[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Sc-devel] regexp support revisited :)



On Tuesday 20 November 2007, Dan Stowell wrote:
> Florian,
>
> I'm just having a look at this code. The code looks fine, but I
> wonder: why did you use boost regex rather than gnu regex? Gnu regex

Simply because of the ease of using it.. And because i thought boost_regex 
wasn't such a bad dependency because it's all standard c++ and thus pretty 
much perfectly portable.. Plus boos regex has some more features which 
weren't yet exposed..

> comes bundled with the Mac OSX dev libs (because it's bundled with gcc
> I think) so would be a really easy dependency, while boost would be
> adding a proper extra dependency. I'm not very familiar with these
> libs so maybe the answer is obvious. I'd prefer not to add
> dependencies to SC without good reason though.

I can understand that.. Here's a glibc version. This is largely untested 
because it's a ten minute hack ;) You guys probably want to take a look at 
the regexp manpage and think about exposing some of the more advanced 
options.. This code has REG_EXTENDED turned on for now.. I also feel a bit 
uneasy about the macro i define [and later undefine].. Changes anyone?

Flo

Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp	(revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp	(working copy)
@@ -40,6 +40,8 @@
 # include <regex.h>
 #endif
 
+#include <string>
+
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
 {
@@ -178,6 +180,102 @@
 	return(0);
 }
 
+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+	int err;
+
+	PyrSlot *a = g->sp - 2; // source string
+	PyrSlot *b = g->sp - 1; // pattern
+	PyrSlot *c = g->sp;     // offset
+		
+	// std::cout << " num of args: " <<  g->numpop << std::endl;
+
+	if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return 
errWrongType;
+
+	int offset = c->ui;
+
+	char *string = (char*)malloc(a->uo->size + 1);
+	err = slotStrVal(a, string, a->uo->size + 1);
+	if (err) return err;
+
+	if (offset > strlen(string))
+	{
+		free(string);
+		SetNil(a);
+		return errNone;
+	}
+	
+	char *pattern = (char*)malloc(b->uo->size + 1);
+	err = slotStrVal(b, pattern, b->uo->size + 1);
+	if (err) return err;
+	
+	regex_t compiled_pattern;
+
+	/* Need different options, see man regcomp ;) */
+	if (regcomp(&compiled_pattern, pattern, REG_EXTENDED) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// TODO: fix arbitrary limit here.. 
+	#define MAX_NUM_OF_MATCHES 100
+	regmatch_t matches[MAX_NUM_OF_MATCHES];
+
+	/* want more options, see man regexec */
+	if (regexec(&compiled_pattern, string + offset, MAX_NUM_OF_MATCHES, matches, 
0) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// std::cout << "input string: " << string << std::endl;
+	// std::cout << "     pattern: " << pattern << std::endl;
+
+	// std::cout << "      offset: " << offset << std::endl;	
+
+
+	PyrObject *result_array = newPyrArray(g->gc, MAX_NUM_OF_MATCHES, 0, true);
+	result_array->size = 0;
+
+	for (size_t i = 0; (matches[i].rm_so != -1) && (i < MAX_NUM_OF_MATCHES); 
i++)
+	{
+		result_array->size++;
+
+		int match_start =  matches[i].rm_so;
+		int match_length = matches[i].rm_eo - matches[i].rm_so;
+
+		char *match = (char*)malloc(match_length + 1);
+		strncpy(match, string + offset + match_start, match_length);
+		match[match_length] = 0;
+
+		PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+		array->size = 2;
+
+		SetInt(array->slots, match_start + offset);
+
+		PyrObject *matched_string = (PyrObject*)newPyrString(g->gc, match, 0, 
true);
+		SetObject(array->slots+1, matched_string);
+		g->gc->GCWrite(matched_string, array->slots + 1);
+
+		SetObject(result_array->slots + i, array);
+		g->gc->GCWrite(array, result_array->slots + i);
+	}
+
+	SetObject(a, result_array);
+	g->gc->GCWrite(result_array,a);
+
+	#undef MAX_NUM_OF_MATCHES
+	free(string);
+	free(pattern);
+
+	return errNone;
+}
+
 int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
 {
 	int err, start, end;
@@ -622,11 +720,12 @@
 	definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);	
 	definePrimitive(base, index++, "_String_AsCompileString", 
prString_AsCompileString, 1, 0);	
 	definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
-    definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
-    definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
+	definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+	definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
 	definePrimitive(base, index++, "_String_FindBackwards", 
prString_FindBackwards, 4, 0);
-    definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
+	definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
 	definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
+	definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3, 
0);
 	definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
 	definePrimitive(base, index++, "_String_GetResourceDirPath", 
prString_GetResourceDirPath, 1, 0);
 	definePrimitive(base, index++, "_String_StandardizePath", 
prString_StandardizePath, 1, 0);	


-- 
Palimm Palimm!
http://tapas.affenbande.org
Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp	(revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp	(working copy)
@@ -40,6 +40,8 @@
 # include <regex.h>
 #endif
 
+#include <string>
+
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
 {
@@ -178,6 +180,102 @@
 	return(0);
 }
 
+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+	int err;
+
+	PyrSlot *a = g->sp - 2; // source string
+	PyrSlot *b = g->sp - 1; // pattern
+	PyrSlot *c = g->sp;     // offset
+		
+	// std::cout << " num of args: " <<  g->numpop << std::endl;
+
+	if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return errWrongType;
+
+	int offset = c->ui;
+
+	char *string = (char*)malloc(a->uo->size + 1);
+	err = slotStrVal(a, string, a->uo->size + 1);
+	if (err) return err;
+
+	if (offset > strlen(string))
+	{
+		free(string);
+		SetNil(a);
+		return errNone;
+	}
+	
+	char *pattern = (char*)malloc(b->uo->size + 1);
+	err = slotStrVal(b, pattern, b->uo->size + 1);
+	if (err) return err;
+	
+	regex_t compiled_pattern;
+
+	/* Need different options, see man regcomp ;) */
+	if (regcomp(&compiled_pattern, pattern, REG_EXTENDED) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// TODO: fix arbitrary limit here.. 
+	#define MAX_NUM_OF_MATCHES 100
+	regmatch_t matches[MAX_NUM_OF_MATCHES];
+
+	/* want more options, see man regexec */
+	if (regexec(&compiled_pattern, string + offset, MAX_NUM_OF_MATCHES, matches, 0) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// std::cout << "input string: " << string << std::endl;
+	// std::cout << "     pattern: " << pattern << std::endl;
+
+	// std::cout << "      offset: " << offset << std::endl;	
+
+
+	PyrObject *result_array = newPyrArray(g->gc, MAX_NUM_OF_MATCHES, 0, true);
+	result_array->size = 0;
+
+	for (size_t i = 0; (matches[i].rm_so != -1) && (i < MAX_NUM_OF_MATCHES); i++)
+	{
+		result_array->size++;
+
+		int match_start =  matches[i].rm_so;
+		int match_length = matches[i].rm_eo - matches[i].rm_so;
+
+		char *match = (char*)malloc(match_length + 1);
+		strncpy(match, string + offset + match_start, match_length);
+		match[match_length] = 0;
+
+		PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+		array->size = 2;
+
+		SetInt(array->slots, match_start + offset);
+
+		PyrObject *matched_string = (PyrObject*)newPyrString(g->gc, match, 0, true);
+		SetObject(array->slots+1, matched_string);
+		g->gc->GCWrite(matched_string, array->slots + 1);
+
+		SetObject(result_array->slots + i, array);
+		g->gc->GCWrite(array, result_array->slots + i);
+	}
+
+	SetObject(a, result_array);
+	g->gc->GCWrite(result_array,a);
+
+	#undef MAX_NUM_OF_MATCHES
+	free(string);
+	free(pattern);
+
+	return errNone;
+}
+
 int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
 {
 	int err, start, end;
@@ -622,11 +720,12 @@
 	definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);	
 	definePrimitive(base, index++, "_String_AsCompileString", prString_AsCompileString, 1, 0);	
 	definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0);
-    definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
-    definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
+	definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+	definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
 	definePrimitive(base, index++, "_String_FindBackwards", prString_FindBackwards, 4, 0);
-    definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
+	definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0);
 	definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0);
+	definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3, 0);
 	definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
 	definePrimitive(base, index++, "_String_GetResourceDirPath", prString_GetResourceDirPath, 1, 0);
 	definePrimitive(base, index++, "_String_StandardizePath", prString_StandardizePath, 1, 0);