[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Sc-devel] regexp support revisited :)



ported a version to icu, so it would go nicely along with the advanced find ... diff not attached, since i had to move it into another file, because of tricky linking issues.
code is more or less taken from Flo, only replaced all the regex calls.


On Nov 21, 2007, at 1:10 AM, Florian Schmidt wrote:

On Tuesday 20 November 2007, Dan Stowell wrote:
Florian,

I'm just having a look at this code. The code looks fine, but I
wonder: why did you use boost regex rather than gnu regex? Gnu regex

Simply because of the ease of using it.. And because i thought boost_regex wasn't such a bad dependency because it's all standard c++ and thus pretty
much perfectly portable.. Plus boos regex has some more features which
weren't yet exposed..

comes bundled with the Mac OSX dev libs (because it's bundled with gcc
I think) so would be a really easy dependency, while boost would be
adding a proper extra dependency. I'm not very familiar with these
libs so maybe the answer is obvious. I'd prefer not to add
dependencies to SC without good reason though.

I can understand that.. Here's a glibc version. This is largely untested because it's a ten minute hack ;) You guys probably want to take a look at
the regexp manpage and think about exposing some of the more advanced
options.. This code has REG_EXTENDED turned on for now.. I also feel a bit
uneasy about the macro i define [and later undefine].. Changes anyone?

Flo

Index: Source/lang/LangPrimSource/PyrStringPrim.cpp
===================================================================
--- Source/lang/LangPrimSource/PyrStringPrim.cpp	(revision 6504)
+++ Source/lang/LangPrimSource/PyrStringPrim.cpp	(working copy)
@@ -40,6 +40,8 @@
 # include <regex.h>
 #endif

+#include <string>
+
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed);
 int prStringAsSymbol(struct VMGlobals *g, int numArgsPushed)
 {
@@ -178,6 +180,102 @@
 	return(0);
 }

+int prString_FindRegexp(struct VMGlobals *g, int numArgsPushed)
+{
+	int err;
+
+	PyrSlot *a = g->sp - 2; // source string
+	PyrSlot *b = g->sp - 1; // pattern
+	PyrSlot *c = g->sp;     // offset
+		
+	// std::cout << " num of args: " <<  g->numpop << std::endl;
+
+	if (!isKindOfSlot(b, class_string) || (c->utag != tagInt)) return
errWrongType;
+
+	int offset = c->ui;
+
+	char *string = (char*)malloc(a->uo->size + 1);
+	err = slotStrVal(a, string, a->uo->size + 1);
+	if (err) return err;
+
+	if (offset > strlen(string))
+	{
+		free(string);
+		SetNil(a);
+		return errNone;
+	}
+	
+	char *pattern = (char*)malloc(b->uo->size + 1);
+	err = slotStrVal(b, pattern, b->uo->size + 1);
+	if (err) return err;
+	
+	regex_t compiled_pattern;
+
+	/* Need different options, see man regcomp ;) */
+	if (regcomp(&compiled_pattern, pattern, REG_EXTENDED) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// TODO: fix arbitrary limit here..
+	#define MAX_NUM_OF_MATCHES 100
+	regmatch_t matches[MAX_NUM_OF_MATCHES];
+
+	/* want more options, see man regexec */
+ if (regexec(&compiled_pattern, string + offset, MAX_NUM_OF_MATCHES, matches,
0) != 0)
+	{
+		free(string);
+		free(pattern);
+		SetNil(a);
+		return errNone;
+	}
+
+	// std::cout << "input string: " << string << std::endl;
+	// std::cout << "     pattern: " << pattern << std::endl;
+
+	// std::cout << "      offset: " << offset << std::endl;	
+
+
+ PyrObject *result_array = newPyrArray(g->gc, MAX_NUM_OF_MATCHES, 0, true);
+	result_array->size = 0;
+
+ for (size_t i = 0; (matches[i].rm_so != -1) && (i < MAX_NUM_OF_MATCHES);
i++)
+	{
+		result_array->size++;
+
+		int match_start =  matches[i].rm_so;
+		int match_length = matches[i].rm_eo - matches[i].rm_so;
+
+		char *match = (char*)malloc(match_length + 1);
+		strncpy(match, string + offset + match_start, match_length);
+		match[match_length] = 0;
+
+		PyrObject *array = newPyrArray(g->gc, 2, 0, true);
+		array->size = 2;
+
+		SetInt(array->slots, match_start + offset);
+
+ PyrObject *matched_string = (PyrObject*)newPyrString(g->gc, match, 0,
true);
+		SetObject(array->slots+1, matched_string);
+		g->gc->GCWrite(matched_string, array->slots + 1);
+
+		SetObject(result_array->slots + i, array);
+		g->gc->GCWrite(array, result_array->slots + i);
+	}
+
+	SetObject(a, result_array);
+	g->gc->GCWrite(result_array,a);
+
+	#undef MAX_NUM_OF_MATCHES
+	free(string);
+	free(pattern);
+
+	return errNone;
+}
+
 int prString_Regexp(struct VMGlobals *g, int numArgsPushed)
 {
 	int err, start, end;
@@ -622,11 +720,12 @@
definePrimitive(base, index++, "_String_AsFloat", prString_AsFloat, 1, 0);
 	definePrimitive(base, index++, "_String_AsCompileString",
prString_AsCompileString, 1, 0);	
definePrimitive(base, index++, "_String_Getenv", prString_Getenv, 1, 0); - definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0); - definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0); + definePrimitive(base, index++, "_String_Setenv", prString_Setenv, 2, 0);
+	definePrimitive(base, index++, "_String_Find", prString_Find, 4, 0);
 	definePrimitive(base, index++, "_String_FindBackwards",
prString_FindBackwards, 4, 0);
- definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0); + definePrimitive(base, index++, "_String_Format", prString_Format, 2, 0); definePrimitive(base, index++, "_String_Regexp", prString_Regexp, 4, 0); + definePrimitive(base, index++, "_String_FindRegexp", prString_FindRegexp, 3,
0);
 	definePrimitive(base, index++, "_StripRtf", prStripRtf, 1, 0);
 	definePrimitive(base, index++, "_String_GetResourceDirPath",
prString_GetResourceDirPath, 1, 0);
 	definePrimitive(base, index++, "_String_StandardizePath",
prString_StandardizePath, 1, 0);	


--
Palimm Palimm!
http://tapas.affenbande.org
<sc-regexp-patch_posix.diff>
_______________________________________________
Sc-devel mailing list
Sc-devel@xxxxxxxxxxxxxxx
http://www.create.ucsb.edu/mailman/listinfo/sc-devel