Skip to content

Commit fde6880

Browse files
committed
regexp: Handle non repeat count {...}
If it does not contain a number, treat it as an exact match. Fixes #323 Signed-off-by: Steve Bennett <[email protected]>
1 parent 2edfbb4 commit fde6880

File tree

2 files changed

+43
-17
lines changed

2 files changed

+43
-17
lines changed

jimregexp.c

Lines changed: 27 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -164,8 +164,7 @@
164164
*/
165165

166166
#define FAIL(R,M) { (R)->err = (M); return (M); }
167-
#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?' || (c) == '{')
168-
#define META "^$.[()|?{+*"
167+
#define META "^$.[()|"
169168

170169
/*
171170
* Flags to be passed up and down.
@@ -203,6 +202,22 @@ static void regdump(regex_t *preg);
203202
static const char *regprop( int op );
204203
#endif
205204

205+
/* Returns 1 if *s is '*', '+', '?', or {n...} where n must be a number */
206+
static int str_is_mult(const char *s)
207+
{
208+
switch (*s) {
209+
case '*':
210+
case '+':
211+
case '?':
212+
return 1;
213+
case '{':
214+
if (isdigit(UCHAR(s[1]))) {
215+
return 1;
216+
}
217+
break;
218+
}
219+
return 0;
220+
}
206221

207222
/**
208223
* Returns the length of the null-terminated integer sequence.
@@ -494,12 +509,12 @@ static int regpiece(regex_t *preg, int *flagp)
494509
if (ret == 0)
495510
return 0;
496511

497-
op = *preg->regparse;
498-
if (!ISMULT(op)) {
512+
if (!str_is_mult(preg->regparse)) {
499513
*flagp = flags;
500514
return(ret);
501515
}
502516

517+
op = *preg->regparse;
503518
if (!(flags&HASWIDTH) && op != '?') {
504519
preg->err = REG_ERR_OPERAND_COULD_BE_EMPTY;
505520
return 0;
@@ -568,7 +583,7 @@ static int regpiece(regex_t *preg, int *flagp)
568583
}
569584

570585
preg->regparse++;
571-
if (ISMULT(*preg->regparse)) {
586+
if (str_is_mult(preg->regparse)) {
572587
preg->err = REG_ERR_NESTED_COUNT;
573588
return 0;
574589
}
@@ -916,12 +931,6 @@ static int regatom(regex_t *preg, int *flagp)
916931
case ')':
917932
preg->err = REG_ERR_INTERNAL;
918933
return 0; /* Supposed to be caught earlier. */
919-
case '?':
920-
case '+':
921-
case '*':
922-
case '{':
923-
preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
924-
return 0;
925934
case '\\':
926935
ch = *preg->regparse++;
927936
switch (ch) {
@@ -986,6 +995,11 @@ static int regatom(regex_t *preg, int *flagp)
986995
/* Back up to pick up the first char of interest */
987996
preg->regparse -= n;
988997

998+
if (str_is_mult(preg->regparse)) {
999+
preg->err = REG_ERR_COUNT_FOLLOWS_NOTHING;
1000+
return 0;
1001+
}
1002+
9891003
ret = regnode(preg, EXACTLY);
9901004

9911005
/* Note that a META operator such as ? or * consumes the
@@ -995,7 +1009,7 @@ static int regatom(regex_t *preg, int *flagp)
9951009
*/
9961010

9971011
/* Until end of string or a META char is reached */
998-
while (*preg->regparse && strchr(META, *preg->regparse) == NULL) {
1012+
while (*preg->regparse && strchr(META, *preg->regparse) == NULL && !str_is_mult(preg->regparse)) {
9991013
n = reg_utf8_tounicode_case(preg->regparse, &ch, (preg->cflags & REG_ICASE));
10001014
if (ch == '\\' && preg->regparse[n]) {
10011015
/* Non-trailing backslash.
@@ -1020,7 +1034,7 @@ static int regatom(regex_t *preg, int *flagp)
10201034
* Check to see if the following char is a MULT
10211035
*/
10221036

1023-
if (ISMULT(preg->regparse[n])) {
1037+
if (str_is_mult(&preg->regparse[n])) {
10241038
/* Yes. But do we already have some EXACTLY chars? */
10251039
if (added) {
10261040
/* Yes, so return what we have and pick up the current char next time around */

tests/regexp2.test

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -943,19 +943,31 @@ test regexp-25.3 {End of word} {
943943
regexp {\mcd\M} cdef
944944
} 0
945945

946-
test regexp-26.1 {regexp operator =~} {
946+
test regexp-25.4 {Braces not a repeat count} {
947+
regexp "{abc}" "test{abc}def"
948+
} 1
949+
950+
test regexp-25.5 {Repeat follows nothing} -body {
951+
regexp "{3}" "test{3}def"
952+
} -returnCodes error -match glob -result {couldn't compile regular expression pattern: *}
953+
954+
test regexp-25.6 {Meta char after nothing is error} -body {
955+
regexp "?" "te?st"
956+
} -returnCodes error -match glob -result {couldn't compile regular expression pattern: *}
957+
958+
test regexp-26.1 {regexp operator =~} jim {
947959
expr {"abc" =~ "^a"}
948960
} 1
949961

950-
test regexp-26.2 {regexp operator =~} {
962+
test regexp-26.2 {regexp operator =~} jim {
951963
expr {"abc" =~ "^b"}
952964
} 0
953965

954-
test regexp-26.2 {regexp operator =~} {
966+
test regexp-26.2 {regexp operator =~} jim {
955967
expr {"abc" =~ ".b."}
956968
} 1
957969

958-
test regexp-26.3 {regexp operator =~ invalid regexp} -body {
970+
test regexp-26.3 {regexp operator =~ invalid regexp} -constraints jim -body {
959971
expr {"abc" =~ {[}}
960972
} -returnCodes error -result {couldn't compile regular expression pattern: brackets [] not balanced}
961973

0 commit comments

Comments
 (0)