Skip to content

Commit 00b27e1

Browse files
committed
regexp, regsub: add support for -lineanchor and -linestop
Signed-off-by: Steve Bennett <[email protected]>
1 parent 3c89e1b commit 00b27e1

File tree

5 files changed

+97
-77
lines changed

5 files changed

+97
-77
lines changed

jim-regexp.c

Lines changed: 37 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
122122
int opt_indices = 0;
123123
int opt_all = 0;
124124
int opt_inline = 0;
125+
int opt_lineanchor = 0;
125126
regex_t *regex;
126127
int match, i, j;
127128
int offset = 0;
@@ -137,10 +138,10 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
137138
int eflags = 0;
138139
int option;
139140
enum {
140-
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
141+
OPT_INDICES, OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_INLINE, OPT_START, OPT_EXPANDED, OPT_END
141142
};
142143
static const char * const options[] = {
143-
"-indices", "-nocase", "-line", "-all", "-inline", "-start", "-expanded", "--", NULL
144+
"-indices", "-nocase", "-line", "-linestop", "-lineanchor", "-all", "-inline", "-start", "-expanded", "--", NULL
144145
};
145146

146147
for (i = 1; i < argc; i++) {
@@ -167,8 +168,20 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
167168

168169
case OPT_LINE:
169170
regcomp_flags |= REG_NEWLINE;
171+
opt_lineanchor = 1;
170172
break;
171173

174+
#ifdef REG_NEWLINE_STOP
175+
case OPT_LINESTOP:
176+
regcomp_flags |= REG_NEWLINE_STOP;
177+
break;
178+
#endif
179+
#ifdef REG_NEWLINE_ANCHOR
180+
case OPT_LINEANCHOR:
181+
regcomp_flags |= REG_NEWLINE_ANCHOR;
182+
opt_lineanchor = 1;
183+
break;
184+
#endif
172185
case OPT_ALL:
173186
opt_all = 1;
174187
break;
@@ -186,14 +199,15 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
186199
}
187200
break;
188201

189-
case OPT_EXPANDED:
190202
#ifdef REG_EXPANDED
203+
case OPT_EXPANDED:
191204
regcomp_flags |= REG_EXPANDED;
192205
break;
193-
#else
206+
#endif
207+
default:
208+
/* Could get here if -linestop or -lineanchor or -expanded is not supported */
194209
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
195210
return JIM_ERR;
196-
#endif
197211
}
198212
}
199213
if (argc - i < 2) {
@@ -313,7 +327,7 @@ int Jim_RegexpCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
313327
}
314328
}
315329

316-
if (opt_all && (pattern[0] != '^' || (regcomp_flags & REG_NEWLINE)) && *source_str) {
330+
if (opt_all && (pattern[0] != '^' || opt_lineanchor) && *source_str) {
317331
if (pmatch[0].rm_eo) {
318332
offset += utf8_strlen(source_str, pmatch[0].rm_eo);
319333
source_str += pmatch[0].rm_eo;
@@ -369,10 +383,10 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
369383
const char *pattern;
370384
int option;
371385
enum {
372-
OPT_NOCASE, OPT_LINE, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
386+
OPT_NOCASE, OPT_LINE, OPT_LINESTOP, OPT_LINEANCHOR, OPT_ALL, OPT_START, OPT_COMMAND, OPT_EXPANDED, OPT_END
373387
};
374388
static const char * const options[] = {
375-
"-nocase", "-line", "-all", "-start", "-command", "-expanded", "--", NULL
389+
"-nocase", "-line", "-linestop", "-lineanchor", "-all", "-start", "-command", "-expanded", "--", NULL
376390
};
377391

378392
for (i = 1; i < argc; i++) {
@@ -397,6 +411,16 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
397411
regcomp_flags |= REG_NEWLINE;
398412
break;
399413

414+
#ifdef REG_NEWLINE_STOP
415+
case OPT_LINESTOP:
416+
regcomp_flags |= REG_NEWLINE_STOP;
417+
break;
418+
#endif
419+
#ifdef REG_NEWLINE_ANCHOR
420+
case OPT_LINEANCHOR:
421+
regcomp_flags |= REG_NEWLINE_ANCHOR;
422+
break;
423+
#endif
400424
case OPT_ALL:
401425
opt_all = 1;
402426
break;
@@ -414,14 +438,16 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
414438
opt_command = 1;
415439
break;
416440

417-
case OPT_EXPANDED:
418441
#ifdef REG_EXPANDED
442+
case OPT_EXPANDED:
419443
regcomp_flags |= REG_EXPANDED;
420444
break;
421-
#else
445+
#endif
446+
447+
default:
448+
/* Could get here if -linestop or -lineanchor or -expanded is not supported */
422449
Jim_SetResultFormatted(interp, "not supported: %#s", argv[i]);
423450
return JIM_ERR;
424-
#endif
425451
}
426452
}
427453
if (argc - i != 3 && argc - i != 4) {
@@ -583,11 +609,6 @@ int Jim_RegsubCmd(Jim_Interp *interp, int argc, Jim_Obj *const *argv)
583609
break;
584610
}
585611

586-
/* An anchored pattern without -line must be done */
587-
if ((regcomp_flags & REG_NEWLINE) == 0 && pattern[0] == '^') {
588-
break;
589-
}
590-
591612
regexec_flags = 0;
592613
if (pmatch[0].rm_eo == pmatch[0].rm_so) {
593614
/* Matched a zero length string. Need to avoid matching the same position again */

jimregexp.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1234,7 +1234,7 @@ int jim_regexec(regex_t *preg, const char *string, size_t nmatch, regmatch_t
12341234
}
12351235
if (*string) {
12361236
nextline:
1237-
if (preg->cflags & REG_NEWLINE) {
1237+
if (preg->cflags & REG_NEWLINE_ANCHOR) {
12381238
/* Try the next anchor? */
12391239
string = strchr(string, '\n');
12401240
if (string) {
@@ -1369,12 +1369,12 @@ static const char *str_find(const char *string, int c, int nocase)
13691369
/**
13701370
* Returns true if 'ch' is an end-of-line char.
13711371
*
1372-
* In REG_NEWLINE mode, \n is considered EOL in
1372+
* In REG_NEWLINE_STOP mode, \n is considered EOL in
13731373
* addition to \0
13741374
*/
13751375
static int reg_iseol(regex_t *preg, int ch)
13761376
{
1377-
if (preg->cflags & REG_NEWLINE) {
1377+
if (preg->cflags & REG_NEWLINE_STOP) {
13781378
return ch == '\0' || ch == '\n';
13791379
}
13801380
else {

jimregexp.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,11 @@ typedef struct regexp {
7070
typedef regexp regex_t;
7171

7272
#define REG_EXTENDED 0
73-
#define REG_NEWLINE 1
7473
#define REG_ICASE 2
74+
#define REG_NEWLINE_ANCHOR 4
75+
#define REG_NEWLINE_STOP 8
76+
/* REG_NEWLINE is POSIX */
77+
#define REG_NEWLINE (REG_NEWLINE_ANCHOR | REG_NEWLINE_STOP)
7578

7679
#define REG_NOTBOL 16
7780
#define REG_EXPANDED 32

tests/regexp.test

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,12 @@ test regexp-1.5 {basic regexp operation} {
3737
test regexp-1.6 {basic regexp operation} regexp_are {
3838
list [catch {regexp {} abc} msg] $msg
3939
} {0 1}
40-
#test regexp-1.7 {regexp utf compliance} {
41-
# # if not UTF-8 aware, result is "0 1"
42-
# set foo "\u4e4eb q"
43-
# regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar
44-
# list [string compare $foo $bar] [regexp 4 $bar]
45-
#} {0 0}
40+
test regexp-1.7 {regexp utf compliance} {
41+
# if not UTF-8 aware, result is "0 1"
42+
set foo "\u4e4eb q"
43+
regexp "\u4e4eb q" "a\u4e4eb qw\u5e4e\x4e wq" bar
44+
list [string compare $foo $bar] [regexp 4 $bar]
45+
} {0 0}
4646

4747

4848
test regexp-2.1 {getting substrings back from regexp} {
@@ -201,9 +201,9 @@ test regexp-6.2 {regexp errors} -body {
201201
list [catch {regexp -nocase a} msg] $msg
202202
} -result {1 {wrong # args: should be "regexp ?-option ...? exp string ?matchVar? ?subMatchVar ...?"}}
203203

204-
test regexp-6.3 {regexp errors} -constraints jim -body {
204+
test regexp-6.3 {regexp errors} -body {
205205
list [catch {regexp -gorp a} msg] $msg
206-
} -result {1 {bad option "-gorp": must be -all, -expanded, -indices, -inline, -line, -nocase, -start, or --}}
206+
} -result {1 {bad option "-gorp": must be -all, -expanded, -indices, -inline, -line, -lineanchor, -linestop, -nocase, -start, or --}}
207207

208208
test regexp-6.4 {regexp errors} {
209209
catch {regexp a( b} msg
@@ -283,12 +283,12 @@ test regexp-7.16 {basic regsub operation} {
283283
set foo xxx
284284
list [regsub x "" y foo] $foo
285285
} {0 {}}
286-
#test regexp-7.17 {regsub utf compliance} {
287-
# # if not UTF-8 aware, result is "0 1"
288-
# set foo "xyz555ijka\u4e4ebpqr"
289-
# regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar
290-
# list [string compare $foo $bar] [regexp 4 $bar]
291-
#} {0 0}
286+
test regexp-7.17 {regsub utf compliance} {
287+
# if not UTF-8 aware, result is "0 1"
288+
set foo "xyz555ijka\u4e4ebpqr"
289+
regsub a\u4e4eb xyza\u4e4ebijka\u4e4ebpqr 555 bar
290+
list [string compare $foo $bar] [regexp 4 $bar]
291+
} {0 0}
292292

293293
test regexp-8.1 {case conversion in regsub} {
294294
list [regsub -nocase a(a+) xaAAaAAay & foo] $foo
@@ -348,14 +348,14 @@ test regexp-10.3 {newline sensitivity in regsub} {
348348
set foo xxx
349349
list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
350350
} "1 {dabc\n123\nxb}"
351-
#test regexp-10.4 {partial newline sensitivity in regsub} {
352-
# set foo xxx
353-
# list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
354-
#} "1 {da\n123}"
355-
#test regexp-10.5 {inverse partial newline sensitivity in regsub} {
356-
# set foo xxx
357-
# list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
358-
#} "1 {da\nb123\nxb}"
351+
test regexp-10.4 {partial newline sensitivity in regsub} {
352+
set foo xxx
353+
list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
354+
} "1 {da\n123}"
355+
test regexp-10.5 {inverse partial newline sensitivity in regsub} {
356+
set foo xxx
357+
list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
358+
} "1 {da\nb123\nxb}"
359359

360360
test regexp-11.1 {regsub errors} {
361361
list [catch {regsub a b} msg] $msg
@@ -370,13 +370,9 @@ test regexp-11.4 {regsub errors} {
370370
list [catch {regsub a b c d e f} msg] $msg
371371
} {1 {wrong # args: should be "regsub ?-option ...? exp string subSpec ?varName?"}}
372372

373-
test regexp-11.5 {regsub errors} -constraints jim -body {
374-
list [catch {regsub -gorp a b c} msg] $msg
375-
} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -nocase, -start, or --}}
376-
377-
test regexp-11.5 {regsub errors} -constraints tcl -body {
373+
test regexp-11.5 {regsub errors} -body {
378374
list [catch {regsub -gorp a b c} msg] $msg
379-
} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -linestop, -lineanchor, -nocase, -start, or --}}
375+
} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -lineanchor, -linestop, -nocase, -start, or --}}
380376

381377
test regexp-11.6 {regsub errors} {
382378
catch {regsub -nocase a( b c d} msg
@@ -605,14 +601,14 @@ test regexp-19.1 {regsub null replacement} {
605601
list $result [string length $result]
606602
} "\0a\0hel\0a\0lo\0a\0 14"
607603

608-
#test regexp-20.1 {regsub shared object shimmering} {
609-
# # Bug #461322
610-
# set a abcdefghijklmnopqurstuvwxyz
611-
# set b $a
612-
# set c abcdefghijklmnopqurstuvwxyz0123456789
613-
# regsub $a $c $b d
614-
# list $d [string length $d] [string bytelength $d]
615-
#} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37]
604+
test regexp-20.1 {regsub shared object shimmering} {
605+
# Bug #461322
606+
set a abcdefghijklmnopqurstuvwxyz
607+
set b $a
608+
set c abcdefghijklmnopqurstuvwxyz0123456789
609+
regsub $a $c $b d
610+
list $d [string length $d] [string bytelength $d]
611+
} [list abcdefghijklmnopqurstuvwxyz0123456789 37 37]
616612
#test regexp-20.2 {regsub shared object shimmering with -about} {
617613
# eval regexp -about abc
618614
#} {0 {}}
@@ -653,9 +649,9 @@ test regexp-21.9 {regexp works with empty string offset} {
653649
regexp -start 3 -- \$ {123}
654650
} {1}
655651

656-
#test regexp-21.10 {multiple matches handle newlines} {
657-
# regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n
658-
#} "foo\nfoo\nfoo\n"
652+
test regexp-21.10 {multiple matches handle newlines} {
653+
regsub -all -lineanchor -- {^#[^\n]*\n} "#one\n#two\n#three\n" foo\n
654+
} "foo\nfoo\nfoo\n"
659655

660656
test regexp-21.11 {multiple matches handle newlines} {
661657
regsub -all -line -- ^ "a\nb\nc" \#

tests/regexp2.test

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -481,18 +481,18 @@ test regexpComp-10.3 {newline sensitivity in regsub} {
481481
list [regsub -line {^a.*b$} "dabc\naxyb\nxb" 123 foo] $foo
482482
}
483483
} "1 {dabc\n123\nxb}"
484-
#test regexpComp-10.4 {partial newline sensitivity in regsub} {
485-
# evalInProc {
486-
# set foo xxx
487-
# list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
488-
# }
489-
#} "1 {da\n123}"
490-
#test regexpComp-10.5 {inverse partial newline sensitivity in regsub} {
491-
# evalInProc {
492-
# set foo xxx
493-
# list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
494-
# }
495-
#} "1 {da\nb123\nxb}"
484+
test regexpComp-10.4 {partial newline sensitivity in regsub} {
485+
evalInProc {
486+
set foo xxx
487+
list [regsub -lineanchor {^a.*b$} "da\naxyb\nxb" 123 foo] $foo
488+
}
489+
} "1 {da\n123}"
490+
test regexpComp-10.5 {inverse partial newline sensitivity in regsub} {
491+
evalInProc {
492+
set foo xxx
493+
list [regsub -linestop {a.*b} "da\nbaxyb\nxb" 123 foo] $foo
494+
}
495+
} "1 {da\nb123\nxb}"
496496
test regexpComp-10.6 {\Z only matching end of string with -line} {
497497
evalInProc {
498498
set foo xxx
@@ -528,7 +528,7 @@ test regexpComp-11.5 {regsub errors} -body {
528528
evalInProc {
529529
list [catch {regsub -gorp a b c} msg] $msg
530530
}
531-
} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -nocase, -start, or --}}
531+
} -result {1 {bad option "-gorp": must be -all, -command, -expanded, -line, -lineanchor, -linestop, -nocase, -start, or --}}
532532

533533
test regexpComp-11.6 {regsub errors} {
534534
evalInProc {
@@ -807,11 +807,11 @@ test regexpComp-21.10 {regexp command compiling tests} {
807807
}
808808
} {3 barfbarobaro}
809809
# This useless expression fails. Jim returns "bar"
810-
#test regexpComp-21.11 {regexp command compiling tests} {
811-
# evalInProc {
812-
# list [regsub -all "" "" bar str] $str
813-
# }
814-
#} {0 {}}
810+
test regexpComp-21.11 {regexp command compiling tests} {
811+
evalInProc {
812+
list [regsub -all "" "" bar str] $str
813+
}
814+
} {0 {}}
815815
test regexpComp-21.12 {regexp empty pattern with utf8} utf8 {
816816
# Make sure the second char isn't sliced up
817817
evalInProc {

0 commit comments

Comments
 (0)