diff --git a/src/engine/builtins.cpp b/src/engine/builtins.cpp index 29f1efc578..a78a5dac5e 100644 --- a/src/engine/builtins.cpp +++ b/src/engine/builtins.cpp @@ -1018,7 +1018,12 @@ LIST * builtin_match( FRAME * frame, int flags ) b2::list_cref patterns( lol_get( frame->args, 0 ) ); for ( auto pattern : patterns ) { - b2::regex::program re( pattern->str() ); + b2::regex::program re; + { + // compilation errors print a nice error message and exit + b2::regex::frame_ctx ctx(frame); + re.reset( pattern->str() ); + } /* For each text string to match against. */ b2::list_cref texts( lol_get( frame->args, 1 ) ); @@ -1026,12 +1031,10 @@ LIST * builtin_match( FRAME * frame, int flags ) { if ( auto re_i = re.search( text->str() ) ) { - /* Find highest parameter */ - int top = NSUBEXP-1; - while ( !re_i[top].begin() ) top -= 1; - /* And add all parameters up to highest onto list. */ - /* Must have parameters to have results! */ - for ( int i = 1; i <= top ; ++i ) + /* Find total groups matched */ + int tot = re_i.count(); + /* And add all catched matches onto result list. */ + for ( int i = 1; i <= tot ; ++i ) { string_append_range( buf, re_i[i].begin(), re_i[i].end() ); result.push_back( object_new( buf->value ) ); diff --git a/src/engine/regexp.cpp b/src/engine/regexp.cpp index 632a1037df..20ee48f020 100644 --- a/src/engine/regexp.cpp +++ b/src/engine/regexp.cpp @@ -1,5 +1,5 @@ /* - * regcomp and regexec -- regsub and regerror are elsewhere + * regcomp and regexec * * Copyright (c) 1986 by University of Toronto. * Written by Henry Spencer. Not derived from licensed software. @@ -49,6 +49,7 @@ #include "jam.h" #include "output.h" #include "strview.h" +#include "frames.h" #include #include @@ -60,7 +61,22 @@ #include #include -namespace b2 { namespace regex { +/* + * All these forward declarations are only needed for the error function + * regerror below. + */ +void backtrace_line(FRAME *); +void backtrace(FRAME *); +void print_source_line(FRAME *); +typedef struct _lol LOL; +void lol_print(LOL * lol); + +namespace b2 { + +// more forward +void clean_exit(int exit_code); + +namespace regex { /* * The first byte of the regexp internal "program" is actually this magic @@ -68,7 +84,33 @@ namespace b2 { namespace regex { */ #define MAGIC 0234 -void regerror(char const * s); +thread_local FRAME * frame = nullptr; + +/* + * Handles any errors that occur while compiling a regex. + * Largely inspired to argument_error() from function.cpp. An alternative, + * more structured method of issuing errors would be appropriate. + */ +void regerror(char const * s) +{ + // frame comes from the thread_local variable b2::regex::frame + if (frame == nullptr) + { + // NOTE: "legacy" behaviour, but should exit here + printf("regexp error: %s\n", s); + } + else + { + backtrace_line( frame->prev ); + out_printf( "*** regexp error\n* rule %s", frame->rulename ); + out_printf( " called with: ( " ); + lol_print( frame->args ); + out_printf( " )\n* %s\n", s ); + print_source_line( frame ); + backtrace( frame->prev ); + b2::clean_exit( EXITBAD ); + } +} /* * The "internal use only" fields in regexp.h are present to pass info from @@ -167,15 +209,15 @@ void regerror(char const * s); * Utility definitions. */ #ifndef CHARBITS -#define UCHARAT(p) ((int32_t) * (const unsigned char *)(p)) +#define UCHARAT(p) (static_cast( * reinterpret_cast(p))) #else -#define UCHARAT(p) ((int32_t) * (p)&CHARBITS) +#define UCHARAT(p) (static_cast( * (p)&CHARBITS)) #endif #define FAIL(m) \ { \ regerror(m); \ -return (NULL); \ +return nullptr; \ } #define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?') @@ -188,7 +230,7 @@ return (NULL); \ #define WORST 0 /* Worst case. */ namespace { -char regdummy = 0; +char regdummy = '\0'; /* - regnext - dig the "next" pointer out of a node */ @@ -197,10 +239,10 @@ inline C * regnext(C * p) { int32_t offset; - if (p == ®dummy) return (NULL); + if (p == ®dummy) return nullptr; offset = NEXT(p); - if (offset == 0) return (NULL); + if (offset == 0) return nullptr; if (OP(p) == BACK) return (p - offset); @@ -213,8 +255,8 @@ inline C * regnext(C * p) struct regex_prog { std::string regexpr; /* The not-compiled regex. */ - char regstart = 0; /* Internal use only. */ - char reganch = 0; /* Internal use only. */ + char regstart = '\0'; /* Internal use only. */ + char reganch = '\0'; /* Internal use only. */ const char * regmust = nullptr; /* Internal use only. */ int32_t regmlen = 0; /* Internal use only. */ std::size_t progsize = 0; // The size of the program. @@ -260,41 +302,45 @@ struct compiler int32_t len; int32_t flags; - if (exp == NULL) FAIL("NULL argument"); + if (exp == nullptr) FAIL("NULL argument"); - /* First pass: determine size, legality. */ + /* First pass: determine size, legality. */ #ifdef notdef if (exp[0] == '.' && exp[1] == '*') exp += 2; /* aid grep */ #endif - regparse = (char *)exp; + regparse = const_cast(exp); regnpar = 1; regsize = 0; regcode = ®dummy; regc(MAGIC); - if (reg(0, &flags) == NULL) return (NULL); + if (reg(0, &flags) == nullptr) return nullptr; /* Small enough for pointer-storage convention? */ if (regsize >= 32767L) /* Probably could be 65535L. */ FAIL("regexp too big"); /* Allocate space. */ - r = (regex_prog *)BJAM_MALLOC(sizeof(regex_prog) + regsize); - if (r == NULL) FAIL("out of space"); + r = static_cast(BJAM_MALLOC(sizeof(regex_prog) + regsize)); + if (r == nullptr) FAIL("out of space"); b2::jam::ctor_ptr(r); r->regexpr = exp; r->progsize = regsize; /* Second pass: emit code. */ - regparse = (char *)exp; + regparse = const_cast(exp); regnpar = 1; regcode = r->program; regc(MAGIC); - if (reg(0, &flags) == NULL) return (NULL); + if (reg(0, &flags) == nullptr) + { + BJAM_FREE(r); + return nullptr; + } /* Dig out information for optimizations. */ r->regstart = '\0'; /* Worst-case defaults. */ - r->reganch = 0; - r->regmust = NULL; + r->reganch = '\0'; + r->regmust = nullptr; r->regmlen = 0; scan = r->program + 1; /* First BRANCH. */ if (OP(regnext(scan)) == END) @@ -317,9 +363,9 @@ struct compiler */ if (flags & SPSTART) { - longest = NULL; + longest = nullptr; len = 0; - for (; scan != NULL; scan = regnext(scan)) + for (; scan != nullptr; scan = regnext(scan)) if (OP(scan) == EXACTLY && static_cast(strlen(OPERAND(scan))) >= len) { @@ -363,12 +409,12 @@ struct compiler ret = regnode(OPEN + parno); } else - ret = NULL; + ret = nullptr; /* Pick up the branches, linking them together. */ br = regbranch(&flags); - if (br == NULL) return (NULL); - if (ret != NULL) + if (br == nullptr) return nullptr; + if (ret != nullptr) regtail(ret, br); /* OPEN -> first. */ else ret = br; @@ -378,7 +424,7 @@ struct compiler { regparse++; br = regbranch(&flags); - if (br == NULL) return (NULL); + if (br == nullptr) return nullptr; regtail(ret, br); /* BRANCH -> BRANCH. */ if (!(flags & HASWIDTH)) *flagp &= ~HASWIDTH; *flagp |= flags & SPSTART; @@ -389,7 +435,7 @@ struct compiler regtail(ret, ender); /* Hook the tails of the branches to the closing node. */ - for (br = ret; br != NULL; br = regnext(br)) regoptail(br, ender); + for (br = ret; br != nullptr; br = regnext(br)) regoptail(br, ender); /* Check for proper termination. */ if (paren && *regparse++ != ')') @@ -425,20 +471,20 @@ struct compiler *flagp = WORST; /* Tentatively. */ ret = regnode(BRANCH); - chain = NULL; + chain = nullptr; while (*regparse != '\0' && *regparse != ')' && *regparse != '\n' && *regparse != '|') { latest = regpiece(&flags); - if (latest == NULL) return (NULL); + if (latest == nullptr) return nullptr; *flagp |= flags & HASWIDTH; - if (chain == NULL) /* First piece. */ + if (chain == nullptr) /* First piece. */ *flagp |= flags & SPSTART; else regtail(chain, latest); chain = latest; } - if (chain == NULL) /* Loop ran zero times. */ + if (chain == nullptr) /* Loop ran zero times. */ (void)regnode(NOTHING); return (ret); @@ -461,7 +507,7 @@ struct compiler int32_t flags; ret = regatom(&flags); - if (ret == NULL) return (NULL); + if (ret == nullptr) return nullptr; op = *regparse; if (!ISMULT(op)) @@ -574,7 +620,7 @@ struct compiler break; case '(': ret = reg(1, &flags); - if (ret == NULL) return (NULL); + if (ret == nullptr) return nullptr; *flagp |= flags & (HASWIDTH | SPSTART); break; case '\0': @@ -630,7 +676,7 @@ struct compiler regparse--; /* Look at cur char */ ret = regnode(EXACTLY); - for (regprev = 0;;) + for (regprev = nullptr;;) { ch = *regparse++; /* Get current char */ switch (*regparse) @@ -657,7 +703,7 @@ struct compiler case '?': case '+': case '*': - if (!regprev) /* If just ch in str, */ + if (regprev == nullptr) /* If just ch in str, */ goto magic; /* use it */ /* End mult-char string one early */ regparse = regprev; /* Back up parse */ @@ -685,7 +731,7 @@ struct compiler done: regc('\0'); *flagp |= HASWIDTH; - if (!regprev) /* One char? */ + if (regprev == nullptr) /* One char? */ *flagp |= SIMPLE; } break; @@ -711,7 +757,7 @@ struct compiler } ptr = ret; - *ptr++ = op; + *ptr++ = static_cast(op); *ptr++ = '\0'; /* Null "next" pointer. */ *ptr++ = '\0'; regcode = ptr; @@ -774,7 +820,7 @@ struct compiler for (;;) { temp = regnext(scan); - if (temp == NULL) break; + if (temp == nullptr) break; scan = temp; } @@ -782,8 +828,8 @@ struct compiler offset = scan - val; else offset = val - scan; - *(scan + 1) = (offset >> 8) & 0377; - *(scan + 2) = offset & 0377; + *(scan + 1) = static_cast((offset >> 8) & 0377); + *(scan + 2) = static_cast(offset & 0377); } /* @@ -793,7 +839,7 @@ struct compiler void regoptail(char * p, char * val) { /* "Operandless" and "op != BRANCH" are synonymous in practice. */ - if (p == NULL || p == ®dummy || OP(p) != BRANCH) return; + if (p == nullptr || p == ®dummy || OP(p) != BRANCH) return; regtail(OPERAND(p), val); } @@ -832,7 +878,7 @@ struct executor } /* If there is a "must appear" string, look for it. */ - if (prog.regmust != NULL + if (prog.regmust != nullptr && string.find(prog.regmust, 0, prog.regmlen) == string_view::npos) return false; /* Not present. */ @@ -897,7 +943,7 @@ struct executor const char * next; /* Next node. */ scan = prog; - while (scan != NULL) + while (scan != nullptr) { next = regnext(scan); @@ -1033,7 +1079,7 @@ struct executor reg_in = save; scan = regnext(scan); } - while (scan != NULL && OP(scan) == BRANCH); + while (scan != nullptr && OP(scan) == BRANCH); return false; /* NOTREACHED */ } @@ -1112,14 +1158,14 @@ struct executor } break; case ANYOF: - while (!scan.empty() && strchr(opnd, scan[0]) != NULL) + while (!scan.empty() && strchr(opnd, scan[0]) != nullptr) { count++; scan = scan.substr(1); } break; case ANYBUT: - while (!scan.empty() && strchr(opnd, scan[0]) == NULL) + while (!scan.empty() && strchr(opnd, scan[0]) == nullptr) { count++; scan = scan.substr(1); @@ -1145,21 +1191,19 @@ bool regex_exec( return result; } -void regerror(char const * s) { out_printf("re error %s\n", s); } - -regex_prog & program::compile(const char * pattern) +regex_prog * program::compile(const char * pattern) { static std::unordered_map cache; if (cache.count(pattern) == 0) { cache[pattern] = regex_comp(pattern); } - return *cache[pattern]; + return cache[pattern].get(); } program::program(const char * pattern) { reset(pattern); } -void program::reset(const char * pattern) { compiled = &compile(pattern); } +void program::reset(const char * pattern) { compiled = compile(pattern); } program::result_iterator::result_iterator( const regex_prog & c, const string_view & s) @@ -1172,7 +1216,7 @@ program::result_iterator::result_iterator( void program::result_iterator::advance() { // We start searching for a match at the end of the previous match. - if (regex_exec(*compiled, expressions, rest)) + if ((compiled != nullptr) && regex_exec(*compiled, expressions, rest)) { // A match means the subexpressions are filled in and the first entry // is the full match. Advance `rest` to follow the match. diff --git a/src/engine/regexp.h b/src/engine/regexp.h index 776fee44e2..3490e07080 100644 --- a/src/engine/regexp.h +++ b/src/engine/regexp.h @@ -22,7 +22,7 @@ namespace b2 { namespace regex { // The resulting matches for a regex match. Expression 0 is the full match. -// And expressions [1,NSUBEXP] are the subexpressions matched. +// And expressions [1, NSUBEXP] are the subexpressions (groups) matched. struct regex_expr { string_view sub[NSUBEXP]; @@ -52,7 +52,7 @@ struct program private: const regex_prog * compiled = nullptr; - static regex_prog & compile(const char * patter); + static regex_prog * compile(const char * pattern); }; struct program::result_iterator @@ -78,11 +78,18 @@ struct program::result_iterator } inline reference operator*() const { return (*this)[0]; } inline pointer operator->() const { return &(*this)[0]; } - explicit inline operator bool() const { return !(*this)[0].empty(); } + inline operator bool() const { return !(*this)[0].empty(); } inline reference operator[](std::size_t i) const { static const value_type invalid { nullptr, 0 }; - return i <= NSUBEXP ? expressions.sub[i] : invalid; + return i < NSUBEXP ? expressions.sub[i] : invalid; + } + // total groups matched + inline int count() const + { + int i = NSUBEXP - 1; + while ( expressions.sub[i].begin() == nullptr && i ) i--; + return i; } private: @@ -111,4 +118,23 @@ inline program::result_iterator program::search(const char * str_begin) }} // namespace b2::regex +typedef struct frame FRAME; + +namespace b2 { namespace regex { + +thread_local extern FRAME * frame; + +/* + * Simple class which use RAII to set b2::regex::frame during + * compilation phase, for proper error message emission and + * program exit. + */ +struct frame_ctx +{ + frame_ctx(FRAME * frm) { frame = frm; } + ~frame_ctx() { frame = nullptr; } +}; + +}} // namespace b2::regex + #endif diff --git a/test/abs_workdir.py b/test/abs_workdir.py index 288e70af67..cd673bf030 100644 --- a/test/abs_workdir.py +++ b/test/abs_workdir.py @@ -42,4 +42,5 @@ def samefile(f1, f2): else: raise ValueError("exception expected") finally: + t.rm('file.jam') t.cleanup()