diff --git a/src/libfsm/lexer.c b/src/libfsm/lexer.c index 8bd374cec..3bf26b3b6 100644 --- a/src/libfsm/lexer.c +++ b/src/libfsm/lexer.c @@ -15,6 +15,26 @@ static enum lx_token z3(struct lx *lx); static enum lx_token z4(struct lx *lx); static enum lx_token z5(struct lx *lx); +static int +lx_advance_end(struct lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif @@ -35,18 +55,19 @@ lx_getc(struct lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_getc((struct lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -55,10 +76,7 @@ lx_ungetc(struct lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -68,13 +86,20 @@ lx_ungetc(struct lx *lx, int c) } } +/* Get a character from fgetc and push it to the buffer */ int lx_fgetc(struct lx *lx) { assert(lx != NULL); assert(lx->getc_opaque != NULL); - return fgetc(lx->getc_opaque); + const int c = fgetc(lx->getc_opaque); + if (c == EOF) { + lx->c = EOF; + return EOF; + } else { + return c; + } } int @@ -119,6 +144,17 @@ lx_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_dynclear(void *buf_opaque) { @@ -158,44 +194,53 @@ lx_dynfree(void *buf_opaque) static enum lx_token z0(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\n': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return lx->z(lx); + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); case S2: /* e.g. "" */ - lx_ungetc(lx, c); return lx->z = z1, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z1, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_UNKNOWN; + case S2: return lx->z = z1, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S0: @@ -212,44 +257,40 @@ z0(struct lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_EOF; - case S2: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z1(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '0': case '1': @@ -268,7 +309,9 @@ z1(struct lx *lx) case '\r': case ' ': state = S4; break; case ']': state = S5; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -284,15 +327,15 @@ z1(struct lx *lx) case '7': case '8': case '9': break; - default: lx_ungetc(lx, c); return TOK_ENDID; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_ENDID; } break; case S2: /* e.g. "," */ - lx_ungetc(lx, c); return TOK_COMMA; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_COMMA; case S3: /* e.g. "#" */ - lx_ungetc(lx, c); return lx->z = z0, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z0, lx->z(lx); case S4: /* e.g. "\\x09" */ switch ((unsigned char) c) { @@ -300,16 +343,29 @@ z1(struct lx *lx) case '\n': case '\r': case ' ': break; - default: lx_ungetc(lx, c); return lx->z(lx); + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); } break; case S5: /* e.g. "]" */ - lx_ungetc(lx, c); return lx->z = z5, TOK_CLOSEENDIDS; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z5, TOK_CLOSEENDIDS; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_ENDID; + case S2: return TOK_COMMA; + case S3: return lx->z = z0, lx->z(lx); + case S4: return TOK_EOF; + case S5: return lx->z = z5, TOK_CLOSEENDIDS; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S3: @@ -325,106 +381,105 @@ z1(struct lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_ENDID; - case S2: return TOK_COMMA; - case S3: return TOK_EOF; - case S4: return TOK_EOF; - case S5: return TOK_CLOSEENDIDS; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z2(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\'': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return TOK_CHAR; + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "'" */ - lx_ungetc(lx, c); return lx->z = z5, TOK_LABEL; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z5, TOK_LABEL; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return lx->z = z5, TOK_LABEL; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_LABEL; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z3(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S1; break; case '"': state = S3; break; @@ -450,15 +505,15 @@ z3(struct lx *lx) case 'r': case 't': case 'v': state = S6; break; - default: lx_ungetc(lx, c); return TOK_CHAR; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; } break; - case S2: /* e.g. "a" */ - lx_ungetc(lx, c); return TOK_CHAR; + case S2: /* e.g. "\\x00" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "\"" */ - lx_ungetc(lx, c); return lx->z = z5, TOK_LABEL; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z5, TOK_LABEL; case S4: /* e.g. "\\x" */ switch ((unsigned char) c) { @@ -484,7 +539,9 @@ z3(struct lx *lx) case 'd': case 'e': case 'f': state = S7; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -498,14 +555,14 @@ z3(struct lx *lx) case '5': case '6': case '7': break; - default: lx_ungetc(lx, c); return TOK_OCT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_OCT; } break; - case S6: /* e.g. "\\f" */ - lx_ungetc(lx, c); return TOK_ESC; + case S6: /* e.g. "\\\"" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_ESC; - case S7: /* e.g. "\\xa" */ + case S7: /* e.g. "\\x0" */ switch ((unsigned char) c) { case '0': case '1': @@ -529,76 +586,92 @@ z3(struct lx *lx) case 'd': case 'e': case 'f': break; - default: lx_ungetc(lx, c); return TOK_HEX; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_HEX; } break; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; case S2: return TOK_CHAR; - case S3: return TOK_LABEL; + case S3: return lx->z = z5, TOK_LABEL; case S5: return TOK_OCT; case S6: return TOK_ESC; case S7: return TOK_HEX; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z4(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\n': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return lx->z(lx); + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); case S2: /* e.g. "" */ - lx_ungetc(lx, c); return lx->z = z5, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z5, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_UNKNOWN; + case S2: return lx->z = z5, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S0: @@ -615,46 +688,42 @@ z4(struct lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_EOF; - case S2: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z5(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case ',': state = S1; break; case ';': state = S2; break; @@ -732,31 +801,35 @@ z5(struct lx *lx) case '\n': case '\r': case ' ': state = S13; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S1: /* e.g. "," */ - lx_ungetc(lx, c); return TOK_COMMA; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_COMMA; case S2: /* e.g. ";" */ - lx_ungetc(lx, c); return TOK_SEP; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_SEP; - case S3: /* e.g. "?" */ - lx_ungetc(lx, c); return TOK_ANY; + case S3: /* e.g. "\077" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_ANY; case S4: /* e.g. "-" */ switch ((unsigned char) c) { case '>': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S5: /* e.g. "[" */ - lx_ungetc(lx, c); return lx->z = z1, TOK_OPENENDIDS; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENENDIDS; case S6: /* e.g. "=" */ - lx_ungetc(lx, c); return TOK_EQUALS; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_EQUALS; case S7: /* e.g. "e" */ switch ((unsigned char) c) { @@ -823,7 +896,7 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 'n': state = S19; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; @@ -892,11 +965,11 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 't': state = S14; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; - case S9: /* e.g. "a" */ + case S9: /* e.g. "0" */ switch ((unsigned char) c) { case '0': case '1': @@ -961,18 +1034,18 @@ z5(struct lx *lx) case 'x': case 'y': case 'z': break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; case S10: /* e.g. "'" */ - lx_ungetc(lx, c); return lx->z = z2, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z2, lx->z(lx); case S11: /* e.g. "\"" */ - lx_ungetc(lx, c); return lx->z = z3, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z3, lx->z(lx); case S12: /* e.g. "#" */ - lx_ungetc(lx, c); return lx->z = z4, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z4, lx->z(lx); case S13: /* e.g. "\\x09" */ switch ((unsigned char) c) { @@ -980,7 +1053,7 @@ z5(struct lx *lx) case '\n': case '\r': case ' ': break; - default: lx_ungetc(lx, c); return lx->z(lx); + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); } break; @@ -1049,7 +1122,7 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 'a': state = S15; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; @@ -1118,7 +1191,7 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 'r': state = S16; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; @@ -1187,7 +1260,7 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 't': state = S17; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; @@ -1257,12 +1330,12 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case ':': state = S18; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; case S18: /* e.g. "start:" */ - lx_ungetc(lx, c); return TOK_START; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_START; case S19: /* e.g. "en" */ switch ((unsigned char) c) { @@ -1329,7 +1402,7 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case 'd': state = S20; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; @@ -1399,53 +1472,34 @@ z5(struct lx *lx) case 'y': case 'z': state = S9; break; case ':': state = S21; break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; case S21: /* e.g. "end:" */ - lx_ungetc(lx, c); return TOK_END; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_END; case S22: /* e.g. "->" */ - lx_ungetc(lx, c); return TOK_TO; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_TO; default: ; /* unreached */ } - - switch (state) { - case S10: - case S11: - case S12: - case S13: - break; - - default: - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } - break; - - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_COMMA; case S2: return TOK_SEP; case S3: return TOK_ANY; - case S5: return TOK_OPENENDIDS; + case S5: return lx->z = z1, TOK_OPENENDIDS; case S6: return TOK_EQUALS; case S7: return TOK_IDENT; case S8: return TOK_IDENT; case S9: return TOK_IDENT; - case S10: return TOK_EOF; - case S11: return TOK_EOF; - case S12: return TOK_EOF; + case S10: return lx->z = z2, lx->z(lx); + case S11: return lx->z = z3, lx->z(lx); + case S12: return lx->z = z4, lx->z(lx); case S13: return TOK_EOF; case S14: return TOK_IDENT; case S15: return TOK_IDENT; @@ -1456,8 +1510,34 @@ z5(struct lx *lx) case S20: return TOK_IDENT; case S21: return TOK_END; case S22: return TOK_TO; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + switch (state) { + case S10: + case S11: + case S12: + case S13: + break; + + default: + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + break; + + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -1640,6 +1720,7 @@ lx_init(struct lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_dynpop; } enum lx_token diff --git a/src/libfsm/parser.c b/src/libfsm/parser.c index e4ac8a31b..ec9bf4f78 100644 --- a/src/libfsm/parser.c +++ b/src/libfsm/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 153 "src/libfsm/parser.act" +#line 27 "src/libfsm/parser.act" #include @@ -179,7 +179,7 @@ p_label(fsm fsm, lex_state lex_state, act_state act_state, char *ZOc) { /* BEGINNING OF EXTRACT: CHAR */ { -#line 247 "src/libfsm/parser.act" +#line 243 "src/libfsm/parser.act" assert(lex_state->buf.a[0] != '\0'); assert(lex_state->buf.a[1] == '\0'); @@ -196,7 +196,7 @@ p_label(fsm fsm, lex_state lex_state, act_state act_state, char *ZOc) { /* BEGINNING OF EXTRACT: ESC */ { -#line 171 "src/libfsm/parser.act" +#line 167 "src/libfsm/parser.act" assert(0 == strncmp(lex_state->buf.a, "\\", 1)); assert(2 == strlen(lex_state->buf.a)); @@ -224,7 +224,7 @@ p_label(fsm fsm, lex_state lex_state, act_state act_state, char *ZOc) { /* BEGINNING OF EXTRACT: HEX */ { -#line 240 "src/libfsm/parser.act" +#line 214 "src/libfsm/parser.act" unsigned long u; char *e; @@ -263,7 +263,7 @@ p_label(fsm fsm, lex_state lex_state, act_state act_state, char *ZOc) { /* BEGINNING OF EXTRACT: OCT */ { -#line 211 "src/libfsm/parser.act" +#line 185 "src/libfsm/parser.act" unsigned long u; char *e; @@ -338,7 +338,7 @@ ZL2_items:; case (TOK_IDENT): /* BEGINNING OF EXTRACT: IDENT */ { -#line 252 "src/libfsm/parser.act" +#line 250 "src/libfsm/parser.act" /* XXX: don't exit in library code */ ZIa = xstrdup(lex_state->buf.a); @@ -366,7 +366,7 @@ ZL2_items:; goto ZL2_items; /* END OF INLINE: items */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -451,7 +451,7 @@ ZL2_xend_C_Cend_Hstates:; } /* END OF INLINE: xend::end-states */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -489,7 +489,7 @@ p_xstart(fsm fsm, lex_state lex_state, act_state act_state) { /* BEGINNING OF ACTION: err-expected-start */ { -#line 404 "src/libfsm/parser.act" +#line 402 "src/libfsm/parser.act" err_expected(lex_state, "'start:'"); @@ -507,7 +507,7 @@ p_xstart(fsm fsm, lex_state lex_state, act_state act_state) case (TOK_IDENT): /* BEGINNING OF EXTRACT: IDENT */ { -#line 252 "src/libfsm/parser.act" +#line 250 "src/libfsm/parser.act" /* XXX: don't exit in library code */ ZIn = xstrdup(lex_state->buf.a); @@ -530,7 +530,7 @@ p_xstart(fsm fsm, lex_state lex_state, act_state act_state) } /* BEGINNING OF ACTION: add-state */ { -#line 284 "src/libfsm/parser.act" +#line 282 "src/libfsm/parser.act" struct act_statelist *p; const unsigned hash = hash_of_id((ZIn)); @@ -588,7 +588,7 @@ p_xstart(fsm fsm, lex_state lex_state, act_state act_state) /* END OF ACTION: add-state */ /* BEGINNING OF ACTION: mark-start */ { -#line 336 "src/libfsm/parser.act" +#line 335 "src/libfsm/parser.act" fsm_setstart(fsm, (ZIs)); @@ -597,7 +597,7 @@ p_xstart(fsm fsm, lex_state lex_state, act_state act_state) /* END OF ACTION: mark-start */ /* BEGINNING OF ACTION: free */ { -#line 350 "src/libfsm/parser.act" +#line 349 "src/libfsm/parser.act" free((ZIn)); @@ -639,7 +639,7 @@ p_xend(fsm fsm, lex_state lex_state, act_state act_state) { /* BEGINNING OF ACTION: err-expected-end */ { -#line 408 "src/libfsm/parser.act" +#line 406 "src/libfsm/parser.act" err_expected(lex_state, "'end:'"); @@ -687,7 +687,7 @@ p_xend_C_Cend_Hstate(fsm fsm, lex_state lex_state, act_state act_state, state *Z case (TOK_IDENT): /* BEGINNING OF EXTRACT: IDENT */ { -#line 252 "src/libfsm/parser.act" +#line 250 "src/libfsm/parser.act" /* XXX: don't exit in library code */ ZIn = xstrdup(lex_state->buf.a); @@ -705,7 +705,7 @@ p_xend_C_Cend_Hstate(fsm fsm, lex_state lex_state, act_state act_state, state *Z /* END OF INLINE: ident */ /* BEGINNING OF ACTION: add-state */ { -#line 284 "src/libfsm/parser.act" +#line 282 "src/libfsm/parser.act" struct act_statelist *p; const unsigned hash = hash_of_id((ZIn)); @@ -763,7 +763,7 @@ p_xend_C_Cend_Hstate(fsm fsm, lex_state lex_state, act_state act_state, state *Z /* END OF ACTION: add-state */ /* BEGINNING OF ACTION: mark-end */ { -#line 340 "src/libfsm/parser.act" +#line 339 "src/libfsm/parser.act" fsm_setend(fsm, (ZIs), 1); @@ -772,7 +772,7 @@ p_xend_C_Cend_Hstate(fsm fsm, lex_state lex_state, act_state act_state, state *Z /* END OF ACTION: mark-end */ /* BEGINNING OF ACTION: free */ { -#line 350 "src/libfsm/parser.act" +#line 349 "src/libfsm/parser.act" free((ZIn)); @@ -810,7 +810,7 @@ p_fsm(fsm fsm, lex_state lex_state, act_state act_state) ADVANCE_LEXER; /* BEGINNING OF ACTION: free-statelist */ { -#line 366 "src/libfsm/parser.act" +#line 353 "src/libfsm/parser.act" struct act_statelist *p; struct act_statelist *next; @@ -834,7 +834,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-syntax */ { -#line 413 "src/libfsm/parser.act" +#line 410 "src/libfsm/parser.act" err(lex_state, "Syntax error"); exit(EXIT_FAILURE); @@ -865,7 +865,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-comma */ { -#line 400 "src/libfsm/parser.act" +#line 398 "src/libfsm/parser.act" err_expected(lex_state, "','"); @@ -895,7 +895,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-sep */ { -#line 392 "src/libfsm/parser.act" +#line 390 "src/libfsm/parser.act" err_expected(lex_state, "';'"); @@ -923,7 +923,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) case (TOK_IDENT): /* BEGINNING OF EXTRACT: IDENT */ { -#line 252 "src/libfsm/parser.act" +#line 250 "src/libfsm/parser.act" /* XXX: don't exit in library code */ ZIb = xstrdup(lex_state->buf.a); @@ -941,7 +941,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* END OF INLINE: ident */ /* BEGINNING OF ACTION: add-state */ { -#line 284 "src/libfsm/parser.act" +#line 282 "src/libfsm/parser.act" struct act_statelist *p; const unsigned hash = hash_of_id((*ZIa)); @@ -999,7 +999,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* END OF ACTION: add-state */ /* BEGINNING OF ACTION: add-state */ { -#line 284 "src/libfsm/parser.act" +#line 282 "src/libfsm/parser.act" struct act_statelist *p; const unsigned hash = hash_of_id((ZIb)); @@ -1057,7 +1057,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* END OF ACTION: add-state */ /* BEGINNING OF ACTION: free */ { -#line 350 "src/libfsm/parser.act" +#line 349 "src/libfsm/parser.act" free((*ZIa)); @@ -1066,7 +1066,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* END OF ACTION: free */ /* BEGINNING OF ACTION: free */ { -#line 350 "src/libfsm/parser.act" +#line 349 "src/libfsm/parser.act" free((ZIb)); @@ -1081,7 +1081,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) ADVANCE_LEXER; /* BEGINNING OF ACTION: add-edge-any */ { -#line 376 "src/libfsm/parser.act" +#line 375 "src/libfsm/parser.act" if (!fsm_addedge_any(fsm, (ZIx), (ZIy))) { perror("fsm_addedge_any"); @@ -1104,7 +1104,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) } /* BEGINNING OF ACTION: add-edge-literal */ { -#line 369 "src/libfsm/parser.act" +#line 368 "src/libfsm/parser.act" if (!fsm_addedge_literal(fsm, (ZIx), (ZIy), (ZIc))) { perror("fsm_addedge_literal"); @@ -1120,7 +1120,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) { /* BEGINNING OF ACTION: add-edge-epsilon */ { -#line 383 "src/libfsm/parser.act" +#line 382 "src/libfsm/parser.act" if (!fsm_addedge_epsilon(fsm, (ZIx), (ZIy))) { perror("fsm_addedge_epsilon"); @@ -1138,7 +1138,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) { /* BEGINNING OF ACTION: err-expected-trans */ { -#line 396 "src/libfsm/parser.act" +#line 394 "src/libfsm/parser.act" err_expected(lex_state, "transition"); @@ -1162,7 +1162,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* BEGINNING OF ACTION: add-state */ { -#line 284 "src/libfsm/parser.act" +#line 282 "src/libfsm/parser.act" struct act_statelist *p; const unsigned hash = hash_of_id((*ZIa)); @@ -1220,7 +1220,7 @@ p_78(fsm fsm, lex_state lex_state, act_state act_state, string *ZIa) /* END OF ACTION: add-state */ /* BEGINNING OF ACTION: free */ { -#line 350 "src/libfsm/parser.act" +#line 349 "src/libfsm/parser.act" free((*ZIa)); @@ -1269,7 +1269,7 @@ ZL2_xend_C_Cend_Hids:; } /* END OF INLINE: xend::end-ids */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): RESTORE_LEXER; goto ZL1; @@ -1298,7 +1298,7 @@ p_xend_C_Cend_Hid(fsm fsm, lex_state lex_state, act_state act_state, state ZIs) case (TOK_ENDID): /* BEGINNING OF EXTRACT: ENDID */ { -#line 277 "src/libfsm/parser.act" +#line 255 "src/libfsm/parser.act" unsigned long u; char *e; @@ -1333,7 +1333,7 @@ p_xend_C_Cend_Hid(fsm fsm, lex_state lex_state, act_state act_state, state ZIs) ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-end-id */ { -#line 344 "src/libfsm/parser.act" +#line 343 "src/libfsm/parser.act" if (!fsm_endid_set(fsm, (ZIs), (ZIid))) { goto ZL1; @@ -1351,7 +1351,7 @@ ZL1:; /* BEGINNING OF TRAILER */ -#line 479 "src/libfsm/parser.act" +#line 415 "src/libfsm/parser.act" struct fsm *fsm_parse(FILE *f, const struct fsm_alloc *alloc) { diff --git a/src/libfsm/parser.h b/src/libfsm/parser.h index edeebb112..32f562c66 100644 --- a/src/libfsm/parser.h +++ b/src/libfsm/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 163 "src/libfsm/parser.act" +#line 153 "src/libfsm/parser.act" typedef struct lex_state * lex_state; @@ -27,7 +27,7 @@ extern void p_fsm(fsm, lex_state, act_state); /* BEGINNING OF TRAILER */ -#line 480 "src/libfsm/parser.act" +#line 479 "src/libfsm/parser.act" #line 33 "src/libfsm/parser.h" diff --git a/src/libre/dialect/glob/lexer.c b/src/libre/dialect/glob/lexer.c index 843cedc4e..2a4d33a29 100644 --- a/src/libre/dialect/glob/lexer.c +++ b/src/libre/dialect/glob/lexer.c @@ -10,11 +10,31 @@ static enum lx_glob_token z0(struct lx_glob_lx *lx); +static int +lx_glob_advance_end(struct lx_glob_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_glob_lx *lx) +lx_glob_getc(struct lx_glob_lx *lx) { int c; @@ -30,18 +50,19 @@ lx_getc(struct lx_glob_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_glob_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_glob_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_glob_getc((struct lx_glob_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -50,10 +71,7 @@ lx_glob_ungetc(struct lx_glob_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -105,6 +123,17 @@ lx_glob_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_glob_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_glob_dynclear(void *buf_opaque) { @@ -144,29 +173,28 @@ lx_glob_dynfree(void *buf_opaque) static enum lx_glob_token z0(struct lx_glob_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '*': state = S2; break; case '?': state = S3; break; @@ -175,34 +203,41 @@ z0(struct lx_glob_lx *lx) break; case S1: /* e.g. "\\x00" */ - lx_glob_ungetc(lx, c); return TOK_CHAR; + lx_glob_ungetc(lx, c); lx_glob_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "*" */ - lx_glob_ungetc(lx, c); return TOK_MANY; + lx_glob_ungetc(lx, c); lx_glob_dynpop(lx->buf_opaque); return TOK_MANY; - case S3: /* e.g. "?" */ - lx_glob_ungetc(lx, c); return TOK_ANY; + case S3: /* e.g. "\077" */ + lx_glob_ungetc(lx, c); lx_glob_dynpop(lx->buf_opaque); return TOK_ANY; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return TOK_MANY; + case S3: return TOK_ANY; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_glob_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_MANY; - case S3: return TOK_ANY; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -254,6 +289,7 @@ lx_glob_init(struct lx_glob_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_glob_dynpop; } enum lx_glob_token diff --git a/src/libre/dialect/glob/parser.c b/src/libre/dialect/glob/parser.c index c8f021380..b20798f4f 100644 --- a/src/libre/dialect/glob/parser.c +++ b/src/libre/dialect/glob/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -304,7 +304,7 @@ ZL2_list_Hof_Hatoms:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -322,7 +322,7 @@ ZL2_list_Hof_Hatoms:; goto ZL2_list_Hof_Hatoms; /* END OF INLINE: list-of-atoms */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -348,7 +348,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -358,7 +358,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIe) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -378,7 +378,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -397,7 +397,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIe) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZIe) == NULL) { @@ -418,7 +418,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -428,7 +428,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIg) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIg) == NULL) { @@ -440,7 +440,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: ast-make-named */ /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -449,7 +449,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: count-zero-or-more */ /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); @@ -478,7 +478,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -490,7 +490,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -523,7 +523,7 @@ p_re__glob(flags flags, lex_state lex_state, act_state act_state, err err, t_ast /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -537,7 +537,7 @@ p_re__glob(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZIe) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -558,7 +558,7 @@ p_re__glob(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -575,7 +575,7 @@ p_re__glob(flags flags, lex_state lex_state, act_state act_state, err err, t_ast /* END OF INLINE: 119 */ /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -601,7 +601,7 @@ p_re__glob(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -626,7 +626,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/glob/parser.h b/src/libre/dialect/glob/parser.h index ec618caca..89800e6e9 100644 --- a/src/libre/dialect/glob/parser.h +++ b/src/libre/dialect/glob/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__glob(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/glob/parser.h" diff --git a/src/libre/dialect/like/lexer.c b/src/libre/dialect/like/lexer.c index 4f4dcbdab..2edc365a9 100644 --- a/src/libre/dialect/like/lexer.c +++ b/src/libre/dialect/like/lexer.c @@ -10,11 +10,31 @@ static enum lx_like_token z0(struct lx_like_lx *lx); +static int +lx_like_advance_end(struct lx_like_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_like_lx *lx) +lx_like_getc(struct lx_like_lx *lx) { int c; @@ -30,18 +50,19 @@ lx_getc(struct lx_like_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_like_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_like_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_like_getc((struct lx_like_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -50,10 +71,7 @@ lx_like_ungetc(struct lx_like_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -105,6 +123,17 @@ lx_like_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_like_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_like_dynclear(void *buf_opaque) { @@ -144,29 +173,28 @@ lx_like_dynfree(void *buf_opaque) static enum lx_like_token z0(struct lx_like_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '%': state = S2; break; case '_': state = S3; break; @@ -175,34 +203,41 @@ z0(struct lx_like_lx *lx) break; case S1: /* e.g. "\\x00" */ - lx_like_ungetc(lx, c); return TOK_CHAR; + lx_like_ungetc(lx, c); lx_like_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "%" */ - lx_like_ungetc(lx, c); return TOK_MANY; + lx_like_ungetc(lx, c); lx_like_dynpop(lx->buf_opaque); return TOK_MANY; case S3: /* e.g. "_" */ - lx_like_ungetc(lx, c); return TOK_ANY; + lx_like_ungetc(lx, c); lx_like_dynpop(lx->buf_opaque); return TOK_ANY; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return TOK_MANY; + case S3: return TOK_ANY; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_like_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_MANY; - case S3: return TOK_ANY; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -254,6 +289,7 @@ lx_like_init(struct lx_like_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_like_dynpop; } enum lx_like_token diff --git a/src/libre/dialect/like/parser.c b/src/libre/dialect/like/parser.c index 64bfe1078..008bd2b04 100644 --- a/src/libre/dialect/like/parser.c +++ b/src/libre/dialect/like/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -304,7 +304,7 @@ ZL2_list_Hof_Hatoms:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -322,7 +322,7 @@ ZL2_list_Hof_Hatoms:; goto ZL2_list_Hof_Hatoms; /* END OF INLINE: list-of-atoms */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -348,7 +348,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -358,7 +358,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIe) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -378,7 +378,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -397,7 +397,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIe) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZIe) == NULL) { @@ -418,7 +418,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -428,7 +428,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIg) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIg) == NULL) { @@ -440,7 +440,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: ast-make-named */ /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -449,7 +449,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, /* END OF ACTION: count-zero-or-more */ /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); @@ -478,7 +478,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -490,7 +490,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -523,7 +523,7 @@ p_re__like(flags flags, lex_state lex_state, act_state act_state, err err, t_ast /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -537,7 +537,7 @@ p_re__like(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZIe) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -558,7 +558,7 @@ p_re__like(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -575,7 +575,7 @@ p_re__like(flags flags, lex_state lex_state, act_state act_state, err err, t_ast /* END OF INLINE: 119 */ /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -601,7 +601,7 @@ p_re__like(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -626,7 +626,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/like/parser.h b/src/libre/dialect/like/parser.h index f6c87ad7b..5294f9792 100644 --- a/src/libre/dialect/like/parser.h +++ b/src/libre/dialect/like/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__like(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/like/parser.h" diff --git a/src/libre/dialect/literal/lexer.c b/src/libre/dialect/literal/lexer.c index f4ff77a37..f13bdbc6f 100644 --- a/src/libre/dialect/literal/lexer.c +++ b/src/libre/dialect/literal/lexer.c @@ -10,11 +10,31 @@ static enum lx_literal_token z0(struct lx_literal_lx *lx); +static int +lx_literal_advance_end(struct lx_literal_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_literal_lx *lx) +lx_literal_getc(struct lx_literal_lx *lx) { int c; @@ -30,18 +50,19 @@ lx_getc(struct lx_literal_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_literal_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_literal_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_literal_getc((struct lx_literal_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -50,10 +71,7 @@ lx_literal_ungetc(struct lx_literal_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -105,6 +123,17 @@ lx_literal_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_literal_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_literal_dynclear(void *buf_opaque) { @@ -144,52 +173,58 @@ lx_literal_dynfree(void *buf_opaque) static enum lx_literal_token z0(struct lx_literal_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ state = S1; break; case S1: /* e.g. "" */ - lx_literal_ungetc(lx, c); return TOK_CHAR; + lx_literal_ungetc(lx, c); lx_literal_dynpop(lx->buf_opaque); return TOK_CHAR; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_literal_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -237,6 +272,7 @@ lx_literal_init(struct lx_literal_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_literal_dynpop; } enum lx_literal_token diff --git a/src/libre/dialect/literal/parser.c b/src/libre/dialect/literal/parser.c index 44547716b..5d1dc82f7 100644 --- a/src/libre/dialect/literal/parser.c +++ b/src/libre/dialect/literal/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -304,7 +304,7 @@ ZL2_list_Hof_Hatoms:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -322,7 +322,7 @@ ZL2_list_Hof_Hatoms:; goto ZL2_list_Hof_Hatoms; /* END OF INLINE: list-of-atoms */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -349,7 +349,7 @@ p_re__literal(flags flags, lex_state lex_state, act_state act_state, err err, t_ /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -363,7 +363,7 @@ p_re__literal(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZIe) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -384,7 +384,7 @@ p_re__literal(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -401,7 +401,7 @@ p_re__literal(flags flags, lex_state lex_state, act_state act_state, err err, t_ /* END OF INLINE: 117 */ /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -427,7 +427,7 @@ p_re__literal(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -467,7 +467,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, case (TOK_CHAR): /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -490,7 +490,7 @@ p_list_Hof_Hatoms_C_Catom(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -506,7 +506,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -518,7 +518,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -539,7 +539,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/literal/parser.h b/src/libre/dialect/literal/parser.h index be58db4ea..7f90a15ef 100644 --- a/src/libre/dialect/literal/parser.h +++ b/src/libre/dialect/literal/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__literal(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/literal/parser.h" diff --git a/src/libre/dialect/native/lexer.c b/src/libre/dialect/native/lexer.c index 2399683ac..b18634004 100644 --- a/src/libre/dialect/native/lexer.c +++ b/src/libre/dialect/native/lexer.c @@ -12,11 +12,31 @@ static enum lx_native_token z0(struct lx_native_lx *lx); static enum lx_native_token z1(struct lx_native_lx *lx); static enum lx_native_token z2(struct lx_native_lx *lx); +static int +lx_native_advance_end(struct lx_native_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_native_lx *lx) +lx_native_getc(struct lx_native_lx *lx) { int c; @@ -32,18 +52,19 @@ lx_getc(struct lx_native_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_native_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_native_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_native_getc((struct lx_native_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -52,10 +73,7 @@ lx_native_ungetc(struct lx_native_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -107,6 +125,17 @@ lx_native_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_native_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_native_dynclear(void *buf_opaque) { @@ -146,29 +175,28 @@ lx_native_dynfree(void *buf_opaque) static enum lx_native_token z0(struct lx_native_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '0': case '1': @@ -182,7 +210,9 @@ z0(struct lx_native_lx *lx) case '9': state = S1; break; case ',': state = S2; break; case '}': state = S3; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -198,69 +228,75 @@ z0(struct lx_native_lx *lx) case '7': case '8': case '9': break; - default: lx_native_ungetc(lx, c); return TOK_COUNT; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_COUNT; } break; case S2: /* e.g. "," */ - lx_native_ungetc(lx, c); return TOK_SEP; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_SEP; case S3: /* e.g. "}" */ - lx_native_ungetc(lx, c); return lx->z = z2, TOK_CLOSECOUNT; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z2, TOK_CLOSECOUNT; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_COUNT; + case S2: return TOK_SEP; + case S3: return lx->z = z2, TOK_CLOSECOUNT; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_COUNT; - case S2: return TOK_SEP; - case S3: return TOK_CLOSECOUNT; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_native_token z1(struct lx_native_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, - S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, - S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, - S50, S51, S52, S53, S54, S55, S56, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, + S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, + S50, S51, S52, S53, S54, S55, S56 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '[': state = S1; break; case '\\': state = S3; break; @@ -273,12 +309,12 @@ z1(struct lx_native_lx *lx) case S1: /* e.g. "[" */ switch ((unsigned char) c) { case ':': state = S12; break; - default: lx_native_ungetc(lx, c); return TOK_CHAR; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_CHAR; } break; case S2: /* e.g. "\\x00" */ - lx_native_ungetc(lx, c); return TOK_CHAR; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "\\" */ switch ((unsigned char) c) { @@ -301,25 +337,27 @@ z1(struct lx_native_lx *lx) case '5': case '6': case '7': state = S9; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S4: /* e.g. "-" */ switch ((unsigned char) c) { case ']': state = S6; break; - default: lx_native_ungetc(lx, c); return TOK_RANGE; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_RANGE; } break; case S5: /* e.g. "]" */ - lx_native_ungetc(lx, c); return lx->z = z2, TOK_CLOSEGROUP; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z2, TOK_CLOSEGROUP; case S6: /* e.g. "-]" */ - lx_native_ungetc(lx, c); return lx->z = z2, TOK_CLOSEGROUPRANGE; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z2, TOK_CLOSEGROUPRANGE; case S7: /* e.g. "\\-" */ - lx_native_ungetc(lx, c); return TOK_ESC; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_ESC; case S8: /* e.g. "\\x" */ switch ((unsigned char) c) { @@ -345,7 +383,9 @@ z1(struct lx_native_lx *lx) case 'd': case 'e': case 'f': state = S10; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -359,7 +399,7 @@ z1(struct lx_native_lx *lx) case '5': case '6': case '7': break; - default: lx_native_ungetc(lx, c); return TOK_OCT; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OCT; } break; @@ -387,12 +427,14 @@ z1(struct lx_native_lx *lx) case 'd': case 'e': case 'f': state = S11; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S11: /* e.g. "\\x00" */ - lx_native_ungetc(lx, c); return TOK_HEX; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_HEX; case S12: /* e.g. "[:" */ switch ((unsigned char) c) { @@ -408,35 +450,45 @@ z1(struct lx_native_lx *lx) case 'w': state = S21; break; case 'u': state = S22; break; case 'p': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S13: /* e.g. "[:d" */ switch ((unsigned char) c) { case 'i': state = S55; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S14: /* e.g. "[:s" */ switch ((unsigned char) c) { case 'p': state = S52; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S15: /* e.g. "[:h" */ switch ((unsigned char) c) { case 's': state = S14; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S16: /* e.g. "[:g" */ switch ((unsigned char) c) { case 'r': state = S49; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -444,42 +496,54 @@ z1(struct lx_native_lx *lx) switch ((unsigned char) c) { case 's': state = S41; break; case 'l': state = S42; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S18: /* e.g. "[:c" */ switch ((unsigned char) c) { case 'n': state = S38; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S19: /* e.g. "[:l" */ switch ((unsigned char) c) { case 'o': state = S37; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S20: /* e.g. "[:x" */ switch ((unsigned char) c) { case 'd': state = S13; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S21: /* e.g. "[:w" */ switch ((unsigned char) c) { case 'o': state = S35; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S22: /* e.g. "[:u" */ switch ((unsigned char) c) { case 'p': state = S32; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -487,129 +551,165 @@ z1(struct lx_native_lx *lx) switch ((unsigned char) c) { case 'r': state = S24; break; case 'u': state = S25; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S24: /* e.g. "[:pr" */ switch ((unsigned char) c) { case 'i': state = S31; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S25: /* e.g. "[:pu" */ switch ((unsigned char) c) { case 'n': state = S26; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S26: /* e.g. "[:pun" */ switch ((unsigned char) c) { case 'c': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S27: /* e.g. "[:digi" */ switch ((unsigned char) c) { case 't': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S28: /* e.g. "[:word" */ switch ((unsigned char) c) { case ':': state = S29; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S29: /* e.g. "[:word:" */ switch ((unsigned char) c) { case ']': state = S30; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S30: /* e.g. "[:word:]" */ - lx_native_ungetc(lx, c); return TOK_NAMED__CLASS; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_NAMED__CLASS; case S31: /* e.g. "[:pri" */ switch ((unsigned char) c) { case 'n': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S32: /* e.g. "[:up" */ switch ((unsigned char) c) { case 'p': state = S33; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S33: /* e.g. "[:low" */ switch ((unsigned char) c) { case 'e': state = S34; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S34: /* e.g. "[:lowe" */ switch ((unsigned char) c) { case 'r': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S35: /* e.g. "[:wo" */ switch ((unsigned char) c) { case 'r': state = S36; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S36: /* e.g. "[:wor" */ switch ((unsigned char) c) { case 'd': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S37: /* e.g. "[:lo" */ switch ((unsigned char) c) { case 'w': state = S33; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S38: /* e.g. "[:cn" */ switch ((unsigned char) c) { case 't': state = S39; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S39: /* e.g. "[:cnt" */ switch ((unsigned char) c) { case 'r': state = S40; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S40: /* e.g. "[:cntr" */ switch ((unsigned char) c) { case 'l': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S41: /* e.g. "[:as" */ switch ((unsigned char) c) { case 'c': state = S47; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -617,164 +717,200 @@ z1(struct lx_native_lx *lx) switch ((unsigned char) c) { case 'p': state = S43; break; case 'n': state = S44; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S43: /* e.g. "[:alp" */ switch ((unsigned char) c) { case 'h': state = S46; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S44: /* e.g. "[:aln" */ switch ((unsigned char) c) { case 'u': state = S45; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S45: /* e.g. "[:alnu" */ switch ((unsigned char) c) { case 'm': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S46: /* e.g. "[:alph" */ switch ((unsigned char) c) { case 'a': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S47: /* e.g. "[:asc" */ switch ((unsigned char) c) { case 'i': state = S48; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S48: /* e.g. "[:asci" */ switch ((unsigned char) c) { case 'i': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S49: /* e.g. "[:gr" */ switch ((unsigned char) c) { case 'a': state = S50; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S50: /* e.g. "[:gra" */ switch ((unsigned char) c) { case 'p': state = S51; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S51: /* e.g. "[:grap" */ switch ((unsigned char) c) { case 'h': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S52: /* e.g. "[:sp" */ switch ((unsigned char) c) { case 'a': state = S53; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S53: /* e.g. "[:spa" */ switch ((unsigned char) c) { case 'c': state = S54; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S54: /* e.g. "[:spac" */ switch ((unsigned char) c) { case 'e': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S55: /* e.g. "[:di" */ switch ((unsigned char) c) { case 'g': state = S56; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S56: /* e.g. "[:dig" */ switch ((unsigned char) c) { case 'i': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; case S2: return TOK_CHAR; case S4: return TOK_RANGE; - case S5: return TOK_CLOSEGROUP; - case S6: return TOK_CLOSEGROUPRANGE; + case S5: return lx->z = z2, TOK_CLOSEGROUP; + case S6: return lx->z = z2, TOK_CLOSEGROUPRANGE; case S7: return TOK_ESC; case S9: return TOK_OCT; case S11: return TOK_HEX; case S30: return TOK_NAMED__CLASS; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_native_token z2(struct lx_native_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S1; break; case '{': state = S3; break; @@ -820,63 +956,63 @@ z2(struct lx_native_lx *lx) case 'v': case '{': case '|': state = S19; break; - default: lx_native_ungetc(lx, c); return TOK_CHAR; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_CHAR; } break; case S2: /* e.g. "\\x00" */ - lx_native_ungetc(lx, c); return TOK_CHAR; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "{" */ - lx_native_ungetc(lx, c); return lx->z = z0, TOK_OPENCOUNT; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z0, TOK_OPENCOUNT; case S4: /* e.g. "[" */ switch ((unsigned char) c) { case '^': state = S14; break; case ']': state = S15; break; - default: lx_native_ungetc(lx, c); return lx->z = z1, TOK_OPENGROUP; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENGROUP; } break; case S5: /* e.g. "|" */ - lx_native_ungetc(lx, c); return TOK_ALT; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_ALT; case S6: /* e.g. "." */ - lx_native_ungetc(lx, c); return TOK_ANY; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_ANY; case S7: /* e.g. "+" */ - lx_native_ungetc(lx, c); return TOK_PLUS; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_PLUS; case S8: /* e.g. "*" */ - lx_native_ungetc(lx, c); return TOK_STAR; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_STAR; - case S9: /* e.g. "?" */ - lx_native_ungetc(lx, c); return TOK_OPT; + case S9: /* e.g. "\077" */ + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OPT; case S10: /* e.g. "$" */ - lx_native_ungetc(lx, c); return TOK_END; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_END; case S11: /* e.g. "^" */ - lx_native_ungetc(lx, c); return TOK_START; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_START; case S12: /* e.g. ")" */ - lx_native_ungetc(lx, c); return TOK_CLOSESUB; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_CLOSESUB; case S13: /* e.g. "(" */ - lx_native_ungetc(lx, c); return TOK_OPENSUB; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OPENSUB; case S14: /* e.g. "[^" */ switch ((unsigned char) c) { case ']': state = S16; break; - default: lx_native_ungetc(lx, c); return lx->z = z1, TOK_OPENGROUPINV; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENGROUPINV; } break; case S15: /* e.g. "[]" */ - lx_native_ungetc(lx, c); return lx->z = z1, TOK_OPENGROUPCB; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENGROUPCB; case S16: /* e.g. "[^]" */ - lx_native_ungetc(lx, c); return lx->z = z1, TOK_OPENGROUPINVCB; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENGROUPINVCB; case S17: /* e.g. "\\x" */ switch ((unsigned char) c) { @@ -902,7 +1038,9 @@ z2(struct lx_native_lx *lx) case 'd': case 'e': case 'f': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -916,12 +1054,12 @@ z2(struct lx_native_lx *lx) case '5': case '6': case '7': state = S20; break; - default: lx_native_ungetc(lx, c); return TOK_OCT; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OCT; } break; case S19: /* e.g. "\\$" */ - lx_native_ungetc(lx, c); return TOK_ESC; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_ESC; case S20: /* e.g. "\\00" */ switch ((unsigned char) c) { @@ -933,12 +1071,12 @@ z2(struct lx_native_lx *lx) case '5': case '6': case '7': state = S21; break; - default: lx_native_ungetc(lx, c); return TOK_OCT; + default: lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OCT; } break; case S21: /* e.g. "\\000" */ - lx_native_ungetc(lx, c); return TOK_OCT; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_OCT; case S22: /* e.g. "\\x0" */ switch ((unsigned char) c) { @@ -964,32 +1102,26 @@ z2(struct lx_native_lx *lx) case 'd': case 'e': case 'f': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S23: /* e.g. "\\x00" */ - lx_native_ungetc(lx, c); return TOK_HEX; + lx_native_ungetc(lx, c); lx_native_dynpop(lx->buf_opaque); return TOK_HEX; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; case S2: return TOK_CHAR; - case S3: return TOK_OPENCOUNT; - case S4: return TOK_OPENGROUP; + case S3: return lx->z = z0, TOK_OPENCOUNT; + case S4: return lx->z = z1, TOK_OPENGROUP; case S5: return TOK_ALT; case S6: return TOK_ANY; case S7: return TOK_PLUS; @@ -999,16 +1131,31 @@ z2(struct lx_native_lx *lx) case S11: return TOK_START; case S12: return TOK_CLOSESUB; case S13: return TOK_OPENSUB; - case S14: return TOK_OPENGROUPINV; - case S15: return TOK_OPENGROUPCB; - case S16: return TOK_OPENGROUPINVCB; + case S14: return lx->z = z1, TOK_OPENGROUPINV; + case S15: return lx->z = z1, TOK_OPENGROUPCB; + case S16: return lx->z = z1, TOK_OPENGROUPINVCB; case S18: return TOK_OCT; case S19: return TOK_ESC; case S20: return TOK_OCT; case S21: return TOK_OCT; case S23: return TOK_HEX; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_native_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -1164,6 +1311,7 @@ lx_native_init(struct lx_native_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_native_dynpop; } enum lx_native_token diff --git a/src/libre/dialect/native/parser.c b/src/libre/dialect/native/parser.c index 809383bf8..63c5f8cb8 100644 --- a/src/libre/dialect/native/parser.c +++ b/src/libre/dialect/native/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -326,7 +326,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -349,7 +349,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -385,7 +385,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -443,7 +443,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -499,7 +499,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags /* END OF INLINE: 141 */ /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (ZIc); @@ -531,7 +531,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI216 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -545,7 +545,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI263)); mark(&act_state->countend, &(ZIend)); @@ -555,7 +555,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((*ZIm) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -586,7 +586,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 case (TOK_COUNT): /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -618,7 +618,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 case (TOK_CLOSECOUNT): /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI219 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -636,7 +636,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI263)); mark(&act_state->countend, &(ZIend)); @@ -646,7 +646,7 @@ p_265(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((ZIn) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -696,7 +696,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIclass), (ZInode))) { goto ZL1; @@ -709,7 +709,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; goto ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms; /* END OF INLINE: expr::character-class::list-of-class-terms */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -736,7 +736,7 @@ p_154(flags flags, lex_state lex_state, act_state act_state, err err) case (TOK_RANGE): /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI155 = '-'; ZI156 = lex_state->lx.start; @@ -760,7 +760,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-range */ { -#line 722 "src/libre/parser.act" +#line 718 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXRANGE; @@ -795,7 +795,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -815,7 +815,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; goto ZL2_expr_C_Clist_Hof_Hpieces; /* END OF INLINE: expr::list-of-pieces */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -849,7 +849,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -875,7 +875,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -914,7 +914,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -975,7 +975,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1031,7 +1031,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* END OF INLINE: 109 */ /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -1064,7 +1064,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -1096,7 +1096,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1141,7 +1141,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1208,7 +1208,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZI243 = DIALECT_CLASS(lex_state->buf.a); if (ZI243 == NULL) { @@ -1241,7 +1241,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1325,7 +1325,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hclass(flags fl case (TOK_NAMED__CLASS): /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZIid = DIALECT_CLASS(lex_state->buf.a); if (ZIid == NULL) { @@ -1349,7 +1349,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hclass(flags fl ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-range-endpoint-class */ { -#line 845 "src/libre/parser.act" +#line 844 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_NAMED; (ZIr).u.named.class = (ZIid); @@ -1389,7 +1389,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUP */ { -#line 319 "src/libre/parser.act" +#line 318 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI167 = lex_state->lx.end; @@ -1403,7 +1403,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1430,7 +1430,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPCB */ { -#line 335 "src/libre/parser.act" +#line 334 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI186 = lex_state->lx.end; @@ -1444,7 +1444,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1457,7 +1457,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: make-literal-cbrak */ { -#line 886 "src/libre/parser.act" +#line 885 "src/libre/parser.act" (ZIcbrak) = ']'; @@ -1471,7 +1471,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZInode1))) { goto ZL1; @@ -1493,7 +1493,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPINV */ { -#line 327 "src/libre/parser.act" +#line 326 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI178 = lex_state->lx.end; @@ -1507,7 +1507,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1520,7 +1520,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -1577,7 +1577,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPINVCB */ { -#line 343 "src/libre/parser.act" +#line 342 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI193 = lex_state->lx.end; @@ -1591,7 +1591,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1604,7 +1604,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -1647,7 +1647,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-make-invert */ /* BEGINNING OF ACTION: make-literal-cbrak */ { -#line 886 "src/libre/parser.act" +#line 885 "src/libre/parser.act" (ZIcbrak) = ']'; @@ -1661,7 +1661,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZInode1))) { goto ZL1; @@ -1693,7 +1693,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: CLOSEGROUP */ { -#line 351 "src/libre/parser.act" +#line 350 "src/libre/parser.act" ZI200 = ']'; ZI201 = lex_state->lx.start; @@ -1709,7 +1709,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIstart)); mark(&act_state->groupend, &(ZIend)); @@ -1728,7 +1728,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: CLOSEGROUPRANGE */ { -#line 361 "src/libre/parser.act" +#line 360 "src/libre/parser.act" ZIcrange = '-'; ZI203 = lex_state->lx.start; @@ -1744,7 +1744,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIrange) = ast_make_expr_literal(act_state->poolp, *flags, (ZIcrange)); if ((ZIrange) == NULL) { @@ -1756,7 +1756,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZIrange))) { goto ZL4; @@ -1767,7 +1767,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-add-alt */ /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIstart)); mark(&act_state->groupend, &(ZIend)); @@ -1785,7 +1785,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state { /* BEGINNING OF ACTION: err-expected-closegroup */ { -#line 729 "src/libre/parser.act" +#line 725 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCLOSEGROUP; @@ -1821,7 +1821,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZIrstart = lex_state->lx.start; @@ -1842,7 +1842,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode1) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode1) == NULL) { @@ -1865,7 +1865,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (ZIc); @@ -1875,7 +1875,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI183 = '-'; ZI184 = lex_state->lx.start; @@ -1896,7 +1896,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp } /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -1935,7 +1935,7 @@ p_180(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF INLINE: 182 */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((*ZItmp), (ZInode1))) { goto ZL1; @@ -1977,7 +1977,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_Hend(flags flags, lex_state lex_st /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZI149 = lex_state->lx.start; @@ -1993,7 +1993,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_Hend(flags flags, lex_state lex_st ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (ZIc); @@ -2050,7 +2050,7 @@ p_expr_C_Cpiece(flags flags, lex_state lex_state, act_state act_state, err err, } /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); @@ -2088,7 +2088,7 @@ p_expr(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__ex { /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2109,7 +2109,7 @@ p_expr(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__ex { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2129,7 +2129,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -2141,7 +2141,7 @@ ZL1:; /* END OF ACTION: err-expected-alts */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2170,7 +2170,7 @@ p_195(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode1) = ast_make_expr_literal(act_state->poolp, *flags, (*ZIcbrak)); if ((ZInode1) == NULL) { @@ -2194,7 +2194,7 @@ p_195(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (*ZIcbrak); @@ -2204,7 +2204,7 @@ p_195(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI196 = '-'; ZI197 = lex_state->lx.start; @@ -2225,7 +2225,7 @@ p_195(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs } /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (*ZIcbrak); @@ -2235,7 +2235,7 @@ p_195(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -2294,7 +2294,7 @@ p_re__native(flags flags, lex_state lex_state, act_state act_state, err err, t_a /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -2308,7 +2308,7 @@ p_re__native(flags flags, lex_state lex_state, act_state act_state, err err, t_a } /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -2334,7 +2334,7 @@ p_re__native(flags flags, lex_state lex_state, act_state act_state, err err, t_a { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -2422,7 +2422,7 @@ ZL2_expr_C_Clist_Hof_Halts:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIalts), (ZIa))) { goto ZL1; @@ -2441,7 +2441,7 @@ ZL2_expr_C_Clist_Hof_Halts:; goto ZL2_expr_C_Clist_Hof_Halts; /* END OF INLINE: expr::list-of-alts */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -2453,7 +2453,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -2485,7 +2485,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, /* BEGINNING OF EXTRACT: OPENCOUNT */ { -#line 371 "src/libre/parser.act" +#line 370 "src/libre/parser.act" ZI263 = lex_state->lx.start; ZI264 = lex_state->lx.end; @@ -2501,7 +2501,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, case (TOK_COUNT): /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -2541,7 +2541,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-one */ { -#line 817 "src/libre/parser.act" +#line 816 "src/libre/parser.act" (ZIc) = ast_make_count(0, 1); @@ -2555,7 +2555,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-one-or-more */ { -#line 813 "src/libre/parser.act" +#line 812 "src/libre/parser.act" (ZIc) = ast_make_count(1, AST_COUNT_UNBOUNDED); @@ -2569,7 +2569,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -2582,7 +2582,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, { /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -2599,7 +2599,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-count */ { -#line 701 "src/libre/parser.act" +#line 697 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCOUNT; @@ -2611,7 +2611,7 @@ ZL1:; /* END OF ACTION: err-expected-count */ /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -2640,7 +2640,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -2650,7 +2650,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIe) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -2667,7 +2667,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-anchor-end */ { -#line 943 "src/libre/parser.act" +#line 942 "src/libre/parser.act" (ZIe) = ast_make_expr_anchor(act_state->poolp, *flags, AST_ANCHOR_END); if ((ZIe) == NULL) { @@ -2687,7 +2687,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -2701,7 +2701,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e } /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZIe) = ast_make_expr_group(act_state->poolp, *flags, (ZIg), (ZIid)); if ((ZIe) == NULL) { @@ -2725,7 +2725,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-anchor-start */ { -#line 936 "src/libre/parser.act" +#line 935 "src/libre/parser.act" (ZIe) = ast_make_expr_anchor(act_state->poolp, *flags, AST_ANCHOR_START); if ((ZIe) == NULL) { @@ -2765,7 +2765,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -2777,7 +2777,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -2806,7 +2806,7 @@ p_246(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla { /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZInode) = ast_make_expr_named(act_state->poolp, *flags, (*ZI243)); if ((ZInode) == NULL) { @@ -2826,7 +2826,7 @@ p_246(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla /* BEGINNING OF ACTION: ast-range-endpoint-class */ { -#line 845 "src/libre/parser.act" +#line 844 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_NAMED; (ZIlower).u.named.class = (*ZI243); @@ -2842,7 +2842,7 @@ p_246(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla } /* BEGINNING OF ACTION: mark-range */ { -#line 773 "src/libre/parser.act" +#line 772 "src/libre/parser.act" mark(&act_state->rangestart, &(*ZI244)); mark(&act_state->rangeend, &(ZIend)); @@ -2852,7 +2852,7 @@ p_246(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla /* END OF ACTION: mark-range */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -2908,7 +2908,7 @@ p_expr_C_Calt(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZInode) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2942,7 +2942,7 @@ p_250(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (*ZI247)); if ((ZInode) == NULL) { @@ -2962,7 +2962,7 @@ p_250(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (*ZI247); @@ -2978,7 +2978,7 @@ p_250(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI } /* BEGINNING OF ACTION: mark-range */ { -#line 773 "src/libre/parser.act" +#line 772 "src/libre/parser.act" mark(&act_state->rangestart, &(*ZI248)); mark(&act_state->rangeend, &(ZIend)); @@ -2988,7 +2988,7 @@ p_250(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* END OF ACTION: mark-range */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -3035,7 +3035,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/native/parser.h b/src/libre/dialect/native/parser.h index e19648892..5cf04f6c6 100644 --- a/src/libre/dialect/native/parser.h +++ b/src/libre/dialect/native/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__native(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/native/parser.h" diff --git a/src/libre/dialect/pcre/lexer.c b/src/libre/dialect/pcre/lexer.c index b26096785..da8f825ee 100644 --- a/src/libre/dialect/pcre/lexer.c +++ b/src/libre/dialect/pcre/lexer.c @@ -17,11 +17,31 @@ static enum lx_pcre_token z5(struct lx_pcre_lx *lx); static enum lx_pcre_token z6(struct lx_pcre_lx *lx); static enum lx_pcre_token z7(struct lx_pcre_lx *lx); +static int +lx_pcre_advance_end(struct lx_pcre_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_pcre_lx *lx) +lx_pcre_getc(struct lx_pcre_lx *lx) { int c; @@ -37,18 +57,19 @@ lx_getc(struct lx_pcre_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_pcre_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_pcre_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_pcre_getc((struct lx_pcre_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -57,10 +78,7 @@ lx_pcre_ungetc(struct lx_pcre_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -112,6 +130,17 @@ lx_pcre_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_pcre_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_pcre_dynclear(void *buf_opaque) { @@ -151,32 +180,33 @@ lx_pcre_dynfree(void *buf_opaque) static enum lx_pcre_token z0(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S1; break; - case '\x00': lx->lgetc = NULL; return TOK_UNKNOWN; + case '\x00': + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; default: state = S2; break; } break; @@ -184,19 +214,30 @@ z0(struct lx_pcre_lx *lx) case S1: /* e.g. "\\" */ switch ((unsigned char) c) { case 'E': state = S3; break; - default: lx_pcre_ungetc(lx, c); return TOK_CHAR; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; } break; - case S2: /* e.g. "a" */ - lx_pcre_ungetc(lx, c); return TOK_CHAR; + case S2: /* e.g. "\\x01" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "\\E" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, lx->z(lx); + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return TOK_CHAR; + case S3: return lx->z = z7, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S3: @@ -211,45 +252,40 @@ z0(struct lx_pcre_lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_CHAR; - case S3: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z1(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '0': case '1': @@ -263,7 +299,9 @@ z1(struct lx_pcre_lx *lx) case '9': state = S1; break; case ',': state = S2; break; case '}': state = S3; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -279,67 +317,75 @@ z1(struct lx_pcre_lx *lx) case '7': case '8': case '9': break; - default: lx_pcre_ungetc(lx, c); return TOK_COUNT; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_COUNT; } break; case S2: /* e.g. "," */ - lx_pcre_ungetc(lx, c); return TOK_SEP; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_SEP; case S3: /* e.g. "}" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, TOK_CLOSECOUNT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, TOK_CLOSECOUNT; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_COUNT; + case S2: return TOK_SEP; + case S3: return lx->z = z7, TOK_CLOSECOUNT; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_COUNT; - case S2: return TOK_SEP; - case S3: return TOK_CLOSECOUNT; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z2(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S1; break; - case '\x00': lx->lgetc = NULL; return TOK_UNKNOWN; + case '\x00': + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; default: state = S2; break; } break; @@ -347,19 +393,30 @@ z2(struct lx_pcre_lx *lx) case S1: /* e.g. "\\" */ switch ((unsigned char) c) { case 'E': state = S3; break; - default: lx_pcre_ungetc(lx, c); return TOK_CHAR; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; } break; - case S2: /* e.g. "a" */ - lx_pcre_ungetc(lx, c); return TOK_CHAR; + case S2: /* e.g. "\\x01" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "\\E" */ - lx_pcre_ungetc(lx, c); return lx->z = z3, lx->z(lx); + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z3, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return TOK_CHAR; + case S3: return lx->z = z3, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S3: @@ -374,24 +431,30 @@ z2(struct lx_pcre_lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_CHAR; - case S3: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z3(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; + assert(lx != NULL); + + if (lx->clear != NULL) { + lx->clear(lx->buf_opaque); + } + + lx->start = lx->end; + + void *getc_opaque = (void *)lx; enum { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, @@ -400,32 +463,23 @@ z3(struct lx_pcre_lx *lx) S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, S60, S61, S62, S63, S64, S65, S66, S67, S68, S69, - S70, S71, S72, NONE + S70, S71, S72 } state; - assert(lx != NULL); - - if (lx->clear != NULL) { - lx->clear(lx->buf_opaque); - } - - state = NONE; - - lx->start = lx->end; - - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S1; break; case '[': state = S2; break; case '-': state = S4; break; case ']': state = S5; break; - case '\x00': lx->lgetc = NULL; return TOK_UNKNOWN; + case '\x00': + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; default: state = S3; break; } break; @@ -441,22 +495,8 @@ z3(struct lx_pcre_lx *lx) case 'h': case 's': case 'v': - case 'w': state = S24; break; - case 'Q': state = S55; break; - case 'E': state = S56; break; - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': state = S57; break; - case 'x': state = S58; break; - case '0': state = S59; break; - case 'o': state = S60; break; - case 'c': state = S61; break; + case 'w': state = S23; break; + case 'c': state = S55; break; case '$': case '(': case '*': @@ -475,384 +515,492 @@ z3(struct lx_pcre_lx *lx) case 'r': case 't': case '{': - case '|': state = S63; break; - default: state = S62; break; + case '|': state = S56; break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': state = S57; break; + case 'Q': state = S58; break; + case 'E': state = S59; break; + case 'o': state = S60; break; + case 'x': state = S62; break; + case '0': state = S63; break; + default: state = S61; break; } break; case S2: /* e.g. "[" */ switch ((unsigned char) c) { case ':': state = S7; break; - default: lx_pcre_ungetc(lx, c); return TOK_CHAR; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; } break; - case S3: /* e.g. "a" */ - lx_pcre_ungetc(lx, c); return TOK_CHAR; + case S3: /* e.g. "\\x01" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; case S4: /* e.g. "-" */ switch ((unsigned char) c) { case ']': state = S6; break; - default: lx_pcre_ungetc(lx, c); return TOK_RANGE; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_RANGE; } break; case S5: /* e.g. "]" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, TOK_CLOSEGROUP; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, TOK_CLOSEGROUP; case S6: /* e.g. "-]" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, TOK_CLOSEGROUPRANGE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, TOK_CLOSEGROUPRANGE; case S7: /* e.g. "[:" */ switch ((unsigned char) c) { case 'd': state = S8; break; - case 'u': state = S9; break; - case 'w': state = S10; break; - case 'x': state = S11; break; - case 'b': state = S12; break; - case 'c': state = S13; break; - case 'l': state = S14; break; + case 'p': state = S9; break; + case 'x': state = S10; break; + case 'c': state = S11; break; + case 'l': state = S12; break; + case 'g': state = S13; break; + case 's': state = S14; break; case 'a': state = S15; break; - case 's': state = S16; break; - case 'p': state = S17; break; - case 'g': state = S18; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'b': state = S16; break; + case 'u': state = S17; break; + case 'w': state = S18; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S8: /* e.g. "[:d" */ switch ((unsigned char) c) { case 'i': state = S53; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S9: /* e.g. "[:u" */ + case S9: /* e.g. "[:p" */ switch ((unsigned char) c) { - case 'p': state = S52; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'r': state = S48; break; + case 'u': state = S49; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S10: /* e.g. "[:w" */ + case S10: /* e.g. "[:x" */ switch ((unsigned char) c) { - case 'o': state = S50; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'd': state = S8; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S11: /* e.g. "[:x" */ + case S11: /* e.g. "[:c" */ switch ((unsigned char) c) { - case 'd': state = S8; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'n': state = S45; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S12: /* e.g. "[:b" */ + case S12: /* e.g. "[:l" */ switch ((unsigned char) c) { - case 'l': state = S47; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'o': state = S44; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S13: /* e.g. "[:c" */ + case S13: /* e.g. "[:g" */ switch ((unsigned char) c) { - case 'n': state = S44; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'r': state = S41; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S14: /* e.g. "[:l" */ + case S14: /* e.g. "[:s" */ switch ((unsigned char) c) { - case 'o': state = S41; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'p': state = S38; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S15: /* e.g. "[:a" */ switch ((unsigned char) c) { - case 's': state = S33; break; - case 'l': state = S34; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 's': state = S30; break; + case 'l': state = S31; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S16: /* e.g. "[:s" */ + case S16: /* e.g. "[:b" */ switch ((unsigned char) c) { - case 'p': state = S30; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'l': state = S27; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S17: /* e.g. "[:p" */ + case S17: /* e.g. "[:u" */ switch ((unsigned char) c) { - case 'r': state = S25; break; - case 'u': state = S26; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'p': state = S24; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S18: /* e.g. "[:g" */ + case S18: /* e.g. "[:w" */ switch ((unsigned char) c) { - case 'r': state = S19; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'o': state = S19; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S19: /* e.g. "[:gr" */ + case S19: /* e.g. "[:wo" */ switch ((unsigned char) c) { - case 'a': state = S20; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'r': state = S20; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S20: /* e.g. "[:gra" */ + case S20: /* e.g. "[:wor" */ switch ((unsigned char) c) { - case 'p': state = S21; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'd': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S21: /* e.g. "[:grap" */ + case S21: /* e.g. "[:word" */ switch ((unsigned char) c) { - case 'h': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case ':': state = S22; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S22: /* e.g. "[:word" */ + case S22: /* e.g. "[:word:" */ switch ((unsigned char) c) { - case ':': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case ']': state = S23; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S23: /* e.g. "[:word:" */ + case S23: /* e.g. "\\D" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NAMED__CLASS; + + case S24: /* e.g. "[:up" */ switch ((unsigned char) c) { - case ']': state = S24; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'p': state = S25; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S24: /* e.g. "\\d" */ - lx_pcre_ungetc(lx, c); return TOK_NAMED__CLASS; - - case S25: /* e.g. "[:pr" */ + case S25: /* e.g. "[:low" */ switch ((unsigned char) c) { - case 'i': state = S29; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'e': state = S26; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S26: /* e.g. "[:pu" */ + case S26: /* e.g. "[:lowe" */ switch ((unsigned char) c) { - case 'n': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'r': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S27: /* e.g. "[:pun" */ + case S27: /* e.g. "[:bl" */ switch ((unsigned char) c) { - case 'c': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'a': state = S28; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S28: /* e.g. "[:digi" */ + case S28: /* e.g. "[:bla" */ switch ((unsigned char) c) { - case 't': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'n': state = S29; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S29: /* e.g. "[:pri" */ + case S29: /* e.g. "[:blan" */ switch ((unsigned char) c) { - case 'n': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'k': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S30: /* e.g. "[:sp" */ + case S30: /* e.g. "[:as" */ switch ((unsigned char) c) { - case 'a': state = S31; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'c': state = S36; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S31: /* e.g. "[:spa" */ + case S31: /* e.g. "[:al" */ switch ((unsigned char) c) { - case 'c': state = S32; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'p': state = S32; break; + case 'n': state = S33; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S32: /* e.g. "[:spac" */ + case S32: /* e.g. "[:alp" */ switch ((unsigned char) c) { - case 'e': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'h': state = S35; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S33: /* e.g. "[:as" */ + case S33: /* e.g. "[:aln" */ switch ((unsigned char) c) { - case 'c': state = S39; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'u': state = S34; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S34: /* e.g. "[:al" */ + case S34: /* e.g. "[:alnu" */ switch ((unsigned char) c) { - case 'n': state = S35; break; - case 'p': state = S36; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'm': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S35: /* e.g. "[:aln" */ + case S35: /* e.g. "[:alph" */ switch ((unsigned char) c) { - case 'u': state = S38; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'a': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S36: /* e.g. "[:alp" */ + case S36: /* e.g. "[:asc" */ switch ((unsigned char) c) { - case 'h': state = S37; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'i': state = S37; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S37: /* e.g. "[:alph" */ + case S37: /* e.g. "[:asci" */ switch ((unsigned char) c) { - case 'a': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'i': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S38: /* e.g. "[:alnu" */ + case S38: /* e.g. "[:sp" */ switch ((unsigned char) c) { - case 'm': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'a': state = S39; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S39: /* e.g. "[:asc" */ + case S39: /* e.g. "[:spa" */ switch ((unsigned char) c) { - case 'i': state = S40; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'c': state = S40; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S40: /* e.g. "[:asci" */ + case S40: /* e.g. "[:spac" */ switch ((unsigned char) c) { - case 'i': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'e': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S41: /* e.g. "[:lo" */ + case S41: /* e.g. "[:gr" */ switch ((unsigned char) c) { - case 'w': state = S42; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'a': state = S42; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S42: /* e.g. "[:low" */ + case S42: /* e.g. "[:gra" */ switch ((unsigned char) c) { - case 'e': state = S43; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'p': state = S43; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S43: /* e.g. "[:lowe" */ + case S43: /* e.g. "[:grap" */ switch ((unsigned char) c) { - case 'r': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'h': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S44: /* e.g. "[:cn" */ + case S44: /* e.g. "[:lo" */ switch ((unsigned char) c) { - case 't': state = S45; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'w': state = S25; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S45: /* e.g. "[:cnt" */ + case S45: /* e.g. "[:cn" */ switch ((unsigned char) c) { - case 'r': state = S46; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 't': state = S46; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S46: /* e.g. "[:cntr" */ + case S46: /* e.g. "[:cnt" */ switch ((unsigned char) c) { - case 'l': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'r': state = S47; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S47: /* e.g. "[:bl" */ + case S47: /* e.g. "[:cntr" */ switch ((unsigned char) c) { - case 'a': state = S48; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'l': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S48: /* e.g. "[:bla" */ + case S48: /* e.g. "[:pr" */ switch ((unsigned char) c) { - case 'n': state = S49; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'i': state = S52; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S49: /* e.g. "[:blan" */ + case S49: /* e.g. "[:pu" */ switch ((unsigned char) c) { - case 'k': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'n': state = S50; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S50: /* e.g. "[:wo" */ + case S50: /* e.g. "[:pun" */ switch ((unsigned char) c) { - case 'r': state = S51; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'c': state = S51; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S51: /* e.g. "[:wor" */ + case S51: /* e.g. "[:digi" */ switch ((unsigned char) c) { - case 'd': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 't': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S52: /* e.g. "[:up" */ + case S52: /* e.g. "[:pri" */ switch ((unsigned char) c) { - case 'p': state = S42; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'n': state = S51; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S53: /* e.g. "[:di" */ switch ((unsigned char) c) { case 'g': state = S54; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S54: /* e.g. "[:dig" */ switch ((unsigned char) c) { - case 'i': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'i': state = S51; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S55: /* e.g. "\\Q" */ - lx_pcre_ungetc(lx, c); return lx->z = z2, lx->z(lx); + case S55: /* e.g. "\\c" */ + state = S72; break; - case S56: /* e.g. "\\E" */ - lx_pcre_ungetc(lx, c); return lx->z(lx); + case S56: /* e.g. "\\$" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_ESC; case S57: /* e.g. "\\1" */ switch ((unsigned char) c) { @@ -866,13 +1014,29 @@ z3(struct lx_pcre_lx *lx) case '7': case '8': case '9': break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; - case S58: /* e.g. "\\x" */ + case S58: /* e.g. "\\Q" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z2, lx->z(lx); + + case S59: /* e.g. "\\E" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z(lx); + + case S60: /* e.g. "\\o" */ switch ((unsigned char) c) { - case '{': state = S69; break; + case '{': state = S70; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NOESC; + } + break; + + case S61: /* e.g. "\\\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NOESC; + + case S62: /* e.g. "\\x" */ + switch ((unsigned char) c) { + case '{': state = S66; break; case '0': case '1': case '2': @@ -894,12 +1058,12 @@ z3(struct lx_pcre_lx *lx) case 'c': case 'd': case 'e': - case 'f': state = S70; break; - default: lx_pcre_ungetc(lx, c); return TOK_HEX; + case 'f': state = S67; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; } break; - case S59: /* e.g. "\\0" */ + case S63: /* e.g. "\\0" */ switch ((unsigned char) c) { case '0': case '1': @@ -908,31 +1072,12 @@ z3(struct lx_pcre_lx *lx) case '4': case '5': case '6': - case '7': state = S68; break; - default: lx_pcre_ungetc(lx, c); return TOK_OCT; + case '7': state = S64; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; } break; - case S60: /* e.g. "\\o" */ - switch ((unsigned char) c) { - case '{': state = S65; break; - default: lx_pcre_ungetc(lx, c); return TOK_NOESC; - } - break; - - case S61: /* e.g. "\\c" */ - state = S64; break; - - case S62: /* e.g. "\\g" */ - lx_pcre_ungetc(lx, c); return TOK_NOESC; - - case S63: /* e.g. "\\a" */ - lx_pcre_ungetc(lx, c); return TOK_ESC; - - case S64: /* e.g. "\\ca" */ - lx_pcre_ungetc(lx, c); return TOK_CONTROL; - - case S65: /* e.g. "\\o{" */ + case S64: /* e.g. "\\00" */ switch ((unsigned char) c) { case '0': case '1': @@ -941,44 +1086,15 @@ z3(struct lx_pcre_lx *lx) case '4': case '5': case '6': - case '7': state = S66; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case '7': state = S65; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; } break; - case S66: /* e.g. "\\o{0" */ - switch ((unsigned char) c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': break; - case '}': state = S67; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; - } - break; + case S65: /* e.g. "\\000" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; - case S67: /* e.g. "\\000" */ - lx_pcre_ungetc(lx, c); return TOK_OCT; - - case S68: /* e.g. "\\00" */ - switch ((unsigned char) c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': state = S67; break; - default: lx_pcre_ungetc(lx, c); return TOK_OCT; - } - break; - - case S69: /* e.g. "\\x{" */ + case S66: /* e.g. "\\x{" */ switch ((unsigned char) c) { case '0': case '1': @@ -1001,12 +1117,14 @@ z3(struct lx_pcre_lx *lx) case 'c': case 'd': case 'e': - case 'f': state = S72; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case 'f': state = S69; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S70: /* e.g. "\\xa" */ + case S67: /* e.g. "\\x0" */ switch ((unsigned char) c) { case '0': case '1': @@ -1029,17 +1147,17 @@ z3(struct lx_pcre_lx *lx) case 'c': case 'd': case 'e': - case 'f': state = S71; break; - default: lx_pcre_ungetc(lx, c); return TOK_HEX; + case 'f': state = S68; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; } break; - case S71: /* e.g. "\\xaa" */ - lx_pcre_ungetc(lx, c); return TOK_HEX; + case S68: /* e.g. "\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; - case S72: /* e.g. "\\x{a" */ + case S69: /* e.g. "\\x{0" */ switch ((unsigned char) c) { - case '}': state = S71; break; + case '}': state = S68; break; case '0': case '1': case '2': @@ -1062,17 +1180,84 @@ z3(struct lx_pcre_lx *lx) case 'd': case 'e': case 'f': break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } + break; + + case S70: /* e.g. "\\o{" */ + switch ((unsigned char) c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': state = S71; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; + case S71: /* e.g. "\\o{0" */ + switch ((unsigned char) c) { + case '}': state = S65; break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } + break; + + case S72: /* e.g. "\\c\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CONTROL; + default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_INVALID; + case S2: return TOK_CHAR; + case S3: return TOK_CHAR; + case S4: return TOK_RANGE; + case S5: return lx->z = z7, TOK_CLOSEGROUP; + case S6: return lx->z = z7, TOK_CLOSEGROUPRANGE; + case S23: return TOK_NAMED__CLASS; + case S55: return TOK_NOESC; + case S56: return TOK_ESC; + case S57: return TOK_UNSUPPORTED; + case S58: return lx->z = z2, lx->z(lx); + case S59: return TOK_EOF; + case S60: return TOK_NOESC; + case S61: return TOK_NOESC; + case S62: return TOK_HEX; + case S63: return TOK_OCT; + case S64: return TOK_OCT; + case S65: return TOK_OCT; + case S67: return TOK_HEX; + case S68: return TOK_HEX; + case S72: return TOK_CONTROL; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { - case S55: - case S56: + case S58: + case S59: break; default: @@ -1084,64 +1269,41 @@ z3(struct lx_pcre_lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_INVALID; - case S2: return TOK_CHAR; - case S3: return TOK_CHAR; - case S4: return TOK_RANGE; - case S5: return TOK_CLOSEGROUP; - case S6: return TOK_CLOSEGROUPRANGE; - case S24: return TOK_NAMED__CLASS; - case S55: return TOK_EOF; - case S56: return TOK_EOF; - case S57: return TOK_UNSUPPORTED; - case S58: return TOK_HEX; - case S59: return TOK_OCT; - case S60: return TOK_NOESC; - case S61: return TOK_NOESC; - case S62: return TOK_NOESC; - case S63: return TOK_ESC; - case S64: return TOK_CONTROL; - case S67: return TOK_OCT; - case S68: return TOK_OCT; - case S70: return TOK_HEX; - case S71: return TOK_HEX; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z4(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '+': case 'R': state = S1; break; @@ -1184,12 +1346,14 @@ z4(struct lx_pcre_lx *lx) case '-': state = S8; break; case ')': state = S9; break; case ':': state = S10; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S1: /* e.g. "R" */ - lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + case S1: /* e.g. "+" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; case S2: /* e.g. "0" */ switch ((unsigned char) c) { @@ -1203,53 +1367,45 @@ z4(struct lx_pcre_lx *lx) case '7': case '8': case '9': break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; case S3: /* e.g. "n" */ - lx_pcre_ungetc(lx, c); return TOK_FLAG__IGNORE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_FLAG__IGNORE; case S4: /* e.g. "x" */ switch ((unsigned char) c) { case 'x': state = S7; break; - default: lx_pcre_ungetc(lx, c); return TOK_FLAG__EXTENDED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_FLAG__EXTENDED; } break; case S5: /* e.g. "s" */ - lx_pcre_ungetc(lx, c); return TOK_FLAG__SINGLE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_FLAG__SINGLE; case S6: /* e.g. "i" */ - lx_pcre_ungetc(lx, c); return TOK_FLAG__INSENSITIVE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_FLAG__INSENSITIVE; case S7: /* e.g. "a" */ - lx_pcre_ungetc(lx, c); return TOK_FLAG__UNKNOWN; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_FLAG__UNKNOWN; case S8: /* e.g. "-" */ - lx_pcre_ungetc(lx, c); return TOK_NEGATE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NEGATE; case S9: /* e.g. ")" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, TOK_CLOSE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, TOK_CLOSE; case S10: /* e.g. ":" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, TOK_SUB; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, TOK_SUB; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_UNSUPPORTED; case S2: return TOK_UNSUPPORTED; case S3: return TOK_FLAG__IGNORE; @@ -1258,38 +1414,52 @@ z4(struct lx_pcre_lx *lx) case S6: return TOK_FLAG__INSENSITIVE; case S7: return TOK_FLAG__UNKNOWN; case S8: return TOK_NEGATE; - case S9: return TOK_CLOSE; - case S10: return TOK_SUB; - default: errno = EINVAL; return TOK_ERROR; + case S9: return lx->z = z7, TOK_CLOSE; + case S10: return lx->z = z7, TOK_SUB; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z5(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '(': state = S2; break; case ')': state = S3; break; @@ -1297,23 +1467,34 @@ z5(struct lx_pcre_lx *lx) } break; - case S1: /* e.g. "a" */ + case S1: /* e.g. "\\x00" */ switch ((unsigned char) c) { case '(': - case ')': lx_pcre_ungetc(lx, c); return lx->z(lx); + case ')': lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z(lx); default: break; } break; case S2: /* e.g. "(" */ - lx_pcre_ungetc(lx, c); return TOK_INVALID__COMMENT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_INVALID__COMMENT; case S3: /* e.g. ")" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, lx->z(lx); + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_UNKNOWN; + case S2: return TOK_INVALID__COMMENT; + case S3: return lx->z = z7, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S1: @@ -1329,24 +1510,30 @@ z5(struct lx_pcre_lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_EOF; - case S2: return TOK_INVALID__COMMENT; - case S3: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z6(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; + assert(lx != NULL); + + if (lx->clear != NULL) { + lx->clear(lx->buf_opaque); + } + + lx->start = lx->end; + + void *getc_opaque = (void *)lx; enum { S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, @@ -1355,26 +1542,15 @@ z6(struct lx_pcre_lx *lx) S40, S41, S42, S43, S44, S45, S46, S47, S48, S49, S50, S51, S52, S53, S54, S55, S56, S57, S58, S59, S60, S61, S62, S63, S64, S65, S66, S67, S68, S69, - S70, S71, S72, S73, S74, S75, S76, S77, S78, NONE + S70, S71, S72, S73, S74, S75, S76, S77, S78 } state; - assert(lx != NULL); - - if (lx->clear != NULL) { - lx->clear(lx->buf_opaque); - } - - state = NONE; - - lx->start = lx->end; - - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case ':': state = S1; break; case 'L': state = S2; break; @@ -1390,13 +1566,17 @@ z6(struct lx_pcre_lx *lx) case 'n': state = S12; break; case 'F': state = S13; break; case ')': state = S14; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S1: /* e.g. ":" */ switch ((unsigned char) c) { - case ')': lx->lgetc = NULL; return TOK_UNKNOWN; + case ')': + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; default: state = S78; break; } break; @@ -1404,7 +1584,9 @@ z6(struct lx_pcre_lx *lx) case S2: /* e.g. "L" */ switch ((unsigned char) c) { case 'F': state = S37; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1412,35 +1594,45 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'l': state = S18; break; case 'o': state = S76; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S4: /* e.g. "M" */ switch ((unsigned char) c) { case 'A': state = S74; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S5: /* e.g. "T" */ switch ((unsigned char) c) { case 'H': state = S72; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S6: /* e.g. "S" */ switch ((unsigned char) c) { case 'K': state = S70; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S7: /* e.g. "P" */ switch ((unsigned char) c) { case 'R': state = S67; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1448,21 +1640,27 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'C': state = S60; break; case 'N': state = S61; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S9: /* e.g. "a" */ switch ((unsigned char) c) { case 't': state = S56; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S10: /* e.g. "N" */ switch ((unsigned char) c) { case 'O': state = S46; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1470,7 +1668,9 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'R': state = S41; break; case 'O': state = S42; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1478,7 +1678,9 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'l': state = S18; break; case 'e': state = S19; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1486,31 +1688,35 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case ':': state = S1; break; case 'A': state = S15; break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; case S14: /* e.g. ")" */ - lx_pcre_ungetc(lx, c); return lx->z = z7, lx->z(lx); + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z7, lx->z(lx); case S15: /* e.g. "FA" */ switch ((unsigned char) c) { case 'I': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S16: /* e.g. "FAI" */ switch ((unsigned char) c) { case 'L': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S17: /* e.g. "FAIL" */ switch ((unsigned char) c) { case ':': state = S1; break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; @@ -1518,84 +1724,108 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'a': case 'b': state = S36; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S19: /* e.g. "ne" */ switch ((unsigned char) c) { case 'g': state = S20; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S20: /* e.g. "neg" */ switch ((unsigned char) c) { case 'a': state = S21; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S21: /* e.g. "nega" */ switch ((unsigned char) c) { case 't': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S22: /* e.g. "negat" */ switch ((unsigned char) c) { case 'i': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S23: /* e.g. "negati" */ switch ((unsigned char) c) { case 'v': state = S24; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S24: /* e.g. "negativ" */ switch ((unsigned char) c) { case 'e': state = S25; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S25: /* e.g. "negative" */ switch ((unsigned char) c) { case '_': state = S26; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S26: /* e.g. "negative_" */ switch ((unsigned char) c) { case 'l': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S27: /* e.g. "negative_l" */ switch ((unsigned char) c) { case 'o': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S28: /* e.g. "negative_lo" */ switch ((unsigned char) c) { case 'o': state = S29; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S29: /* e.g. "negative_loo" */ switch ((unsigned char) c) { case 'k': state = S30; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -1603,338 +1833,428 @@ z6(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case 'b': state = S31; break; case 'a': state = S32; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S31: /* e.g. "negative_lookb" */ switch ((unsigned char) c) { case 'e': state = S38; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S32: /* e.g. "negative_looka" */ switch ((unsigned char) c) { case 'h': state = S33; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S33: /* e.g. "negative_lookah" */ switch ((unsigned char) c) { case 'e': state = S34; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S34: /* e.g. "negative_lookahe" */ switch ((unsigned char) c) { case 'a': state = S35; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S35: /* e.g. "negative_lookahea" */ switch ((unsigned char) c) { case 'd': state = S36; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S36: /* e.g. "nla" */ switch ((unsigned char) c) { case ':': state = S37; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S37: /* e.g. "LF" */ - lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; case S38: /* e.g. "negative_lookbe" */ switch ((unsigned char) c) { case 'h': state = S39; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S39: /* e.g. "negative_lookbeh" */ switch ((unsigned char) c) { case 'i': state = S40; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S40: /* e.g. "negative_lookbehi" */ switch ((unsigned char) c) { case 'n': state = S35; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S41: /* e.g. "CR" */ switch ((unsigned char) c) { case 'L': state = S2; break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; case S42: /* e.g. "CO" */ switch ((unsigned char) c) { case 'M': state = S43; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S43: /* e.g. "COM" */ switch ((unsigned char) c) { case 'M': state = S44; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S44: /* e.g. "COMM" */ switch ((unsigned char) c) { case 'I': state = S45; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S45: /* e.g. "ACCEP" */ switch ((unsigned char) c) { case 'T': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S46: /* e.g. "NO" */ switch ((unsigned char) c) { case '_': state = S47; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S47: /* e.g. "NO_" */ switch ((unsigned char) c) { case 'S': state = S48; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S48: /* e.g. "NO_S" */ switch ((unsigned char) c) { case 'T': state = S49; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S49: /* e.g. "NO_ST" */ switch ((unsigned char) c) { case 'A': state = S50; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S50: /* e.g. "NO_STA" */ switch ((unsigned char) c) { case 'R': state = S51; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S51: /* e.g. "NO_STAR" */ switch ((unsigned char) c) { case 'T': state = S52; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S52: /* e.g. "NO_START" */ switch ((unsigned char) c) { case '_': state = S53; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S53: /* e.g. "NO_START_" */ switch ((unsigned char) c) { case 'O': state = S54; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S54: /* e.g. "NO_START_O" */ switch ((unsigned char) c) { case 'P': state = S55; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S55: /* e.g. "NO_START_OP" */ switch ((unsigned char) c) { case 'T': state = S37; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S56: /* e.g. "at" */ switch ((unsigned char) c) { case 'o': state = S57; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S57: /* e.g. "ato" */ switch ((unsigned char) c) { case 'm': state = S58; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S58: /* e.g. "atom" */ switch ((unsigned char) c) { case 'i': state = S59; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S59: /* e.g. "atomi" */ switch ((unsigned char) c) { case 'c': state = S36; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S60: /* e.g. "AC" */ switch ((unsigned char) c) { case 'C': state = S65; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S61: /* e.g. "AN" */ switch ((unsigned char) c) { case 'Y': state = S62; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S62: /* e.g. "ANY" */ switch ((unsigned char) c) { case 'C': state = S63; break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; case S63: /* e.g. "ANYC" */ switch ((unsigned char) c) { case 'R': state = S64; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S64: /* e.g. "ANYCR" */ switch ((unsigned char) c) { case 'L': state = S2; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S65: /* e.g. "ACC" */ switch ((unsigned char) c) { case 'E': state = S66; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S66: /* e.g. "ACCE" */ switch ((unsigned char) c) { case 'P': state = S45; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S67: /* e.g. "PR" */ switch ((unsigned char) c) { case 'U': state = S68; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S68: /* e.g. "PRU" */ switch ((unsigned char) c) { case 'N': state = S69; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S69: /* e.g. "PRUN" */ switch ((unsigned char) c) { case 'E': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S70: /* e.g. "SK" */ switch ((unsigned char) c) { case 'I': state = S71; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S71: /* e.g. "SKI" */ switch ((unsigned char) c) { case 'P': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S72: /* e.g. "TH" */ switch ((unsigned char) c) { case 'E': state = S73; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S73: /* e.g. "THE" */ switch ((unsigned char) c) { case 'N': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S74: /* e.g. "MA" */ switch ((unsigned char) c) { case 'R': state = S75; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S75: /* e.g. "MAR" */ switch ((unsigned char) c) { case 'K': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S76: /* e.g. "po" */ switch ((unsigned char) c) { case 's': state = S77; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S77: /* e.g. "pos" */ switch ((unsigned char) c) { case 'i': state = S21; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S78: /* e.g. ":a" */ + case S78: /* e.g. ":\\x00" */ switch ((unsigned char) c) { - case ')': lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + case ')': lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; default: break; } break; @@ -1942,6 +2262,21 @@ z6(struct lx_pcre_lx *lx) default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S13: return TOK_UNSUPPORTED; + case S14: return lx->z = z7, lx->z(lx); + case S17: return TOK_UNSUPPORTED; + case S37: return TOK_UNSUPPORTED; + case S41: return TOK_UNSUPPORTED; + case S62: return TOK_UNSUPPORTED; + case S78: return TOK_UNSUPPORTED; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S14: @@ -1956,53 +2291,44 @@ z6(struct lx_pcre_lx *lx) break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S13: return TOK_UNSUPPORTED; - case S14: return TOK_EOF; - case S17: return TOK_UNSUPPORTED; - case S37: return TOK_UNSUPPORTED; - case S41: return TOK_UNSUPPORTED; - case S62: return TOK_UNSUPPORTED; - case S78: return TOK_UNSUPPORTED; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_pcre_token z7(struct lx_pcre_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, - S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, - S40, S41, S42, S43, S44, S45, S46, S47, S48, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30, S31, S32, S33, S34, S35, S36, S37, S38, S39, + S40, S41, S42, S43, S44, S45, S46, S47, S48 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\\': state = S2; break; case '\n': @@ -2023,13 +2349,15 @@ z7(struct lx_pcre_lx *lx) case '$': state = S14; break; case '^': state = S15; break; case ')': state = S16; break; - case '\x00': lx->lgetc = NULL; return TOK_UNKNOWN; + case '\x00': + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_pcre_ungetc(lx, c); return TOK_CHAR; + case S1: /* e.g. "\\x01" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "\\" */ switch ((unsigned char) c) { @@ -2042,24 +2370,7 @@ z7(struct lx_pcre_lx *lx) case 'X': case 'b': case 'g': - case 'k': state = S20; break; - case 'Q': state = S28; break; - case 'o': state = S29; break; - case 'c': state = S30; break; - case 'x': state = S32; break; - case '0': state = S33; break; - case 'R': state = S34; break; - case 'D': - case 'H': - case 'N': - case 'S': - case 'V': - case 'W': - case 'd': - case 'h': - case 's': - case 'v': - case 'w': state = S35; break; + case 'k': state = S21; break; case '$': case '(': case ')': @@ -2077,8 +2388,9 @@ z7(struct lx_pcre_lx *lx) case 'r': case 't': case '{': - case '|': state = S36; break; - case 'E': state = S37; break; + case '|': state = S28; break; + case 'E': state = S29; break; + case 'z': state = S30; break; case '1': case '2': case '3': @@ -2087,29 +2399,45 @@ z7(struct lx_pcre_lx *lx) case '6': case '7': case '8': - case '9': state = S38; break; - case 'z': state = S39; break; - default: state = S31; break; + case '9': state = S31; break; + case 'Q': state = S32; break; + case 'c': state = S33; break; + case 'o': state = S34; break; + case 'x': state = S36; break; + case 'D': + case 'H': + case 'N': + case 'S': + case 'V': + case 'W': + case 'd': + case 'h': + case 's': + case 'v': + case 'w': state = S37; break; + case 'R': state = S38; break; + case '0': state = S39; break; + default: state = S35; break; } break; case S3: /* e.g. "\\x0a" */ - lx_pcre_ungetc(lx, c); return TOK_NEWLINE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NEWLINE; case S4: /* e.g. "\\x09" */ - lx_pcre_ungetc(lx, c); return TOK_WHITESPACE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_WHITESPACE; case S5: /* e.g. "#" */ - lx_pcre_ungetc(lx, c); return TOK_MAYBE_COMMENT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_MAYBE_COMMENT; case S6: /* e.g. "{" */ - lx_pcre_ungetc(lx, c); return lx->z = z1, TOK_OPENCOUNT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENCOUNT; case S7: /* e.g. "[" */ switch ((unsigned char) c) { case '^': state = S25; break; case ']': state = S26; break; - default: lx_pcre_ungetc(lx, c); return lx->z = z3, TOK_OPENGROUP; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z3, TOK_OPENGROUP; } break; @@ -2117,66 +2445,56 @@ z7(struct lx_pcre_lx *lx) switch ((unsigned char) c) { case '?': state = S17; break; case '*': state = S18; break; - default: lx_pcre_ungetc(lx, c); return TOK_OPEN; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OPEN; } break; case S9: /* e.g. "|" */ - lx_pcre_ungetc(lx, c); return TOK_ALT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_ALT; case S10: /* e.g. "." */ - lx_pcre_ungetc(lx, c); return TOK_ANY; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_ANY; case S11: /* e.g. "+" */ - lx_pcre_ungetc(lx, c); return TOK_PLUS; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_PLUS; case S12: /* e.g. "*" */ - lx_pcre_ungetc(lx, c); return TOK_STAR; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_STAR; - case S13: /* e.g. "?" */ - lx_pcre_ungetc(lx, c); return TOK_OPT; + case S13: /* e.g. "\077" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OPT; case S14: /* e.g. "$" */ - lx_pcre_ungetc(lx, c); return TOK_END__NL; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_END__NL; case S15: /* e.g. "^" */ - lx_pcre_ungetc(lx, c); return TOK_START; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_START; case S16: /* e.g. ")" */ - lx_pcre_ungetc(lx, c); return TOK_CLOSE; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CLOSE; - case S17: /* e.g. "(?" */ + case S17: /* e.g. "(\077" */ switch ((unsigned char) c) { case '#': state = S19; break; + case '<': state = S20; break; case '!': case '&': - case '=': state = S20; break; - case 'P': state = S21; break; - case '<': state = S22; break; - default: lx_pcre_ungetc(lx, c); return lx->z = z4, TOK_FLAGS; + case '=': state = S21; break; + case 'P': state = S22; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z4, TOK_FLAGS; } break; case S18: /* e.g. "(*" */ - lx_pcre_ungetc(lx, c); return lx->z = z6, lx->z(lx); + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z6, lx->z(lx); - case S19: /* e.g. "(?#" */ - lx_pcre_ungetc(lx, c); return lx->z = z5, lx->z(lx); + case S19: /* e.g. "(\077#" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z5, lx->z(lx); - case S20: /* e.g. "\\b" */ - lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; - - case S21: /* e.g. "(?P" */ - switch ((unsigned char) c) { - case '>': state = S20; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; - } - break; - - case S22: /* e.g. "(?<" */ + case S20: /* e.g. "(\077<" */ switch ((unsigned char) c) { case '!': - case '=': state = S20; break; + case '=': state = S21; break; case 'A': case 'B': case 'C': @@ -2230,11 +2548,25 @@ z7(struct lx_pcre_lx *lx) case 'x': case 'y': case 'z': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } + break; + + case S21: /* e.g. "\\B" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; + + case S22: /* e.g. "(\077P" */ + switch ((unsigned char) c) { + case '>': state = S21; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S23: /* e.g. "(?': state = S24; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S24: /* e.g. "(?" */ - lx_pcre_ungetc(lx, c); return TOK_OPENCAPTURE; + case S24: /* e.g. "(\077" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OPENCAPTURE; case S25: /* e.g. "[^" */ switch ((unsigned char) c) { case ']': state = S27; break; - default: lx_pcre_ungetc(lx, c); return lx->z = z3, TOK_OPENGROUPINV; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z3, TOK_OPENGROUPINV; } break; case S26: /* e.g. "[]" */ - lx_pcre_ungetc(lx, c); return lx->z = z3, TOK_OPENGROUPCB; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z3, TOK_OPENGROUPCB; case S27: /* e.g. "[^]" */ - lx_pcre_ungetc(lx, c); return lx->z = z3, TOK_OPENGROUPINVCB; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z3, TOK_OPENGROUPINVCB; - case S28: /* e.g. "\\Q" */ - lx_pcre_ungetc(lx, c); return lx->z = z0, lx->z(lx); + case S28: /* e.g. "\\$" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_ESC; - case S29: /* e.g. "\\o" */ + case S29: /* e.g. "\\E" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z(lx); + + case S30: /* e.g. "\\z" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_END; + + case S31: /* e.g. "\\1" */ switch ((unsigned char) c) { - case '{': state = S47; break; - default: lx_pcre_ungetc(lx, c); return TOK_NOESC; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_UNSUPPORTED; } break; - case S30: /* e.g. "\\c" */ - state = S46; break; + case S32: /* e.g. "\\Q" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return lx->z = z0, lx->z(lx); - case S31: /* e.g. "\\i" */ - lx_pcre_ungetc(lx, c); return TOK_NOESC; + case S33: /* e.g. "\\c" */ + state = S48; break; - case S32: /* e.g. "\\x" */ + case S34: /* e.g. "\\o" */ + switch ((unsigned char) c) { + case '{': state = S46; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NOESC; + } + break; + + case S35: /* e.g. "\\\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NOESC; + + case S36: /* e.g. "\\x" */ switch ((unsigned char) c) { case '{': state = S42; break; case '0': @@ -2361,37 +2720,17 @@ z7(struct lx_pcre_lx *lx) case 'd': case 'e': case 'f': state = S43; break; - default: lx_pcre_ungetc(lx, c); return TOK_HEX; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; } break; - case S33: /* e.g. "\\0" */ - switch ((unsigned char) c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': state = S40; break; - default: lx_pcre_ungetc(lx, c); return TOK_OCT; - } - break; - - case S34: /* e.g. "\\R" */ - lx_pcre_ungetc(lx, c); return TOK_EOL; - - case S35: /* e.g. "\\d" */ - lx_pcre_ungetc(lx, c); return TOK_NAMED__CLASS; - - case S36: /* e.g. "\\a" */ - lx_pcre_ungetc(lx, c); return TOK_ESC; + case S37: /* e.g. "\\D" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_NAMED__CLASS; - case S37: /* e.g. "\\E" */ - lx_pcre_ungetc(lx, c); return lx->z(lx); + case S38: /* e.g. "\\R" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_EOL; - case S38: /* e.g. "\\1" */ + case S39: /* e.g. "\\0" */ switch ((unsigned char) c) { case '0': case '1': @@ -2400,16 +2739,11 @@ z7(struct lx_pcre_lx *lx) case '4': case '5': case '6': - case '7': - case '8': - case '9': break; - default: lx_pcre_ungetc(lx, c); return TOK_UNSUPPORTED; + case '7': state = S40; break; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; } break; - case S39: /* e.g. "\\z" */ - lx_pcre_ungetc(lx, c); return TOK_END; - case S40: /* e.g. "\\00" */ switch ((unsigned char) c) { case '0': @@ -2420,12 +2754,12 @@ z7(struct lx_pcre_lx *lx) case '5': case '6': case '7': state = S41; break; - default: lx_pcre_ungetc(lx, c); return TOK_OCT; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; } break; case S41: /* e.g. "\\000" */ - lx_pcre_ungetc(lx, c); return TOK_OCT; + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_OCT; case S42: /* e.g. "\\x{" */ switch ((unsigned char) c) { @@ -2451,11 +2785,13 @@ z7(struct lx_pcre_lx *lx) case 'd': case 'e': case 'f': state = S45; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S43: /* e.g. "\\xa" */ + case S43: /* e.g. "\\x0" */ switch ((unsigned char) c) { case '0': case '1': @@ -2479,14 +2815,14 @@ z7(struct lx_pcre_lx *lx) case 'd': case 'e': case 'f': state = S44; break; - default: lx_pcre_ungetc(lx, c); return TOK_HEX; + default: lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; } break; - case S44: /* e.g. "\\xaa" */ - lx_pcre_ungetc(lx, c); return TOK_HEX; + case S44: /* e.g. "\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_HEX; - case S45: /* e.g. "\\x{a" */ + case S45: /* e.g. "\\x{0" */ switch ((unsigned char) c) { case '}': state = S44; break; case '0': @@ -2511,14 +2847,13 @@ z7(struct lx_pcre_lx *lx) case 'd': case 'e': case 'f': break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S46: /* e.g. "\\ca" */ - lx_pcre_ungetc(lx, c); return TOK_CONTROL; - - case S47: /* e.g. "\\o{" */ + case S46: /* e.g. "\\o{" */ switch ((unsigned char) c) { case '0': case '1': @@ -2527,12 +2862,14 @@ z7(struct lx_pcre_lx *lx) case '4': case '5': case '6': - case '7': state = S48; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + case '7': state = S47; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S48: /* e.g. "\\o{0" */ + case S47: /* e.g. "\\o{0" */ switch ((unsigned char) c) { case '}': state = S41; break; case '0': @@ -2543,43 +2880,29 @@ z7(struct lx_pcre_lx *lx) case '5': case '6': case '7': break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - default: - ; /* unreached */ - } - - switch (state) { - case S18: - case S19: - case S28: - case S37: - break; + case S48: /* e.g. "\\c\\x00" */ + lx_pcre_ungetc(lx, c); lx_pcre_dynpop(lx->buf_opaque); return TOK_CONTROL; default: - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } - break; - + ; /* unreached */ } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; case S2: return TOK_INVALID; case S3: return TOK_NEWLINE; case S4: return TOK_WHITESPACE; case S5: return TOK_MAYBE_COMMENT; - case S6: return TOK_OPENCOUNT; - case S7: return TOK_OPENGROUP; + case S6: return lx->z = z1, TOK_OPENCOUNT; + case S7: return lx->z = z3, TOK_OPENGROUP; case S8: return TOK_OPEN; case S9: return TOK_ALT; case S10: return TOK_ANY; @@ -2589,33 +2912,59 @@ z7(struct lx_pcre_lx *lx) case S14: return TOK_END__NL; case S15: return TOK_START; case S16: return TOK_CLOSE; - case S17: return TOK_FLAGS; - case S18: return TOK_EOF; - case S19: return TOK_EOF; - case S20: return TOK_UNSUPPORTED; + case S17: return lx->z = z4, TOK_FLAGS; + case S18: return lx->z = z6, lx->z(lx); + case S19: return lx->z = z5, lx->z(lx); + case S21: return TOK_UNSUPPORTED; case S24: return TOK_OPENCAPTURE; - case S25: return TOK_OPENGROUPINV; - case S26: return TOK_OPENGROUPCB; - case S27: return TOK_OPENGROUPINVCB; - case S28: return TOK_EOF; - case S29: return TOK_NOESC; - case S30: return TOK_NOESC; - case S31: return TOK_NOESC; - case S32: return TOK_HEX; - case S33: return TOK_OCT; - case S34: return TOK_EOL; - case S35: return TOK_NAMED__CLASS; - case S36: return TOK_ESC; - case S37: return TOK_EOF; - case S38: return TOK_UNSUPPORTED; - case S39: return TOK_END; + case S25: return lx->z = z3, TOK_OPENGROUPINV; + case S26: return lx->z = z3, TOK_OPENGROUPCB; + case S27: return lx->z = z3, TOK_OPENGROUPINVCB; + case S28: return TOK_ESC; + case S29: return TOK_EOF; + case S30: return TOK_END; + case S31: return TOK_UNSUPPORTED; + case S32: return lx->z = z0, lx->z(lx); + case S33: return TOK_NOESC; + case S34: return TOK_NOESC; + case S35: return TOK_NOESC; + case S36: return TOK_HEX; + case S37: return TOK_NAMED__CLASS; + case S38: return TOK_EOL; + case S39: return TOK_OCT; case S40: return TOK_OCT; case S41: return TOK_OCT; case S43: return TOK_HEX; case S44: return TOK_HEX; - case S46: return TOK_CONTROL; - default: errno = EINVAL; return TOK_ERROR; + case S48: return TOK_CONTROL; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_pcre_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + switch (state) { + case S18: + case S19: + case S29: + case S32: + break; + + default: + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + break; + + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -3092,6 +3441,7 @@ lx_pcre_init(struct lx_pcre_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_pcre_dynpop; } enum lx_pcre_token diff --git a/src/libre/dialect/pcre/parser.c b/src/libre/dialect/pcre/parser.c index 78c11d795..e59fee020 100644 --- a/src/libre/dialect/pcre/parser.c +++ b/src/libre/dialect/pcre/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -325,7 +325,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta /* BEGINNING OF EXTRACT: FLAG_EXTENDED */ { -#line 666 "src/libre/parser.act" +#line 665 "src/libre/parser.act" ZIc = RE_EXTENDED; @@ -335,7 +335,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta ADVANCE_LEXER; /* BEGINNING OF ACTION: re-flag-union */ { -#line 801 "src/libre/parser.act" +#line 800 "src/libre/parser.act" (ZIo) = (ZIi) | (ZIc); @@ -356,7 +356,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta /* BEGINNING OF EXTRACT: FLAG_INSENSITIVE */ { -#line 662 "src/libre/parser.act" +#line 661 "src/libre/parser.act" ZIc = RE_ICASE; @@ -366,7 +366,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta ADVANCE_LEXER; /* BEGINNING OF ACTION: re-flag-union */ { -#line 801 "src/libre/parser.act" +#line 800 "src/libre/parser.act" (ZIo) = (ZIi) | (ZIc); @@ -381,7 +381,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta /* BEGINNING OF EXTRACT: FLAG_SINGLE */ { -#line 670 "src/libre/parser.act" +#line 669 "src/libre/parser.act" ZIc = RE_SINGLE; @@ -391,7 +391,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta ADVANCE_LEXER; /* BEGINNING OF ACTION: re-flag-union */ { -#line 801 "src/libre/parser.act" +#line 800 "src/libre/parser.act" (ZIo) = (ZIi) | (ZIc); @@ -406,7 +406,7 @@ p_expr_C_Cflags_C_Cflag__set(flags flags, lex_state lex_state, act_state act_sta ZIo = ZIi; /* BEGINNING OF ACTION: err-unknown-flag */ { -#line 743 "src/libre/parser.act" +#line 739 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EFLAG; @@ -451,7 +451,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -474,7 +474,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: CONTROL */ { -#line 448 "src/libre/parser.act" +#line 442 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] == 'c'); @@ -522,7 +522,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -538,7 +538,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -574,7 +574,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -632,7 +632,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -685,7 +685,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags { /* BEGINNING OF EXTRACT: UNSUPPORTED */ { -#line 429 "src/libre/parser.act" +#line 426 "src/libre/parser.act" /* handle \1-\9 back references */ if (lex_state->buf.a[0] == '\\' && lex_state->buf.a[1] != '\0' && lex_state->buf.a[2] == '\0') { @@ -707,7 +707,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -726,7 +726,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hliteral(flags /* END OF INLINE: 155 */ /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (ZIc); @@ -762,7 +762,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIclass), (ZInode))) { goto ZL1; @@ -775,7 +775,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; goto ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms; /* END OF INLINE: expr::character-class::list-of-class-terms */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -822,7 +822,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -851,7 +851,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; goto ZL2_expr_C_Clist_Hof_Hpieces; /* END OF INLINE: expr::list-of-pieces */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -874,7 +874,7 @@ p_293(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla { /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZInode) = ast_make_expr_named(act_state->poolp, *flags, (*ZI290)); if ((ZInode) == NULL) { @@ -894,7 +894,7 @@ p_293(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla /* BEGINNING OF ACTION: ast-range-endpoint-class */ { -#line 845 "src/libre/parser.act" +#line 844 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_NAMED; (ZIlower).u.named.class = (*ZI290); @@ -910,7 +910,7 @@ p_293(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla } /* BEGINNING OF ACTION: mark-range */ { -#line 773 "src/libre/parser.act" +#line 772 "src/libre/parser.act" mark(&act_state->rangestart, &(*ZI291)); mark(&act_state->rangeend, &(ZIend)); @@ -920,7 +920,7 @@ p_293(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__cla /* END OF ACTION: mark-range */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -980,7 +980,7 @@ p_168(flags flags, lex_state lex_state, act_state act_state, err err) case (TOK_RANGE): /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI169 = '-'; ZI170 = lex_state->lx.start; @@ -1004,7 +1004,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-range */ { -#line 722 "src/libre/parser.act" +#line 718 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXRANGE; @@ -1043,7 +1043,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -1069,7 +1069,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: CONTROL */ { -#line 448 "src/libre/parser.act" +#line 442 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] == 'c'); @@ -1124,7 +1124,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1163,7 +1163,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1224,7 +1224,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: NOESC */ { -#line 417 "src/libre/parser.act" +#line 412 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1251,7 +1251,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1307,7 +1307,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* BEGINNING OF EXTRACT: UNSUPPORTED */ { -#line 429 "src/libre/parser.act" +#line 426 "src/libre/parser.act" /* handle \1-\9 back references */ if (lex_state->buf.a[0] == '\\' && lex_state->buf.a[1] != '\0' && lex_state->buf.a[2] == '\0') { @@ -1329,7 +1329,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -1348,7 +1348,7 @@ p_expr_C_Cliteral(flags flags, lex_state lex_state, act_state act_state, err err /* END OF INLINE: 111 */ /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -1381,7 +1381,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -1413,7 +1413,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: CONTROL */ { -#line 448 "src/libre/parser.act" +#line 442 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] == 'c'); @@ -1461,7 +1461,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -1486,7 +1486,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: ESC */ { -#line 391 "src/libre/parser.act" +#line 386 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1531,7 +1531,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: HEX */ { -#line 535 "src/libre/parser.act" +#line 527 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1598,7 +1598,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZI290 = DIALECT_CLASS(lex_state->buf.a); if (ZI290 == NULL) { @@ -1631,7 +1631,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: NOESC */ { -#line 417 "src/libre/parser.act" +#line 412 "src/libre/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1651,7 +1651,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -1671,7 +1671,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: OCT */ { -#line 492 "src/libre/parser.act" +#line 484 "src/libre/parser.act" unsigned long u; char *s, *e; @@ -1733,7 +1733,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: UNSUPPORTED */ { -#line 429 "src/libre/parser.act" +#line 426 "src/libre/parser.act" /* handle \1-\9 back references */ if (lex_state->buf.a[0] == '\\' && lex_state->buf.a[1] != '\0' && lex_state->buf.a[2] == '\0') { @@ -1755,7 +1755,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -1801,7 +1801,7 @@ p_expr_C_Ccomment(flags flags, lex_state lex_state, act_state act_state, err err ADVANCE_LEXER; /* BEGINNING OF ACTION: err-invalid-comment */ { -#line 687 "src/libre/parser.act" +#line 683 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EBADCOMMENT; @@ -1835,7 +1835,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hclass(flags fl case (TOK_NAMED__CLASS): /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZIid = DIALECT_CLASS(lex_state->buf.a); if (ZIid == NULL) { @@ -1859,7 +1859,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_C_Crange_Hendpoint_Hclass(flags fl ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-range-endpoint-class */ { -#line 845 "src/libre/parser.act" +#line 844 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_NAMED; (ZIr).u.named.class = (ZIid); @@ -1899,7 +1899,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUP */ { -#line 319 "src/libre/parser.act" +#line 318 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI181 = lex_state->lx.end; @@ -1913,7 +1913,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1940,7 +1940,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPCB */ { -#line 335 "src/libre/parser.act" +#line 334 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI200 = lex_state->lx.end; @@ -1954,7 +1954,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1967,7 +1967,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: make-literal-cbrak */ { -#line 886 "src/libre/parser.act" +#line 885 "src/libre/parser.act" (ZIcbrak) = ']'; @@ -1981,7 +1981,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZInode1))) { goto ZL1; @@ -2003,7 +2003,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPINV */ { -#line 327 "src/libre/parser.act" +#line 326 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI192 = lex_state->lx.end; @@ -2017,7 +2017,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2030,7 +2030,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -2087,7 +2087,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: OPENGROUPINVCB */ { -#line 343 "src/libre/parser.act" +#line 342 "src/libre/parser.act" ZIstart = lex_state->lx.start; ZI207 = lex_state->lx.end; @@ -2101,7 +2101,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2114,7 +2114,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ZItmp = ZInode; /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -2157,7 +2157,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-make-invert */ /* BEGINNING OF ACTION: make-literal-cbrak */ { -#line 886 "src/libre/parser.act" +#line 885 "src/libre/parser.act" (ZIcbrak) = ']'; @@ -2171,7 +2171,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZInode1))) { goto ZL1; @@ -2203,7 +2203,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: CLOSEGROUP */ { -#line 351 "src/libre/parser.act" +#line 350 "src/libre/parser.act" ZI214 = ']'; ZI215 = lex_state->lx.start; @@ -2219,7 +2219,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIstart)); mark(&act_state->groupend, &(ZIend)); @@ -2238,7 +2238,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: CLOSEGROUPRANGE */ { -#line 361 "src/libre/parser.act" +#line 360 "src/libre/parser.act" ZIcrange = '-'; ZI217 = lex_state->lx.start; @@ -2254,7 +2254,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIrange) = ast_make_expr_literal(act_state->poolp, *flags, (ZIcrange)); if ((ZIrange) == NULL) { @@ -2266,7 +2266,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZItmp), (ZIrange))) { goto ZL4; @@ -2277,7 +2277,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: ast-add-alt */ /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIstart)); mark(&act_state->groupend, &(ZIend)); @@ -2295,7 +2295,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state { /* BEGINNING OF ACTION: err-expected-closegroup */ { -#line 729 "src/libre/parser.act" +#line 725 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCLOSEGROUP; @@ -2338,7 +2338,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_Hend(flags flags, lex_state lex_st /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZI163 = lex_state->lx.start; @@ -2354,7 +2354,7 @@ p_expr_C_Ccharacter_Hclass_C_Crange_Hendpoint_Hend(flags flags, lex_state lex_st ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (ZIc); @@ -2401,7 +2401,7 @@ p_317(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (*ZI314)); if ((ZInode) == NULL) { @@ -2421,7 +2421,7 @@ p_317(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (*ZI314); @@ -2437,7 +2437,7 @@ p_317(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI } /* BEGINNING OF ACTION: mark-range */ { -#line 773 "src/libre/parser.act" +#line 772 "src/libre/parser.act" mark(&act_state->rangestart, &(*ZI315)); mark(&act_state->rangeend, &(ZIend)); @@ -2447,7 +2447,7 @@ p_317(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* END OF ACTION: mark-range */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -2522,7 +2522,7 @@ p_expr_C_Cpiece(flags flags, lex_state lex_state, act_state act_state, err err, /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -2531,7 +2531,7 @@ p_expr_C_Cpiece(flags flags, lex_state lex_state, act_state act_state, err err, /* END OF ACTION: count-one */ /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); @@ -2578,7 +2578,7 @@ p_320(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI256 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -2592,7 +2592,7 @@ p_320(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI318)); mark(&act_state->countend, &(ZIend)); @@ -2602,7 +2602,7 @@ p_320(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((*ZIm) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -2656,7 +2656,7 @@ p_expr(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__ex { /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2677,7 +2677,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -2689,7 +2689,7 @@ ZL1:; /* END OF ACTION: err-expected-alts */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2722,7 +2722,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI261 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -2736,7 +2736,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI318)); mark(&act_state->countend, &(ZIend)); @@ -2746,7 +2746,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-unbounded */ { -#line 805 "src/libre/parser.act" +#line 804 "src/libre/parser.act" (ZIn) = AST_COUNT_UNBOUNDED; @@ -2755,7 +2755,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* END OF ACTION: count-unbounded */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((ZIn) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -2783,7 +2783,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -2811,7 +2811,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 case (TOK_CLOSECOUNT): /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI259 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -2829,7 +2829,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI318)); mark(&act_state->countend, &(ZIend)); @@ -2839,7 +2839,7 @@ p_321(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI3 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((ZIn) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -2885,7 +2885,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZIrstart = lex_state->lx.start; @@ -2906,7 +2906,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode1) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode1) == NULL) { @@ -2929,7 +2929,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (ZIc); @@ -2939,7 +2939,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI197 = '-'; ZI198 = lex_state->lx.start; @@ -2960,7 +2960,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp } /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -2999,7 +2999,7 @@ p_194(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF INLINE: 196 */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((*ZItmp), (ZInode1))) { goto ZL1; @@ -3044,7 +3044,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, ADVANCE_LEXER; /* BEGINNING OF ACTION: re-flag-none */ { -#line 797 "src/libre/parser.act" +#line 796 "src/libre/parser.act" (ZIempty__pos) = RE_FLAGS_NONE; @@ -3053,7 +3053,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, /* END OF ACTION: re-flag-none */ /* BEGINNING OF ACTION: re-flag-none */ { -#line 797 "src/libre/parser.act" +#line 796 "src/libre/parser.act" (ZIempty__neg) = RE_FLAGS_NONE; @@ -3110,7 +3110,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-mask-re-flags */ { -#line 931 "src/libre/parser.act" +#line 926 "src/libre/parser.act" /* * Note: in cases like `(?i-i)`, the negative is @@ -3124,7 +3124,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, /* END OF ACTION: ast-mask-re-flags */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -3144,7 +3144,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-get-re-flags */ { -#line 919 "src/libre/parser.act" +#line 918 "src/libre/parser.act" (ZIflags) = *flags; @@ -3153,7 +3153,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, /* END OF ACTION: ast-get-re-flags */ /* BEGINNING OF ACTION: ast-mask-re-flags */ { -#line 931 "src/libre/parser.act" +#line 926 "src/libre/parser.act" /* * Note: in cases like `(?i-i)`, the negative is @@ -3172,7 +3172,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, } /* BEGINNING OF ACTION: ast-set-re-flags */ { -#line 923 "src/libre/parser.act" +#line 922 "src/libre/parser.act" *flags = (ZIflags); @@ -3197,7 +3197,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, { /* BEGINNING OF ACTION: err-expected-closeflags */ { -#line 750 "src/libre/parser.act" +#line 746 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCLOSEFLAGS; @@ -3209,7 +3209,7 @@ p_expr_C_Cflags(flags flags, lex_state lex_state, act_state act_state, err err, /* END OF ACTION: err-expected-closeflags */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -3250,7 +3250,7 @@ p_expr_C_Cpiece_C_Clist_Hof_Hcounts(flags flags, lex_state lex_state, act_state } /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); @@ -3275,7 +3275,7 @@ p_expr_C_Cpiece_C_Clist_Hof_Hcounts(flags flags, lex_state lex_state, act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -3292,7 +3292,7 @@ p_expr_C_Cpiece_C_Clist_Hof_Hcounts(flags flags, lex_state lex_state, act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: err-unsupported */ { -#line 764 "src/libre/parser.act" +#line 760 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EUNSUPPORTED; @@ -3384,7 +3384,7 @@ p_class_Hnamed(flags flags, lex_state lex_state, act_state act_state, err err, t case (TOK_NAMED__CLASS): /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZIid = DIALECT_CLASS(lex_state->buf.a); if (ZIid == NULL) { @@ -3408,7 +3408,7 @@ p_class_Hnamed(flags flags, lex_state lex_state, act_state act_state, err err, t ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZInode) = ast_make_expr_named(act_state->poolp, *flags, (ZIid)); if ((ZInode) == NULL) { @@ -3439,7 +3439,7 @@ p_209(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode1) = ast_make_expr_literal(act_state->poolp, *flags, (*ZIcbrak)); if ((ZInode1) == NULL) { @@ -3463,7 +3463,7 @@ p_209(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIr).type = AST_ENDPOINT_LITERAL; (ZIr).u.literal.c = (unsigned char) (*ZIcbrak); @@ -3473,7 +3473,7 @@ p_209(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI210 = '-'; ZI211 = lex_state->lx.start; @@ -3494,7 +3494,7 @@ p_209(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs } /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIlower).type = AST_ENDPOINT_LITERAL; (ZIlower).u.literal.c = (unsigned char) (*ZIcbrak); @@ -3504,7 +3504,7 @@ p_209(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZIs /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -3566,7 +3566,7 @@ ZL2_expr_C_Clist_Hof_Halts:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIalts), (ZIa))) { goto ZL1; @@ -3585,7 +3585,7 @@ ZL2_expr_C_Clist_Hof_Halts:; goto ZL2_expr_C_Clist_Hof_Halts; /* END OF INLINE: expr::list-of-alts */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -3597,7 +3597,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -3629,7 +3629,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, /* BEGINNING OF EXTRACT: OPENCOUNT */ { -#line 371 "src/libre/parser.act" +#line 370 "src/libre/parser.act" ZI318 = lex_state->lx.start; ZI319 = lex_state->lx.end; @@ -3645,7 +3645,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, case (TOK_COUNT): /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -3685,7 +3685,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-one */ { -#line 817 "src/libre/parser.act" +#line 816 "src/libre/parser.act" (ZIc) = ast_make_count(0, 1); @@ -3699,7 +3699,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-one-or-more */ { -#line 813 "src/libre/parser.act" +#line 812 "src/libre/parser.act" (ZIc) = ast_make_count(1, AST_COUNT_UNBOUNDED); @@ -3713,7 +3713,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -3732,7 +3732,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-count */ { -#line 701 "src/libre/parser.act" +#line 697 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCOUNT; @@ -3744,7 +3744,7 @@ ZL1:; /* END OF ACTION: err-expected-count */ /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -3774,7 +3774,7 @@ p_re__pcre(flags flags, lex_state lex_state, act_state act_state, err err, t_ast /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -3788,7 +3788,7 @@ p_re__pcre(flags flags, lex_state lex_state, act_state act_state, err err, t_ast } /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -3814,7 +3814,7 @@ p_re__pcre(flags flags, lex_state lex_state, act_state act_state, err err, t_ast { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -3850,7 +3850,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -3860,7 +3860,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIe) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -3877,7 +3877,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-anchor-end */ { -#line 943 "src/libre/parser.act" +#line 942 "src/libre/parser.act" (ZIe) = ast_make_expr_anchor(act_state->poolp, *flags, AST_ANCHOR_END); if ((ZIe) == NULL) { @@ -3894,7 +3894,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-anchor-end-nl */ { -#line 950 "src/libre/parser.act" +#line 949 "src/libre/parser.act" (ZIe) = ast_make_expr_anchor(act_state->poolp, *flags, AST_ANCHOR_END); if ((ZIe) == NULL) { @@ -3922,7 +3922,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: class-bsr */ { -#line 789 "src/libre/parser.act" +#line 787 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIclass__bsr) = &class_bsr; @@ -3932,7 +3932,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: class-bsr */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIbsr) = ast_make_expr_named(act_state->poolp, *flags, (ZIclass__bsr)); if ((ZIbsr) == NULL) { @@ -3944,7 +3944,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-named */ /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZIcrlf) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZIcrlf) == NULL) { @@ -3956,7 +3956,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-concat */ /* BEGINNING OF ACTION: make-literal-cr */ { -#line 890 "src/libre/parser.act" +#line 889 "src/libre/parser.act" (ZIcr) = '\r'; @@ -3965,7 +3965,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: make-literal-cr */ /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIecr) = ast_make_expr_literal(act_state->poolp, *flags, (ZIcr)); if ((ZIecr) == NULL) { @@ -3977,7 +3977,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: make-literal-nl */ { -#line 894 "src/libre/parser.act" +#line 893 "src/libre/parser.act" (ZInl) = '\n'; @@ -3986,7 +3986,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: make-literal-nl */ /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIenl) = ast_make_expr_literal(act_state->poolp, *flags, (ZInl)); if ((ZIenl) == NULL) { @@ -3998,7 +3998,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcrlf), (ZIecr))) { goto ZL1; @@ -4009,7 +4009,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-add-concat */ /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcrlf), (ZIenl))) { goto ZL1; @@ -4020,7 +4020,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-add-concat */ /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZIe) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -4032,7 +4032,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-alt */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIe), (ZIcrlf))) { goto ZL1; @@ -4043,7 +4043,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-add-alt */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIe), (ZIbsr))) { goto ZL1; @@ -4063,7 +4063,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-get-re-flags */ { -#line 919 "src/libre/parser.act" +#line 918 "src/libre/parser.act" (ZIflags) = *flags; @@ -4072,7 +4072,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-get-re-flags */ /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -4086,7 +4086,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e } /* BEGINNING OF ACTION: ast-set-re-flags */ { -#line 923 "src/libre/parser.act" +#line 922 "src/libre/parser.act" *flags = (ZIflags); @@ -4095,7 +4095,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-set-re-flags */ /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZIe) = ast_make_expr_group(act_state->poolp, *flags, (ZIg), (ZIid)); if ((ZIe) == NULL) { @@ -4119,7 +4119,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-anchor-start */ { -#line 936 "src/libre/parser.act" +#line 935 "src/libre/parser.act" (ZIe) = ast_make_expr_anchor(act_state->poolp, *flags, AST_ANCHOR_START); if ((ZIe) == NULL) { @@ -4178,7 +4178,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -4190,7 +4190,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -4223,7 +4223,7 @@ p_expr_C_Calt(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZInode) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -4244,7 +4244,7 @@ p_expr_C_Calt(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -4287,7 +4287,7 @@ p_expr_C_Ctype(flags flags, lex_state lex_state, act_state act_state, err err, t } /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -4299,7 +4299,7 @@ p_expr_C_Ctype(flags flags, lex_state lex_state, act_state act_state, err err, t /* END OF ACTION: ast-make-alt */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZInode), (ZIclass))) { goto ZL1; @@ -4319,7 +4319,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/pcre/parser.h b/src/libre/dialect/pcre/parser.h index c0cfbabe3..84ef34223 100644 --- a/src/libre/dialect/pcre/parser.h +++ b/src/libre/dialect/pcre/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__pcre(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/pcre/parser.h" diff --git a/src/libre/dialect/sql/lexer.c b/src/libre/dialect/sql/lexer.c index 6c35bf800..87459e786 100644 --- a/src/libre/dialect/sql/lexer.c +++ b/src/libre/dialect/sql/lexer.c @@ -12,11 +12,31 @@ static enum lx_sql_token z0(struct lx_sql_lx *lx); static enum lx_sql_token z1(struct lx_sql_lx *lx); static enum lx_sql_token z2(struct lx_sql_lx *lx); +static int +lx_sql_advance_end(struct lx_sql_lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif static int -lx_getc(struct lx_sql_lx *lx) +lx_sql_getc(struct lx_sql_lx *lx) { int c; @@ -32,18 +52,19 @@ lx_getc(struct lx_sql_lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_sql_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_sql_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_sql_getc((struct lx_sql_lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -52,10 +73,7 @@ lx_sql_ungetc(struct lx_sql_lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -107,6 +125,17 @@ lx_sql_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_sql_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_sql_dynclear(void *buf_opaque) { @@ -146,29 +175,28 @@ lx_sql_dynfree(void *buf_opaque) static enum lx_sql_token z0(struct lx_sql_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case ',': state = S1; break; case '0': @@ -182,12 +210,14 @@ z0(struct lx_sql_lx *lx) case '8': case '9': state = S2; break; case '}': state = S3; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S1: /* e.g. "," */ - lx_sql_ungetc(lx, c); return TOK_SEP; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_SEP; case S2: /* e.g. "0" */ switch ((unsigned char) c) { @@ -201,64 +231,70 @@ z0(struct lx_sql_lx *lx) case '7': case '8': case '9': break; - default: lx_sql_ungetc(lx, c); return TOK_COUNT; + default: lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_COUNT; } break; case S3: /* e.g. "}" */ - lx_sql_ungetc(lx, c); return lx->z = z2, TOK_CLOSECOUNT; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return lx->z = z2, TOK_CLOSECOUNT; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_SEP; + case S2: return TOK_COUNT; + case S3: return lx->z = z2, TOK_CLOSECOUNT; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_SEP; - case S2: return TOK_COUNT; - case S3: return TOK_CLOSECOUNT; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_sql_token z1(struct lx_sql_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, - S30, S31, S32, S33, S34, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, + S30, S31, S32, S33, S34 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '[': state = S1; break; case '-': state = S3; break; @@ -271,21 +307,21 @@ z1(struct lx_sql_lx *lx) case S1: /* e.g. "[" */ switch ((unsigned char) c) { case ':': state = S6; break; - default: lx_sql_ungetc(lx, c); return TOK_CHAR; + default: lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_CHAR; } break; case S2: /* e.g. "\\x00" */ - lx_sql_ungetc(lx, c); return TOK_CHAR; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_CHAR; case S3: /* e.g. "-" */ - lx_sql_ungetc(lx, c); return TOK_RANGE; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_RANGE; case S4: /* e.g. "^" */ - lx_sql_ungetc(lx, c); return TOK_INVERT; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_INVERT; case S5: /* e.g. "]" */ - lx_sql_ungetc(lx, c); return lx->z = z2, TOK_CLOSEGROUP; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return lx->z = z2, TOK_CLOSEGROUP; case S6: /* e.g. "[:" */ switch ((unsigned char) c) { @@ -295,94 +331,120 @@ z1(struct lx_sql_lx *lx) case 'A': state = S10; break; case 'L': state = S11; break; case 'U': state = S12; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S7: /* e.g. "[:S" */ switch ((unsigned char) c) { case 'P': state = S32; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S8: /* e.g. "[:W" */ switch ((unsigned char) c) { case 'H': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S9: /* e.g. "[:D" */ switch ((unsigned char) c) { case 'I': state = S25; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S10: /* e.g. "[:A" */ switch ((unsigned char) c) { case 'L': state = S20; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S11: /* e.g. "[:L" */ switch ((unsigned char) c) { case 'O': state = S19; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S12: /* e.g. "[:U" */ switch ((unsigned char) c) { case 'P': state = S13; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S13: /* e.g. "[:UP" */ switch ((unsigned char) c) { case 'P': state = S14; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S14: /* e.g. "[:LOW" */ switch ((unsigned char) c) { case 'E': state = S15; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S15: /* e.g. "[:LOWE" */ switch ((unsigned char) c) { case 'R': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S16: /* e.g. "[:ALPHA" */ switch ((unsigned char) c) { case ':': state = S17; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S17: /* e.g. "[:ALPHA:" */ switch ((unsigned char) c) { case ']': state = S18; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S18: /* e.g. "[:ALPHA:]" */ - lx_sql_ungetc(lx, c); return TOK_NAMED__CLASS; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_NAMED__CLASS; case S19: /* e.g. "[:LO" */ switch ((unsigned char) c) { case 'W': state = S14; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; @@ -390,160 +452,196 @@ z1(struct lx_sql_lx *lx) switch ((unsigned char) c) { case 'N': state = S21; break; case 'P': state = S22; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S21: /* e.g. "[:ALN" */ switch ((unsigned char) c) { case 'U': state = S24; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S22: /* e.g. "[:ALP" */ switch ((unsigned char) c) { case 'H': state = S23; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S23: /* e.g. "[:ALPH" */ switch ((unsigned char) c) { case 'A': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S24: /* e.g. "[:ALNU" */ switch ((unsigned char) c) { case 'M': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S25: /* e.g. "[:DI" */ switch ((unsigned char) c) { case 'G': state = S26; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S26: /* e.g. "[:DIG" */ switch ((unsigned char) c) { case 'I': state = S27; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S27: /* e.g. "[:DIGI" */ switch ((unsigned char) c) { case 'T': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S28: /* e.g. "[:WH" */ switch ((unsigned char) c) { case 'I': state = S29; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S29: /* e.g. "[:WHI" */ switch ((unsigned char) c) { case 'T': state = S30; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S30: /* e.g. "[:WHIT" */ switch ((unsigned char) c) { case 'E': state = S31; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S31: /* e.g. "[:WHITE" */ switch ((unsigned char) c) { case 'S': state = S7; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S32: /* e.g. "[:SP" */ switch ((unsigned char) c) { case 'A': state = S33; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S33: /* e.g. "[:SPA" */ switch ((unsigned char) c) { case 'C': state = S34; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; case S34: /* e.g. "[:SPAC" */ switch ((unsigned char) c) { case 'E': state = S16; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; case S2: return TOK_CHAR; case S3: return TOK_RANGE; case S4: return TOK_INVERT; - case S5: return TOK_CLOSEGROUP; + case S5: return lx->z = z2, TOK_CLOSEGROUP; case S18: return TOK_NAMED__CLASS; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_sql_token z2(struct lx_sql_lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '{': state = S2; break; case '[': state = S3; break; @@ -560,56 +658,48 @@ z2(struct lx_sql_lx *lx) break; case S1: /* e.g. "\\x00" */ - lx_sql_ungetc(lx, c); return TOK_CHAR; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "{" */ - lx_sql_ungetc(lx, c); return lx->z = z0, TOK_OPENCOUNT; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return lx->z = z0, TOK_OPENCOUNT; case S3: /* e.g. "[" */ - lx_sql_ungetc(lx, c); return lx->z = z1, TOK_OPENGROUP; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return lx->z = z1, TOK_OPENGROUP; case S4: /* e.g. "|" */ - lx_sql_ungetc(lx, c); return TOK_ALT; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_ALT; case S5: /* e.g. "+" */ - lx_sql_ungetc(lx, c); return TOK_PLUS; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_PLUS; case S6: /* e.g. "*" */ - lx_sql_ungetc(lx, c); return TOK_STAR; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_STAR; - case S7: /* e.g. "?" */ - lx_sql_ungetc(lx, c); return TOK_OPT; + case S7: /* e.g. "\077" */ + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_OPT; case S8: /* e.g. ")" */ - lx_sql_ungetc(lx, c); return TOK_CLOSESUB; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_CLOSESUB; case S9: /* e.g. "(" */ - lx_sql_ungetc(lx, c); return TOK_OPENSUB; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_OPENSUB; case S10: /* e.g. "%" */ - lx_sql_ungetc(lx, c); return TOK_MANY; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_MANY; case S11: /* e.g. "_" */ - lx_sql_ungetc(lx, c); return TOK_ANY; + lx_sql_ungetc(lx, c); lx_sql_dynpop(lx->buf_opaque); return TOK_ANY; default: ; /* unreached */ } - - if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, (char)c)) { - return TOK_ERROR; - } - } } - lx->lgetc = NULL; - + /* end states */ switch (state) { - case NONE: return TOK_EOF; case S1: return TOK_CHAR; - case S2: return TOK_OPENCOUNT; - case S3: return TOK_OPENGROUP; + case S2: return lx->z = z0, TOK_OPENCOUNT; + case S3: return lx->z = z1, TOK_OPENGROUP; case S4: return TOK_ALT; case S5: return TOK_PLUS; case S6: return TOK_STAR; @@ -618,8 +708,23 @@ z2(struct lx_sql_lx *lx) case S9: return TOK_OPENSUB; case S10: return TOK_MANY; case S11: return TOK_ANY; - default: errno = EINVAL; return TOK_ERROR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_sql_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } + + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return TOK_ERROR; + } + } + + lx->lgetc = NULL; + + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -747,6 +852,7 @@ lx_sql_init(struct lx_sql_lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_sql_dynpop; } enum lx_sql_token diff --git a/src/libre/dialect/sql/parser.c b/src/libre/dialect/sql/parser.c index d380c5b7d..e7a4c2e75 100644 --- a/src/libre/dialect/sql/parser.c +++ b/src/libre/dialect/sql/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 275 "src/libre/parser.act" +#line 22 "src/libre/parser.act" #include @@ -311,7 +311,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hhead(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: INVERT */ { -#line 303 "src/libre/parser.act" +#line 302 "src/libre/parser.act" ZI203 = '^'; @@ -337,7 +337,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hhead(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZI114 = lex_state->lx.start; @@ -353,7 +353,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hhead(flags flags, lex_state lex_state, act_ ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -365,7 +365,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hhead(flags flags, lex_state lex_state, act_ /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((*ZIclass), (ZInode))) { goto ZL1; @@ -401,7 +401,7 @@ p_re__sql(flags flags, lex_state lex_state, act_state act_state, err err, t_ast_ /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -415,7 +415,7 @@ p_re__sql(flags flags, lex_state lex_state, act_state act_state, err err, t_ast_ } /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZInode) = ast_make_expr_group(act_state->poolp, *flags, (ZIe), (ZIid)); if ((ZInode) == NULL) { @@ -441,7 +441,7 @@ p_re__sql(flags flags, lex_state lex_state, act_state act_state, err err, t_ast_ { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 757 "src/libre/parser.act" +#line 753 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXEOF; @@ -484,7 +484,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIclass), (ZInode))) { goto ZL4; @@ -499,7 +499,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; { /* BEGINNING OF ACTION: err-expected-term */ { -#line 694 "src/libre/parser.act" +#line 690 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXTERM; @@ -522,7 +522,7 @@ ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms:; goto ZL2_expr_C_Ccharacter_Hclass_C_Clist_Hof_Hclass_Hterms; /* END OF INLINE: expr::character-class::list-of-class-terms */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -560,7 +560,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; } /* BEGINNING OF ACTION: ast-add-concat */ { -#line 1041 "src/libre/parser.act" +#line 1040 "src/libre/parser.act" if (!ast_add_expr_concat((ZIcat), (ZIa))) { goto ZL1; @@ -579,7 +579,7 @@ ZL2_expr_C_Clist_Hof_Hpieces:; goto ZL2_expr_C_Clist_Hof_Hpieces; /* END OF INLINE: expr::list-of-pieces */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -606,7 +606,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hterm(flags flags, lex_state lex_state, act_ /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -670,7 +670,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state case (TOK_OPENGROUP): /* BEGINNING OF EXTRACT: OPENGROUP */ { -#line 319 "src/libre/parser.act" +#line 318 "src/libre/parser.act" ZIopen__start = lex_state->lx.start; ZIopen__end = lex_state->lx.end; @@ -688,7 +688,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZIclass) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZIclass) == NULL) { @@ -713,7 +713,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: CLOSEGROUP */ { -#line 351 "src/libre/parser.act" +#line 350 "src/libre/parser.act" ZI154 = ']'; ZIclose__start = lex_state->lx.start; @@ -729,7 +729,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIopen__start)); mark(&act_state->groupend, &(ZIclose__end)); @@ -748,7 +748,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* BEGINNING OF EXTRACT: INVERT */ { -#line 303 "src/libre/parser.act" +#line 302 "src/libre/parser.act" ZI158 = '^'; @@ -760,7 +760,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZImask) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZImask) == NULL) { @@ -789,7 +789,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state case (TOK_CLOSEGROUP): /* BEGINNING OF EXTRACT: CLOSEGROUP */ { -#line 351 "src/libre/parser.act" +#line 350 "src/libre/parser.act" ZI163 = ']'; ZIclose__start = lex_state->lx.start; @@ -809,7 +809,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-group */ { -#line 768 "src/libre/parser.act" +#line 767 "src/libre/parser.act" mark(&act_state->groupstart, &(ZIopen__start)); mark(&act_state->groupend, &(ZIclose__end)); @@ -823,7 +823,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state { /* BEGINNING OF ACTION: err-expected-closegroup */ { -#line 729 "src/libre/parser.act" +#line 725 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCLOSEGROUP; @@ -839,7 +839,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF INLINE: 162 */ /* BEGINNING OF ACTION: ast-make-subtract */ { -#line 960 "src/libre/parser.act" +#line 959 "src/libre/parser.act" (ZInode) = ast_make_expr_subtract(act_state->poolp, *flags, (ZIclass), (ZImask)); if ((ZInode) == NULL) { @@ -862,7 +862,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state { /* BEGINNING OF ACTION: err-expected-closegroup */ { -#line 729 "src/libre/parser.act" +#line 725 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCLOSEGROUP; @@ -874,7 +874,7 @@ p_expr_C_Ccharacter_Hclass(flags flags, lex_state lex_state, act_state act_state /* END OF ACTION: err-expected-closegroup */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -917,7 +917,7 @@ p_expr_C_Cpiece(flags flags, lex_state lex_state, act_state act_state, err err, } /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); @@ -954,7 +954,7 @@ p_expr(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__ex { /* BEGINNING OF ACTION: ast-make-alt */ { -#line 868 "src/libre/parser.act" +#line 867 "src/libre/parser.act" (ZInode) = ast_make_expr_alt(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -975,7 +975,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -987,7 +987,7 @@ ZL1:; /* END OF ACTION: err-expected-alts */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -1019,7 +1019,7 @@ p_204(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZIc = '-'; ZI117 = lex_state->lx.start; @@ -1035,7 +1035,7 @@ p_204(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (ZIc)); if ((ZInode) == NULL) { @@ -1047,7 +1047,7 @@ p_204(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF ACTION: ast-make-literal */ /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((*ZIclass), (ZInode))) { goto ZL1; @@ -1058,7 +1058,7 @@ p_204(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp /* END OF ACTION: ast-add-alt */ /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -1105,7 +1105,7 @@ p_204(flags flags, lex_state lex_state, act_state act_state, err err, t_ast__exp { /* BEGINNING OF ACTION: ast-make-invert */ { -#line 995 "src/libre/parser.act" +#line 966 "src/libre/parser.act" struct ast_expr *any; @@ -1167,7 +1167,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI { /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZInode) = ast_make_expr_literal(act_state->poolp, *flags, (*ZI205)); if ((ZInode) == NULL) { @@ -1192,7 +1192,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIa).type = AST_ENDPOINT_LITERAL; (ZIa).u.literal.c = (unsigned char) (*ZI205); @@ -1202,7 +1202,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF EXTRACT: RANGE */ { -#line 309 "src/libre/parser.act" +#line 308 "src/libre/parser.act" ZI136 = '-'; ZI137 = lex_state->lx.start; @@ -1220,7 +1220,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI case (TOK_CHAR): /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -1243,7 +1243,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-range-endpoint-literal */ { -#line 840 "src/libre/parser.act" +#line 839 "src/libre/parser.act" (ZIz).type = AST_ENDPOINT_LITERAL; (ZIz).u.literal.c = (unsigned char) (ZIcz); @@ -1253,7 +1253,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* END OF ACTION: ast-range-endpoint-literal */ /* BEGINNING OF ACTION: mark-range */ { -#line 773 "src/libre/parser.act" +#line 772 "src/libre/parser.act" mark(&act_state->rangestart, &(*ZI206)); mark(&act_state->rangeend, &(ZIend)); @@ -1263,7 +1263,7 @@ p_208(flags flags, lex_state lex_state, act_state act_state, err err, t_char *ZI /* END OF ACTION: mark-range */ /* BEGINNING OF ACTION: ast-make-range */ { -#line 1007 "src/libre/parser.act" +#line 1004 "src/libre/parser.act" unsigned char lower, upper; @@ -1321,7 +1321,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI176 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -1335,7 +1335,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI209)); mark(&act_state->countend, &(ZIend)); @@ -1345,7 +1345,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((*ZIm) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -1376,7 +1376,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 case (TOK_COUNT): /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -1408,7 +1408,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 case (TOK_CLOSECOUNT): /* BEGINNING OF EXTRACT: CLOSECOUNT */ { -#line 379 "src/libre/parser.act" +#line 378 "src/libre/parser.act" ZI179 = lex_state->lx.start; ZIend = lex_state->lx.end; @@ -1426,7 +1426,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 ADVANCE_LEXER; /* BEGINNING OF ACTION: mark-count */ { -#line 778 "src/libre/parser.act" +#line 777 "src/libre/parser.act" mark(&act_state->countstart, &(*ZI209)); mark(&act_state->countend, &(ZIend)); @@ -1436,7 +1436,7 @@ p_211(flags flags, lex_state lex_state, act_state act_state, err err, t_pos *ZI2 /* END OF ACTION: mark-count */ /* BEGINNING OF ACTION: count-range */ { -#line 825 "src/libre/parser.act" +#line 824 "src/libre/parser.act" if ((ZIn) < (*ZIm)) { err->e = RE_ENEGCOUNT; @@ -1486,7 +1486,7 @@ ZL2_expr_C_Clist_Hof_Halts:; } /* BEGINNING OF ACTION: ast-add-alt */ { -#line 1047 "src/libre/parser.act" +#line 1046 "src/libre/parser.act" if (!ast_add_expr_alt((ZIalts), (ZIa))) { goto ZL1; @@ -1505,7 +1505,7 @@ ZL2_expr_C_Clist_Hof_Halts:; goto ZL2_expr_C_Clist_Hof_Halts; /* END OF INLINE: expr::list-of-alts */ } - /*UNREACHED*/ + /* UNREACHED */ default: break; } @@ -1517,7 +1517,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-alts */ { -#line 715 "src/libre/parser.act" +#line 711 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXALTS; @@ -1549,7 +1549,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, /* BEGINNING OF EXTRACT: OPENCOUNT */ { -#line 371 "src/libre/parser.act" +#line 370 "src/libre/parser.act" ZI209 = lex_state->lx.start; ZI210 = lex_state->lx.end; @@ -1565,7 +1565,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, case (TOK_COUNT): /* BEGINNING OF EXTRACT: COUNT */ { -#line 636 "src/libre/parser.act" +#line 627 "src/libre/parser.act" unsigned long u; char *e; @@ -1605,7 +1605,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-one */ { -#line 817 "src/libre/parser.act" +#line 816 "src/libre/parser.act" (ZIc) = ast_make_count(0, 1); @@ -1619,7 +1619,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-one-or-more */ { -#line 813 "src/libre/parser.act" +#line 812 "src/libre/parser.act" (ZIc) = ast_make_count(1, AST_COUNT_UNBOUNDED); @@ -1633,7 +1633,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, ADVANCE_LEXER; /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -1646,7 +1646,7 @@ p_expr_C_Cpiece_C_Ccount(flags flags, lex_state lex_state, act_state act_state, { /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -1663,7 +1663,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-count */ { -#line 701 "src/libre/parser.act" +#line 697 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXCOUNT; @@ -1675,7 +1675,7 @@ ZL1:; /* END OF ACTION: err-expected-count */ /* BEGINNING OF ACTION: count-one */ { -#line 821 "src/libre/parser.act" +#line 820 "src/libre/parser.act" (ZIc) = ast_make_count(1, 1); @@ -1704,7 +1704,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -1714,7 +1714,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIe) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -1734,7 +1734,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* BEGINNING OF EXTRACT: CHAR */ { -#line 579 "src/libre/parser.act" +#line 575 "src/libre/parser.act" /* the first byte may be '\x00' */ assert(lex_state->buf.a[1] == '\0'); @@ -1753,7 +1753,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-literal */ { -#line 875 "src/libre/parser.act" +#line 874 "src/libre/parser.act" (ZIe) = ast_make_expr_literal(act_state->poolp, *flags, (ZIa)); if ((ZIe) == NULL) { @@ -1774,7 +1774,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: class-any */ { -#line 784 "src/libre/parser.act" +#line 782 "src/libre/parser.act" /* TODO: or the unicode equivalent */ (ZIa) = (*flags & RE_SINGLE) ? &class_any : &class_notnl; @@ -1784,7 +1784,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: class-any */ /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZIg) = ast_make_expr_named(act_state->poolp, *flags, (ZIa)); if ((ZIg) == NULL) { @@ -1796,7 +1796,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: ast-make-named */ /* BEGINNING OF ACTION: count-zero-or-more */ { -#line 809 "src/libre/parser.act" +#line 808 "src/libre/parser.act" (ZIc) = ast_make_count(0, AST_COUNT_UNBOUNDED); @@ -1805,7 +1805,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e /* END OF ACTION: count-zero-or-more */ /* BEGINNING OF ACTION: ast-make-piece */ { -#line 898 "src/libre/parser.act" +#line 897 "src/libre/parser.act" if ((ZIc).min == 0 && (ZIc).max == 0) { (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); @@ -1832,7 +1832,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e ADVANCE_LEXER; /* BEGINNING OF ACTION: make-group-id */ { -#line 882 "src/libre/parser.act" +#line 881 "src/libre/parser.act" (ZIid) = act_state->group_id++; @@ -1846,7 +1846,7 @@ p_expr_C_Cpiece_C_Catom(flags flags, lex_state lex_state, act_state act_state, e } /* BEGINNING OF ACTION: ast-make-group */ { -#line 912 "src/libre/parser.act" +#line 911 "src/libre/parser.act" (ZIe) = ast_make_expr_group(act_state->poolp, *flags, (ZIg), (ZIid)); if ((ZIe) == NULL) { @@ -1884,7 +1884,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-atom */ { -#line 708 "src/libre/parser.act" +#line 704 "src/libre/parser.act" if (err->e == RE_ESUCCESS) { err->e = RE_EXATOM; @@ -1896,7 +1896,7 @@ ZL1:; /* END OF ACTION: err-expected-atom */ /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZIe) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZIe) == NULL) { @@ -1932,7 +1932,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hnamed(flags flags, lex_state lex_state, act case (TOK_NAMED__CLASS): /* BEGINNING OF EXTRACT: NAMED_CLASS */ { -#line 648 "src/libre/parser.act" +#line 647 "src/libre/parser.act" ZIid = DIALECT_CLASS(lex_state->buf.a); if (ZIid == NULL) { @@ -1956,7 +1956,7 @@ p_expr_C_Ccharacter_Hclass_C_Cclass_Hnamed(flags flags, lex_state lex_state, act ADVANCE_LEXER; /* BEGINNING OF ACTION: ast-make-named */ { -#line 1034 "src/libre/parser.act" +#line 1033 "src/libre/parser.act" (ZInode) = ast_make_expr_named(act_state->poolp, *flags, (ZIid)); if ((ZInode) == NULL) { @@ -1986,7 +1986,7 @@ p_expr_C_Calt(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-concat */ { -#line 861 "src/libre/parser.act" +#line 860 "src/libre/parser.act" (ZInode) = ast_make_expr_concat(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2007,7 +2007,7 @@ p_expr_C_Calt(flags flags, lex_state lex_state, act_state act_state, err err, t_ { /* BEGINNING OF ACTION: ast-make-empty */ { -#line 854 "src/libre/parser.act" +#line 853 "src/libre/parser.act" (ZInode) = ast_make_expr_empty(act_state->poolp, *flags); if ((ZInode) == NULL) { @@ -2032,7 +2032,7 @@ ZL0:; /* BEGINNING OF TRAILER */ -#line 1207 "src/libre/parser.act" +#line 1052 "src/libre/parser.act" static int diff --git a/src/libre/dialect/sql/parser.h b/src/libre/dialect/sql/parser.h index c5e885439..7825ae3af 100644 --- a/src/libre/dialect/sql/parser.h +++ b/src/libre/dialect/sql/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 292 "src/libre/parser.act" +#line 281 "src/libre/parser.act" #include @@ -28,7 +28,7 @@ extern void p_re__sql(flags, lex_state, act_state, err, t_ast__expr *); /* BEGINNING OF TRAILER */ -#line 1209 "src/libre/parser.act" +#line 1207 "src/libre/parser.act" #line 35 "src/libre/dialect/sql/parser.h" diff --git a/src/lx/lexer.c b/src/lx/lexer.c index 03bfbf463..ed7ca6d78 100644 --- a/src/lx/lexer.c +++ b/src/lx/lexer.c @@ -14,6 +14,26 @@ static enum lx_token z2(struct lx *lx); static enum lx_token z3(struct lx *lx); static enum lx_token z4(struct lx *lx); +static int +lx_advance_end(struct lx *lx, int c) +{ + lx->end.byte++; + lx->end.col++; + if (c == '\n') { + lx->end.line++; + lx->end.saved_col = lx->end.col - 1; + lx->end.col = 1; + } + if (lx->push != NULL) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { + return 0; + } + } + return 1; +} + +/* This wrapper manages one character of lookahead/pushback + * and the line, column, and byte offsets. */ #if __STDC_VERSION__ >= 199901L inline #endif @@ -34,18 +54,19 @@ lx_getc(struct lx *lx) } } - lx->end.byte++; - lx->end.col++; - - if (c == '\n') { - lx->end.line++; - lx->end.saved_col = lx->end.col - 1; - lx->end.col = 1; - } + if (!lx_advance_end(lx, c)) { return EOF; } return c; } +/* This wrapper adapts calling lx_getc to the interface + * in libfsm's generated code. */ +static int +fsm_getc(void *getc_opaque) +{ + return lx_getc((struct lx *)getc_opaque); +} + #if __STDC_VERSION__ >= 199901L inline #endif @@ -54,10 +75,7 @@ lx_ungetc(struct lx *lx, int c) { assert(lx != NULL); assert(lx->c == EOF); - lx->c = c; - - lx->end.byte--; lx->end.col--; @@ -67,13 +85,20 @@ lx_ungetc(struct lx *lx, int c) } } +/* Get a character from fgetc and push it to the buffer */ int lx_fgetc(struct lx *lx) { assert(lx != NULL); assert(lx->getc_opaque != NULL); - return fgetc(lx->getc_opaque); + const int c = fgetc(lx->getc_opaque); + if (c == EOF) { + lx->c = EOF; + return EOF; + } else { + return c; + } } int @@ -118,6 +143,17 @@ lx_dynpush(void *buf_opaque, char c) return 0; } +static void +lx_dynpop(void *buf_opaque) +{ + struct lx_dynbuf *t = buf_opaque; + + assert(t != NULL); + + assert(t->p != t->a); + t->p--; +} + int lx_dynclear(void *buf_opaque) { @@ -157,37 +193,36 @@ lx_dynfree(void *buf_opaque) static enum lx_token z0(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '/': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return TOK_CHAR; + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "\057" */ switch ((unsigned char) c) { @@ -243,79 +278,73 @@ z0(struct lx *lx) case 'x': case 'y': case 'z': break; - default: lx_ungetc(lx, c); return lx->z = z4, TOK_RE; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z4, TOK_RE; } break; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return lx->z = z4, TOK_RE; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, c)) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_RE; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z1(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { - case '"': state = S2; break; - case '\\': state = S3; break; - default: state = S1; break; + case '\\': state = S1; break; + case '"': state = S3; break; + default: state = S2; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return TOK_CHAR; - - case S2: /* e.g. "\"" */ - lx_ungetc(lx, c); return lx->z = z4, TOK_STR; - - case S3: /* e.g. "\\" */ + case S1: /* e.g. "\\" */ switch ((unsigned char) c) { - case '"': - case '\\': - case 'f': - case 'n': - case 'r': - case 't': - case 'v': state = S4; break; + case 'x': state = S4; break; case '0': case '1': case '2': @@ -324,29 +353,24 @@ z1(struct lx *lx) case '5': case '6': case '7': state = S5; break; - case 'x': state = S6; break; - default: lx_ungetc(lx, c); return TOK_CHAR; + case '"': + case '\\': + case 'f': + case 'n': + case 'r': + case 't': + case 'v': state = S6; break; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; } break; - case S4: /* e.g. "\\f" */ - lx_ungetc(lx, c); return TOK_ESC; + case S2: /* e.g. "\\x00" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; - case S5: /* e.g. "\\0" */ - switch ((unsigned char) c) { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': break; - default: lx_ungetc(lx, c); return TOK_OCT; - } - break; + case S3: /* e.g. "\"" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z4, TOK_STR; - case S6: /* e.g. "\\x" */ + case S4: /* e.g. "\\x" */ switch ((unsigned char) c) { case '0': case '1': @@ -370,11 +394,30 @@ z1(struct lx *lx) case 'd': case 'e': case 'f': state = S7; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } + break; + + case S5: /* e.g. "\\0" */ + switch ((unsigned char) c) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': break; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_OCT; } break; - case S7: /* e.g. "\\xa" */ + case S6: /* e.g. "\\\"" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_ESC; + + case S7: /* e.g. "\\x0" */ switch ((unsigned char) c) { case '0': case '1': @@ -398,135 +441,157 @@ z1(struct lx *lx) case 'd': case 'e': case 'f': break; - default: lx_ungetc(lx, c); return TOK_HEX; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_HEX; } break; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return TOK_CHAR; + case S3: return lx->z = z4, TOK_STR; + case S5: return TOK_OCT; + case S6: return TOK_ESC; + case S7: return TOK_HEX; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, c)) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_STR; - case S3: return TOK_CHAR; - case S4: return TOK_ESC; - case S5: return TOK_OCT; - case S7: return TOK_HEX; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z2(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + + state = S0; + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\'': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return TOK_CHAR; + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CHAR; case S2: /* e.g. "'" */ - lx_ungetc(lx, c); return lx->z = z4, TOK_STR; + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z4, TOK_STR; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_CHAR; + case S2: return lx->z = z4, TOK_STR; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, c)) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_CHAR; - case S2: return TOK_STR; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z3(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { case '\n': state = S2; break; default: state = S1; break; } break; - case S1: /* e.g. "a" */ - lx_ungetc(lx, c); return lx->z(lx); + case S1: /* e.g. "" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); case S2: /* e.g. "" */ - lx_ungetc(lx, c); return lx->z = z4, lx->z(lx); + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z4, lx->z(lx); default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_UNKNOWN; + case S2: return lx->z = z4, lx->z(lx); + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { case S0: @@ -536,75 +601,52 @@ z3(struct lx *lx) default: if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, c)) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_EOF; - case S2: return TOK_EOF; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } static enum lx_token z4(struct lx *lx) { + int has_consumed_input = 0; int c; - enum { - S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, - S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, - S20, S21, S22, S23, S24, S25, S26, S27, S28, NONE - } state; - assert(lx != NULL); if (lx->clear != NULL) { lx->clear(lx->buf_opaque); } - state = NONE; - lx->start = lx->end; - while (c = lx_getc(lx), c != EOF) { - if (state == NONE) { - state = S0; - } + void *getc_opaque = (void *)lx; + enum { + S0, S1, S2, S3, S4, S5, S6, S7, S8, S9, + S10, S11, S12, S13, S14, S15, S16, S17, S18, S19, + S20, S21, S22, S23, S24, S25, S26, S27, S28 + } state; + state = S0; + + while (c = fsm_getc(getc_opaque), c != EOF) { + has_consumed_input = 1; switch (state) { - case S0: /* start */ + case S0: /* e.g. "" */ switch ((unsigned char) c) { - case '\t': - case '\n': - case '\r': - case ' ': state = S1; break; - case '!': state = S2; break; - case '"': state = S3; break; - case '#': state = S4; break; - case '$': state = S5; break; - case '&': state = S6; break; - case '\'': state = S7; break; - case '(': state = S8; break; - case ')': state = S9; break; - case '*': state = S10; break; - case '+': state = S11; break; - case ',': state = S12; break; - case '-': state = S13; break; - case '.': state = S14; break; - case '/': state = S15; break; - case ';': state = S16; break; - case '=': state = S17; break; - case '?': state = S18; break; + case ',': state = S1; break; + case '$': state = S2; break; case 'A': case 'B': case 'C': @@ -657,37 +699,42 @@ z4(struct lx *lx) case 'w': case 'x': case 'y': - case 'z': state = S19; break; - case '\\': state = S20; break; - case '^': state = S21; break; - case '{': state = S22; break; - case '|': state = S23; break; - case '}': state = S24; break; - case '~': state = S25; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; - } - break; - - case S1: /* e.g. "\\x09" */ - switch ((unsigned char) c) { + case 'z': state = S3; break; + case '&': state = S4; break; + case '|': state = S5; break; + case '.': state = S6; break; + case '-': state = S7; break; + case '\\': state = S8; break; + case '^': state = S9; break; + case '!': state = S10; break; + case '~': state = S11; break; + case '?': state = S12; break; + case '+': state = S13; break; + case '*': state = S14; break; + case ')': state = S15; break; + case '(': state = S16; break; + case '}': state = S17; break; + case '{': state = S18; break; + case ';': state = S19; break; + case '=': state = S20; break; + case '/': state = S21; break; + case '"': state = S22; break; + case '\'': state = S23; break; + case '#': state = S24; break; case '\t': case '\n': case '\r': - case ' ': break; - default: lx_ungetc(lx, c); return lx->z(lx); + case ' ': state = S25; break; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S2: /* e.g. "!" */ - lx_ungetc(lx, c); return TOK_BANG; - - case S3: /* e.g. "\"" */ - lx_ungetc(lx, c); return lx->z = z1, lx->z(lx); - - case S4: /* e.g. "#" */ - lx_ungetc(lx, c); return lx->z = z3, lx->z(lx); + case S1: /* e.g. "," */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_COMMA; - case S5: /* e.g. "$" */ + case S2: /* e.g. "$" */ switch ((unsigned char) c) { case 'A': case 'B': @@ -742,58 +789,13 @@ z4(struct lx *lx) case 'x': case 'y': case 'z': state = S28; break; - default: lx->lgetc = NULL; return TOK_UNKNOWN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; } break; - case S6: /* e.g. "&" */ - lx_ungetc(lx, c); return TOK_AND; - - case S7: /* e.g. "'" */ - lx_ungetc(lx, c); return lx->z = z2, lx->z(lx); - - case S8: /* e.g. "(" */ - lx_ungetc(lx, c); return TOK_LPAREN; - - case S9: /* e.g. ")" */ - lx_ungetc(lx, c); return TOK_RPAREN; - - case S10: /* e.g. "*" */ - lx_ungetc(lx, c); return TOK_STAR; - - case S11: /* e.g. "+" */ - lx_ungetc(lx, c); return TOK_CROSS; - - case S12: /* e.g. "," */ - lx_ungetc(lx, c); return TOK_COMMA; - - case S13: /* e.g. "-" */ - switch ((unsigned char) c) { - case '>': state = S27; break; - default: lx_ungetc(lx, c); return TOK_DASH; - } - break; - - case S14: /* e.g. "." */ - switch ((unsigned char) c) { - case '.': state = S26; break; - default: lx_ungetc(lx, c); return TOK_DOT; - } - break; - - case S15: /* e.g. "\057" */ - lx_ungetc(lx, c); return lx->z = z0, lx->z(lx); - - case S16: /* e.g. ";" */ - lx_ungetc(lx, c); return TOK_SEMI; - - case S17: /* e.g. "=" */ - lx_ungetc(lx, c); return TOK_BIND; - - case S18: /* e.g. "?" */ - lx_ungetc(lx, c); return TOK_QMARK; - - case S19: /* e.g. "a" */ + case S3: /* e.g. "A" */ switch ((unsigned char) c) { case '0': case '1': @@ -858,35 +860,98 @@ z4(struct lx *lx) case 'x': case 'y': case 'z': break; - default: lx_ungetc(lx, c); return TOK_IDENT; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_IDENT; } break; - case S20: /* e.g. "\\" */ - lx_ungetc(lx, c); return TOK_DASH; + case S4: /* e.g. "&" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_AND; + + case S5: /* e.g. "|" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_PIPE; + + case S6: /* e.g. "." */ + switch ((unsigned char) c) { + case '.': state = S27; break; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_DOT; + } + break; + + case S7: /* e.g. "-" */ + switch ((unsigned char) c) { + case '>': state = S26; break; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_DASH; + } + break; + + case S8: /* e.g. "\\" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_DASH; + + case S9: /* e.g. "^" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_HAT; - case S21: /* e.g. "^" */ - lx_ungetc(lx, c); return TOK_HAT; + case S10: /* e.g. "!" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_BANG; - case S22: /* e.g. "{" */ - lx_ungetc(lx, c); return TOK_OPEN; + case S11: /* e.g. "~" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_TILDE; - case S23: /* e.g. "|" */ - lx_ungetc(lx, c); return TOK_PIPE; + case S12: /* e.g. "\077" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_QMARK; - case S24: /* e.g. "}" */ - lx_ungetc(lx, c); return TOK_CLOSE; + case S13: /* e.g. "+" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CROSS; - case S25: /* e.g. "~" */ - lx_ungetc(lx, c); return TOK_TILDE; + case S14: /* e.g. "*" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_STAR; - case S26: /* e.g. ".." */ - lx_ungetc(lx, c); return TOK_TO; + case S15: /* e.g. ")" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_RPAREN; - case S27: /* e.g. "->" */ - lx_ungetc(lx, c); return TOK_MAP; + case S16: /* e.g. "(" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_LPAREN; - case S28: /* e.g. "$a" */ + case S17: /* e.g. "}" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_CLOSE; + + case S18: /* e.g. "{" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_OPEN; + + case S19: /* e.g. ";" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_SEMI; + + case S20: /* e.g. "=" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_BIND; + + case S21: /* e.g. "\057" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z0, lx->z(lx); + + case S22: /* e.g. "\"" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z1, lx->z(lx); + + case S23: /* e.g. "'" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z2, lx->z(lx); + + case S24: /* e.g. "#" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z = z3, lx->z(lx); + + case S25: /* e.g. "\\x09" */ + switch ((unsigned char) c) { + case '\t': + case '\n': + case '\r': + case ' ': break; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return lx->z(lx); + } + break; + + case S26: /* e.g. "->" */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_MAP; + + case S27: /* e.g. ".." */ + lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_TO; + + case S28: /* e.g. "$A" */ switch ((unsigned char) c) { case '0': case '1': @@ -951,66 +1016,73 @@ z4(struct lx *lx) case 'x': case 'y': case 'z': break; - default: lx_ungetc(lx, c); return TOK_TOKEN; + default: lx_ungetc(lx, c); lx_dynpop(lx->buf_opaque); return TOK_TOKEN; } break; default: ; /* unreached */ } + } + + /* end states */ + switch (state) { + case S1: return TOK_COMMA; + case S3: return TOK_IDENT; + case S4: return TOK_AND; + case S5: return TOK_PIPE; + case S6: return TOK_DOT; + case S7: return TOK_DASH; + case S8: return TOK_DASH; + case S9: return TOK_HAT; + case S10: return TOK_BANG; + case S11: return TOK_TILDE; + case S12: return TOK_QMARK; + case S13: return TOK_CROSS; + case S14: return TOK_STAR; + case S15: return TOK_RPAREN; + case S16: return TOK_LPAREN; + case S17: return TOK_CLOSE; + case S18: return TOK_OPEN; + case S19: return TOK_SEMI; + case S20: return TOK_BIND; + case S21: return lx->z = z0, lx->z(lx); + case S22: return lx->z = z1, lx->z(lx); + case S23: return lx->z = z2, lx->z(lx); + case S24: return lx->z = z3, lx->z(lx); + case S25: return TOK_EOF; + case S26: return TOK_MAP; + case S27: return TOK_TO; + case S28: return TOK_TOKEN; + default: + if (!has_consumed_input) { return TOK_EOF; } + lx_ungetc(lx, c); lx->lgetc = NULL; return TOK_UNKNOWN; + } switch (state) { - case S1: - case S3: - case S4: - case S7: - case S15: + case S21: + case S22: + case S23: + case S24: + case S25: break; default: if (lx->push != NULL) { - if (-1 == lx->push(lx->buf_opaque, c)) { + if (-1 == lx->push(lx->buf_opaque, (char)c)) { return TOK_ERROR; } } break; } - } lx->lgetc = NULL; - switch (state) { - case NONE: return TOK_EOF; - case S1: return TOK_EOF; - case S2: return TOK_BANG; - case S3: return TOK_EOF; - case S4: return TOK_EOF; - case S6: return TOK_AND; - case S7: return TOK_EOF; - case S8: return TOK_LPAREN; - case S9: return TOK_RPAREN; - case S10: return TOK_STAR; - case S11: return TOK_CROSS; - case S12: return TOK_COMMA; - case S13: return TOK_DASH; - case S14: return TOK_DOT; - case S15: return TOK_EOF; - case S16: return TOK_SEMI; - case S17: return TOK_BIND; - case S18: return TOK_QMARK; - case S19: return TOK_IDENT; - case S20: return TOK_DASH; - case S21: return TOK_HAT; - case S22: return TOK_OPEN; - case S23: return TOK_PIPE; - case S24: return TOK_CLOSE; - case S25: return TOK_TILDE; - case S26: return TOK_TO; - case S27: return TOK_MAP; - case S28: return TOK_TOKEN; - default: errno = EINVAL; return TOK_ERROR; - } + if (!has_consumed_input) { + return TOK_EOF; + } + return TOK_ERROR; } const char * @@ -1238,6 +1310,7 @@ lx_init(struct lx *lx) lx->end.byte = 0; lx->end.line = 1; lx->end.col = 1; + (void)lx_dynpop; } enum lx_token diff --git a/src/lx/parser.c b/src/lx/parser.c index f6f759693..4ab69ce21 100644 --- a/src/lx/parser.c +++ b/src/lx/parser.c @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 127 "src/lx/parser.act" +#line 27 "src/lx/parser.act" #include @@ -182,7 +182,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-list */ { -#line 816 "src/lx/parser.act" +#line 814 "src/lx/parser.act" err_expected(lex_state, "list of mappings, bindings or zones"); @@ -204,7 +204,7 @@ p_pattern(lex_state lex_state, act_state act_state, zone ZIz, fsm *ZOr) /* BEGINNING OF EXTRACT: IDENT */ { -#line 228 "src/lx/parser.act" +#line 227 "src/lx/parser.act" ZIn = xstrdup(lex_state->buf.a); @@ -214,7 +214,7 @@ p_pattern(lex_state lex_state, act_state act_state, zone ZIz, fsm *ZOr) ADVANCE_LEXER; /* BEGINNING OF ACTION: deref-var */ { -#line 280 "src/lx/parser.act" +#line 277 "src/lx/parser.act" struct ast_zone *z; @@ -252,7 +252,7 @@ p_pattern(lex_state lex_state, act_state act_state, zone ZIz, fsm *ZOr) /* BEGINNING OF EXTRACT: TOKEN */ { -#line 224 "src/lx/parser.act" +#line 222 "src/lx/parser.act" /* TODO: submatch addressing */ ZIt = xstrdup(lex_state->buf.a + 1); /* +1 for '$' prefix */ @@ -263,7 +263,7 @@ p_pattern(lex_state lex_state, act_state act_state, zone ZIz, fsm *ZOr) ADVANCE_LEXER; /* BEGINNING OF ACTION: deref-token */ { -#line 308 "src/lx/parser.act" +#line 304 "src/lx/parser.act" const struct ast_mapping *m; fsm_state_t start; @@ -374,7 +374,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: op-reverse */ { -#line 677 "src/lx/parser.act" +#line 676 "src/lx/parser.act" assert((ZI210) != NULL); @@ -398,7 +398,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI231) != NULL); @@ -449,7 +449,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: op-complete */ { -#line 668 "src/lx/parser.act" +#line 667 "src/lx/parser.act" assert((ZI210) != NULL); @@ -473,7 +473,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI239) != NULL); @@ -509,7 +509,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z /* BEGINNING OF EXTRACT: IDENT */ { -#line 228 "src/lx/parser.act" +#line 227 "src/lx/parser.act" ZIn = xstrdup(lex_state->buf.a); @@ -563,7 +563,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI248) != NULL); @@ -614,7 +614,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: op-complement */ { -#line 659 "src/lx/parser.act" +#line 658 "src/lx/parser.act" assert((ZI210) != NULL); @@ -638,7 +638,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI223) != NULL); @@ -685,7 +685,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z /* BEGINNING OF EXTRACT: TOKEN */ { -#line 224 "src/lx/parser.act" +#line 222 "src/lx/parser.act" /* TODO: submatch addressing */ ZI253 = xstrdup(lex_state->buf.a + 1); /* +1 for '$' prefix */ @@ -696,7 +696,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z ADVANCE_LEXER; /* BEGINNING OF ACTION: deref-token */ { -#line 308 "src/lx/parser.act" +#line 304 "src/lx/parser.act" const struct ast_mapping *m; fsm_state_t start; @@ -769,7 +769,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI259) != NULL); @@ -829,7 +829,7 @@ p_list_Hof_Hthings_C_Cthing(lex_state lex_state, act_state act_state, ast ZIa, z } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI268) != NULL); @@ -869,7 +869,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-thing */ { -#line 812 "src/lx/parser.act" +#line 810 "src/lx/parser.act" err_expected(lex_state, "mapping, binding or zone"); @@ -899,7 +899,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-open */ { -#line 800 "src/lx/parser.act" +#line 798 "src/lx/parser.act" err_expected(lex_state, "'{'"); @@ -929,7 +929,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-close */ { -#line 804 "src/lx/parser.act" +#line 802 "src/lx/parser.act" err_expected(lex_state, "'}'"); @@ -955,7 +955,7 @@ ZL2_pattern_C_Cbody:; { /* BEGINNING OF EXTRACT: CHAR */ { -#line 219 "src/lx/parser.act" +#line 215 "src/lx/parser.act" assert(lex_state->buf.a[0] != '\0'); assert(lex_state->buf.a[1] == '\0'); @@ -972,7 +972,7 @@ ZL2_pattern_C_Cbody:; { /* BEGINNING OF EXTRACT: ESC */ { -#line 149 "src/lx/parser.act" +#line 143 "src/lx/parser.act" assert(lex_state->buf.a[0] == '\\'); assert(lex_state->buf.a[1] != '\0'); @@ -1000,7 +1000,7 @@ ZL2_pattern_C_Cbody:; { /* BEGINNING OF EXTRACT: HEX */ { -#line 212 "src/lx/parser.act" +#line 188 "src/lx/parser.act" unsigned long u; char *e; @@ -1037,7 +1037,7 @@ ZL2_pattern_C_Cbody:; { /* BEGINNING OF EXTRACT: OCT */ { -#line 185 "src/lx/parser.act" +#line 161 "src/lx/parser.act" unsigned long u; char *e; @@ -1077,7 +1077,7 @@ ZL2_pattern_C_Cbody:; /* END OF INLINE: 84 */ /* BEGINNING OF ACTION: pattern-char */ { -#line 249 "src/lx/parser.act" +#line 247 "src/lx/parser.act" /* TODO */ *lex_state->p++ = (ZIc); @@ -1089,7 +1089,7 @@ ZL2_pattern_C_Cbody:; goto ZL2_pattern_C_Cbody; /* END OF INLINE: pattern::body */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -1124,7 +1124,7 @@ ZL2_174:; } /* END OF INLINE: 174 */ } - /*UNREACHED*/ + /* UNREACHED */ default: { ZI171 = ZI168; @@ -1211,7 +1211,7 @@ ZL2_180:; } /* BEGINNING OF ACTION: op-alt */ { -#line 725 "src/lx/parser.act" +#line 724 "src/lx/parser.act" assert((ZI177) != NULL); assert((ZIb) != NULL); @@ -1230,7 +1230,7 @@ ZL2_180:; goto ZL2_180; /* END OF INLINE: 180 */ } - /*UNREACHED*/ + /* UNREACHED */ default: { ZI178 = ZI176; @@ -1291,7 +1291,7 @@ p_expr_C_Cprefix_Hexpr(lex_state lex_state, act_state act_state, zone ZIz, fsm * } /* BEGINNING OF ACTION: op-reverse */ { -#line 677 "src/lx/parser.act" +#line 676 "src/lx/parser.act" assert((ZIq) != NULL); @@ -1315,7 +1315,7 @@ p_expr_C_Cprefix_Hexpr(lex_state lex_state, act_state act_state, zone ZIz, fsm * } /* BEGINNING OF ACTION: op-complete */ { -#line 668 "src/lx/parser.act" +#line 667 "src/lx/parser.act" assert((ZIq) != NULL); @@ -1339,7 +1339,7 @@ p_expr_C_Cprefix_Hexpr(lex_state lex_state, act_state act_state, zone ZIz, fsm * } /* BEGINNING OF ACTION: op-complement */ { -#line 659 "src/lx/parser.act" +#line 658 "src/lx/parser.act" assert((ZIq) != NULL); @@ -1398,7 +1398,7 @@ ZL2_186:; } /* BEGINNING OF ACTION: op-intersect */ { -#line 714 "src/lx/parser.act" +#line 713 "src/lx/parser.act" assert((ZI183) != NULL); assert((ZIb) != NULL); @@ -1417,7 +1417,7 @@ ZL2_186:; goto ZL2_186; /* END OF INLINE: 186 */ } - /*UNREACHED*/ + /* UNREACHED */ default: { ZI184 = ZI182; @@ -1455,7 +1455,7 @@ ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hto_Hmappings_C_Clist_Hof_ } /* BEGINNING OF ACTION: op-alt */ { -#line 725 "src/lx/parser.act" +#line 724 "src/lx/parser.act" assert((ZIold_Hexit) != NULL); assert((ZInew_Hexit) != NULL); @@ -1473,7 +1473,7 @@ ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hto_Hmappings_C_Clist_Hof_ goto ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hto_Hmappings_C_Clist_Hof_Hzone_Hto_Hmappings_Hx; /* END OF INLINE: list-of-things::zone-thing::list-of-zone-to-mappings::list-of-zone-to-mappings-x */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -1554,7 +1554,7 @@ ZL2_197:; ADVANCE_LEXER; /* BEGINNING OF ACTION: op-cross */ { -#line 575 "src/lx/parser.act" +#line 568 "src/lx/parser.act" fsm_state_t start, end; fsm_state_t old; @@ -1607,13 +1607,13 @@ ZL2_197:; goto ZL2_197; /* END OF INLINE: 197 */ } - /*UNREACHED*/ + /* UNREACHED */ case (TOK_QMARK): { ADVANCE_LEXER; /* BEGINNING OF ACTION: op-qmark */ { -#line 620 "src/lx/parser.act" +#line 613 "src/lx/parser.act" fsm_state_t start, end; fsm_state_t old; @@ -1666,13 +1666,13 @@ ZL2_197:; goto ZL2_197; /* END OF INLINE: 197 */ } - /*UNREACHED*/ + /* UNREACHED */ case (TOK_STAR): { ADVANCE_LEXER; /* BEGINNING OF ACTION: op-star */ { -#line 525 "src/lx/parser.act" +#line 518 "src/lx/parser.act" fsm_state_t start, end; fsm_state_t old; @@ -1730,14 +1730,14 @@ ZL2_197:; goto ZL2_197; /* END OF INLINE: 197 */ } - /*UNREACHED*/ + /* UNREACHED */ default: goto ZL1; } } /* END OF INLINE: 272 */ } - /*UNREACHED*/ + /* UNREACHED */ default: { ZI196 = ZI191; @@ -1795,7 +1795,7 @@ p_204(lex_state lex_state, act_state act_state, zone *ZIz, fsm *ZI202, fsm *ZOq) } /* BEGINNING OF ACTION: op-subtract */ { -#line 703 "src/lx/parser.act" +#line 702 "src/lx/parser.act" assert((*ZI202) != NULL); assert((ZIb) != NULL); @@ -1870,7 +1870,7 @@ p_208(lex_state lex_state, act_state act_state, zone *ZIz, fsm *ZI206, fsm *ZOq) } /* BEGINNING OF ACTION: op-concat */ { -#line 686 "src/lx/parser.act" +#line 685 "src/lx/parser.act" assert((*ZI206) != NULL); assert((ZIb) != NULL); @@ -1920,7 +1920,7 @@ p_212(lex_state lex_state, act_state act_state, zone *ZIz, fsm *ZI210, fsm *ZOq) } /* BEGINNING OF ACTION: op-product */ { -#line 698 "src/lx/parser.act" +#line 696 "src/lx/parser.act" fprintf(stderr, "unimplemented\n"); (ZIq) = NULL; @@ -1960,7 +1960,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) /* BEGINNING OF EXTRACT: RE */ { -#line 236 "src/lx/parser.act" +#line 231 "src/lx/parser.act" assert(lex_state->buf.a[0] == '/'); @@ -1980,7 +1980,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) ADVANCE_LEXER; /* BEGINNING OF ACTION: pattern-buffer */ { -#line 263 "src/lx/parser.act" +#line 252 "src/lx/parser.act" size_t len; @@ -2010,7 +2010,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) /* END OF ACTION: pattern-buffer */ /* BEGINNING OF ACTION: compile-regex */ { -#line 379 "src/lx/parser.act" +#line 376 "src/lx/parser.act" struct re_err err; @@ -2030,7 +2030,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) /* END OF ACTION: compile-regex */ /* BEGINNING OF ACTION: free-arr */ { -#line 766 "src/lx/parser.act" +#line 765 "src/lx/parser.act" free((ZIa)); @@ -2046,7 +2046,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) ADVANCE_LEXER; /* BEGINNING OF ACTION: pattern-buffer */ { -#line 263 "src/lx/parser.act" +#line 252 "src/lx/parser.act" size_t len; @@ -2076,7 +2076,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) /* END OF ACTION: pattern-buffer */ /* BEGINNING OF ACTION: compile-literal */ { -#line 364 "src/lx/parser.act" +#line 361 "src/lx/parser.act" struct re_err err; @@ -2096,7 +2096,7 @@ p_215(lex_state lex_state, act_state act_state, fsm *ZOr) /* END OF ACTION: compile-literal */ /* BEGINNING OF ACTION: free-arr */ { -#line 766 "src/lx/parser.act" +#line 765 "src/lx/parser.act" free((ZIa)); @@ -2164,7 +2164,7 @@ ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hfrom_Hmappings_C_Clist_Ho } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZIr) != NULL); @@ -2189,7 +2189,7 @@ ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hfrom_Hmappings_C_Clist_Ho /* END OF ACTION: subtract-exit */ /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -2229,7 +2229,7 @@ ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hfrom_Hmappings_C_Clist_Ho goto ZL2_list_Hof_Hthings_C_Czone_Hthing_C_Clist_Hof_Hzone_Hfrom_Hmappings_C_Clist_Hof_Hzone_Hfrom_Hmappings_Hx; /* END OF INLINE: list-of-things::zone-thing::list-of-zone-from-mappings::list-of-zone-from-mappings-x */ } - /*UNREACHED*/ + /* UNREACHED */ case (ERROR_TERMINAL): return; default: @@ -2256,7 +2256,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) /* BEGINNING OF ACTION: no-zone */ { -#line 515 "src/lx/parser.act" +#line 514 "src/lx/parser.act" (ZIparent) = NULL; @@ -2265,7 +2265,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) /* END OF ACTION: no-zone */ /* BEGINNING OF ACTION: make-ast */ { -#line 424 "src/lx/parser.act" +#line 423 "src/lx/parser.act" (ZIa) = ast_new(); if ((ZIa) == NULL) { @@ -2278,7 +2278,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) /* END OF ACTION: make-ast */ /* BEGINNING OF ACTION: make-zone */ { -#line 432 "src/lx/parser.act" +#line 431 "src/lx/parser.act" assert((ZIa) != NULL); @@ -2301,7 +2301,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) /* END OF ACTION: make-zone */ /* BEGINNING OF ACTION: no-exit */ { -#line 511 "src/lx/parser.act" +#line 510 "src/lx/parser.act" (ZIexit) = NULL; @@ -2310,7 +2310,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) /* END OF ACTION: no-exit */ /* BEGINNING OF ACTION: set-globalzone */ { -#line 500 "src/lx/parser.act" +#line 499 "src/lx/parser.act" assert((ZIa) != NULL); assert((ZIz) != NULL); @@ -2341,7 +2341,7 @@ p_lx(lex_state lex_state, act_state act_state, ast *ZOa) { /* BEGINNING OF ACTION: err-expected-eof */ { -#line 808 "src/lx/parser.act" +#line 806 "src/lx/parser.act" err_expected(lex_state, "EOF"); @@ -2358,7 +2358,7 @@ ZL1:; { /* BEGINNING OF ACTION: make-ast */ { -#line 424 "src/lx/parser.act" +#line 423 "src/lx/parser.act" (ZIa) = ast_new(); if ((ZIa) == NULL) { @@ -2371,7 +2371,7 @@ ZL1:; /* END OF ACTION: make-ast */ /* BEGINNING OF ACTION: err-syntax */ { -#line 776 "src/lx/parser.act" +#line 773 "src/lx/parser.act" err(lex_state, "Syntax error"); exit(EXIT_FAILURE); @@ -2433,7 +2433,7 @@ p_list_Hof_Hthings_C_Czone_Hthing_C_Czone_Hto_Hmapping(lex_state lex_state, act_ } /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -2471,7 +2471,7 @@ p_list_Hof_Hthings_C_Czone_Hthing_C_Czone_Hto_Hmapping(lex_state lex_state, act_ /* END OF ACTION: add-mapping */ /* BEGINNING OF ACTION: clone */ { -#line 756 "src/lx/parser.act" +#line 755 "src/lx/parser.act" assert((ZIr) != NULL); @@ -2517,7 +2517,7 @@ p_112(lex_state lex_state, act_state act_state, string *ZOt) { /* BEGINNING OF ACTION: err-expected-map */ { -#line 784 "src/lx/parser.act" +#line 782 "src/lx/parser.act" err_expected(lex_state, "'->'"); @@ -2532,7 +2532,7 @@ p_112(lex_state lex_state, act_state act_state, string *ZOt) case (TOK_TOKEN): /* BEGINNING OF EXTRACT: TOKEN */ { -#line 224 "src/lx/parser.act" +#line 222 "src/lx/parser.act" /* TODO: submatch addressing */ ZIt = xstrdup(lex_state->buf.a + 1); /* +1 for '$' prefix */ @@ -2551,7 +2551,7 @@ p_112(lex_state lex_state, act_state act_state, string *ZOt) { /* BEGINNING OF ACTION: no-token */ { -#line 507 "src/lx/parser.act" +#line 506 "src/lx/parser.act" (ZIt) = NULL; @@ -2644,7 +2644,7 @@ ZL1:; { /* BEGINNING OF ACTION: err-expected-semi */ { -#line 792 "src/lx/parser.act" +#line 790 "src/lx/parser.act" err_expected(lex_state, "';'"); @@ -2695,7 +2695,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit } /* BEGINNING OF ACTION: no-zone */ { -#line 515 "src/lx/parser.act" +#line 514 "src/lx/parser.act" (ZIto) = NULL; @@ -2704,7 +2704,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: no-zone */ /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -2749,7 +2749,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* BEGINNING OF ACTION: make-zone */ { -#line 432 "src/lx/parser.act" +#line 431 "src/lx/parser.act" assert((*ZIa) != NULL); @@ -2772,7 +2772,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: make-zone */ /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -2829,7 +2829,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit { /* BEGINNING OF ACTION: err-expected-to */ { -#line 796 "src/lx/parser.act" +#line 794 "src/lx/parser.act" err_expected(lex_state, "'..'"); @@ -2858,7 +2858,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit } /* BEGINNING OF ACTION: no-zone */ { -#line 515 "src/lx/parser.act" +#line 514 "src/lx/parser.act" (ZIx) = NULL; @@ -2867,7 +2867,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: no-zone */ /* BEGINNING OF ACTION: no-token */ { -#line 507 "src/lx/parser.act" +#line 506 "src/lx/parser.act" (ZIy) = NULL; @@ -2876,7 +2876,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: no-token */ /* BEGINNING OF ACTION: regex-any */ { -#line 395 "src/lx/parser.act" +#line 392 "src/lx/parser.act" fsm_state_t start, end; @@ -2912,7 +2912,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: regex-any */ /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZIw) != NULL); @@ -2937,7 +2937,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: subtract-exit */ /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -2997,7 +2997,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit { /* BEGINNING OF ACTION: err-expected-list */ { -#line 816 "src/lx/parser.act" +#line 814 "src/lx/parser.act" err_expected(lex_state, "list of mappings, bindings or zones"); @@ -3017,7 +3017,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* BEGINNING OF ACTION: no-exit */ { -#line 511 "src/lx/parser.act" +#line 510 "src/lx/parser.act" (ZIr2) = NULL; @@ -3026,7 +3026,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: no-exit */ /* BEGINNING OF ACTION: make-zone */ { -#line 432 "src/lx/parser.act" +#line 431 "src/lx/parser.act" assert((*ZIa) != NULL); @@ -3049,7 +3049,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* END OF ACTION: make-zone */ /* BEGINNING OF ACTION: add-mapping */ { -#line 453 "src/lx/parser.act" +#line 449 "src/lx/parser.act" struct ast_token *t; struct ast_mapping *m; @@ -3101,7 +3101,7 @@ p_251(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit { /* BEGINNING OF ACTION: err-expected-list */ { -#line 816 "src/lx/parser.act" +#line 814 "src/lx/parser.act" err_expected(lex_state, "list of mappings, bindings or zones"); @@ -3149,7 +3149,7 @@ p_252(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit { /* BEGINNING OF ACTION: err-expected-bind */ { -#line 788 "src/lx/parser.act" +#line 786 "src/lx/parser.act" err_expected(lex_state, "'='"); @@ -3168,7 +3168,7 @@ p_252(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit } /* BEGINNING OF ACTION: add-binding */ { -#line 485 "src/lx/parser.act" +#line 482 "src/lx/parser.act" struct var *v; @@ -3211,7 +3211,7 @@ p_252(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit /* BEGINNING OF ACTION: deref-var */ { -#line 280 "src/lx/parser.act" +#line 277 "src/lx/parser.act" struct ast_zone *z; @@ -3254,7 +3254,7 @@ p_252(lex_state lex_state, act_state act_state, ast *ZIa, zone *ZIz, fsm *ZIexit } /* BEGINNING OF ACTION: subtract-exit */ { -#line 736 "src/lx/parser.act" +#line 735 "src/lx/parser.act" assert((ZI278) != NULL); @@ -3297,7 +3297,7 @@ ZL1:; /* BEGINNING OF TRAILER */ -#line 880 "src/lx/parser.act" +#line 818 "src/lx/parser.act" struct ast *lx_parse(FILE *f, const struct fsm_alloc *alloc) { diff --git a/src/lx/parser.h b/src/lx/parser.h index fdaff9879..947b194b5 100644 --- a/src/lx/parser.h +++ b/src/lx/parser.h @@ -9,7 +9,7 @@ /* BEGINNING OF HEADER */ -#line 139 "src/lx/parser.act" +#line 127 "src/lx/parser.act" #include @@ -29,7 +29,7 @@ extern void p_lx(lex_state, act_state, ast *); /* BEGINNING OF TRAILER */ -#line 882 "src/lx/parser.act" +#line 880 "src/lx/parser.act" #line 36 "src/lx/parser.h" diff --git a/src/lx/print/c.c b/src/lx/print/c.c index 77207403a..3340f0d74 100644 --- a/src/lx/print/c.c +++ b/src/lx/print/c.c @@ -159,6 +159,28 @@ unget_character(FILE *f, bool pop, const char *cur_char_var) } } +static bool +endid_represents_dead_end(fsm_end_id_t endid, const struct ast *ast) +{ + const struct ast_mapping *m = ast_getendmappingbyendid(endid); + if (m == NULL) { + return false; + } + + /* For each zone, check if this endid is associated with its z->ml zone. + * If so, that endid is the "dead end" for that zone. + * + * The total number of zones and end ids (each corresponding to mapping) + * should stay small enough that linear search is fine. If this becomes + * prohibitively expensive, then build a bitset of dead-end IDs upfront + * in one pass. */ + for (struct ast_zone *z = ast->zl; z != NULL; z = z->next) { + if (z->ml == m) { return true; } + } + + return false; +} + static int accept_c(FILE *f, const struct fsm_options *opt, const struct fsm_state_metadata *state_metadata, @@ -193,10 +215,21 @@ accept_c(FILE *f, const struct fsm_options *opt, if (m->to == NULL) { if (m->token == NULL) { /* If accept-ing here doesn't actually map to a token or - * a different zone, then it's stuck in the middle of a - * pattern pair like `'//' .. /\n/ -> $nl;` with an EOF, - * so tokenization should still fail. */ - fprintf(f, "%sUNKNOWN", prefix.tok); + * a different zone, then check whether the endid represents + * a dead end. In that case, it's stuck in the middle of a + * pattern pair like `'//' .. /\n/ -> $nl;` with an unexpected + * EOF, so tokenization should still fail (with TOK_UNKNOWN). + * + * An example where the endid doesn't represent a dead end is + * a zone ignoring trailing whitespace in a file, such as + * `/[\r\n\t ]+/;`. In that case, the EOF is valid, so still + * return TOK_EOF. */ + const fsm_end_id_t endid = state_metadata->end_ids[0]; + if (endid_represents_dead_end(endid, ast)) { + fprintf(f, "%sUNKNOWN", prefix.tok); + } else { + fprintf(f, "%sEOF", prefix.tok); + } } else { /* yield a token */ fprintf(f, "%s", prefix.tok);