@@ -41,16 +41,18 @@ namespace nix {
4141
4242// we make use of the fact that the parser receives a private copy of the input
4343// string and can munge around in it.
44- static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
44+ // getting the position is expensive and thus it is implemented lazily.
45+ static StringToken unescapeStr(char * const s, size_t length, std::function<Pos()> && pos)
4546{
46- char * result = s ;
47+ bool noNullByte = true ;
4748 char * t = s;
48- char c;
4949 // the input string is terminated with *two* NULs, so we can safely take
5050 // *one* character after the one being checked against.
51- while ((c = *s++)) {
51+ for (size_t i = 0 ; i < length; t++) {
52+ char c = s[i++];
53+ noNullByte &= c != ' \0 ' ;
5254 if (c == ' \\ ' ) {
53- c = *s++ ;
55+ c = s[i++] ;
5456 if (c == ' n' ) *t = ' \n ' ;
5557 else if (c == ' r' ) *t = ' \r ' ;
5658 else if (c == ' t' ) *t = ' \t ' ;
@@ -59,12 +61,14 @@ static StringToken unescapeStr(SymbolTable & symbols, char * s, size_t length)
5961 else if (c == ' \r ' ) {
6062 /* Normalise CR and CR/LF into LF. */
6163 *t = ' \n ' ;
62- if (*s == ' \n ' ) s ++; /* cr/lf */
64+ if (s[i] == ' \n ' ) i ++; /* cr/lf */
6365 }
6466 else *t = c;
65- t++;
6667 }
67- return {result, size_t (t - result)};
68+ if (!noNullByte) {
69+ forceNoNullByte ({s, size_t (t - s)}, std::move (pos));
70+ }
71+ return {s, size_t (t - s)};
6872}
6973
7074static void requireExperimentalFeature (const ExperimentalFeature & feature, const Pos & pos)
@@ -175,7 +179,7 @@ or { return OR_KW; }
175179 /* It is impossible to match strings ending with '$' with one
176180 regex because trailing contexts are only valid at the end
177181 of a rule. (A sane but undocumented limitation.) */
178- yylval->str = unescapeStr (state-> symbols , yytext, yyleng);
182+ yylval->str = unescapeStr (yytext, yyleng, [&]() { return state-> positions [CUR_POS]; } );
179183 return STR;
180184 }
181185<STRING >\$\{ { PUSH_STATE (DEFAULT); return DOLLAR_CURLY; }
@@ -191,6 +195,7 @@ or { return OR_KW; }
191195\'\' (\ * \n )? { PUSH_STATE (IND_STRING); return IND_STRING_OPEN; }
192196<IND_STRING >([^ \$\' ]| \$ [^ \{\' ]| \' [^ \'\$ ])+ {
193197 yylval->str = {yytext, (size_t ) yyleng, true };
198+ forceNoNullByte (yylval->str , [&]() { return state->positions [CUR_POS]; });
194199 return IND_STR;
195200 }
196201<IND_STRING >\'\'\$ |
@@ -203,7 +208,7 @@ or { return OR_KW; }
203208 return IND_STR;
204209 }
205210<IND_STRING >\'\'\\ {ANY } {
206- yylval->str = unescapeStr (state-> symbols , yytext + 2 , yyleng - 2 );
211+ yylval->str = unescapeStr (yytext + 2 , yyleng - 2 , [&]() { return state-> positions [CUR_POS]; } );
207212 return IND_STR;
208213 }
209214<IND_STRING >\$\{ { PUSH_STATE (DEFAULT); return DOLLAR_CURLY; }
0 commit comments