@@ -29,6 +29,8 @@ const PseudoClass = selector.PseudoClass;
2929const AttributeOP = selector .AttributeOP ;
3030const Combinator = selector .Combinator ;
3131
32+ const REPLACEMENT_CHARACTER = &.{ 239 , 191 , 189 };
33+
3234pub const ParseError = error {
3335 ExpectedSelector ,
3436 ExpectedIdentifier ,
@@ -217,22 +219,31 @@ pub const Parser = struct {
217219 // parseName parses a name (which is like an identifier, but doesn't have
218220 // extra restrictions on the first character).
219221 fn parseName (p : * Parser , w : anytype ) ParseError ! void {
222+ const sel = p .s ;
223+ const sel_len = sel .len ;
224+
220225 var i = p .i ;
221226 var ok = false ;
222227
223- while (i < p . s . len ) {
224- const c = p . s [i ];
228+ while (i < sel_len ) {
229+ const c = sel [i ];
225230
226231 if (nameChar (c )) {
227232 const start = i ;
228- while (i < p . s . len and nameChar (p . s [i ])) i += 1 ;
229- w .writeAll (p . s [start .. i ]) catch return ParseError .WriteError ;
233+ while (i < sel_len and nameChar (sel [i ])) i += 1 ;
234+ w .writeAll (sel [start .. i ]) catch return ParseError .WriteError ;
230235 ok = true ;
231236 } else if (c == '\\ ' ) {
232237 p .i = i ;
233238 try p .parseEscape (w );
234239 i = p .i ;
235240 ok = true ;
241+ } else if (c == 0 ) {
242+ w .writeAll (REPLACEMENT_CHARACTER ) catch return ParseError .WriteError ;
243+ i += 1 ;
244+ if (i == sel_len ) {
245+ ok = true ;
246+ }
236247 } else {
237248 // default:
238249 break ;
@@ -246,41 +257,60 @@ pub const Parser = struct {
246257 // parseEscape parses a backslash escape.
247258 // The returned string is owned by the caller.
248259 fn parseEscape (p : * Parser , w : anytype ) ParseError ! void {
249- if (p .s .len < p .i + 2 or p .s [p .i ] != '\\ ' ) {
250- return ParseError .InvalidEscape ;
260+ const sel = p .s ;
261+ const sel_len = sel .len ;
262+
263+ if (sel_len < p .i + 2 or sel [p .i ] != '\\ ' ) {
264+ p .i += 1 ;
265+ w .writeAll (REPLACEMENT_CHARACTER ) catch return ParseError .WriteError ;
266+ return ;
251267 }
252268
253269 const start = p .i + 1 ;
254- const c = p .s [start ];
255- if (ascii .isWhitespace (c )) return ParseError .EscapeLineEndingOutsideString ;
270+ const c = sel [start ];
256271
257272 // unicode escape (hex)
258273 if (ascii .isHex (c )) {
259274 var i : usize = start ;
260- while (i < start + 6 and i < p . s . len and ascii .isHex (p . s [i ])) {
275+ while (i < start + 6 and i < sel_len and ascii .isHex (sel [i ])) {
261276 i += 1 ;
262277 }
263- const v = std .fmt .parseUnsigned (u21 , p .s [start .. i ], 16 ) catch return ParseError .InvalidUnicode ;
264- if (p .s .len > i ) {
265- switch (p .s [i ]) {
266- '\r ' = > {
267- i += 1 ;
268- if (p .s .len > i and p .s [i ] == '\n ' ) i += 1 ;
269- },
270- ' ' , '\t ' , '\n ' , std .ascii .control_code .ff = > i += 1 ,
271- else = > {},
278+
279+ const v = std .fmt .parseUnsigned (u21 , sel [start .. i ], 16 ) catch {
280+ p .i = i ;
281+ w .writeAll (REPLACEMENT_CHARACTER ) catch return ParseError .WriteError ;
282+ return ;
283+ };
284+
285+ if (sel_len >= i ) {
286+ if (sel_len > i ) {
287+ switch (sel [i ]) {
288+ '\r ' = > {
289+ i += 1 ;
290+ if (sel_len > i and sel [i ] == '\n ' ) i += 1 ;
291+ },
292+ ' ' , '\t ' , '\n ' , std .ascii .control_code .ff = > i += 1 ,
293+ else = > {},
294+ }
272295 }
273296 p .i = i ;
297+ if (v == 0 ) {
298+ w .writeAll (REPLACEMENT_CHARACTER ) catch return ParseError .WriteError ;
299+ return ;
300+ }
274301 var buf : [4 ]u8 = undefined ;
275- const ln = std .unicode .utf8Encode (v , & buf ) catch return ParseError .InvalidUnicode ;
302+ const ln = std .unicode .utf8Encode (v , & buf ) catch {
303+ w .writeAll (REPLACEMENT_CHARACTER ) catch return ParseError .WriteError ;
304+ return ;
305+ };
276306 w .writeAll (buf [0.. ln ]) catch return ParseError .WriteError ;
277307 return ;
278308 }
279309 }
280310
281311 // Return the literal character after the backslash.
282312 p .i += 2 ;
283- w .writeAll ( p . s [start .. start + 1 ]) catch return ParseError .WriteError ;
313+ w .writeByte ( sel [start ]) catch return ParseError .WriteError ;
284314 }
285315
286316 // parseIDSelector parses a selector that matches by id attribute.
@@ -383,20 +413,23 @@ pub const Parser = struct {
383413
384414 // parseString parses a single- or double-quoted string.
385415 fn parseString (p : * Parser , writer : anytype ) ParseError ! void {
416+ const sel = p .s ;
417+ const sel_len = sel .len ;
418+
386419 var i = p .i ;
387- if (p . s . len < i + 2 ) return ParseError .ExpectedString ;
420+ if (sel_len < i + 2 ) return ParseError .ExpectedString ;
388421
389- const quote = p . s [i ];
422+ const quote = sel [i ];
390423 i += 1 ;
391424
392- loop : while (i < p . s . len ) {
393- switch (p . s [i ]) {
425+ loop : while (i < sel_len ) {
426+ switch (sel [i ]) {
394427 '\\ ' = > {
395- if (p . s . len > i + 1 ) {
396- const c = p . s [i + 1 ];
428+ if (sel_len > i + 1 ) {
429+ const c = sel [i + 1 ];
397430 switch (c ) {
398431 '\r ' = > {
399- if (p . s . len > i + 2 and p . s [i + 2 ] == '\n ' ) {
432+ if (sel_len > i + 2 and sel [i + 2 ] == '\n ' ) {
400433 i += 3 ;
401434 continue :loop ;
402435 }
@@ -418,17 +451,17 @@ pub const Parser = struct {
418451 else = > | c | {
419452 if (c == quote ) break :loop ;
420453 const start = i ;
421- while (i < p . s . len ) {
422- const cc = p . s [i ];
454+ while (i < sel_len ) {
455+ const cc = sel [i ];
423456 if (cc == quote or cc == '\\ ' or c == '\r ' or c == '\n ' or c == std .ascii .control_code .ff ) break ;
424457 i += 1 ;
425458 }
426- writer .writeAll (p . s [start .. i ]) catch return ParseError .WriteError ;
459+ writer .writeAll (sel [start .. i ]) catch return ParseError .WriteError ;
427460 },
428461 }
429462 }
430463
431- if (i >= p . s . len ) return ParseError .InvalidString ;
464+ if (i >= sel_len ) return ParseError .InvalidString ;
432465
433466 // Consume the final quote.
434467 i += 1 ;
0 commit comments