Skip to content

Commit d9e5821

Browse files
Merge pull request #613 from lightpanda-io/css_selector_parsing_tweaks
"Improve" css selector parsing
2 parents 17ed502 + 56eef2e commit d9e5821

File tree

1 file changed

+64
-31
lines changed

1 file changed

+64
-31
lines changed

src/browser/css/parser.zig

Lines changed: 64 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ const PseudoClass = selector.PseudoClass;
2929
const AttributeOP = selector.AttributeOP;
3030
const Combinator = selector.Combinator;
3131

32+
const REPLACEMENT_CHARACTER = &.{ 239, 191, 189 };
33+
3234
pub const ParseError = error{
3335
ExpectedSelector,
3436
ExpectedIdentifier,
@@ -217,22 +219,31 @@ pub const Parser = struct {
217219
// parseName parses a name (which is like an identifier, but doesn't have
218220
// extra restrictions on the first character).
219221
fn parseName(p: *Parser, w: anytype) ParseError!void {
222+
const sel = p.s;
223+
const sel_len = sel.len;
224+
220225
var i = p.i;
221226
var ok = false;
222227

223-
while (i < p.s.len) {
224-
const c = p.s[i];
228+
while (i < sel_len) {
229+
const c = sel[i];
225230

226231
if (nameChar(c)) {
227232
const start = i;
228-
while (i < p.s.len and nameChar(p.s[i])) i += 1;
229-
w.writeAll(p.s[start..i]) catch return ParseError.WriteError;
233+
while (i < sel_len and nameChar(sel[i])) i += 1;
234+
w.writeAll(sel[start..i]) catch return ParseError.WriteError;
230235
ok = true;
231236
} else if (c == '\\') {
232237
p.i = i;
233238
try p.parseEscape(w);
234239
i = p.i;
235240
ok = true;
241+
} else if (c == 0) {
242+
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
243+
i += 1;
244+
if (i == sel_len) {
245+
ok = true;
246+
}
236247
} else {
237248
// default:
238249
break;
@@ -246,41 +257,60 @@ pub const Parser = struct {
246257
// parseEscape parses a backslash escape.
247258
// The returned string is owned by the caller.
248259
fn parseEscape(p: *Parser, w: anytype) ParseError!void {
249-
if (p.s.len < p.i + 2 or p.s[p.i] != '\\') {
250-
return ParseError.InvalidEscape;
260+
const sel = p.s;
261+
const sel_len = sel.len;
262+
263+
if (sel_len < p.i + 2 or sel[p.i] != '\\') {
264+
p.i += 1;
265+
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
266+
return;
251267
}
252268

253269
const start = p.i + 1;
254-
const c = p.s[start];
255-
if (ascii.isWhitespace(c)) return ParseError.EscapeLineEndingOutsideString;
270+
const c = sel[start];
256271

257272
// unicode escape (hex)
258273
if (ascii.isHex(c)) {
259274
var i: usize = start;
260-
while (i < start + 6 and i < p.s.len and ascii.isHex(p.s[i])) {
275+
while (i < start + 6 and i < sel_len and ascii.isHex(sel[i])) {
261276
i += 1;
262277
}
263-
const v = std.fmt.parseUnsigned(u21, p.s[start..i], 16) catch return ParseError.InvalidUnicode;
264-
if (p.s.len > i) {
265-
switch (p.s[i]) {
266-
'\r' => {
267-
i += 1;
268-
if (p.s.len > i and p.s[i] == '\n') i += 1;
269-
},
270-
' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
271-
else => {},
278+
279+
const v = std.fmt.parseUnsigned(u21, sel[start..i], 16) catch {
280+
p.i = i;
281+
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
282+
return;
283+
};
284+
285+
if (sel_len >= i) {
286+
if (sel_len > i) {
287+
switch (sel[i]) {
288+
'\r' => {
289+
i += 1;
290+
if (sel_len > i and sel[i] == '\n') i += 1;
291+
},
292+
' ', '\t', '\n', std.ascii.control_code.ff => i += 1,
293+
else => {},
294+
}
272295
}
273296
p.i = i;
297+
if (v == 0) {
298+
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
299+
return;
300+
}
274301
var buf: [4]u8 = undefined;
275-
const ln = std.unicode.utf8Encode(v, &buf) catch return ParseError.InvalidUnicode;
302+
const ln = std.unicode.utf8Encode(v, &buf) catch {
303+
w.writeAll(REPLACEMENT_CHARACTER) catch return ParseError.WriteError;
304+
return;
305+
};
276306
w.writeAll(buf[0..ln]) catch return ParseError.WriteError;
277307
return;
278308
}
279309
}
280310

281311
// Return the literal character after the backslash.
282312
p.i += 2;
283-
w.writeAll(p.s[start .. start + 1]) catch return ParseError.WriteError;
313+
w.writeByte(sel[start]) catch return ParseError.WriteError;
284314
}
285315

286316
// parseIDSelector parses a selector that matches by id attribute.
@@ -383,20 +413,23 @@ pub const Parser = struct {
383413

384414
// parseString parses a single- or double-quoted string.
385415
fn parseString(p: *Parser, writer: anytype) ParseError!void {
416+
const sel = p.s;
417+
const sel_len = sel.len;
418+
386419
var i = p.i;
387-
if (p.s.len < i + 2) return ParseError.ExpectedString;
420+
if (sel_len < i + 2) return ParseError.ExpectedString;
388421

389-
const quote = p.s[i];
422+
const quote = sel[i];
390423
i += 1;
391424

392-
loop: while (i < p.s.len) {
393-
switch (p.s[i]) {
425+
loop: while (i < sel_len) {
426+
switch (sel[i]) {
394427
'\\' => {
395-
if (p.s.len > i + 1) {
396-
const c = p.s[i + 1];
428+
if (sel_len > i + 1) {
429+
const c = sel[i + 1];
397430
switch (c) {
398431
'\r' => {
399-
if (p.s.len > i + 2 and p.s[i + 2] == '\n') {
432+
if (sel_len > i + 2 and sel[i + 2] == '\n') {
400433
i += 3;
401434
continue :loop;
402435
}
@@ -418,17 +451,17 @@ pub const Parser = struct {
418451
else => |c| {
419452
if (c == quote) break :loop;
420453
const start = i;
421-
while (i < p.s.len) {
422-
const cc = p.s[i];
454+
while (i < sel_len) {
455+
const cc = sel[i];
423456
if (cc == quote or cc == '\\' or c == '\r' or c == '\n' or c == std.ascii.control_code.ff) break;
424457
i += 1;
425458
}
426-
writer.writeAll(p.s[start..i]) catch return ParseError.WriteError;
459+
writer.writeAll(sel[start..i]) catch return ParseError.WriteError;
427460
},
428461
}
429462
}
430463

431-
if (i >= p.s.len) return ParseError.InvalidString;
464+
if (i >= sel_len) return ParseError.InvalidString;
432465

433466
// Consume the final quote.
434467
i += 1;

0 commit comments

Comments
 (0)