Skip to content
308 changes: 308 additions & 0 deletions src/browser/cssom/css_parser.zig
Original file line number Diff line number Diff line change
@@ -0,0 +1,308 @@
const std = @import("std");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The license header is missing.


const CSSConstants = struct {
const IMPORTANT_KEYWORD = "!important";
const IMPORTANT_LENGTH = IMPORTANT_KEYWORD.len;
const URL_PREFIX = "url(";
const URL_PREFIX_LENGTH = URL_PREFIX.len;
};

pub const CSSParserState = enum {
seekName,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we would normally use snake_case for these, like a variable.

inName,
seekColon,
seekValue,
inValue,
inQuotedValue,
inSingleQuotedValue,
inUrl,
inImportant,
};

pub const CSSDeclaration = struct {
name: []const u8,
value: []const u8,
is_important: bool,

pub fn init(name: []const u8, value: []const u8, is_important: bool) CSSDeclaration {
return .{
.name = name,
.value = value,
.is_important = is_important,
};
}
};

pub const CSSParser = struct {
state: CSSParserState,
name_start: usize,
name_end: usize,
value_start: usize,
position: usize,
paren_depth: usize,
escape_next: bool,

pub fn init() CSSParser {
return .{
.state = .seekName,
.name_start = 0,
.name_end = 0,
.value_start = 0,
.position = 0,
.paren_depth = 0,
.escape_next = false,
};
}

pub fn parseDeclarations(text: []const u8, allocator: std.mem.Allocator) ![]CSSDeclaration {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As a general convention in Zig code, the allocator tends to be the first parameter (or the 2nd one if it's a method, where the 1st has to be the receiver).

var parser = init();
var declarations = std.ArrayList(CSSDeclaration).init(allocator);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a chance the "managed" structures get removed from Zig. So we tend to favor std.ArrayListUmamanged(CSSDeclaration). But I can see here how that would be a bit annoying, since you pass it around, you'd also have to pass the allocator around.

I can think of 3 options.

1 - leave it as-is, we can deal with it if ever the stdlib removes this. I'm fine with this.

2 - Pass the allocator wherever you pass &declarations

3 - Add the allocator and declarations as fields to CSSParser, then you don't need to pass either around. You could create a helper. self.addDeclaration(...):

fn addDeclaration(self: *CSSParser, decl: CSSDeclaration) !void {
  return self.declarations.append(self.allocator, decl);

errdefer declarations.deinit();

while (parser.position < text.len) {
const c = text[parser.position];

switch (parser.state) {
.seekName => {
if (!std.ascii.isWhitespace(c)) {
parser.name_start = parser.position;
parser.state = .inName;
continue;
}
},
.inName => {
if (c == ':') {
parser.name_end = parser.position;
parser.state = .seekValue;
} else if (std.ascii.isWhitespace(c)) {
parser.name_end = parser.position;
parser.state = .seekColon;
}
},
.seekColon => {
if (c == ':') {
parser.state = .seekValue;
} else if (!std.ascii.isWhitespace(c)) {
parser.state = .seekName;
continue;
}
},
.seekValue => {
if (!std.ascii.isWhitespace(c)) {
parser.value_start = parser.position;
if (c == '"') {
parser.state = .inQuotedValue;
} else if (c == '\'') {
parser.state = .inSingleQuotedValue;
} else if (c == 'u' and parser.position + 3 < text.len and
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this is equivalent:

else if (c == 'u' and std.mem.startsWith(u8, text[parser.position...], URL_PREFIX)) {

std.mem.eql(u8, text[parser.position .. parser.position + 4], CSSConstants.URL_PREFIX))
{
parser.state = .inUrl;
parser.paren_depth = 1;
parser.position += 3;
} else {
parser.state = .inValue;
continue;
}
}
},
.inValue => {
if (parser.escape_next) {
parser.escape_next = false;
} else if (c == '\\') {
parser.escape_next = true;
} else if (c == '(') {
parser.paren_depth += 1;
} else if (c == ')' and parser.paren_depth > 0) {
parser.paren_depth -= 1;
} else if (c == ';' and parser.paren_depth == 0) {
try parser.finishDeclaration(text, &declarations);
parser.state = .seekName;
}
},
.inQuotedValue => {
if (parser.escape_next) {
parser.escape_next = false;
} else if (c == '\\') {
parser.escape_next = true;
} else if (c == '"') {
parser.state = .inValue;
}
},
.inSingleQuotedValue => {
if (parser.escape_next) {
parser.escape_next = false;
} else if (c == '\\') {
parser.escape_next = true;
} else if (c == '\'') {
parser.state = .inValue;
}
},
.inUrl => {
if (parser.escape_next) {
parser.escape_next = false;
} else if (c == '\\') {
parser.escape_next = true;
} else if (c == '(') {
parser.paren_depth += 1;
} else if (c == ')') {
parser.paren_depth -= 1;
if (parser.paren_depth == 0) {
parser.state = .inValue;
}
}
},
.inImportant => {},
}

parser.position += 1;
}

try parser.finalize(text, &declarations);

return declarations.toOwnedSlice();
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

A bit of an eternal debate here. If this was a library, this would probably be right. But within our code, we know that allocator is actually an arena, so all this is doing is creating another copy of the slice that we don't actually need.

You can keep it as-is, which makes this code more universally usable.

You could also return declarations.items and "leak" the ArrayList..but it wouldn't really be leaking because we know we're using an arena. If you opt to go this route, you could rename allocator to arena, just so that you make it clear you expect callers to provide an arena-backed allocator. If you go this route, you might as well remove the deinit at the top of the function.

It would make your testing a bit more complicated, but instead of const testing = std.testing, you can do:

const testing = @import("../../testing.zig");

And then do:

test "CSSParser - Property with !important" {
     defer testing.reset(); // this resets the testing.arena_allocator
     const declarations = try CSSParser.parseDeclarations(text, testing.arena_allocator);
     // ...
}

}

fn finishDeclaration(self: *CSSParser, text: []const u8, declarations: *std.ArrayList(CSSDeclaration)) !void {
const name = std.mem.trim(u8, text[self.name_start..self.name_end], &std.ascii.whitespace);
if (name.len == 0) return;

const raw_value = text[self.value_start..self.position];
const value = std.mem.trim(u8, raw_value, &std.ascii.whitespace);

var final_value = value;
var is_important = false;

if (std.mem.endsWith(u8, value, CSSConstants.IMPORTANT_KEYWORD)) {
is_important = true;
final_value = std.mem.trim(u8, value[0 .. value.len - CSSConstants.IMPORTANT_LENGTH], &std.ascii.whitespace);
}

const declaration = CSSDeclaration.init(name, final_value, is_important);
try declarations.append(declaration);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would remove CSSDeclaration.init function and use instead

        try declarations.append(.{
                .name = name,
                .value = value,
                .is_important = is_important,
        });

}

fn finalize(self: *CSSParser, text: []const u8, declarations: *std.ArrayList(CSSDeclaration)) !void {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As-is, I think this could be a *const CSSParser, but if you create an addDeclaration method, then it would need to stay non-const.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can always alias std.ArrayList(CSSDeclaration) to const DeclarationList = std.ArrayList(CSSDeclaration) if you want

or const DeclarationList = std.ArrayListUnmanaged(CSSDeclaration) if you go that route.

if (self.state == .inValue) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since this is the only case you could inverse it.

if (self.state != .inValue) {
  return;
}

const name = text[self.name_start..self.name_end];
const trimmed_name = std.mem.trim(u8, name, &std.ascii.whitespace);

if (trimmed_name.len > 0) {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also could be inversed

const raw_value = text[self.value_start..self.position];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if finishDeclaration could be leveraged here to deduplicate any of this.

const value = std.mem.trim(u8, raw_value, &std.ascii.whitespace);

var final_value = value;
var is_important = false;
if (std.mem.endsWith(u8, value, CSSConstants.IMPORTANT_KEYWORD)) {
is_important = true;
final_value = std.mem.trim(u8, value[0 .. value.len - CSSConstants.IMPORTANT_LENGTH], &std.ascii.whitespace);
}

const declaration = CSSDeclaration.init(trimmed_name, final_value, is_important);
try declarations.append(declaration);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here

}
}
}

pub fn getState(self: *const CSSParser) CSSParserState {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know it isn't pretty, but in Zig, direct field access is pretty common, so these getters aren't too useful.

Does anything call getState, getPosition and reset?

return self.state;
}

pub fn getPosition(self: *const CSSParser) usize {
return self.position;
}

pub fn reset(self: *CSSParser) void {
self.* = init();
}
};

const testing = std.testing;

test "CSSParser - Simple property" {
const text = "color: red;";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 1);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expectEqual gives a better error message

try testing.expectEqualStrings("color", declarations[0].name);
try testing.expectEqualStrings("red", declarations[0].value);
try testing.expect(!declarations[0].is_important);
}

test "CSSParser - Property with !important" {
const text = "margin: 10px !important;";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 1);
try testing.expectEqualStrings("margin", declarations[0].name);
try testing.expectEqualStrings("10px", declarations[0].value);
try testing.expect(declarations[0].is_important);
}

test "CSSParser - Multiple properties" {
const text = "color: red; font-size: 12px; margin: 5px !important;";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 3);

try testing.expectEqualStrings("color", declarations[0].name);
try testing.expectEqualStrings("red", declarations[0].value);
try testing.expect(!declarations[0].is_important);

try testing.expectEqualStrings("font-size", declarations[1].name);
try testing.expectEqualStrings("12px", declarations[1].value);
try testing.expect(!declarations[1].is_important);

try testing.expectEqualStrings("margin", declarations[2].name);
try testing.expectEqualStrings("5px", declarations[2].value);
try testing.expect(declarations[2].is_important);
}

test "CSSParser - Quoted value with semicolon" {
const text = "content: \"Hello; world!\";";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 1);
try testing.expectEqualStrings("content", declarations[0].name);
try testing.expectEqualStrings("\"Hello; world!\"", declarations[0].value);
try testing.expect(!declarations[0].is_important);
}

test "CSSParser - URL value" {
const text = "background-image: url(\"test.png\");";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 1);
try testing.expectEqualStrings("background-image", declarations[0].name);
try testing.expectEqualStrings("url(\"test.png\")", declarations[0].value);
try testing.expect(!declarations[0].is_important);
}

test "CSSParser - Whitespace handling" {
const text = " color : purple ; margin : 10px ; ";
const allocator = testing.allocator;

const declarations = try CSSParser.parseDeclarations(text, allocator);
defer allocator.free(declarations);

try testing.expect(declarations.len == 2);
try testing.expectEqualStrings("color", declarations[0].name);
try testing.expectEqualStrings("purple", declarations[0].value);
try testing.expectEqualStrings("margin", declarations[1].name);
try testing.expectEqualStrings("10px", declarations[1].value);
}
Loading