Skip to content

Commit 8d0f401

Browse files
chqrliebvdberg
authored andcommitted
Yaml: simplify and improve parser
* make `Data.text_cur` and `Data.nodes_cur` integers to avoid fixups * enforce enum and names synchronisation * remove redundant casts * simplify tokenizer * remove redundant enum prefixes
1 parent fe37d4f commit 8d0f401

File tree

5 files changed

+143
-192
lines changed

5 files changed

+143
-192
lines changed

common/yaml/yaml_data.c2

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ import string local;
1919
import stdlib local;
2020

2121
const u32 MaxDepth = 8;
22-
22+
const u32 MinText = 256;
23+
const u32 MinNodes = 32;
2324

2425
type NodeKind enum u8 {
2526
Unknown,
@@ -29,7 +30,12 @@ type NodeKind enum u8 {
2930
}
3031

3132
// NOTE: keep in sync with NodeKind
32-
const char*[NodeKind] node_names = { "UNK", "SCA", "MAP", "SEQ" }
33+
const char*[NodeKind] node_names = {
34+
[Unknown] = "UNK",
35+
[Scalar] = "SCA",
36+
[Map] = "MAP",
37+
[Sequence] = "SEQ",
38+
}
3339

3440
public type Node struct @(opaque) {
3541
NodeKind kind;
@@ -41,7 +47,6 @@ public type Node struct @(opaque) {
4147
}
4248
}
4349

44-
4550
type StackLevel struct {
4651
i32 indent; // -1 for root node
4752
Node* node;
@@ -52,26 +57,28 @@ type Data struct {
5257
// text
5358
char* text;
5459
u32 text_size;
55-
char* text_cur;
60+
u32 text_cur;
5661

5762
// nodes
5863
Node* nodes;
5964
u32 nodes_count;
60-
Node* nodes_cur;
65+
u32 nodes_cur;
6166

6267
// needed for node resize
6368
StackLevel* stack;
6469
}
6570

6671
fn void Data.init(Data* d, u32 text_size, u32 nodes_count, StackLevel* stack) {
72+
if (text_size < MinText) text_size = MinText;
6773
d.text = malloc(text_size);
6874
d.text_size = text_size;
69-
d.text_cur = d.text + 1; // reserve first byte for empty text
70-
d.text[0] = 0;
75+
d.text_cur = 1; // reserve first byte for empty text
76+
d.text[0] = '\0';
7177

78+
if (nodes_count < MinNodes) nodes_count = MinNodes;
7279
d.nodes = malloc(nodes_count * sizeof(Node));
7380
d.nodes_count = nodes_count;
74-
d.nodes_cur = &d.nodes[1]; // reserve first node
81+
d.nodes_cur = 1; // reserve first node
7582
memset(&d.nodes[0], 0, sizeof(Node));
7683

7784
d.stack = stack;
@@ -83,46 +90,39 @@ fn void Data.destroy(Data* d) {
8390
}
8491

8592
fn void Data.resize_nodes(Data* d) {
86-
u32 idx = (u32)(d.nodes_cur - d.nodes);
87-
8893
d.nodes_count *= 2;
8994
Node* nodes2 = malloc(d.nodes_count * sizeof(Node));
90-
memcpy(nodes2, d.nodes, idx * sizeof(Node));
95+
memcpy(nodes2, d.nodes, d.nodes_cur * sizeof(Node));
9196

9297
// fix-up stack pointers
9398
for (u32 i=0; i<MaxDepth; i++) {
9499
StackLevel* sl = &d.stack[i];
95100
if (sl.node) {
96-
u32 node_idx = (u32)(sl.node - d.nodes);
101+
isize node_idx = sl.node - d.nodes;
97102
sl.node = &nodes2[node_idx];
98103
}
99104
if (sl.last_child) {
100-
u32 last_child_idx = (u32)(sl.last_child - d.nodes);
105+
isize last_child_idx = sl.last_child - d.nodes;
101106
sl.last_child = &nodes2[last_child_idx];
102107
}
103108
}
104109

105110
free(d.nodes);
106111
d.nodes = nodes2;
107-
d.nodes_cur = &d.nodes[idx];
108112
}
109113

110114
fn void Data.resize_text(Data* d) {
111-
u32 idx = (u32)(d.text_cur - d.text);
112-
113115
d.text_size *= 2;
114116
char* text2 = malloc(d.text_size);
115-
memcpy(text2, d.text, idx + 1); // also copy 0-termination
117+
memcpy(text2, d.text, d.text_cur);
116118
free(d.text);
117119
d.text = text2;
118-
d.text_cur = &d.text[idx];
119120
}
120121

121122
fn Node* Data.add_node(Data* d, NodeKind kind, u32 name_idx) {
122-
u32 idx = (u32)(d.nodes_cur - d.nodes);
123-
if (idx >= d.nodes_count -1) d.resize_nodes();
123+
if (d.nodes_cur >= d.nodes_count - 1) d.resize_nodes();
124124

125-
Node* result = d.nodes_cur;
125+
Node* result = &d.nodes[d.nodes_cur];
126126
d.nodes_cur++;
127127
result.kind = kind;
128128
result.next_idx = 0;
@@ -136,12 +136,12 @@ fn u32 Data.node2idx(const Data* d, const Node* n) @(inline) {
136136
}
137137

138138
fn u32 Data.add_text(Data* d, const char* text, u32 len) {
139-
u32 idx = (u32)(d.text_cur - d.text);
139+
u32 idx = d.text_cur;
140140
while (idx + len + 1 >= d.text_size) d.resize_text();
141141

142-
memcpy(d.text_cur, text, len);
143-
d.text_cur[len] = 0;
144-
d.text_cur += len+1; // add 0-terminator
142+
memcpy(d.text + idx, text, len);
143+
d.text[idx + len] = '\0';
144+
d.text_cur += len + 1; // skip 0-terminator
145145
return idx;
146146
}
147147

common/yaml/yaml_dump.c2

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,12 @@ public fn void Parser.dump(const Parser* p, bool verbose) @(unused) {
2323
}
2424

2525
fn void Data.dump(const Data* d, bool verbose) {
26-
u32 node_count = (u32)(d.nodes_cur - d.nodes);
26+
u32 node_count = d.nodes_cur;
2727
if (verbose) {
28-
printf("Text %d/%d\n", (u32)(d.text_cur - d.text), d.text_size);
28+
printf("Text %d/%d\n", d.text_cur, d.text_size);
2929
const char* cp = d.text + 1;
30-
while (cp < d.text_cur) {
30+
const char* end = d.text + d.text_cur;
31+
while (cp < end) {
3132
u32 len = (u32)strlen(cp);
3233
u32 offset = (u32)(cp - d.text);
3334
printf(" [%3d] %s\n", offset, cp);

common/yaml/yaml_iterator.c2

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,17 @@ module yaml;
1717

1818
import string;
1919

20-
public fn bool Node.isMap(const Node* n) @(unused) { return n.kind == NodeKind.Map; }
20+
public fn bool Node.isMap(const Node* n) @(unused) { return n.kind == Map; }
2121

22-
public fn bool Node.isSequence(const Node* n) @(unused) { return n.kind == NodeKind.Sequence; }
22+
public fn bool Node.isSequence(const Node* n) @(unused) { return n.kind == Sequence; }
2323

24-
fn bool Node.isScalar(const Node* n) @(unused) { return n.kind == NodeKind.Scalar; }
24+
fn bool Node.isScalar(const Node* n) @(unused) { return n.kind == Scalar; }
2525

2626

2727
// TODO only pass iterators? (that way we dont need Parser* anymore)
2828
public fn const Node* Parser.getRoot(const Parser* p) {
29-
u32 node_count = (u32)(p.data.nodes_cur - p.data.nodes) - 1;
30-
if (node_count == 0) return nil;
29+
u32 node_count = p.data.nodes_cur;
30+
if (node_count <= 1) return nil;
3131
return &p.data.nodes[1];
3232
}
3333

@@ -42,10 +42,10 @@ public fn const Node* Parser.findNode(const Parser* p, const char* path) {
4242
}
4343

4444
fn const Node* Data.findNode(const Data* d, const char* path) {
45-
u32 node_count = (u32)(d.nodes_cur - d.nodes) - 1;
46-
if (node_count == 0) return nil;
45+
u32 node_count = d.nodes_cur;
46+
if (node_count <= 1) return nil;
4747
const Node* root = &d.nodes[1];
48-
if (root.kind == NodeKind.Sequence) return nil;
48+
if (root.kind == Sequence) return nil;
4949
return d.findChildNode(path, root.child_idx);
5050
}
5151

@@ -59,7 +59,7 @@ fn const Node* Data.findChildNode(const Data* d, const char* path, u32 next) {
5959
if (rest) { // match
6060
path = rest;
6161
if (path[0] == 0) return node; // found node
62-
if (node.kind == NodeKind.Sequence) return nil; // dont search in sequence
62+
if (node.kind == Sequence) return nil; // dont search in sequence
6363
next = node.child_idx;
6464
continue;
6565
}
@@ -77,7 +77,7 @@ public type Iter struct {
7777

7878
public fn Iter Parser.getNodeChildIter(const Parser* p, const Node* n) {
7979
Iter iter = { .data = &p.data, .node = nil }
80-
if (n && n.kind != NodeKind.Scalar && n.child_idx) {
80+
if (n && n.kind != Scalar && n.child_idx) {
8181
iter.node = p.data.idx2node(n.child_idx);
8282
}
8383
return iter;
@@ -103,7 +103,7 @@ public fn const char* Iter.getName(const Iter* iter) {
103103

104104
public fn const char* Iter.getValue(const Iter* iter) {
105105
const Data* d = (Data*)iter.data;
106-
if (iter.node && iter.node.kind == NodeKind.Scalar) return &d.text[iter.node.text_idx];
106+
if (iter.node && iter.node.kind == Scalar) return &d.text[iter.node.text_idx];
107107
return nil;
108108
}
109109

@@ -112,7 +112,7 @@ public fn Iter Iter.getChildIter(Iter* parent) @(unused) {
112112
if (parent.node == nil) return iter;
113113

114114
const Node* n = parent.node;
115-
if (n.kind != NodeKind.Scalar && n.child_idx) {
115+
if (n.kind != Scalar && n.child_idx) {
116116
const Data* d = (Data*)iter.data;
117117
iter.node = d.idx2node(n.child_idx);
118118
}
@@ -122,7 +122,7 @@ public fn Iter Iter.getChildIter(Iter* parent) @(unused) {
122122
public fn const char* Iter.getChildScalarValue(Iter* iter, const char* path) {
123123
if (!iter.node) return nil;
124124

125-
if (iter.node.kind == NodeKind.Sequence) return nil;
125+
if (iter.node.kind == Sequence) return nil;
126126
const Data* d = (Data*)iter.data;
127127
const Node* n = d.findChildNode(path, iter.node.child_idx);
128128
if (n && n.isScalar()) return &d.text[n.text_idx];

common/yaml/yaml_parser.c2

Lines changed: 27 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -52,13 +52,13 @@ public fn void Parser.destroy(Parser* p) {
5252
public fn bool Parser.parse(Parser* p, const char* input) {
5353
p.tokenizer.init(input, &p.data, p.message);
5454

55-
p.token.kind = TokenKind.None;
55+
p.token.kind = None;
5656

5757
i32 res = setjmp(&p.jmp_err);
5858
if (res == 0) {
5959
p.consumeToken();
6060

61-
while (p.token.kind != TokenKind.Eof) p.parse_doc();
61+
while (p.token.kind != Eof) p.parse_doc();
6262
} else {
6363
// got error, error_msg should be set
6464
return false;
@@ -75,18 +75,20 @@ fn void Parser.error(Parser* p, const char* format @(printf_format), ...) {
7575
va_list args;
7676
va_start(args, format);
7777
char* cp = p.message;
78-
cp += vsnprintf(cp, MaxDiag-1, format, args);
78+
i32 len = vsnprintf(cp, MaxDiag, format, args);
7979
va_end(args);
80-
sprintf(cp, " %s", p.token.loc.str());
80+
if ((u32)len < MaxDiag) {
81+
snprintf(cp + len, MaxDiag - len, "at line %d:%d", p.token.loc.line, p.token.loc.column);
82+
}
8183
longjmp(&p.jmp_err, 1);
8284
}
8385

8486
fn void Parser.consumeToken(Parser* p) {
8587
p.tokenizer.lex(&p.token);
8688
#if YamlPrintToken
87-
printf("%s %s %d\n", p.token.str(), p.token.loc.str(), p.token.same_line);
89+
printf("%s pos %d:%d %d\n", p.token.str(), p.token.loc.line, p.token.loc.column, p.token.same_line);
8890
#endif
89-
if (p.token.kind == TokenKind.Error) longjmp(&p.jmp_err, 1);
91+
if (p.token.kind == Error) longjmp(&p.jmp_err, 1);
9092
}
9193

9294
fn void Parser.expectAndConsume(Parser* p, TokenKind kind) {
@@ -130,16 +132,16 @@ fn void Parser.parse_node(Parser* p) {
130132
case Plain_Scalar:
131133
case Single_Quoted_Scalar:
132134
case Double_Quoted_Scalar:
133-
Node* n = p.data.add_node(NodeKind.Unknown, p.token.text_idx);
134-
p.push_node(n, NodeKind.Unknown, p.cur_indent);
135+
Node* n = p.data.add_node(Unknown, p.token.text_idx);
136+
p.push_node(n, Unknown, p.cur_indent);
135137
p.consumeToken();
136-
p.expectAndConsume(TokenKind.Colon);
138+
p.expectAndConsume(Colon);
137139
p.parse_value();
138140
break;
139141
case Dash:
140142
p.consumeToken();
141-
Node* n = p.data.add_node(NodeKind.Unknown, 0);
142-
p.push_node(n, NodeKind.Sequence, p.cur_indent + 1);
143+
Node* n = p.data.add_node(Unknown, 0);
144+
p.push_node(n, Sequence, p.cur_indent + 1);
143145
p.parse_node_or_value();
144146
break;
145147
case Indent:
@@ -175,8 +177,8 @@ fn void Parser.parse_value(Parser* p) {
175177
return;
176178
case Dash:
177179
p.consumeToken();
178-
Node* n = p.data.add_node(NodeKind.Unknown, 0);
179-
p.push_node(n, NodeKind.Sequence, p.cur_indent + 1);
180+
Node* n = p.data.add_node(Unknown, 0);
181+
p.push_node(n, Sequence, p.cur_indent + 1);
180182
p.parse_node_or_value();
181183
return;
182184
case Indent:
@@ -208,7 +210,7 @@ fn void Parser.parse_node_or_value(Parser* p) {
208210
case Single_Quoted_Scalar:
209211
case Double_Quoted_Scalar:
210212
Token* next = p.tokenizer.lex_next();
211-
if (next.kind == TokenKind.Colon) {
213+
if (next.kind == Colon) {
212214
// NOTE: this doesn't work, because tokenizer doesn't know (and doesn't give DEDENT)
213215
p.cur_indent += 2; // one for dash, one for node
214216
// TEMP DIRTY HACK, how to do properly?
@@ -231,8 +233,8 @@ fn void Parser.doc_start(Parser* p) {
231233

232234
fn void Parser.doc_end(Parser* p) {
233235
p.cur_indent = -1;
234-
if (p.stack_size == 1 && p.stack[0].node.kind == NodeKind.Unknown) {
235-
p.stack[0].node.kind = NodeKind.Map;
236+
if (p.stack_size == 1 && p.stack[0].node.kind == Unknown) {
237+
p.stack[0].node.kind = Map;
236238
}
237239
p.pop();
238240
p.cur_indent = 0;
@@ -242,11 +244,11 @@ fn void Parser.doc_end(Parser* p) {
242244
fn void Parser.add_scalar_value(Parser* p, u32 value_idx) {
243245
StackLevel* top = &p.stack[p.stack_size-1];
244246
Node* n = top.node;
245-
if (n.kind != NodeKind.Unknown) {
247+
if (n.kind != Unknown) {
246248
//p.error("%s() cannot add scalar to node", __func__);
247249
p.error("%s() cannot add scalar to node", "add_scalar_value");
248250
}
249-
n.kind = NodeKind.Scalar;
251+
n.kind = Scalar;
250252
n.text_idx = value_idx;
251253
}
252254

@@ -260,7 +262,7 @@ fn void Parser.pop(Parser* p) {
260262
StackLevel* prev = &p.stack[p.stack_size-2];
261263
prev.last_child = top.node;
262264
}
263-
if (top.node.kind == NodeKind.Unknown) top.node.kind = NodeKind.Scalar;
265+
if (top.node.kind == Unknown) top.node.kind = Scalar;
264266

265267
top.indent = 0;
266268
top.node = nil;
@@ -270,7 +272,7 @@ fn void Parser.pop(Parser* p) {
270272
}
271273

272274
fn void Parser.push_root(Parser* p) {
273-
Node* root = p.data.add_node(NodeKind.Unknown, 0);
275+
Node* root = p.data.add_node(Unknown, 0);
274276
StackLevel* top = &p.stack[0];
275277
if (p.stack_size) {
276278
top.node.next_idx = p.data.node2idx(root);
@@ -295,7 +297,7 @@ fn void Parser.push_node(Parser* p, Node* n, NodeKind parent_kind, i32 indent) {
295297
if (top.indent == indent) { // same level
296298
if (top.node) {
297299
// close old node as SCALAR with empty data
298-
if (top.node.kind == NodeKind.Unknown) top.node.kind = NodeKind.Scalar;
300+
if (top.node.kind == Unknown) top.node.kind = Scalar;
299301
top.node.next_idx = n_idx;
300302
}
301303
top.last_child = nil;
@@ -304,9 +306,9 @@ fn void Parser.push_node(Parser* p, Node* n, NodeKind parent_kind, i32 indent) {
304306
assert(indent > top.indent);
305307
Node* parent = top.node;
306308

307-
if (parent.kind == NodeKind.Unknown) {
309+
if (parent.kind == Unknown) {
308310
// just assign it
309-
if (parent_kind == NodeKind.Unknown) parent_kind = NodeKind.Map;
311+
if (parent_kind == Unknown) parent_kind = Map;
310312
parent.kind = parent_kind;
311313
}
312314
if (top.last_child) {
@@ -326,9 +328,8 @@ fn void Parser.push_node(Parser* p, Node* n, NodeKind parent_kind, i32 indent) {
326328
StackLevel* prev = &p.stack[p.stack_size-2];
327329
Node* parent = prev.node;
328330

329-
if (parent.kind != parent_kind
330-
&& !(parent.kind == NodeKind.Map && parent_kind == NodeKind.Unknown)) {
331-
if (parent.kind == NodeKind.Sequence) {
331+
if (parent.kind != parent_kind && !(parent.kind == Map && parent_kind == Unknown)) {
332+
if (parent.kind == Sequence) {
332333
p.error("invalid scalar after sequence");
333334
} else {
334335
p.error("invalid scalar after %s", node_names[parent.kind]);

0 commit comments

Comments
 (0)