Skip to content

Commit e0c9169

Browse files
committed
feat(parser): add bytes literal syntax
Implements bytes literals, which return []byte directly and interpret \x and octal escapes as raw byte values (0-255) rather than Unicode codepoints. Components: - lexer/token: add Bytes token kind - lexer/state: detect [bB]'...' and [bB]"..." prefix, emit Bytes token - lexer/utils: add unescapeBytes and unescapeByteChar functions for byte-level escape handling ((no \u/\U support, 0-255 limit) - parser: parse Bytes token into BytesNode - ast/node: add BytesNode struct with []byte value - ast/visitor: handle BytesNode in AST walker - ast/print: add String() method - checker: type inference returns []byte - compiler: emit push for BytesNode value Updated docs. Signed-off-by: Ville Vesilehto <[email protected]>
1 parent c020e95 commit e0c9169

File tree

13 files changed

+336
-0
lines changed

13 files changed

+336
-0
lines changed

ast/node.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ type StringNode struct {
104104
Value string // Value of the string.
105105
}
106106

107+
// BytesNode represents a byte slice.
108+
type BytesNode struct {
109+
base
110+
Value []byte // Value of the byte slice.
111+
}
112+
107113
// ConstantNode represents a constant.
108114
// Constants are predefined values like nil, true, false, array, map, etc.
109115
// The parser.Parse will never generate ConstantNode, it is only generated

ast/print.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ func (n *StringNode) String() string {
3333
return fmt.Sprintf("%q", n.Value)
3434
}
3535

36+
func (n *BytesNode) String() string {
37+
return fmt.Sprintf("b%q", n.Value)
38+
}
39+
3640
func (n *ConstantNode) String() string {
3741
if n.Value == nil {
3842
return "nil"

ast/visitor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ func Walk(node *Node, v Visitor) {
1717
case *FloatNode:
1818
case *BoolNode:
1919
case *StringNode:
20+
case *BytesNode:
2021
case *ConstantNode:
2122
case *UnaryNode:
2223
Walk(&n.Node, v)

checker/checker.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ var (
2424
mapType = reflect.TypeOf(map[string]any{})
2525
timeType = reflect.TypeOf(time.Time{})
2626
durationType = reflect.TypeOf(time.Duration(0))
27+
byteSliceType = reflect.TypeOf([]byte(nil))
2728

2829
anyTypeSlice = []reflect.Type{anyType}
2930
)
@@ -194,6 +195,8 @@ func (v *Checker) visit(node ast.Node) Nature {
194195
nt = v.config.NtCache.FromType(boolType)
195196
case *ast.StringNode:
196197
nt = v.config.NtCache.FromType(stringType)
198+
case *ast.BytesNode:
199+
nt = v.config.NtCache.FromType(byteSliceType)
197200
case *ast.ConstantNode:
198201
nt = v.config.NtCache.FromType(reflect.TypeOf(n.Value))
199202
case *ast.UnaryNode:

compiler/compiler.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,8 @@ func (c *compiler) compile(node ast.Node) {
254254
c.BoolNode(n)
255255
case *ast.StringNode:
256256
c.StringNode(n)
257+
case *ast.BytesNode:
258+
c.BytesNode(n)
257259
case *ast.ConstantNode:
258260
c.ConstantNode(n)
259261
case *ast.UnaryNode:
@@ -410,6 +412,10 @@ func (c *compiler) StringNode(node *ast.StringNode) {
410412
c.emitPush(node.Value)
411413
}
412414

415+
func (c *compiler) BytesNode(node *ast.BytesNode) {
416+
c.emitPush(node.Value)
417+
}
418+
413419
func (c *compiler) ConstantNode(node *ast.ConstantNode) {
414420
if node.Value == nil {
415421
c.emit(OpNil)

docs/language-definition.md

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,12 @@
5353
<code>nil</code>
5454
</td>
5555
</tr>
56+
<tr>
57+
<td><strong>Bytes</strong></td>
58+
<td>
59+
<code>b"hello"</code>, <code>b'\xff\x00'</code>
60+
</td>
61+
</tr>
5662
</table>
5763

5864
### Strings
@@ -73,6 +79,38 @@ World`
7379

7480
Backticks strings are raw strings, they do not support escape sequences.
7581

82+
### Bytes
83+
84+
Bytes literals are represented by string literals preceded by a `b` or `B` character.
85+
The bytes literal returns a `[]byte` value.
86+
87+
```expr
88+
b"abc" // []byte{97, 98, 99}
89+
```
90+
91+
Non-ASCII characters are UTF-8 encoded:
92+
93+
```expr
94+
b"ÿ" // []byte{195, 191} - UTF-8 encoding of ÿ
95+
```
96+
97+
Bytes literals support escape sequences for specifying arbitrary byte values:
98+
99+
- `\xNN` - hexadecimal escape (2 hex digits, value 0-255)
100+
- `\NNN` - octal escape (3 octal digits, value 0-377)
101+
- `\n`, `\t`, `\r`, etc. - standard escape sequences
102+
103+
```expr
104+
b"\xff" // []byte{255}
105+
b"\x00\x01" // []byte{0, 1}
106+
b"\101" // []byte{65} - octal for 'A'
107+
```
108+
109+
:::note
110+
Unlike string literals, bytes literals do not support `\u` or `\U` Unicode escapes.
111+
Use `\x` escapes for arbitrary byte values.
112+
:::
113+
76114
## Operators
77115

78116
<table>

expr_test.go

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,19 @@ func ExampleCompile() {
7070
// Output: true
7171
}
7272

73+
func ExampleEval_bytes_literal() {
74+
// Bytes literal returns []byte.
75+
output, err := expr.Eval(`b"abc"`, nil)
76+
if err != nil {
77+
fmt.Printf("%v", err)
78+
return
79+
}
80+
81+
fmt.Printf("%v", output)
82+
83+
// Output: [97 98 99]
84+
}
85+
7386
func TestDisableIfOperator_AllowsIfFunction(t *testing.T) {
7487
env := map[string]any{
7588
"if": func(x int) int { return x + 1 },
@@ -2929,3 +2942,60 @@ func TestDisableShortCircuit(t *testing.T) {
29292942
assert.Equal(t, 3, count)
29302943
assert.True(t, got.(bool))
29312944
}
2945+
2946+
func TestBytesLiteral(t *testing.T) {
2947+
tests := []struct {
2948+
code string
2949+
want []byte
2950+
}{
2951+
{`b"hello"`, []byte("hello")},
2952+
{`b'world'`, []byte("world")},
2953+
{`b""`, []byte{}},
2954+
{`b'\x00\xff'`, []byte{0, 255}},
2955+
{`b"\x41\x42\x43"`, []byte("ABC")},
2956+
{`b'\101\102\103'`, []byte("ABC")},
2957+
{`b'\n\t\r'`, []byte{'\n', '\t', '\r'}},
2958+
{`b'hello\x00world'`, []byte("hello\x00world")},
2959+
{`b"ÿ"`, []byte{0xc3, 0xbf}}, // UTF-8 encoding of ÿ
2960+
}
2961+
2962+
for _, tt := range tests {
2963+
t.Run(tt.code, func(t *testing.T) {
2964+
program, err := expr.Compile(tt.code)
2965+
require.NoError(t, err)
2966+
2967+
output, err := expr.Run(program, nil)
2968+
require.NoError(t, err)
2969+
assert.Equal(t, tt.want, output)
2970+
})
2971+
}
2972+
}
2973+
2974+
func TestBytesLiteral_type(t *testing.T) {
2975+
env := map[string]any{
2976+
"data": []byte("test"),
2977+
}
2978+
2979+
// Verify bytes literal has []byte type and can be compared with []byte
2980+
program, err := expr.Compile(`data == b"test"`, expr.Env(env))
2981+
require.NoError(t, err)
2982+
2983+
output, err := expr.Run(program, env)
2984+
require.NoError(t, err)
2985+
assert.Equal(t, true, output)
2986+
}
2987+
2988+
func TestBytesLiteral_errors(t *testing.T) {
2989+
// \u and \U escapes should not be allowed in bytes literals
2990+
errorCases := []string{
2991+
`b'\u0041'`,
2992+
`b"\U00000041"`,
2993+
}
2994+
2995+
for _, code := range errorCases {
2996+
t.Run(code, func(t *testing.T) {
2997+
_, err := expr.Compile(code)
2998+
require.Error(t, err)
2999+
})
3000+
}
3001+
}

parser/lexer/lexer_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,52 @@ func TestLex(t *testing.T) {
299299
{Kind: EOF},
300300
},
301301
},
302+
{
303+
`b"hello" b'world'`,
304+
[]Token{
305+
{Kind: Bytes, Value: "hello"},
306+
{Kind: Bytes, Value: "world"},
307+
{Kind: EOF},
308+
},
309+
},
310+
{
311+
`b"\x00\xff" b'\x41\x42\x43'`,
312+
[]Token{
313+
{Kind: Bytes, Value: "\x00\xff"},
314+
{Kind: Bytes, Value: "ABC"},
315+
{Kind: EOF},
316+
},
317+
},
318+
{
319+
`b"\101\102\103" b'\n\t\r'`,
320+
[]Token{
321+
{Kind: Bytes, Value: "ABC"},
322+
{Kind: Bytes, Value: "\n\t\r"},
323+
{Kind: EOF},
324+
},
325+
},
326+
{
327+
`b""`,
328+
[]Token{
329+
{Kind: Bytes, Value: ""},
330+
{Kind: EOF},
331+
},
332+
},
333+
{
334+
`B"hello" B'world'`,
335+
[]Token{
336+
{Kind: Bytes, Value: "hello"},
337+
{Kind: Bytes, Value: "world"},
338+
{Kind: EOF},
339+
},
340+
},
341+
{
342+
`b"ÿ"`,
343+
[]Token{
344+
{Kind: Bytes, Value: "\xc3\xbf"},
345+
{Kind: EOF},
346+
},
347+
},
302348
}
303349

304350
for _, test := range tests {
@@ -380,6 +426,16 @@ früh ♥︎
380426
unrecognized character: U+2665 '♥' (1:6)
381427
| früh ♥︎
382428
| .....^
429+
430+
b"\u0041"
431+
unable to unescape string (1:9)
432+
| b"\u0041"
433+
| ........^
434+
435+
b'\U00000041'
436+
unable to unescape string (1:13)
437+
| b'\U00000041'
438+
| ............^
383439
`
384440

385441
func TestLex_error(t *testing.T) {

parser/lexer/state.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,14 @@ func root(l *Lexer) stateFn {
2525
l.emitValue(String, str)
2626
case r == '`':
2727
l.scanRawString(r)
28+
case (r == 'b' || r == 'B') && (l.peek() == '\'' || l.peek() == '"'):
29+
quote := l.next()
30+
l.scanString(quote)
31+
str, err := unescapeBytes(l.word()[1:]) // skip 'b'
32+
if err != nil {
33+
l.error("%v", err)
34+
}
35+
l.emitValue(Bytes, str)
2836
case '0' <= r && r <= '9':
2937
l.backup()
3038
return number

parser/lexer/token.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ const (
1212
Identifier Kind = "Identifier"
1313
Number Kind = "Number"
1414
String Kind = "String"
15+
Bytes Kind = "Bytes"
1516
Operator Kind = "Operator"
1617
Bracket Kind = "Bracket"
1718
EOF Kind = "EOF"

0 commit comments

Comments
 (0)