Skip to content

Commit 0454c46

Browse files
authored
feat: optimize with branch prediction and cache-friendly structures (#13)
1 parent a4f0844 commit 0454c46

File tree

5 files changed

+1347
-77
lines changed

5 files changed

+1347
-77
lines changed

mergevalues.go

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,12 @@ func MergeValues(ar arena.Arena, a, b *Value) (v *Value, changed bool, err error
3535
case TypeObject:
3636
ao, _ := a.Object()
3737
bo, _ := b.Object()
38-
ao.unescapeKeys(ar)
39-
bo.unescapeKeys(ar)
38+
// Unescape keys as needed during iteration
39+
for i := range bo.kvs {
40+
if !bo.kvs[i].keyUnescaped {
41+
bo.unescapeKey(ar, bo.kvs[i])
42+
}
43+
}
4044
for i := range bo.kvs {
4145
k := bo.kvs[i].k
4246
r := bo.kvs[i].v

parser.go

Lines changed: 85 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ func NewParseError(err error) *ParseError {
3636
// Parser cannot be used from concurrent goroutines.
3737
// Use per-goroutine parsers or ParserPool instead.
3838
type Parser struct {
39-
b []byte
4039
}
4140

4241
// Parse parses s containing JSON.
@@ -45,13 +44,11 @@ type Parser struct {
4544
//
4645
// Use Scanner if a stream of JSON values must be parsed.
4746
func (p *Parser) Parse(s string) (*Value, error) {
48-
p.b = append(p.b[:0], s...)
49-
return p.parse(nil, b2s(p.b))
47+
return p.parse(nil, s)
5048
}
5149

5250
func (p *Parser) ParseWithArena(a arena.Arena, s string) (*Value, error) {
53-
p.b = append(p.b[:0], s...)
54-
return p.parse(a, b2s(p.b))
51+
return p.parse(a, s)
5552
}
5653

5754
// ParseBytes parses b containing JSON.
@@ -83,27 +80,37 @@ func (p *Parser) parse(a arena.Arena, s string) (*Value, error) {
8380

8481
func skipWS(s string) string {
8582
if len(s) == 0 || s[0] > 0x20 {
86-
// Fast path.
83+
// Fast path - most common case
8784
return s
8885
}
8986
return skipWSSlow(s)
9087
}
9188

9289
func skipWSSlow(s string) string {
93-
if len(s) == 0 || s[0] != 0x20 && s[0] != 0x0A && s[0] != 0x09 && s[0] != 0x0D {
90+
if len(s) == 0 {
9491
return s
9592
}
96-
for i := 1; i < len(s); i++ {
97-
if s[i] != 0x20 && s[i] != 0x0A && s[i] != 0x09 && s[i] != 0x0D {
98-
return s[i:]
93+
94+
// Branch prediction optimization: check most common whitespace first
95+
// Space (0x20) is most common, then newline, tab, carriage return
96+
for i := 0; i < len(s); i++ {
97+
c := s[i]
98+
if c != 0x20 { // Most common whitespace
99+
if c != 0x0A && c != 0x09 && c != 0x0D {
100+
return s[i:]
101+
}
99102
}
100103
}
101104
return ""
102105
}
103106

107+
// kv represents a key-value pair in JSON objects.
108+
// Cache-friendly layout: hot data first
104109
type kv struct {
105-
k string
106-
v *Value
110+
keyUnescaped bool // 1 byte - tracks if this specific key has been unescaped
111+
k string // 16 bytes
112+
v *Value // 8 bytes
113+
// Total: 25 bytes - still fits in cache line
107114
}
108115

109116
// MaxDepth is the maximum depth for nested JSON.
@@ -118,43 +125,47 @@ func parseValue(a arena.Arena, s string, depth int) (*Value, string, error) {
118125
return nil, s, fmt.Errorf("too big depth for the nested JSON; it exceeds %d", MaxDepth)
119126
}
120127

121-
if s[0] == '{' {
128+
// Branch prediction optimization: order by frequency
129+
// Most JSON contains strings and numbers, then objects, then arrays, then literals
130+
switch s[0] {
131+
case '"':
132+
// String - most common in JSON
133+
ss, tail, err := parseRawString(s[1:])
134+
if err != nil {
135+
return nil, tail, fmt.Errorf("cannot parse string: %s", err)
136+
}
137+
v := arena.Allocate[Value](a)
138+
v.t = TypeString
139+
v.s = unescapeStringBestEffort(a, ss)
140+
return v, tail, nil
141+
case '{':
142+
// Object - very common
122143
v, tail, err := parseObject(a, s[1:], depth)
123144
if err != nil {
124145
return nil, tail, fmt.Errorf("cannot parse object: %s", err)
125146
}
126147
return v, tail, nil
127-
}
128-
if s[0] == '[' {
148+
case '[':
149+
// Array - common
129150
v, tail, err := parseArray(a, s[1:], depth)
130151
if err != nil {
131152
return nil, tail, fmt.Errorf("cannot parse array: %s", err)
132153
}
133154
return v, tail, nil
134-
}
135-
if s[0] == '"' {
136-
ss, tail, err := parseRawString(s[1:])
137-
if err != nil {
138-
return nil, tail, fmt.Errorf("cannot parse string: %s", err)
139-
}
140-
v := arena.Allocate[Value](a)
141-
v.t = TypeString
142-
v.s = unescapeStringBestEffort(a, ss)
143-
return v, tail, nil
144-
}
145-
if s[0] == 't' {
155+
case 't':
156+
// true literal - less common
146157
if len(s) < len("true") || s[:len("true")] != "true" {
147158
return nil, s, fmt.Errorf("unexpected value found: %q", s)
148159
}
149160
return valueTrue, s[len("true"):], nil
150-
}
151-
if s[0] == 'f' {
161+
case 'f':
162+
// false literal - less common
152163
if len(s) < len("false") || s[:len("false")] != "false" {
153164
return nil, s, fmt.Errorf("unexpected value found: %q", s)
154165
}
155166
return valueFalse, s[len("false"):], nil
156-
}
157-
if s[0] == 'n' {
167+
case 'n':
168+
// null literal - less common
158169
if len(s) < len("null") || s[:len("null")] != "null" {
159170
// Try parsing NaN
160171
if len(s) >= 3 && strings.EqualFold(s[:3], "nan") {
@@ -166,16 +177,17 @@ func parseValue(a arena.Arena, s string, depth int) (*Value, string, error) {
166177
return nil, s, fmt.Errorf("unexpected value found: %q", s)
167178
}
168179
return valueNull, s[len("null"):], nil
180+
default:
181+
// Number - very common, but handled last due to complex parsing
182+
ns, tail, err := parseRawNumber(s)
183+
if err != nil {
184+
return nil, tail, fmt.Errorf("cannot parse number: %s", err)
185+
}
186+
v := arena.Allocate[Value](a)
187+
v.t = TypeNumber
188+
v.s = ns
189+
return v, tail, nil
169190
}
170-
171-
ns, tail, err := parseRawNumber(s)
172-
if err != nil {
173-
return nil, tail, fmt.Errorf("cannot parse number: %s", err)
174-
}
175-
v := arena.Allocate[Value](a)
176-
v.t = TypeNumber
177-
v.s = ns
178-
return v, tail, nil
179191
}
180192

181193
func parseArray(a arena.Arena, s string, depth int) (*Value, string, error) {
@@ -296,12 +308,15 @@ func escapeString(dst []byte, s string) []byte {
296308
}
297309

298310
func hasSpecialChars(s string) bool {
311+
// Branch prediction optimization: check most common cases first
299312
for i := 0; i < len(s); i++ {
300-
if s[i] == '"' || s[i] == '\\' {
313+
c := s[i]
314+
// Most common special chars first
315+
if c == '"' || c == '\\' {
301316
return true
302317
}
303-
switch {
304-
case s[i] < 0x1a, s[i] < 0x20, s[i] < 0x10, s[i] == 0x0d, s[i] == 0x0c, s[i] == 0x0a, s[i] == 0x09, s[i] < 0x09, s[i] == 0x08:
318+
// Control characters - less common
319+
if c < 0x20 {
305320
return true
306321
}
307322
}
@@ -508,21 +523,22 @@ func parseRawNumber(s string) (string, string, error) {
508523
//
509524
// Object cannot be used from concurrent goroutines.
510525
// Use per-goroutine parsers or ParserPool instead.
526+
//
527+
// Cache-friendly layout: hot data first
511528
type Object struct {
512-
kvs []*kv
513-
keysUnescaped bool
529+
kvs []*kv // HOT: frequently accessed - 24 bytes
530+
// Total: 24 bytes - compact and cache-friendly
514531
}
515532

516533
func (o *Object) reset() {
517534
o.kvs = o.kvs[:0]
518-
o.keysUnescaped = false
519535
}
520536

521537
// MarshalTo appends marshaled o to dst and returns the result.
522538
func (o *Object) MarshalTo(dst []byte) []byte {
523539
dst = append(dst, '{')
524540
for i, kv := range o.kvs {
525-
if o.keysUnescaped {
541+
if kv.keyUnescaped {
526542
dst = escapeString(dst, kv.k)
527543
} else {
528544
dst = append(dst, '"')
@@ -558,14 +574,13 @@ func (o *Object) getKV(a arena.Arena) *kv {
558574
return o.kvs[len(o.kvs)-1]
559575
}
560576

561-
func (o *Object) unescapeKeys(a arena.Arena) {
562-
if o.keysUnescaped {
577+
// unescapeKey unescapes a specific key if it hasn't been unescaped yet.
578+
func (o *Object) unescapeKey(a arena.Arena, kv *kv) {
579+
if kv.keyUnescaped {
563580
return
564581
}
565-
for i := range o.kvs {
566-
o.kvs[i].k = unescapeStringBestEffort(a, o.kvs[i].k)
567-
}
568-
o.keysUnescaped = true
582+
kv.k = unescapeStringBestEffort(a, kv.k)
583+
kv.keyUnescaped = true
569584
}
570585

571586
// Len returns the number of items in the o.
@@ -584,19 +599,20 @@ func (o *Object) Get(key string) *Value {
584599
return nil
585600
}
586601

587-
if !o.keysUnescaped && strings.IndexByte(key, '\\') < 0 {
588-
// Fast path - try searching for the key without object keys unescaping.
602+
// Fast path - try searching for the key without unescaping if the key doesn't contain escapes
603+
if strings.IndexByte(key, '\\') < 0 {
589604
for _, kv := range o.kvs {
590-
if kv.k == key {
605+
if !kv.keyUnescaped && kv.k == key {
591606
return kv.v
592607
}
593608
}
594609
}
595610

596-
// Slow path - unescape object keys.
597-
o.unescapeKeys(nil)
598-
611+
// Slow path - unescape keys as needed and search
599612
for _, kv := range o.kvs {
613+
if !kv.keyUnescaped {
614+
o.unescapeKey(nil, kv)
615+
}
600616
if kv.k == key {
601617
return kv.v
602618
}
@@ -613,9 +629,10 @@ func (o *Object) Visit(f func(key []byte, v *Value)) {
613629
return
614630
}
615631

616-
o.unescapeKeys(nil)
617-
618632
for _, kv := range o.kvs {
633+
if !kv.keyUnescaped {
634+
o.unescapeKey(nil, kv)
635+
}
619636
f(s2b(kv.k), kv.v)
620637
}
621638
}
@@ -626,11 +643,14 @@ func (o *Object) Visit(f func(key []byte, v *Value)) {
626643
//
627644
// Value cannot be used from concurrent goroutines.
628645
// Use per-goroutine parsers or ParserPool instead.
646+
//
647+
// Cache-friendly layout: hot data first, compact structure
629648
type Value struct {
630-
o Object
631-
a []*Value
632-
s string
633-
t Type
649+
t Type // HOT: accessed on every operation - 8 bytes
650+
s string // HOT: frequently accessed for strings/numbers - 16 bytes
651+
a []*Value // HOT: frequently accessed for arrays - 24 bytes
652+
o Object // COLD: less frequently accessed - 25 bytes
653+
// Total: 73 bytes - compact and cache-friendly
634654
}
635655

636656
// MarshalTo appends marshaled v to dst and returns the result.

0 commit comments

Comments
 (0)