Skip to content

Commit 7bf14e2

Browse files
authored
feat: reduce alloc when parsing large objects and arrays (#11)
1 parent be463c9 commit 7bf14e2

File tree

5 files changed

+111617
-30
lines changed

5 files changed

+111617
-30
lines changed

parser.go

Lines changed: 121 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -97,26 +97,55 @@ func (p *Parser) ParseBytesWithoutCache(b []byte) (*Value, error) {
9797

9898
type cache struct {
9999
vs []Value
100+
nx *cache // next
101+
lt *cache // last
100102
}
101103

102104
func (c *cache) reset() {
103105
c.vs = c.vs[:0]
106+
c.lt = nil
107+
if c.nx != nil {
108+
c.nx.reset()
109+
}
104110
}
105111

112+
const (
113+
preAllocatedCacheSize = 341 // 32kb class size
114+
maxAllocatedCacheSize = 10922 // 1MB
115+
)
116+
106117
func (c *cache) getValue() *Value {
107118
if c == nil {
108119
return &Value{}
109120
}
110-
if cap(c.vs) > len(c.vs) {
111-
c.vs = c.vs[:len(c.vs)+1]
112-
} else {
113-
if len(c.vs) == 0 {
114-
c.vs = make([]Value, 4)
115-
} else {
116-
c.vs = make([]Value, 1, len(c.vs)*2)
121+
readSrc := c
122+
if readSrc.lt != nil {
123+
readSrc = readSrc.lt
124+
}
125+
switch {
126+
case cap(readSrc.vs) == 0:
127+
// initial state
128+
readSrc.vs = make([]Value, 1, preAllocatedCacheSize)
129+
130+
case cap(readSrc.vs) > len(readSrc.vs):
131+
readSrc.vs = readSrc.vs[:len(readSrc.vs)+1]
132+
133+
default:
134+
if readSrc.nx == nil {
135+
nextLen := len(readSrc.vs) * 2
136+
if nextLen > maxAllocatedCacheSize {
137+
nextLen = maxAllocatedCacheSize
138+
}
139+
readSrc.nx = &cache{
140+
vs: make([]Value, 0, nextLen),
141+
}
117142
}
143+
c.lt = readSrc.nx
144+
readSrc = readSrc.nx
145+
readSrc.vs = readSrc.vs[:len(readSrc.vs)+1]
118146
}
119-
return &c.vs[len(c.vs)-1]
147+
// Do not reset the value, since the caller must properly init it.
148+
return &readSrc.vs[len(readSrc.vs)-1]
120149
}
121150

122151
func skipWS(s string) string {
@@ -540,29 +569,41 @@ func parseRawNumber(s string) (string, string, error) {
540569
type Object struct {
541570
kvs []kv
542571
keysUnescaped bool
572+
nx *Object
573+
lt *Object
543574
}
544575

545576
func (o *Object) reset() {
546577
o.kvs = o.kvs[:0]
547578
o.keysUnescaped = false
579+
o.lt = nil
580+
if o.nx != nil {
581+
o.nx.reset()
582+
}
548583
}
549584

550585
// MarshalTo appends marshaled o to dst and returns the result.
551586
func (o *Object) MarshalTo(dst []byte) []byte {
552587
dst = append(dst, '{')
553-
for i, kv := range o.kvs {
554-
if o.keysUnescaped {
555-
dst = escapeString(dst, kv.k)
556-
} else {
557-
dst = append(dst, '"')
558-
dst = append(dst, kv.k...)
559-
dst = append(dst, '"')
560-
}
561-
dst = append(dst, ':')
562-
dst = kv.v.MarshalTo(dst)
563-
if i != len(o.kvs)-1 {
564-
dst = append(dst, ',')
588+
srcKV := o
589+
lastN := o.Len()
590+
n := 0
591+
for srcKV != nil {
592+
for _, kv := range srcKV.kvs {
593+
if srcKV.keysUnescaped {
594+
dst = escapeString(dst, kv.k)
595+
} else {
596+
dst = append(dst, '"')
597+
dst = append(dst, kv.k...)
598+
dst = append(dst, '"')
599+
}
600+
dst = append(dst, ':')
601+
dst = kv.v.MarshalTo(dst)
602+
if n++; n != lastN {
603+
dst = append(dst, ',')
604+
}
565605
}
606+
srcKV = srcKV.nx
566607
}
567608
dst = append(dst, '}')
568609
return dst
@@ -579,13 +620,45 @@ func (o *Object) String() string {
579620
return b2s(b)
580621
}
581622

623+
const (
624+
preAllocatedObjectKVs = 170 // 8kb class
625+
maxAllocatedObjectKVS = 21845 // 1MB class
626+
)
627+
582628
func (o *Object) getKV() *kv {
583-
if cap(o.kvs) > len(o.kvs) {
584-
o.kvs = o.kvs[:len(o.kvs)+1]
585-
} else {
586-
o.kvs = append(o.kvs, kv{})
629+
kvSrc := o
630+
if kvSrc.lt != nil {
631+
kvSrc = kvSrc.lt
587632
}
588-
return &o.kvs[len(o.kvs)-1]
633+
switch {
634+
case cap(kvSrc.kvs) == 0:
635+
// initial state
636+
kvSrc.kvs = append(kvSrc.kvs, kv{})
637+
638+
case cap(kvSrc.kvs) > len(kvSrc.kvs):
639+
kvSrc.kvs = kvSrc.kvs[:len(kvSrc.kvs)+1]
640+
641+
default:
642+
if cap(kvSrc.kvs) < preAllocatedObjectKVs {
643+
kvSrc.kvs = append(kvSrc.kvs, kv{})
644+
break
645+
}
646+
// new chain
647+
if kvSrc.nx == nil {
648+
nextLen := len(kvSrc.kvs) * 2
649+
if nextLen > maxAllocatedObjectKVS {
650+
nextLen = maxAllocatedObjectKVS
651+
}
652+
kvSrc.nx = &Object{
653+
kvs: make([]kv, 0, nextLen),
654+
}
655+
}
656+
kvSrc = kvSrc.nx
657+
o.lt = kvSrc
658+
kvSrc.kvs = kvSrc.kvs[:len(kvSrc.kvs)+1]
659+
}
660+
661+
return &kvSrc.kvs[len(kvSrc.kvs)-1]
589662
}
590663

591664
func (o *Object) unescapeKeys() {
@@ -597,12 +670,18 @@ func (o *Object) unescapeKeys() {
597670
kv := &kvs[i]
598671
kv.k = unescapeStringBestEffort(kv.k)
599672
}
673+
if o.nx != nil {
674+
o.nx.unescapeKeys()
675+
}
600676
o.keysUnescaped = true
601677
}
602678

603679
// Len returns the number of items in the o.
604680
func (o *Object) Len() int {
605-
return len(o.kvs)
681+
if o.nx == nil {
682+
return len(o.kvs)
683+
}
684+
return len(o.kvs) + o.nx.Len()
606685
}
607686

608687
// Get returns the value for the given key in the o.
@@ -618,6 +697,11 @@ func (o *Object) Get(key string) *Value {
618697
return kv.v
619698
}
620699
}
700+
if o.nx != nil {
701+
if v := o.nx.Get(key); v != nil {
702+
return v
703+
}
704+
}
621705
}
622706

623707
// Slow path - unescape object keys.
@@ -628,6 +712,13 @@ func (o *Object) Get(key string) *Value {
628712
return kv.v
629713
}
630714
}
715+
716+
if o.nx != nil {
717+
if v := o.nx.Get(key); v != nil {
718+
return v
719+
}
720+
}
721+
631722
return nil
632723
}
633724

@@ -645,6 +736,10 @@ func (o *Object) Visit(f func(key []byte, v *Value)) {
645736
for _, kv := range o.kvs {
646737
f(s2b(kv.k), kv.v)
647738
}
739+
740+
if o.nx != nil {
741+
o.nx.Visit(f)
742+
}
648743
}
649744

650745
// Value represents any JSON value.

parser_test.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1288,3 +1288,27 @@ func TestParseWithoutCache(t *testing.T) {
12881288
t.Fatalf("unexpected value for key=%q; got %q; want %q", "foo", sb, "bar")
12891289
}
12901290
}
1291+
1292+
func TestMarshalTo(t *testing.T) {
1293+
fileData := getFromFile("testdata/bunchFields.json")
1294+
var p Parser
1295+
v, err := p.Parse(fileData)
1296+
if err != nil {
1297+
t.Fatalf("cannot parse json: %s", err)
1298+
}
1299+
data := make([]byte, 0, len(fileData))
1300+
data = v.MarshalTo(data)
1301+
// check
1302+
var p2 Parser
1303+
v, err = p2.ParseBytes(data)
1304+
if err != nil {
1305+
t.Fatalf("cannot parse json: %s", err)
1306+
}
1307+
o, err := v.Object()
1308+
if err != nil {
1309+
t.Fatalf("expected object, got: %s", o.String())
1310+
}
1311+
if o.Len() != 871 {
1312+
t.Fatalf("expected 871 fields, got %d", o.Len())
1313+
}
1314+
}

parser_timing_test.go

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ func BenchmarkObjectGet(b *testing.B) {
7676
}
7777

7878
func benchmarkObjectGet(b *testing.B, itemsCount, lookupsCount int) {
79+
var benchPool ParserPool
7980
b.StopTimer()
8081
var ss []string
8182
for i := 0; i < itemsCount; i++ {
@@ -127,10 +128,15 @@ func BenchmarkMarshalTo(b *testing.B) {
127128
b.Run("twitter", func(b *testing.B) {
128129
benchmarkMarshalTo(b, twitterFixture)
129130
})
131+
b.Run("20mb", func(b *testing.B) {
132+
benchmarkMarshalTo(b, huge20MbFixture)
133+
})
130134
}
131135

136+
var benchPoolMarshalTo ParserPool
137+
132138
func benchmarkMarshalTo(b *testing.B, s string) {
133-
p := benchPool.Get()
139+
p := benchPoolMarshalTo.Get()
134140
v, err := p.Parse(s)
135141
if err != nil {
136142
panic(fmt.Errorf("unexpected error: %s", err))
@@ -146,7 +152,7 @@ func benchmarkMarshalTo(b *testing.B, s string) {
146152
b = v.MarshalTo(b[:0])
147153
}
148154
})
149-
benchPool.Put(p)
155+
benchPoolMarshalTo.Put(p)
150156
}
151157

152158
func BenchmarkParse(b *testing.B) {
@@ -180,6 +186,9 @@ var (
180186
canadaFixture = getFromFile("testdata/canada.json")
181187
citmFixture = getFromFile("testdata/citm_catalog.json")
182188
twitterFixture = getFromFile("testdata/twitter.json")
189+
190+
// 20mb is a huge (stressful) fixture from https://examplefile.com/code/json/20-mb-json
191+
huge20MbFixture = getFromFile("testdata/20mb.json")
183192
)
184193

185194
func getFromFile(filename string) string {
@@ -209,6 +218,7 @@ func benchmarkParse(b *testing.B, s string) {
209218
}
210219

211220
func benchmarkFastJSONParse(b *testing.B, s string) {
221+
var benchPool ParserPool
212222
b.ReportAllocs()
213223
b.SetBytes(int64(len(s)))
214224
b.RunParallel(func(pb *testing.PB) {
@@ -227,6 +237,7 @@ func benchmarkFastJSONParse(b *testing.B, s string) {
227237
}
228238

229239
func benchmarkFastJSONParseGet(b *testing.B, s string) {
240+
var benchPool ParserPool
230241
b.ReportAllocs()
231242
b.SetBytes(int64(len(s)))
232243
b.RunParallel(func(pb *testing.PB) {
@@ -264,8 +275,6 @@ func benchmarkFastJSONParseGet(b *testing.B, s string) {
264275
})
265276
}
266277

267-
var benchPool ParserPool
268-
269278
func benchmarkStdJSONParseMap(b *testing.B, s string) {
270279
b.ReportAllocs()
271280
b.SetBytes(int64(len(s)))

0 commit comments

Comments
 (0)