Skip to content

Commit 2455af9

Browse files
authored
Simplify usage (#54)
* Simplify usage * Add root foreach callback. * Simplify path specification for FindElement * Add buger/jsonparser benchmark. Crazy fast.
1 parent d95f712 commit 2455af9

File tree

11 files changed

+321
-158
lines changed

11 files changed

+321
-158
lines changed

README.md

Lines changed: 94 additions & 83 deletions
Large diffs are not rendered by default.

benchmarks_test.go

Lines changed: 42 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"encoding/json"
2121
"testing"
2222

23+
"github.com/buger/jsonparser"
2324
jsoniter "github.com/json-iterator/go"
2425
)
2526

@@ -166,31 +167,31 @@ func BenchmarkJsonParserLarge(b *testing.B) {
166167
b.Fatal(err)
167168
}
168169
iter := pj.Iter()
169-
elem, err = iter.FindElement("users", elem)
170+
elem, err = iter.FindElement(elem, "users")
170171
if checkErrs && err != nil {
171172
b.Fatal(err)
172173
}
173174
ar, err = elem.Iter.Array(ar)
174175
if checkErrs && err != nil {
175176
b.Fatal(err)
176177
}
177-
ar.ForEach(func(t Type, i Iter) {
178-
elem, err = i.FindElement("username", elem)
178+
ar.ForEach(func(i Iter) {
179+
elem, err = i.FindElement(elem, "username")
179180
if checkErrs && err != nil {
180181
b.Fatal(err)
181182
}
182183
_, _ = elem.Iter.StringBytes()
183184
})
184185

185-
elem, err = iter.FindElement("topics/topics", elem)
186+
elem, err = iter.FindElement(elem, "topics", "topics")
186187
if checkErrs && err != nil {
187188
b.Fatal(err)
188189
}
189190
ar, err = elem.Iter.Array(ar)
190191
if checkErrs && err != nil {
191192
b.Fatal(err)
192193
}
193-
ar.ForEach(func(t Type, i Iter) {
194+
ar.ForEach(func(i Iter) {
194195
if true {
195196
// Use foreach...
196197
obj, err = i.Object(obj)
@@ -213,13 +214,13 @@ func BenchmarkJsonParserLarge(b *testing.B) {
213214

214215
}, onlyKeys)
215216
} else {
216-
elem, err = i.FindElement("id", elem)
217+
elem, err = i.FindElement(elem, "id")
217218
if checkErrs && err != nil {
218219
b.Fatal(err)
219220
}
220221
_, _ = elem.Iter.Int()
221222
//b.Log(elem.Iter.Int())
222-
elem, err = i.FindElement("slug", elem)
223+
elem, err = i.FindElement(elem, "slug")
223224
if checkErrs && err != nil {
224225
b.Fatal(err)
225226
}
@@ -230,3 +231,37 @@ func BenchmarkJsonParserLarge(b *testing.B) {
230231
}
231232
})
232233
}
234+
235+
func BenchmarkBugerJsonParserLarge(b *testing.B) {
236+
largeFixture := loadCompressed(b, "payload-large")
237+
const logVals = false
238+
b.SetBytes(int64(len(largeFixture)))
239+
b.ReportAllocs()
240+
b.ResetTimer()
241+
var dump int
242+
for i := 0; i < b.N; i++ {
243+
jsonparser.ArrayEach(largeFixture, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
244+
sval, _, _, _ := jsonparser.Get(value, "username")
245+
if logVals && i == 0 {
246+
b.Log(string(sval))
247+
}
248+
dump += len(sval)
249+
}, "users")
250+
251+
jsonparser.ArrayEach(largeFixture, func(value []byte, dataType jsonparser.ValueType, offset int, err error) {
252+
ival, _ := jsonparser.GetInt(value, "id")
253+
if logVals && i == 0 {
254+
b.Log(ival)
255+
}
256+
dump += int(ival)
257+
sval, _, _, _ := jsonparser.Get(value, "slug")
258+
if logVals && i == 0 {
259+
b.Log(string(sval))
260+
}
261+
dump += len(sval)
262+
}, "topics", "topics")
263+
}
264+
if dump == 0 {
265+
b.Log("")
266+
}
267+
}

examples/simdjson_example.go

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,42 +1,30 @@
11
package main
22

33
import (
4+
"encoding/json"
45
"fmt"
56
"io/ioutil"
67
"log"
78

89
"github.com/minio/simdjson-go"
910
)
1011

11-
func printKey(iter simdjson.Iter, key string) (err error) {
12-
13-
obj, tmp, elem := &simdjson.Object{}, &simdjson.Iter{}, simdjson.Element{}
14-
15-
for {
16-
typ := iter.Advance()
17-
18-
switch typ {
19-
case simdjson.TypeRoot:
20-
if typ, tmp, err = iter.Root(tmp); err != nil {
21-
return
22-
}
23-
24-
if typ == simdjson.TypeObject {
25-
if obj, err = tmp.Object(obj); err != nil {
26-
return
27-
}
28-
29-
e := obj.FindKey(key, &elem)
30-
if e != nil && elem.Type == simdjson.TypeString {
31-
v, _ := elem.Iter.StringBytes()
32-
fmt.Println(string(v))
33-
}
34-
}
35-
36-
default:
37-
return
12+
func printKeyHistogram(pj *simdjson.ParsedJson, key string) (err error) {
13+
var elem *simdjson.Element
14+
count := make(map[string]int)
15+
err = pj.ForEach(func(i simdjson.Iter) error {
16+
if elem, err = i.FindElement(elem, key); err != nil {
17+
return nil
3818
}
39-
}
19+
if elem.Type == simdjson.TypeString {
20+
s, _ := elem.Iter.String()
21+
count[s]++
22+
}
23+
return nil
24+
})
25+
res, _ := json.Marshal(count)
26+
fmt.Println(key, ":", string(res)+"\n")
27+
return err
4028
}
4129

4230
func main() {
@@ -53,5 +41,7 @@ func main() {
5341
log.Fatalf("Failed to parse JSON: %v", err)
5442
}
5543

56-
printKey(parsed.Iter(), "Make")
44+
printKeyHistogram(parsed, "Make")
45+
printKeyHistogram(parsed, "MeterId")
46+
printKeyHistogram(parsed, "ViolationCode")
5747
}

go.mod

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
module github.com/minio/simdjson-go
22

3-
go 1.13
3+
go 1.15
44

55
require (
6+
github.com/buger/jsonparser v1.1.1
67
github.com/json-iterator/go v1.1.9
78
github.com/klauspost/compress v1.13.6
8-
github.com/klauspost/cpuid/v2 v2.0.6
9+
github.com/klauspost/cpuid/v2 v2.0.9
910
)

go.sum

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
github.com/buger/jsonparser v1.1.1 h1:2PnMjfWD7wBILjqQbt530v576A/cAbQvEW9gGIpYMUs=
2+
github.com/buger/jsonparser v1.1.1/go.mod h1:6RYKKt7H4d4+iWqouImQ9R2FZql3VbhNgx27UK13J/0=
13
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
24
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
35
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
@@ -6,8 +8,8 @@ github.com/json-iterator/go v1.1.9 h1:9yzud/Ht36ygwatGx56VwCZtlI/2AD15T1X2sjSuGn
68
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
79
github.com/klauspost/compress v1.13.6 h1:P76CopJELS0TiO2mebmnzgWaajssP/EszplttgQxcgc=
810
github.com/klauspost/compress v1.13.6/go.mod h1:/3/Vjq9QcHkK5uEr5lBEmyoZ1iFhe47etQ6QUkpK6sk=
9-
github.com/klauspost/cpuid/v2 v2.0.6 h1:dQ5ueTiftKxp0gyjKSx5+8BtPWkyQbd95m8Gys/RarI=
10-
github.com/klauspost/cpuid/v2 v2.0.6/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
11+
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
12+
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
1113
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 h1:ZqeYNhU3OHLH3mGKHDcjJRFFRrJa6eAM5H+CtDdOsPc=
1214
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
1315
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742 h1:Esafd1046DLDQ0W1YjYsBW+p8U2u7vzgW2SQVmlNazg=

ndjson_test.go

Lines changed: 81 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -259,14 +259,60 @@ func TestNdjsonCountWhere(t *testing.T) {
259259
if err != nil {
260260
t.Fatal(err)
261261
}
262-
ser := NewSerializer()
263-
ser.CompressMode(CompressBest)
264-
b := ser.Serialize(nil, *pj)
265-
t.Log(len(b))
262+
266263
const want = 116
267-
if result := countWhere("Make", "HOND", *pj); result != want {
268-
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
269-
}
264+
t.Run("countWhere", func(t *testing.T) {
265+
if result := countWhere("Make", "HOND", *pj); result != want {
266+
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
267+
}
268+
})
269+
t.Run("foreach", func(t *testing.T) {
270+
var result int
271+
var elem *Element
272+
var obj *Object
273+
err := pj.ForEach(func(i Iter) error {
274+
var err error
275+
obj, err = i.Object(obj)
276+
if err == nil {
277+
elem = obj.FindKey("Make", elem)
278+
if elem != nil {
279+
bts, _ := elem.Iter.StringBytes()
280+
if string(bts) == "HOND" {
281+
result++
282+
}
283+
}
284+
}
285+
return nil
286+
})
287+
if err != nil {
288+
t.Fatal(err)
289+
}
290+
if result != want {
291+
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
292+
}
293+
})
294+
t.Run("foreach-findelement", func(t *testing.T) {
295+
var result int
296+
var elem *Element
297+
err := pj.ForEach(func(i Iter) error {
298+
var err error
299+
elem, err = i.FindElement(elem, "Make")
300+
if err != nil {
301+
return nil
302+
}
303+
bts, _ := elem.Iter.StringBytes()
304+
if string(bts) == "HOND" {
305+
result++
306+
}
307+
return nil
308+
})
309+
if err != nil {
310+
t.Fatal(err)
311+
}
312+
if result != want {
313+
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
314+
}
315+
})
270316
}
271317

272318
func TestNdjsonCountWhere2(t *testing.T) {
@@ -287,9 +333,34 @@ func TestNdjsonCountWhere2(t *testing.T) {
287333
t.Fatal(err)
288334
}
289335
const want = 170315
290-
if result := countWhere("subreddit", "reddit.com", *pj); result != want {
291-
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
292-
}
336+
t.Run("countWhere", func(t *testing.T) {
337+
if result := countWhere("subreddit", "reddit.com", *pj); result != want {
338+
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
339+
}
340+
341+
})
342+
t.Run("foreach-findelement", func(t *testing.T) {
343+
var result int
344+
var elem *Element
345+
err := pj.ForEach(func(i Iter) error {
346+
var err error
347+
elem, err = i.FindElement(elem, "subreddit")
348+
if err != nil {
349+
return nil
350+
}
351+
bts, _ := elem.Iter.StringBytes()
352+
if string(bts) == "reddit.com" {
353+
result++
354+
}
355+
return nil
356+
})
357+
if err != nil {
358+
t.Fatal(err)
359+
}
360+
if result != want {
361+
t.Errorf("TestNdjsonCountWhere: got: %d want: %d", result, want)
362+
}
363+
})
293364
}
294365

295366
func loadFile(filename string) []byte {

parsed_array.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,14 +43,14 @@ func (a *Array) Iter() Iter {
4343
}
4444

4545
// ForEach calls the provided function for every element.
46-
func (a *Array) ForEach(fn func(t Type, i Iter)) {
46+
func (a *Array) ForEach(fn func(i Iter)) {
4747
i := a.Iter()
4848
for {
4949
t := i.Advance()
5050
if t == TypeNone {
5151
break
5252
}
53-
fn(t, i)
53+
fn(i)
5454
}
5555
return
5656
}

parsed_json.go

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,24 @@ func (pj *ParsedJson) stringByteAt(offset, length uint64) ([]byte, error) {
119119
return pj.Strings.B[offset : offset+length], nil
120120
}
121121

122+
// ForEach returns each line in NDJSON, or the top element in non-ndjson.
123+
// This will usually be an object or an array.
124+
// If the callback returns a non-nil error parsing stops and the errors is returned.
125+
func (pj *ParsedJson) ForEach(fn func(i Iter) error) error {
126+
i := Iter{tape: *pj}
127+
var elem Iter
128+
for {
129+
t, err := i.AdvanceIter(&elem)
130+
if err != nil || t != TypeRoot {
131+
return err
132+
}
133+
elem.AdvanceInto()
134+
if err = fn(elem); err != nil {
135+
return err
136+
}
137+
}
138+
}
139+
122140
// Clone returns a deep clone of the ParsedJson.
123141
// If a nil destination is sent a new will be created.
124142
func (pj *ParsedJson) Clone(dst *ParsedJson) *ParsedJson {
@@ -751,13 +769,15 @@ func (i *Iter) Root(dst *Iter) (Type, *Iter, error) {
751769

752770
// FindElement allows searching for fields and objects by path from the iter and forward,
753771
// moving into root and objects, but not arrays.
754-
// Separate each object name by /.
755-
// For example `Image/Url` will search the current root/object for an "Image"
772+
// For example "Image", "Url" will search the current root/object for an "Image"
756773
// object and return the value of the "Url" element.
757774
// ErrPathNotFound is returned if any part of the path cannot be found.
758775
// If the tape contains an error it will be returned.
759776
// The iter will *not* be advanced.
760-
func (i *Iter) FindElement(path string, dst *Element) (*Element, error) {
777+
func (i *Iter) FindElement(dst *Element, path ...string) (*Element, error) {
778+
if len(path) == 0 {
779+
return dst, ErrPathNotFound
780+
}
761781
// Local copy.
762782
cp := *i
763783
for {
@@ -768,7 +788,7 @@ func (i *Iter) FindElement(path string, dst *Element) (*Element, error) {
768788
if err != nil {
769789
return dst, err
770790
}
771-
return obj.FindPath(path, dst)
791+
return obj.FindPath(dst, path...)
772792
case TagRoot:
773793
_, _, err := cp.Root(&cp)
774794
if err != nil {

0 commit comments

Comments
 (0)