diff --git a/Makefile b/Makefile index a353f99..01b7811 100644 --- a/Makefile +++ b/Makefile @@ -68,10 +68,14 @@ imports: ## Update imports in Go source code vet: ## Vet Go source code go vet $$(go list ./...) +.PHONY: benchmarks +benchmarks: + go test -run XXX -benchmem -bench . github.com/spatialcurrent/go-simple-serializer/pkg/... | grep ns/op + .PHONY: test_go test_go: ## Run Go tests bash scripts/test.sh - + .PHONY: test_cli test_cli: ## Run CLI tests bash scripts/test-cli.sh diff --git a/pkg/jsonl/Iterator_benchmarks_test.go b/pkg/jsonl/Iterator_benchmarks_test.go new file mode 100644 index 0000000..c90b4ff --- /dev/null +++ b/pkg/jsonl/Iterator_benchmarks_test.go @@ -0,0 +1,115 @@ +// ================================================================= +// +// Copyright (C) 2019 Spatial Current, Inc. - All Rights Reserved +// Released as open source under the MIT License. See LICENSE file. +// +// ================================================================= + +package jsonl + +import ( + "bufio" + "encoding/json" + "os" + "testing" + + "github.com/pkg/errors" +) + +// Following guidance from "How to write benchmarks in Go" +// - https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go + +var ( + results = make([]interface{}, 0) +) + +func benchmarkIterator(limit int, b *testing.B) { + path := os.Getenv("GSS_BENCHMARK_JSONL_INPUT_FILE") + if len(path) == 0 { + panic(errors.New("missing benchmark input file: GSS_BENCHMARK_JSONL_INPUT_FILE is not set")) + } + input, err := os.Open(path) + if err != nil { + panic(errors.Wrap(err, "error opening benchmark input file")) + } + defer input.Close() + it := NewIterator(&NewIteratorInput{ + Reader: bufio.NewReader(input), + SkipLines: 0, + Comment: "", + Trim: true, + SkipBlanks: false, + SkipComments: false, + LineSeparator: []byte("\n")[0], + DropCR: true, + }) + out := make([]interface{}, 0) + + b.ResetTimer() + for n := 0; n < b.N; n++ { + out = make([]interface{}, 0) + for i := 0; i < limit; i++ { + obj, err := it.Next() + if err != nil { + panic(err) + } + out = append(out, obj) + } + } + results = out +} + +func benchmarkStandardLibaryDecoder(limit int, b *testing.B) { + path := os.Getenv("GSS_BENCHMARK_JSONL_INPUT_FILE") + if len(path) == 0 { + panic(errors.New("missing benchmark input file: GSS_BENCHMARK_JSONL_INPUT_FILE is not set")) + } + input, err := os.Open(path) + if err != nil { + panic(errors.Wrap(err, "error opening benchmark input file")) + } + defer input.Close() + + decoder := json.NewDecoder(bufio.NewReader(input)) + + out := make([]interface{}, 0) + + b.ResetTimer() + for n := 0; n < b.N; n++ { + out = make([]interface{}, 0) + for i := 0; i < limit; i++ { + obj := map[string]interface{}{} + err := decoder.Decode(&obj) + if err != nil { + panic(err) + } + out = append(out, obj) + } + } + // defeat compiler optimizations + results = out +} + +func BenchmarkIterator1(b *testing.B) { + benchmarkIterator(1, b) +} + +func BenchmarkIterator1024(b *testing.B) { + benchmarkIterator(1024, b) +} + +func BenchmarkIterator4096(b *testing.B) { + benchmarkIterator(4096, b) +} + +func BenchmarkStandardLibraryDecoder1(b *testing.B) { + benchmarkStandardLibaryDecoder(1, b) +} + +func BenchmarkStandardLibraryDecoder1024(b *testing.B) { + benchmarkStandardLibaryDecoder(1024, b) +} + +func BenchmarkStandardLibraryDecoder4096(b *testing.B) { + benchmarkStandardLibaryDecoder(4096, b) +} diff --git a/pkg/jsonl/Write_benchmarks_test.go b/pkg/jsonl/Write_benchmarks_test.go new file mode 100644 index 0000000..961ca99 --- /dev/null +++ b/pkg/jsonl/Write_benchmarks_test.go @@ -0,0 +1,114 @@ +// ================================================================= +// +// Copyright (C) 2019 Spatial Current, Inc. - All Rights Reserved +// Released as open source under the MIT License. See LICENSE file. +// +// ================================================================= + +package jsonl + +import ( + "bufio" + //"encoding/json" + "fmt" + "io/ioutil" + "os" + "testing" + + "github.com/pkg/errors" + "github.com/spatialcurrent/go-pipe/pkg/pipe" + "github.com/spatialcurrent/go-stringify/pkg/stringify" +) + +// Following guidance from "How to write benchmarks in Go" +// - https://dave.cheney.net/2013/06/30/how-to-write-benchmarks-in-go + +func loadBenchmarkData(path string) []interface{} { + input, err := os.Open(path) + if err != nil { + panic(errors.Wrap(err, "error opening benchmark input file")) + } + defer input.Close() + data := make([]interface{}, 0) + err = pipe.NewBuilder(). + Input(NewIterator(&NewIteratorInput{ + Reader: bufio.NewReader(input), + SkipLines: 0, + Comment: "", + Trim: true, + SkipBlanks: false, + SkipComments: false, + LineSeparator: []byte("\n")[0], + DropCR: true, + })). + OutputF(func(x interface{}) error { + data = append(data, x) + return nil + }). + Run() + if err != nil { + panic(errors.Wrap(err, "error reading in data")) + } + return data +} + +func benchmarkWriter(batch int, b *testing.B) { + inputPath := os.Getenv("GSS_BENCHMARK_JSONL_INPUT_FILE") + if len(inputPath) == 0 { + panic(errors.New("missing benchmark input file: GSS_BENCHMARK_JSONL_INPUT_FILE is not set")) + } + outputDir := os.Getenv("GSS_BENCHMARK_JSONL_OUTPUT_DIR") + if len(outputDir) == 0 { + panic(errors.New("missing benchmark input file: GSS_BENCHMARK_JSONL_OUTPUT_DIR is not set")) + } + // + data := loadBenchmarkData(inputPath) + // + output, err := ioutil.TempFile(outputDir, fmt.Sprintf("benchmark_output_%d_%d_*.jsonl", batch, b.N)) + if err != nil { + panic(errors.Wrap(err, "error creating temporary output file for running benchmarks")) + } + if os.Getenv("GSS_BENCHMARK_JSONL_OUTPUT_KEEP") != "1" { + defer os.Remove(output.Name()) + } + // + writer := NewWriter(output, "\n", stringify.NewDecimalStringer(), false) + // + b.ResetTimer() + // + for n := 0; n < b.N; n++ { + cursor := 0 + if batch == 1 { + err := writer.WriteObject(data[cursor]) + if err != nil { + panic(err) + } + } else { + err := writer.WriteObjects(data[cursor : cursor+batch]) + if err != nil { + panic(err) + } + } + cursor += batch + } +} + +func BenchmarkWriter1(b *testing.B) { + benchmarkWriter(1, b) +} + +func BenchmarkWriter64(b *testing.B) { + benchmarkWriter(64, b) +} + +func BenchmarkWriter256(b *testing.B) { + benchmarkWriter(256, b) +} + +func BenchmarkWriter1024(b *testing.B) { + benchmarkWriter(1024, b) +} + +func BenchmarkWriter4096(b *testing.B) { + benchmarkWriter(4096, b) +} diff --git a/pkg/jsonl/Writer.go b/pkg/jsonl/Writer.go index 245dfb5..fd5b8c6 100644 --- a/pkg/jsonl/Writer.go +++ b/pkg/jsonl/Writer.go @@ -8,6 +8,7 @@ package jsonl import ( + "bytes" "io" "reflect" @@ -23,6 +24,7 @@ type Writer struct { separator string // the separator stirng to use, e.g, null byte or \n. keySerializer stringify.Stringer pretty bool // write pretty output + buffer *bytes.Buffer } // NewWriter returns a writer for formating and writing objets to the underlying writer as JSON Lines (aka jsonl). @@ -32,6 +34,7 @@ func NewWriter(w io.Writer, separator string, keySerializer stringify.Stringer, separator: separator, keySerializer: keySerializer, pretty: pretty, + buffer: &bytes.Buffer{}, } } @@ -66,12 +69,24 @@ func (w *Writer) WriteObjects(objects interface{}) error { k = value.Type().Kind() } if k == reflect.Array || k == reflect.Slice { + defer w.buffer.Reset() + //defer func() { w.buffer = &bytes.Buffer{} }() + bw := &Writer{ + writer: w.buffer, + separator: w.separator, + keySerializer: w.keySerializer, + pretty: w.pretty, + } for i := 0; i < value.Len(); i++ { - err := w.WriteObject(value.Index(i).Interface()) + err := bw.WriteObject(value.Index(i).Interface()) if err != nil { return errors.Wrap(err, "error writing object") } } + _, err := w.writer.Write(w.buffer.Bytes()) + if err != nil { + return errors.Wrap(err, "error writing objects from buffer to underlying writer") + } } return nil } diff --git a/pkg/jsonl/doc.go b/pkg/jsonl/doc.go new file mode 100644 index 0000000..98691d6 --- /dev/null +++ b/pkg/jsonl/doc.go @@ -0,0 +1,14 @@ +// ================================================================= +// +// Copyright (C) 2019 Spatial Current, Inc. - All Rights Reserved +// Released as open source under the MIT License. See LICENSE file. +// +// ================================================================= + +// Package jsonl provides a simple API for reading and writing to JSON Lines (aka jsonl). +// jsonl also supports iterators for efficiently reading through a stream. +// jsonl uses the github.com/spatialcurrent/go-simple-serializer/pkg/json for marshaling/unmarshaling JSON. +// See the examples below for usage. +// +// - https://godoc.org/pkg/github.com/spatialcurrent/go-simple-serializer/pkg/json +package jsonl diff --git a/pkg/jsonl/jsonl.go b/pkg/jsonl/jsonl.go index 79b2db0..3845c16 100644 --- a/pkg/jsonl/jsonl.go +++ b/pkg/jsonl/jsonl.go @@ -5,12 +5,6 @@ // // ================================================================= -// Package jsonl provides a simple API for reading and writing to JSON Lines (aka jsonl). -// jsonl also supports iterators for efficiently reading through a stream. -// jsonl uses the github.com/spatialcurrent/go-simple-serializer/pkg/json for marshaling/unmarshaling JSON. -// See the examples below for usage. -// -// - https://godoc.org/pkg/github.com/spatialcurrent/go-simple-serializer/pkg/json package jsonl import (