Skip to content

Commit 46856ca

Browse files
committed
Make InferString accept any valid JSON value
1 parent fe1b071 commit 46856ca

File tree

2 files changed

+173
-12
lines changed

2 files changed

+173
-12
lines changed

inferrer.go

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package jtdinfer
22

33
import (
44
"encoding/json"
5+
"strconv"
56
)
67

78
// Inferrer represents the `InferredSchema` with its state combined with the
@@ -32,27 +33,63 @@ func (i *Inferrer) IntoSchema() Schema {
3233
return i.Inference.IntoSchema(i.Hints)
3334
}
3435

35-
// InferStrings accepts a slice of strings and will convert them to either a
36-
// `map[string]any` or []any` and run inference on all the rows. If any of the
37-
// rows are not valid JSON object or list, the inference up to that point is
38-
// returned.
39-
//
40-
// If you need to infer simple values like strings or integers they can be
41-
// passed directly to `Infer`.
36+
// InferStrings accepts a slice of strings and will try to JSON unmarshal each
37+
// row to the type that the first row looks like. If an error occurs the
38+
// inferrer will return with the state it had when the error occurred.
39+
// If you already have the type of your data such as a slice of numbers or a map
40+
// of strings you can pass them directly to `Infer`. This is just a convenience
41+
// method if all you got is strings.
4242
func InferStrings(rows []string, hints *Hints) *Inferrer {
4343
inferrer := NewInferrer(hints)
44+
if len(rows) == 0 {
45+
return inferrer
46+
}
47+
48+
var (
49+
firstRow = rows[0]
50+
getToInfer func() any
51+
)
52+
53+
switch {
54+
case isBool(firstRow):
55+
getToInfer = func() any { return false }
56+
case isObject(firstRow):
57+
getToInfer = func() any { return make(map[string]any) }
58+
case isArray(firstRow):
59+
getToInfer = func() any { return make([]any, 0) }
60+
case isNumber(firstRow):
61+
getToInfer = func() any { return 0.0 }
62+
default:
63+
getToInfer = func() any { return "" }
64+
}
4465

4566
for _, row := range rows {
46-
var toInfer any = make(map[string]any, 0)
67+
toInfer := getToInfer()
4768
if err := json.Unmarshal([]byte(row), &toInfer); err != nil {
48-
toInfer = make([]any, 0)
49-
if err := json.Unmarshal([]byte(row), &toInfer); err != nil {
50-
return inferrer
51-
}
69+
return inferrer
5270
}
5371

5472
inferrer = inferrer.Infer(toInfer)
5573
}
5674

5775
return inferrer
5876
}
77+
78+
func isBool(value string) bool {
79+
return value == "true" || value == "false"
80+
}
81+
82+
func isObject(value string) bool {
83+
var m map[string]any
84+
return json.Unmarshal([]byte(value), &m) == nil
85+
}
86+
87+
func isArray(value string) bool {
88+
var a []any
89+
return json.Unmarshal([]byte(value), &a) == nil
90+
}
91+
92+
func isNumber(value string) bool {
93+
_, err := strconv.ParseFloat(value, 64)
94+
return err == nil
95+
}

inferrer_test.go

Lines changed: 124 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,137 @@
11
package jtdinfer
22

33
import (
4+
"fmt"
5+
"math"
6+
"strconv"
47
"testing"
8+
"time"
59

610
jtd "github.com/jsontypedef/json-typedef-go"
711
"github.com/stretchr/testify/assert"
812
"github.com/stretchr/testify/require"
913
)
1014

15+
func TestInferString(t *testing.T) {
16+
cases := []struct {
17+
description string
18+
values []string
19+
expectedSchema Schema
20+
}{
21+
{
22+
description: "boolean true value",
23+
values: []string{"true"},
24+
expectedSchema: Schema{
25+
Type: jtd.TypeBoolean,
26+
},
27+
},
28+
{
29+
description: "boolean false value",
30+
values: []string{"false"},
31+
expectedSchema: Schema{
32+
Type: jtd.TypeBoolean,
33+
},
34+
},
35+
{
36+
description: "object",
37+
values: []string{`{"name":"Joe"}`},
38+
expectedSchema: Schema{
39+
Properties: map[string]Schema{
40+
"name": {
41+
Type: jtd.TypeString,
42+
},
43+
},
44+
},
45+
},
46+
{
47+
description: "array",
48+
values: []string{`[1, 2, 3]`},
49+
expectedSchema: Schema{
50+
Elements: &Schema{
51+
Type: jtd.TypeUint8,
52+
},
53+
},
54+
},
55+
{
56+
description: "unsigned integer",
57+
values: []string{"1"},
58+
expectedSchema: Schema{
59+
Type: jtd.TypeUint8,
60+
},
61+
},
62+
{
63+
description: "signed integer",
64+
values: []string{"-1"},
65+
expectedSchema: Schema{
66+
Type: jtd.TypeInt8,
67+
},
68+
},
69+
{
70+
description: "signed max integer",
71+
values: []string{strconv.Itoa(math.MinInt32)},
72+
expectedSchema: Schema{
73+
Type: jtd.TypeInt32,
74+
},
75+
},
76+
{
77+
description: "float without fraction",
78+
values: []string{"1.0"},
79+
expectedSchema: Schema{
80+
Type: jtd.TypeUint8,
81+
},
82+
},
83+
{
84+
description: "positive float",
85+
values: []string{"1.1"},
86+
expectedSchema: Schema{
87+
Type: jtd.TypeFloat64,
88+
},
89+
},
90+
{
91+
description: "negative float",
92+
values: []string{"-1.1"},
93+
expectedSchema: Schema{
94+
Type: jtd.TypeFloat64,
95+
},
96+
},
97+
{
98+
description: "string",
99+
values: []string{`"string"`},
100+
expectedSchema: Schema{
101+
Type: jtd.TypeString,
102+
},
103+
},
104+
{
105+
description: "number in string is still string",
106+
values: []string{`"2.2"`},
107+
expectedSchema: Schema{
108+
Type: jtd.TypeString,
109+
},
110+
},
111+
{
112+
description: "timestamp",
113+
values: []string{fmt.Sprintf(`"%s"`, time.Now().Format(time.RFC3339))},
114+
expectedSchema: Schema{
115+
Type: jtd.TypeTimestamp,
116+
},
117+
},
118+
{
119+
description: "null",
120+
values: []string{"null"},
121+
expectedSchema: Schema{
122+
Nullable: true,
123+
},
124+
},
125+
}
126+
127+
for _, tc := range cases {
128+
t.Run(tc.description, func(t *testing.T) {
129+
gotSchema := InferStrings(tc.values, NewHints()).IntoSchema()
130+
assert.EqualValues(t, tc.expectedSchema, gotSchema)
131+
})
132+
}
133+
}
134+
11135
func TestJTDInfer(t *testing.T) {
12136
rows := []string{
13137
`{"name": "Joe", "age": 42, "hobbies": ["code", "animals"]}`,

0 commit comments

Comments
 (0)