Skip to content

Commit f32fac3

Browse files
authored
feat(arrow): Streamline Apache Arrow extension types (#823)
### Common Use pointer receivers: 1. For Array types: to have all functions have the same receiver type (other functions do have pointer receivers, so updating `String()` method receiver is natural, see `net.IPNet` as a reference. 2. For `Type` implementations: we do return a pointer from `NewXYZType` function, so it's only natural to define the receiver as pointer as well. ### `Inet` #### `InetBuilder` 1. Now accepts `*net.IPNet` (pointer) in `Append`, `UnsafeAppend` & `AppendValues` 3. `AppendValues` now accounts for `valid` param #### `InetArray` 1. `String` method uses `%q` format as mainstream Apache Arrow types 2. Added `Value(i int) *net.IPNet` function 4. `GetOneForMarshal` utilizes `Value` implementation ### `JSON` #### `JSONBuilder` 1. Marshaling with `json.DisableHTMLEscape()` option per #622 2. `AppendValues` now accounts for `valid` param 3. `UnmarshalJSON` implementation is streamlined with helper `Unmarshal(dec *json.Decoder) error` function (as mainstream Apache Arrow types) #### `JSONArray` 1. `String` method uses `%q` format as mainstream Apache Arrow types 2. Added `Value(i int) any` function that will unmarshal the stored data & return the concrete type 3. Use `json.UnmarshalNoEscape` per #622 5. `GetOneForMarshal` now doesn't perform unmarshaling and, instead, returns the stored `json.RawMessage` ### `Mac` #### `MacBuilder` 1. `AppendValues` now accounts for `valid` param #### `MacArray` 1. `String` method uses `%q` format as mainstream Apache Arrow types 2. Added `Value(i int) net.HardwareAddr` function ### `UUID` #### `UUIDBuilder` 1. `AppendValues` now accounts for `valid` param 2. Use `uuid.Must` func instead of panic in code ### `UUIDArray` 1. `String` method uses `%q` format as mainstream Apache Arrow types 2. Added `Value(i int) uuid.UUID` func 3. Use `uuid.Must` func instead of panic in code
1 parent 1a8072f commit f32fac3

File tree

12 files changed

+499
-237
lines changed

12 files changed

+499
-237
lines changed

go.mod

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -28,37 +28,34 @@ require (
2828
gopkg.in/yaml.v3 v3.0.1
2929
)
3030

31+
replace github.com/apache/arrow/go/v12 => github.com/cloudquery/arrow/go/v12 v12.0.0-20230425184555-43f156fcdec9
32+
3133
require (
3234
github.com/andybalholm/brotli v1.0.5 // indirect
3335
github.com/apache/thrift v0.16.0 // indirect
36+
github.com/davecgh/go-spew v1.1.1 // indirect
37+
github.com/golang/protobuf v1.5.3 // indirect
3438
github.com/golang/snappy v0.0.4 // indirect
3539
github.com/google/flatbuffers v2.0.8+incompatible // indirect
40+
github.com/inconshreveable/mousetrap v1.1.0 // indirect
3641
github.com/klauspost/asmfmt v1.3.2 // indirect
3742
github.com/klauspost/compress v1.16.0 // indirect
3843
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
39-
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
40-
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
41-
github.com/zeebo/xxh3 v1.0.2 // indirect
42-
golang.org/x/mod v0.8.0 // indirect
43-
golang.org/x/tools v0.6.0 // indirect
44-
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
45-
)
46-
47-
replace github.com/apache/arrow/go/v12 => github.com/cloudquery/arrow/go/v12 v12.0.0-20230417154311-f9add0212acd
48-
49-
require (
50-
github.com/davecgh/go-spew v1.1.1 // indirect
51-
github.com/golang/protobuf v1.5.3 // indirect
52-
github.com/inconshreveable/mousetrap v1.1.0 // indirect; indirect // indirect
5344
github.com/mattn/go-colorable v0.1.13 // indirect
54-
github.com/mattn/go-isatty v0.0.18 // indirect; indirect // indirect
45+
github.com/mattn/go-isatty v0.0.18 // indirect
5546
github.com/mattn/go-runewidth v0.0.14 // indirect
47+
github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect
48+
github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect
5649
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
5750
github.com/pmezard/go-difflib v1.0.0 // indirect
58-
github.com/rivo/uniseg v0.4.4 // indirect; indirect // indirect
51+
github.com/rivo/uniseg v0.4.4 // indirect
5952
github.com/spf13/pflag v1.0.5 // indirect
53+
github.com/zeebo/xxh3 v1.0.2 // indirect
54+
golang.org/x/mod v0.8.0 // indirect
6055
golang.org/x/sys v0.7.0 // indirect
61-
golang.org/x/term v0.7.0 // indirect; indirect // indirect
62-
google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633 // indirect; indirect // indirect
56+
golang.org/x/term v0.7.0 // indirect
57+
golang.org/x/tools v0.6.0 // indirect
58+
golang.org/x/xerrors v0.0.0-20220609144429-65e65417b02f // indirect
59+
google.golang.org/genproto v0.0.0-20230331144136-dcfb400f0633 // indirect
6360
gopkg.in/yaml.v2 v2.4.0 // indirect
6461
)

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,8 @@ github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWR
4747
github.com/chzyer/readline v0.0.0-20180603132655-2972be24d48e/go.mod h1:nSuG5e5PlCu98SY8svDHJxuZscDgtXS6KTTbou5AhLI=
4848
github.com/chzyer/test v0.0.0-20180213035817-a1ea475d72b1/go.mod h1:Q3SI9o4m/ZMnBNeIyt5eFwwo7qiLfzFZmjNmxjkiQlU=
4949
github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw=
50-
github.com/cloudquery/arrow/go/v12 v12.0.0-20230417154311-f9add0212acd h1:G093N165IqQvq84MK3Ozi7QwfAWyfYywXijkOyxGJdI=
51-
github.com/cloudquery/arrow/go/v12 v12.0.0-20230417154311-f9add0212acd/go.mod h1:d+tV/eHZZ7Dz7RPrFKtPK02tpr+c9/PEd/zm8mDS9Vg=
50+
github.com/cloudquery/arrow/go/v12 v12.0.0-20230425184555-43f156fcdec9 h1:DOmgyWSIXR8FLBS23UgIwCkEE/JPBWztzTjFHyj5lCw=
51+
github.com/cloudquery/arrow/go/v12 v12.0.0-20230425184555-43f156fcdec9/go.mod h1:d+tV/eHZZ7Dz7RPrFKtPK02tpr+c9/PEd/zm8mDS9Vg=
5252
github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc=
5353
github.com/cncf/udpa/go v0.0.0-20201120205902-5459f2c99403/go.mod h1:WmhPx2Nbnhtbo57+VJT5O0JRkEi1Wbu0z5j0R8u5Hbk=
5454
github.com/coreos/go-systemd v0.0.0-20190321100706-95778dfbb74e/go.mod h1:F5haX7vjVVG0kc13fIWeqUViNPyEJxv/OmvnBo0Yme4=

plugins/destination/plugin_testing_overwrite.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwrite(ctx context.Context,
7373
SourceName: sourceName,
7474
SyncTime: secondSyncTime,
7575
MaxRows: 1,
76-
StableUUID: *u,
76+
StableUUID: u,
7777
}
7878
updatedResource := testdata.GenTestData(table, opts)[0]
7979
// write second time

plugins/destination/plugin_testing_overwrite_delete_stale.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,7 +85,7 @@ func (*PluginTestSuite) destinationPluginTestWriteOverwriteDeleteStale(ctx conte
8585
opts = testdata.GenTestDataOptions{
8686
SourceName: sourceName,
8787
SyncTime: secondSyncTime,
88-
StableUUID: *u,
88+
StableUUID: u,
8989
MaxRows: 1,
9090
}
9191
updatedResources := testdata.GenTestData(table, opts)[0]

schema/arrow.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -398,7 +398,7 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem
398398
}
399399
case TypeInet:
400400
if c[j][i].(*Inet).Status == Present {
401-
bldr.Field(i).(*types.InetBuilder).Append(*c[j][i].(*Inet).IPNet)
401+
bldr.Field(i).(*types.InetBuilder).Append(c[j][i].(*Inet).IPNet)
402402
} else {
403403
bldr.Field(i).(*types.InetBuilder).AppendNull()
404404
}
@@ -407,14 +407,14 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem
407407
listBldr := bldr.Field(i).(*array.ListBuilder)
408408
listBldr.Append(true)
409409
for _, e := range c[j][i].(*InetArray).Elements {
410-
listBldr.ValueBuilder().(*types.InetBuilder).Append(*e.IPNet)
410+
listBldr.ValueBuilder().(*types.InetBuilder).Append(e.IPNet)
411411
}
412412
} else {
413413
bldr.Field(i).(*array.ListBuilder).AppendNull()
414414
}
415415
case TypeCIDR:
416416
if c[j][i].(*CIDR).Status == Present {
417-
bldr.Field(i).(*types.InetBuilder).Append(*c[j][i].(*CIDR).IPNet)
417+
bldr.Field(i).(*types.InetBuilder).Append(c[j][i].(*CIDR).IPNet)
418418
} else {
419419
bldr.Field(i).(*types.InetBuilder).AppendNull()
420420
}
@@ -423,7 +423,7 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem
423423
listBldr := bldr.Field(i).(*array.ListBuilder)
424424
listBldr.Append(true)
425425
for _, e := range c[j][i].(*CIDRArray).Elements {
426-
listBldr.ValueBuilder().(*types.InetBuilder).Append(*e.IPNet)
426+
listBldr.ValueBuilder().(*types.InetBuilder).Append(e.IPNet)
427427
}
428428
} else {
429429
bldr.Field(i).(*array.ListBuilder).AppendNull()

testdata/testdata.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -213,19 +213,19 @@ func GenTestData(sc *arrow.Schema, opts GenTestDataOptions) []arrow.Record {
213213
if err != nil {
214214
panic(err)
215215
}
216-
bldr.Field(i).(*types.InetBuilder).Append(*ipnet)
216+
bldr.Field(i).(*types.InetBuilder).Append(ipnet)
217217
} else if arrow.TypeEqual(c.Type, arrow.ListOf(types.ExtensionTypes.Inet)) {
218218
bldr.Field(i).(*array.ListBuilder).Append(true)
219219
_, ipnet, err := net.ParseCIDR("192.0.2.1/24")
220220
if err != nil {
221221
panic(err)
222222
}
223-
bldr.Field(i).(*array.ListBuilder).ValueBuilder().(*types.InetBuilder).Append(*ipnet)
223+
bldr.Field(i).(*array.ListBuilder).ValueBuilder().(*types.InetBuilder).Append(ipnet)
224224
_, ipnet, err = net.ParseCIDR("192.0.2.1/24")
225225
if err != nil {
226226
panic(err)
227227
}
228-
bldr.Field(i).(*array.ListBuilder).ValueBuilder().(*types.InetBuilder).Append(*ipnet)
228+
bldr.Field(i).(*array.ListBuilder).ValueBuilder().(*types.InetBuilder).Append(ipnet)
229229
} else if arrow.TypeEqual(c.Type, types.ExtensionTypes.Mac) {
230230
mac, err := net.ParseMAC("aa:bb:cc:dd:ee:ff")
231231
if err != nil {

types/inet.go

Lines changed: 44 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -16,24 +16,28 @@ type InetBuilder struct {
1616
*array.ExtensionBuilder
1717
}
1818

19-
func NewInetBuilder(bldr *array.ExtensionBuilder) *InetBuilder {
20-
b := &InetBuilder{
21-
ExtensionBuilder: bldr,
22-
}
23-
return b
19+
func NewInetBuilder(builder *array.ExtensionBuilder) *InetBuilder {
20+
return &InetBuilder{ExtensionBuilder: builder}
2421
}
2522

26-
func (b *InetBuilder) Append(v net.IPNet) {
23+
func (b *InetBuilder) Append(v *net.IPNet) {
24+
if v == nil {
25+
b.AppendNull()
26+
return
27+
}
2728
b.ExtensionBuilder.Builder.(*array.StringBuilder).Append(v.String())
2829
}
2930

30-
func (b *InetBuilder) UnsafeAppend(v net.IPNet) {
31+
func (b *InetBuilder) UnsafeAppend(v *net.IPNet) {
3132
b.ExtensionBuilder.Builder.(*array.StringBuilder).UnsafeAppend([]byte(v.String()))
3233
}
3334

34-
func (b *InetBuilder) AppendValues(v []net.IPNet, valid []bool) {
35+
func (b *InetBuilder) AppendValues(v []*net.IPNet, valid []bool) {
3536
data := make([]string, len(v))
3637
for i, v := range v {
38+
if !valid[i] {
39+
continue
40+
}
3741
data[i] = v.String()
3842
}
3943
b.ExtensionBuilder.Builder.(*array.StringBuilder).AppendValues(data, valid)
@@ -48,7 +52,7 @@ func (b *InetBuilder) AppendValueFromString(s string) error {
4852
if err != nil {
4953
return err
5054
}
51-
b.Append(*data)
55+
b.Append(data)
5256
return nil
5357
}
5458

@@ -58,20 +62,18 @@ func (b *InetBuilder) UnmarshalOne(dec *json.Decoder) error {
5862
return err
5963
}
6064

61-
var val net.IPNet
65+
var val *net.IPNet
6266
switch v := t.(type) {
6367
case string:
64-
_, data, err := net.ParseCIDR(v)
68+
_, val, err = net.ParseCIDR(v)
6569
if err != nil {
6670
return err
6771
}
68-
val = *data
6972
case []byte:
70-
_, data, err := net.ParseCIDR(string(v))
73+
_, val, err = net.ParseCIDR(string(v))
7174
if err != nil {
7275
return err
7376
}
74-
val = *data
7577
case nil:
7678
b.AppendNull()
7779
return nil
@@ -116,7 +118,7 @@ type InetArray struct {
116118
array.ExtensionArrayBase
117119
}
118120

119-
func (a InetArray) String() string {
121+
func (a *InetArray) String() string {
120122
arr := a.Storage().(*array.String)
121123
o := new(strings.Builder)
122124
o.WriteString("[")
@@ -126,33 +128,39 @@ func (a InetArray) String() string {
126128
}
127129
switch {
128130
case a.IsNull(i):
129-
o.WriteString("(null)")
131+
o.WriteString(array.NullValueStr)
130132
default:
131-
fmt.Fprintf(o, "\"%s\"", arr.Value(i))
133+
fmt.Fprintf(o, "%q", a.ValueStr(i))
132134
}
133135
}
134136
o.WriteString("]")
135137
return o.String()
136138
}
137139

140+
func (a *InetArray) Value(i int) *net.IPNet {
141+
if a.IsNull(i) {
142+
return nil
143+
}
144+
_, ipnet, err := net.ParseCIDR(a.Storage().(*array.String).Value(i))
145+
if err != nil {
146+
panic(fmt.Errorf("invalid ip+net: %w", err))
147+
}
148+
149+
return ipnet
150+
}
151+
138152
func (a *InetArray) ValueStr(i int) string {
139-
arr := a.Storage().(*array.String)
140153
switch {
141154
case a.IsNull(i):
142-
return "(null)"
155+
return array.NullValueStr
143156
default:
144-
return arr.Value(i)
157+
return a.Value(i).String()
145158
}
146159
}
147160

148161
func (a *InetArray) GetOneForMarshal(i int) any {
149-
arr := a.Storage().(*array.String)
150-
if a.IsValid(i) {
151-
_, ipnet, err := net.ParseCIDR(arr.Value(i))
152-
if err != nil {
153-
panic(fmt.Errorf("invalid ip+net: %w", err))
154-
}
155-
return ipnet.String()
162+
if val := a.Value(i); val != nil {
163+
return val.String()
156164
}
157165
return nil
158166
}
@@ -166,27 +174,26 @@ type InetType struct {
166174
// NewInetType is a convenience function to create an instance of InetType
167175
// with the correct storage type
168176
func NewInetType() *InetType {
169-
return &InetType{
170-
ExtensionBase: arrow.ExtensionBase{
171-
Storage: &arrow.StringType{}}}
177+
return &InetType{ExtensionBase: arrow.ExtensionBase{Storage: &arrow.StringType{}}}
172178
}
173179

174-
func (InetType) ArrayType() reflect.Type {
180+
// ArrayType returns TypeOf(InetArray{}) for constructing Inet arrays
181+
func (*InetType) ArrayType() reflect.Type {
175182
return reflect.TypeOf(InetArray{})
176183
}
177184

178-
func (InetType) ExtensionName() string {
185+
func (*InetType) ExtensionName() string {
179186
return "inet"
180187
}
181188

182189
// Serialize returns "inet-serialized" for testing proper metadata passing
183-
func (InetType) Serialize() string {
190+
func (*InetType) Serialize() string {
184191
return "inet-serialized"
185192
}
186193

187194
// Deserialize expects storageType to be StringType and the data to be
188195
// "inet-serialized" in order to correctly create a InetType for testing deserialize.
189-
func (InetType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
196+
func (*InetType) Deserialize(storageType arrow.DataType, data string) (arrow.ExtensionType, error) {
190197
if data != "inet-serialized" {
191198
return nil, fmt.Errorf("type identifier did not match: '%s'", data)
192199
}
@@ -196,11 +203,11 @@ func (InetType) Deserialize(storageType arrow.DataType, data string) (arrow.Exte
196203
return NewInetType(), nil
197204
}
198205

199-
// InetType are equal if both are named "inet"
200-
func (u InetType) ExtensionEquals(other arrow.ExtensionType) bool {
206+
// ExtensionEquals returns true if both extensions have the same name
207+
func (u *InetType) ExtensionEquals(other arrow.ExtensionType) bool {
201208
return u.ExtensionName() == other.ExtensionName()
202209
}
203210

204-
func (InetType) NewBuilder(bldr *array.ExtensionBuilder) array.Builder {
211+
func (*InetType) NewBuilder(bldr *array.ExtensionBuilder) array.Builder {
205212
return NewInetBuilder(bldr)
206213
}

types/inet_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ import (
99
"github.com/stretchr/testify/require"
1010
)
1111

12-
func mustParseInet(s string) net.IPNet {
12+
func mustParseInet(s string) *net.IPNet {
1313
_, ipnet, err := net.ParseCIDR(s)
1414
if err != nil {
1515
panic(err)
1616
}
17-
return *ipnet
17+
return ipnet
1818
}
1919

2020
func TestInetBuilder(t *testing.T) {
@@ -31,7 +31,7 @@ func TestInetBuilder(t *testing.T) {
3131
require.Equal(t, 4, b.Len(), "unexpected Len()")
3232
require.Equal(t, 2, b.NullN(), "unexpected NullN()")
3333

34-
values := []net.IPNet{
34+
values := []*net.IPNet{
3535
mustParseInet("192.168.0.0/26"),
3636
mustParseInet("192.168.0.0/27"),
3737
}

0 commit comments

Comments
 (0)