Skip to content

Commit e2855eb

Browse files
Bug fix for null values in complex types (#128)
Github issue #126 #126 Null member values in complex types (i.e. array, map, and struct) were being scanned as the zero value of the member type. Updated column value containers for complex types to generate correct JSON for null member values. In cases where no type can be determined for a member an error was being thrown for an unhandled data type. For example in the query "select map('red', NULL, 'green', NULL) as sample_map" there is no determined type for the map values. Added a new column value container to be created to correspond to the arrow NullType. Updated expected values in existing list test now that null elements are no longer being treated as the zero-value of the type. Added a new test for the case where the type of elements/members cannot be determined. Signed-off-by: Raymond Cypher <[email protected]>
2 parents 2661951 + 17897c1 commit e2855eb

File tree

4 files changed

+211
-39
lines changed

4 files changed

+211
-39
lines changed

internal/rows/arrowbased/arrowRows.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -701,6 +701,9 @@ func (vcm *arrowValueContainerMaker) makeColumnValueContainer(t arrow.DataType,
701701

702702
return svc, nil
703703

704+
case *arrow.NullType:
705+
return nullContainer, nil
706+
704707
default:
705708
return nil, errors.Errorf(errArrowRowsUnhandledArrowType(t.String()))
706709
}

internal/rows/arrowbased/arrowRows_test.go

Lines changed: 45 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1210,21 +1210,21 @@ func TestArrowRowScanner(t *testing.T) {
12101210
// array_interval_ym array<interval year>,
12111211
// array_interval_dt array<interval day>
12121212
expected := []driver.Value{
1213-
"[true,false,false]",
1214-
"[1,2,0,3]",
1215-
"[4,5,0,6]",
1216-
"[7,8,0,9]",
1217-
"[10,11,0,12]",
1218-
"[0,1.1,2.2]",
1219-
"[3.3,0,4.4]",
1220-
"[\"s1\",\"s2\",\"\"]",
1221-
"[\"2021-07-01 05:43:28 +0000 UTC\",\"-2022-08-13 14:01:01 +0000 UTC\",\"1970-01-01 00:00:00 +0000 UTC\"]",
1222-
"[\"Gr8=\",\"D/8=\",\"\"]",
1223-
"[[1,2,3],[4,5,6],[]]",
1224-
"[{\"key1\":1,\"key2\":2},{\"key1\":1,\"key2\":2},{}]",
1225-
"[{\"Field1\":77,\"Field2\":\"2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":13,\"Field2\":\"-2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":0,\"Field2\":\"1970-01-01 00:00:00 +0000 UTC\"}]",
1226-
"[5.15,123.45,0]",
1227-
"[\"2020-12-31 00:00:00 +0000 UTC\",\"-2020-12-31 00:00:00 +0000 UTC\",\"1970-01-01 00:00:00 +0000 UTC\"]",
1213+
"[true,false,null]",
1214+
"[1,2,null,3]",
1215+
"[4,5,null,6]",
1216+
"[7,8,null,9]",
1217+
"[10,11,null,12]",
1218+
"[null,1.1,2.2]",
1219+
"[3.3,null,4.4]",
1220+
"[\"s1\",\"s2\",null]",
1221+
"[\"2021-07-01 05:43:28 +0000 UTC\",\"-2022-08-13 14:01:01 +0000 UTC\",null]",
1222+
"[\"Gr8=\",\"D/8=\",null]",
1223+
"[[1,2,3],[4,5,6],null]",
1224+
"[{\"key1\":1,\"key2\":2},{\"key1\":1,\"key2\":2},null]",
1225+
"[{\"Field1\":77,\"Field2\":\"2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":13,\"Field2\":\"-2020-12-31 00:00:00 +0000 UTC\"},{\"Field1\":null,\"Field2\":null}]",
1226+
"[5.15,123.45,null]",
1227+
"[\"2020-12-31 00:00:00 +0000 UTC\",\"-2020-12-31 00:00:00 +0000 UTC\",null]",
12281228
}
12291229

12301230
executeStatementResp := cli_service.TExecuteStatementResp{}
@@ -1319,6 +1319,36 @@ func TestArrowRowScanner(t *testing.T) {
13191319

13201320
})
13211321

1322+
t.Run("Retrieve null values in complex types", func(t *testing.T) {
1323+
// results of executing query:
1324+
// "select map('red', NULL, 'green', NULL) as sample_map, named_struct('Field1', NULL, 'Field2', NULL) as sample_struct, ARRAY(NULL, NULL, NULL) as sample_list"
1325+
executeStatementResp := cli_service.TExecuteStatementResp{}
1326+
loadTestData(t, "nullsInComplexTypes.json", &executeStatementResp)
1327+
1328+
expected := []driver.Value{
1329+
"{\"red\":null,\"green\":null}",
1330+
"{\"Field1\":null,\"Field2\":null}",
1331+
"[null,null,null]",
1332+
}
1333+
1334+
config := config.WithDefaults()
1335+
config.UseArrowNativeTimestamp = true
1336+
config.UseArrowNativeComplexTypes = true
1337+
config.UseArrowNativeDecimal = false
1338+
config.UseArrowNativeIntervalTypes = false
1339+
d, err := NewArrowRowScanner(executeStatementResp.DirectResults.ResultSetMetadata, executeStatementResp.DirectResults.ResultSet.Results, config, nil, context.Background())
1340+
assert.Nil(t, err)
1341+
1342+
ars := d.(*arrowRowScanner)
1343+
1344+
dest := make([]driver.Value, len(executeStatementResp.DirectResults.ResultSetMetadata.Schema.Columns))
1345+
err = ars.ScanRow(dest, 0)
1346+
assert.Nil(t, err)
1347+
1348+
for i := range expected {
1349+
assert.Equal(t, expected[i], dest[i])
1350+
}
1351+
})
13221352
}
13231353

13241354
type fakeColumnValues struct {

internal/rows/arrowbased/columnValues.go

Lines changed: 57 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -135,19 +135,24 @@ func (lvc *listValueContainer) Value(i int) (any, error) {
135135
len := int(e - s)
136136

137137
for i := 0; i < len; i++ {
138-
val, err := lvc.values.Value(i + int(s))
139-
if err != nil {
140-
return nil, err
141-
}
138+
if lvc.values.IsNull(i + int(s)) {
139+
r = r + "null"
140+
} else {
142141

143-
if !lvc.complexValue {
144-
vb, err := marshal(val)
142+
val, err := lvc.values.Value(i + int(s))
145143
if err != nil {
146144
return nil, err
147145
}
148-
r = r + string(vb)
149-
} else {
150-
r = r + val.(string)
146+
147+
if !lvc.complexValue {
148+
vb, err := marshal(val)
149+
if err != nil {
150+
return nil, err
151+
}
152+
r = r + string(vb)
153+
} else {
154+
r = r + val.(string)
155+
}
151156
}
152157

153158
if i < len-1 {
@@ -204,18 +209,20 @@ func (mvc *mapValueContainer) Value(i int) (any, error) {
204209
return nil, err
205210
}
206211

207-
v, err := mvc.values.Value(int(i))
212+
key, err := marshal(k)
208213
if err != nil {
209214
return nil, err
210215
}
211216

212-
key, err := marshal(k)
217+
v, err := mvc.values.Value(int(i))
213218
if err != nil {
214219
return nil, err
215220
}
216221

217222
var b string
218-
if mvc.complexValue {
223+
if mvc.values.IsNull(int(i)) {
224+
b = "null"
225+
} else if mvc.complexValue {
219226
b = v.(string)
220227
} else {
221228
vb, err := marshal(v)
@@ -288,23 +295,27 @@ func (svc *structValueContainer) Value(i int) (any, error) {
288295
for j := range svc.fieldValues {
289296
r = r + "\"" + svc.fieldNames[j] + "\":"
290297

291-
v, err := svc.fieldValues[j].Value(int(i))
292-
if err != nil {
293-
return nil, err
294-
}
295-
296-
var b string
297-
if svc.complexValue[j] {
298-
b = v.(string)
298+
if svc.fieldValues[j].IsNull(int(i)) {
299+
r = r + "null"
299300
} else {
300-
vb, err := marshal(v)
301+
v, err := svc.fieldValues[j].Value(int(i))
301302
if err != nil {
302303
return nil, err
303304
}
304-
b = string(vb)
305-
}
306305

307-
r = r + b
306+
var b string
307+
if svc.complexValue[j] {
308+
b = v.(string)
309+
} else {
310+
vb, err := marshal(v)
311+
if err != nil {
312+
return nil, err
313+
}
314+
b = string(vb)
315+
}
316+
317+
r = r + b
318+
}
308319
if j < len(svc.fieldValues)-1 {
309320
r = r + ","
310321
}
@@ -473,3 +484,25 @@ func marshal(val any) ([]byte, error) {
473484
vb, err := json.Marshal(val)
474485
return vb, err
475486
}
487+
488+
var nullContainer *nullContainer_ = &nullContainer_{}
489+
490+
type nullContainer_ struct {
491+
}
492+
493+
var _ columnValues = (*nullContainer_)(nil)
494+
495+
func (tvc *nullContainer_) Value(i int) (any, error) {
496+
return nil, nil
497+
}
498+
499+
func (tvc *nullContainer_) IsNull(i int) bool {
500+
return true
501+
}
502+
503+
func (tvc *nullContainer_) Release() {
504+
}
505+
506+
func (tvc *nullContainer_) SetValueArray(colData arrow.ArrayData) error {
507+
return nil
508+
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
{
2+
"status": {
3+
"statusCode": "SUCCESS_STATUS"
4+
},
5+
"operationHandle": {
6+
"operationId": {
7+
"guid": "Ae3/5kgLHHCdBXUYvmHrVA==",
8+
"secret": "M41SnYJyRuuEgstBlGaDnQ=="
9+
},
10+
"operationType": "EXECUTE_STATEMENT",
11+
"hasResultSet": true
12+
},
13+
"directResults": {
14+
"operationStatus": {
15+
"status": {
16+
"statusCode": "SUCCESS_STATUS"
17+
},
18+
"operationState": "FINISHED_STATE",
19+
"operationStarted": 1685560003513,
20+
"operationCompleted": 1685560003570
21+
},
22+
"resultSetMetadata": {
23+
"status": {
24+
"statusCode": "SUCCESS_STATUS"
25+
},
26+
"schema": {
27+
"columns": [
28+
{
29+
"columnName": "sample_map",
30+
"typeDesc": {
31+
"types": [
32+
{
33+
"primitiveEntry": {
34+
"type": "MAP_TYPE"
35+
}
36+
}
37+
]
38+
},
39+
"position": 1,
40+
"comment": ""
41+
},
42+
{
43+
"columnName": "sample_struct",
44+
"typeDesc": {
45+
"types": [
46+
{
47+
"primitiveEntry": {
48+
"type": "STRUCT_TYPE"
49+
}
50+
}
51+
]
52+
},
53+
"position": 2,
54+
"comment": ""
55+
},
56+
{
57+
"columnName": "sample_list",
58+
"typeDesc": {
59+
"types": [
60+
{
61+
"primitiveEntry": {
62+
"type": "ARRAY_TYPE"
63+
}
64+
}
65+
]
66+
},
67+
"position": 3,
68+
"comment": ""
69+
}
70+
]
71+
},
72+
"resultFormat": "ARROW_BASED_SET",
73+
"lz4Compressed": false,
74+
"arrowSchema": "/////9gEAAAQAAAAAAAKAA4ABgANAAgACgAAAAAABAAQAAAAAAEKAAwAAAAIAAQACgAAAAgAAAAIAAAAAAAAAAMAAADcAgAAGAEAAAQAAADW+///FAAAALwAAADwAAAAAAAADOwAAAACAAAAcAAAAAQAAAC0/P//CAAAAEQAAAA5AAAAeyJ0eXBlIjoiYXJyYXkiLCJlbGVtZW50VHlwZSI6InZvaWQiLCJjb250YWluc051bGwiOnRydWV9AAAAFwAAAFNwYXJrOkRhdGFUeXBlOkpzb25UeXBlABz9//8IAAAAFAAAAAsAAABBUlJBWTxWT0lEPgAWAAAAU3Bhcms6RGF0YVR5cGU6U3FsTmFtZQAAAQAAAAQAAADi/P//FAAAABQAAAAUAAAAAAABARAAAAAAAAAAAAAAAHT8//8HAAAAZWxlbWVudACE/P//CwAAAHNhbXBsZV9saXN0AOb8//8UAAAANAEAAJwBAAAAAAANmAEAAAIAAADQAAAABAAAAMT9//8IAAAApAAAAJgAAAB7InR5cGUiOiJzdHJ1Y3QiLCJmaWVsZHMiOlt7Im5hbWUiOiJGaWVsZDEiLCJ0eXBlIjoidm9pZCIsIm51bGxhYmxlIjp0cnVlLCJtZXRhZGF0YSI6e319LHsibmFtZSI6IkZpZWxkMiIsInR5cGUiOiJ2b2lkIiwibnVsbGFibGUiOnRydWUsIm1ldGFkYXRhIjp7fX1dfQAAAAAXAAAAU3Bhcms6RGF0YVR5cGU6SnNvblR5cGUAjP7//wgAAAAsAAAAIgAAAFNUUlVDVDxGaWVsZDE6IFZPSUQsIEZpZWxkMjogVk9JRD4AABYAAABTcGFyazpEYXRhVHlwZTpTcWxOYW1lAAACAAAAOAAAAAQAAABu/v//FAAAABQAAAAUAAAAAAABARAAAAAAAAAAAAAAAAD+//8GAAAARmllbGQyAACe/v//FAAAABQAAAAUAAAAAAABARAAAAAAAAAAAAAAADD+//8GAAAARmllbGQxAABA/v//DQAAAHNhbXBsZV9zdHJ1Y3QAAACm/v//FAAAAOAAAACgAQAAAAAAEZwBAAACAAAAjAAAAAQAAACE////CAAAAFgAAABNAAAAeyJ0eXBlIjoibWFwIiwia2V5VHlwZSI6InN0cmluZyIsInZhbHVlVHlwZSI6InZvaWQiLCJ2YWx1ZUNvbnRhaW5zTnVsbCI6dHJ1ZX0AAAAXAAAAU3Bhcms6RGF0YVR5cGU6SnNvblR5cGUACAAMAAgABAAIAAAACAAAABwAAAARAAAATUFQPFNUUklORywgVk9JRD4AAAAWAAAAU3Bhcms6RGF0YVR5cGU6U3FsTmFtZQAAAQAAAAQAAACW////FAAAABQAAACcAAAAAAAADZgAAAAAAAAAAgAAAFwAAAAYAAAAAAASABgAFAATABIADAAAAAgABAASAAAAFAAAABQAAAAUAAAAAAABARAAAAAAAAAAAAAAAKT///8FAAAAdmFsdWUAEgAYABQAAAATAAwAAAAIAAQAEgAAABQAAAAUAAAAFAAAAAAAAAUQAAAAAAAAAAAAAADk////AwAAAGtleQDw////BwAAAGVudHJpZXMABAAEAAQAAAAKAAAAc2FtcGxlX21hcAAA",
75+
"cacheLookupResult": "CACHE_INELIGIBLE",
76+
"uncompressedBytes": 464,
77+
"compressedBytes": 464
78+
},
79+
"resultSet": {
80+
"status": {
81+
"statusCode": "SUCCESS_STATUS"
82+
},
83+
"hasMoreRows": false,
84+
"results": {
85+
"startRowOffset": 0,
86+
"rows": [],
87+
"arrowBatches": [
88+
{
89+
"batch": "/////3gBAAAUAAAAAAAAAAwAFgAOABUAEAAEAAwAAABQAAAAAAAAAAAABAAQAAAAAAMKABgADAAIAAQACgAAABQAAACoAAAAAQAAAAAAAAAAAAAACQAAAAAAAAAAAAAAAQAAAAAAAAAIAAAAAAAAAAgAAAAAAAAAEAAAAAAAAAABAAAAAAAAABgAAAAAAAAAAQAAAAAAAAAgAAAAAAAAAAwAAAAAAAAAMAAAAAAAAAAIAAAAAAAAADgAAAAAAAAAAQAAAAAAAABAAAAAAAAAAAEAAAAAAAAASAAAAAAAAAAIAAAAAAAAAAAAAAAJAAAAAQAAAAAAAAAAAAAAAAAAAAIAAAAAAAAAAAAAAAAAAAACAAAAAAAAAAAAAAAAAAAAAgAAAAAAAAACAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAQAAAAAAAAABAAAAAAAAAAEAAAAAAAAAAAAAAAAAAAADAAAAAAAAAAMAAAAAAAAAAQAAAAAAAAAAAAAAAgAAAAMAAAAAAAAAAwAAAAAAAAAAAAAAAwAAAAgAAAAAAAAAcmVkZ3JlZW4BAAAAAAAAAAEAAAAAAAAAAAAAAAMAAAA=",
90+
"rowCount": 1
91+
}
92+
]
93+
}
94+
},
95+
"closeOperation": {
96+
"status": {
97+
"statusCode": "SUCCESS_STATUS"
98+
}
99+
}
100+
},
101+
"executionRejected": false,
102+
"maxClusterCapacity": 280,
103+
"queryCost": 0.5,
104+
"currentClusterLoad": 1,
105+
"idempotencyType": "IDEMPOTENT"
106+
}

0 commit comments

Comments
 (0)