Skip to content

Commit 637fc28

Browse files
felipecrvxuliangs
andauthored
fix(go/adbc/driver/bigquery): fix parsing repeated records with nested fields (apache#3240)
Co-authored-by: Xuliang (Harry) Sun <32334165+xuliangs@users.noreply.github.com>
1 parent 16269af commit 637fc28

File tree

2 files changed

+159
-31
lines changed

2 files changed

+159
-31
lines changed

go/adbc/driver/bigquery/connection.go

Lines changed: 19 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -805,40 +805,23 @@ func buildField(schema *bigquery.FieldSchema, level uint) (arrow.Field, error) {
805805
case bigquery.TimestampFieldType:
806806
field.Type = arrow.FixedWidthTypes.Timestamp_ms
807807
case bigquery.RecordFieldType:
808-
if schema.Repeated {
809-
if len(schema.Schema) == 1 {
810-
arrayField, err := buildField(schema.Schema[0], level+1)
811-
if err != nil {
812-
return arrow.Field{}, err
813-
}
814-
field.Type = arrow.ListOf(arrayField.Type)
815-
field.Metadata = arrayField.Metadata
816-
field.Nullable = arrayField.Nullable
817-
} else {
818-
return arrow.Field{}, adbc.Error{
819-
Code: adbc.StatusInvalidArgument,
820-
Msg: fmt.Sprintf("Cannot create array schema for filed `%s`: len(schema.Schema) != 1", schema.Name),
821-
}
822-
}
823-
} else {
824-
nestedFields := make([]arrow.Field, len(schema.Schema))
825-
for i, nestedSchema := range schema.Schema {
826-
f, err := buildField(nestedSchema, level+1)
827-
if err != nil {
828-
return arrow.Field{}, err
829-
}
830-
nestedFields[i] = f
808+
// create an Arrow struct for BigQuery Record fields
809+
nestedFields := make([]arrow.Field, len(schema.Schema))
810+
for i, nestedFieldSchema := range schema.Schema {
811+
f, err := buildField(nestedFieldSchema, level+1)
812+
if err != nil {
813+
return arrow.Field{}, err
831814
}
832-
structType := arrow.StructOf(nestedFields...)
833-
if structType == nil {
834-
return arrow.Field{}, adbc.Error{
835-
Code: adbc.StatusInvalidArgument,
836-
Msg: fmt.Sprintf("Cannot create a struct schema for record `%s`", schema.Name),
837-
}
815+
nestedFields[i] = f
816+
}
817+
structType := arrow.StructOf(nestedFields...)
818+
if structType == nil {
819+
return arrow.Field{}, adbc.Error{
820+
Code: adbc.StatusInvalidArgument,
821+
Msg: fmt.Sprintf("Cannot create a struct schema for record `%s`", schema.Name),
838822
}
839-
field.Type = structType
840823
}
841-
824+
field.Type = structType
842825
case bigquery.DateFieldType:
843826
field.Type = arrow.FixedWidthTypes.Date32
844827
case bigquery.TimeFieldType:
@@ -870,6 +853,11 @@ func buildField(schema *bigquery.FieldSchema, level uint) (arrow.Field, error) {
870853
}
871854
}
872855

856+
// if the field is repeated, then it's a list of the type we just built
857+
if schema.Repeated {
858+
field.Type = arrow.ListOf(field.Type)
859+
}
860+
873861
if level == 0 {
874862
metadata["DefaultValueExpression"] = schema.DefaultValueExpression
875863
}
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
package bigquery
19+
20+
import (
21+
"testing"
22+
23+
"cloud.google.com/go/bigquery"
24+
)
25+
26+
func TestBuildField(t *testing.T) {
27+
tests := []struct {
28+
name string
29+
schema *bigquery.FieldSchema
30+
expectedTypeStr string
31+
expectError bool
32+
}{
33+
{
34+
name: "ArrayOfScalar",
35+
schema: &bigquery.FieldSchema{
36+
Name: "test_array_scalar_field",
37+
Type: bigquery.IntegerFieldType,
38+
Repeated: true,
39+
Required: false,
40+
Description: "Test array field with scalar type",
41+
Schema: nil,
42+
},
43+
expectedTypeStr: "list<item: int64, nullable>",
44+
expectError: false,
45+
},
46+
{
47+
name: "ArrayOfRecordWithMultipleFields",
48+
schema: &bigquery.FieldSchema{
49+
Name: "test_array_field",
50+
Type: bigquery.RecordFieldType,
51+
Repeated: true,
52+
Required: false,
53+
Description: "Test array field with multiple nested fields",
54+
Schema: []*bigquery.FieldSchema{
55+
{
56+
Name: "field1",
57+
Type: bigquery.StringFieldType,
58+
Required: false,
59+
},
60+
{
61+
Name: "field2",
62+
Type: bigquery.IntegerFieldType,
63+
Required: false,
64+
},
65+
},
66+
},
67+
expectedTypeStr: "list<item: struct<field1: utf8, field2: int64>, nullable>",
68+
expectError: false,
69+
},
70+
{
71+
name: "ArrayOfRecordWithSingleField",
72+
schema: &bigquery.FieldSchema{
73+
Name: "test_single_array_field",
74+
Type: bigquery.RecordFieldType,
75+
Repeated: true,
76+
Required: false,
77+
Description: "Test array field with single nested field",
78+
Schema: []*bigquery.FieldSchema{
79+
{
80+
Name: "single_field",
81+
Type: bigquery.StringFieldType,
82+
Required: false,
83+
},
84+
},
85+
},
86+
expectedTypeStr: "list<item: struct<single_field: utf8>, nullable>",
87+
expectError: false,
88+
},
89+
{
90+
name: "NonRepeatedRecord",
91+
schema: &bigquery.FieldSchema{
92+
Name: "test_struct_field",
93+
Type: bigquery.RecordFieldType,
94+
Repeated: false,
95+
Required: false,
96+
Description: "Test struct field with multiple nested fields",
97+
Schema: []*bigquery.FieldSchema{
98+
{
99+
Name: "nested_string",
100+
Type: bigquery.StringFieldType,
101+
Required: false,
102+
},
103+
{
104+
Name: "nested_int",
105+
Type: bigquery.IntegerFieldType,
106+
Required: true,
107+
},
108+
},
109+
},
110+
expectedTypeStr: "struct<nested_string: utf8, nested_int: int64>",
111+
expectError: false,
112+
},
113+
}
114+
115+
for _, tt := range tests {
116+
t.Run(tt.name, func(t *testing.T) {
117+
field, err := buildField(tt.schema, 0)
118+
119+
if tt.expectError {
120+
if err == nil {
121+
t.Fatalf("Expected error for test case %s, but got nil", tt.name)
122+
}
123+
return
124+
}
125+
126+
if err != nil {
127+
t.Fatalf("Expected no error for test case %s, got: %v", tt.name, err)
128+
}
129+
130+
if field.Name != tt.schema.Name {
131+
t.Errorf("Expected field name '%s', got '%s'", tt.schema.Name, field.Name)
132+
}
133+
134+
typeStr := field.Type.String()
135+
if typeStr != tt.expectedTypeStr {
136+
t.Errorf("Expected field type string to be '%s', got '%s'", tt.expectedTypeStr, typeStr)
137+
}
138+
})
139+
}
140+
}

0 commit comments

Comments
 (0)