Skip to content

Commit 027273c

Browse files
authored
fix(tests): Find empty columns for JSON types (#1418)
I'm not sure if there's a better way, but this seems to work for me. ~Still testing cases.~ Fixes cloudquery/cloudquery#15707
1 parent 36128dd commit 027273c

File tree

2 files changed

+79
-0
lines changed

2 files changed

+79
-0
lines changed

schema/validators.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,10 @@
11
package schema
22

33
import (
4+
"encoding/json"
5+
46
"github.com/apache/arrow/go/v14/arrow"
7+
"github.com/cloudquery/plugin-sdk/v4/types"
58
)
69

710
func FindEmptyColumns(table *Table, records []arrow.Record) []string {
@@ -12,6 +15,14 @@ func FindEmptyColumns(table *Table, records []arrow.Record) []string {
1215
for colIndex, arr := range resource.Columns() {
1316
for i := 0; i < arr.Len(); i++ {
1417
if arr.IsValid(i) {
18+
if arrow.TypeEqual(arr.DataType(), types.ExtensionTypes.JSON) {
19+
// JSON column shouldn't be empty
20+
val := arr.GetOneForMarshal(i).(json.RawMessage)
21+
if isEmptyJSON(val) {
22+
continue
23+
}
24+
}
25+
1526
columnsWithValues[colIndex] = true
1627
}
1728
}
@@ -28,3 +39,15 @@ func FindEmptyColumns(table *Table, records []arrow.Record) []string {
2839
}
2940
return emptyColumns
3041
}
42+
43+
func isEmptyJSON(msg json.RawMessage) bool {
44+
if len(msg) == 0 {
45+
return true
46+
}
47+
switch string(msg) {
48+
case "null", "{}", "[]":
49+
return true
50+
default:
51+
return false
52+
}
53+
}

schema/validators_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package schema
2+
3+
import (
4+
"fmt"
5+
"testing"
6+
7+
"github.com/apache/arrow/go/v14/arrow"
8+
"github.com/apache/arrow/go/v14/arrow/array"
9+
"github.com/apache/arrow/go/v14/arrow/memory"
10+
"github.com/cloudquery/plugin-sdk/v4/types"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
func TestFindEmptyColumns(t *testing.T) {
15+
table := TestTable("test", TestSourceOptions{})
16+
tg := NewTestDataGenerator()
17+
record := tg.Generate(table, GenTestDataOptions{
18+
MaxRows: 1,
19+
NullRows: true,
20+
})
21+
v := FindEmptyColumns(table, []arrow.Record{record})
22+
require.NotEmpty(t, v)
23+
require.Len(t, v, len(table.Columns)-1) // exclude "id"
24+
}
25+
26+
func TestFindEmptyColumnsNotEmpty(t *testing.T) {
27+
table := TestTable("test", TestSourceOptions{})
28+
tg := NewTestDataGenerator()
29+
record := tg.Generate(table, GenTestDataOptions{
30+
MaxRows: 1,
31+
NullRows: false,
32+
})
33+
v := FindEmptyColumns(table, []arrow.Record{record})
34+
require.Empty(t, v)
35+
}
36+
37+
func TestFindEmptyColumnsJSON(t *testing.T) {
38+
table := &Table{
39+
Name: "test",
40+
Columns: ColumnList{
41+
{Name: "json", Type: types.ExtensionTypes.JSON},
42+
},
43+
}
44+
sc := table.ToArrowSchema()
45+
bldr := array.NewRecordBuilder(memory.DefaultAllocator, sc)
46+
err := bldr.Field(0).UnmarshalJSON([]byte(`[{}]`))
47+
if err != nil {
48+
panic(fmt.Sprintf("failed to unmarshal json for column: %v", err))
49+
}
50+
records := []arrow.Record{bldr.NewRecord()}
51+
bldr.Release()
52+
53+
v := FindEmptyColumns(table, records)
54+
require.NotEmpty(t, v)
55+
require.Len(t, v, 1)
56+
}

0 commit comments

Comments
 (0)