Skip to content

Commit 591502f

Browse files
authored
fix: Disallow null character in strings per utf8 spec (#797)
This should solve bunch of issues that we have with NullValues and StripNulls logic propagated to all our destination plugins. Apparently [Valid](apache/arrow#35161 (comment)) utf8 string can't have null values in the middle of the string so we should just strip those in our type system (it can have at the end or the start but I suggest actually stripping those down as well as it's really unnecessary as far as I know and it exist prob for some archaic reason like sending a string over the wire without any additional encoding/protocol, which we have via arrow). Once we move sources to arrow we will add the validation/strip on the source side.
1 parent b1baab6 commit 591502f

File tree

1 file changed

+5
-2
lines changed

1 file changed

+5
-2
lines changed

schema/arrow.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package schema
22

33
import (
44
"fmt"
5+
"strings"
56

67
"github.com/goccy/go-json"
78

@@ -270,7 +271,9 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem
270271
}
271272
case TypeString:
272273
if c[j][i].(*Text).Status == Present {
273-
bldr.Field(i).(*array.StringBuilder).Append(c[j][i].(*Text).Str)
274+
// In the new type system we wont allow null string as they are not valid utf-8
275+
// https://github.com/apache/arrow/pull/35161#discussion_r1170516104
276+
bldr.Field(i).(*array.StringBuilder).Append(strings.ReplaceAll(c[j][i].(*Text).Str, "\x00", ""))
274277
} else {
275278
bldr.Field(i).(*array.StringBuilder).AppendNull()
276279
}
@@ -285,7 +288,7 @@ func CQTypesToRecord(mem memory.Allocator, c []CQTypes, arrowSchema *arrow.Schem
285288
listBldr := bldr.Field(i).(*array.ListBuilder)
286289
listBldr.Append(true)
287290
for _, str := range c[j][i].(*TextArray).Elements {
288-
listBldr.ValueBuilder().(*array.StringBuilder).Append(str.Str)
291+
listBldr.ValueBuilder().(*array.StringBuilder).Append(strings.ReplaceAll(str.Str, "\x00", ""))
289292
}
290293
} else {
291294
bldr.Field(i).(*array.ListBuilder).AppendNull()

0 commit comments

Comments
 (0)