-
Notifications
You must be signed in to change notification settings - Fork 85
Open
Labels
Type: bugSomething isn't workingSomething isn't working
Description
Describe the bug, including details regarding any error messages, version, and platform.
Issue:
array.JSONReader error on unmarshal data in exponent notation to int64 or uint64 field, even though the exponent represents an integer value.
Repro:
package main
import (
"bufio"
"fmt"
"strings"
"github.com/apache/arrow-go/v18/arrow"
"github.com/apache/arrow-go/v18/arrow/array"
)
func main() {
input := `{"IntegerId":"669996774"}
{"IntegerId":"669996773"}
{"IntegerId":"6.6999677E+8"} // integer value 669996770
`
fmt.Println("----- With int64 -----")
r2 := bufio.NewReaderSize(strings.NewReader(input), 1024*32)
fields := []arrow.Field{
{Name: "IntegerId", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
}
schema := arrow.NewSchema(fields, nil)
rdr := array.NewJSONReader(r2, schema, array.WithChunk(1))
defer rdr.Release()
for rdr.Next() {
rec := rdr.Record()
if rdr.Err() != nil {
fmt.Println("Error reading record:", rdr.Err())
continue
}
if rec == nil {
fmt.Println("No records found")
continue
}
for x := 0; x < int(rec.NumRows()); x++ {
for i := 0; i < int(rec.NumCols()); i++ {
col := rec.Column(i)
fmt.Printf("Column %d: %v\n", i, col.ValueStr(x))
}
}
}
if rdr.Err() != nil {
fmt.Println("Error reading JSON:", rdr.Err())
} else {
fmt.Println("All records processed successfully")
}
fmt.Println("----- Now with uint64 -----")
r2 = bufio.NewReaderSize(strings.NewReader(input), 1024*32)
fields = []arrow.Field{
{Name: "IntegerId", Type: arrow.PrimitiveTypes.Uint64, Nullable: true},
}
schema = arrow.NewSchema(fields, nil)
rdr = array.NewJSONReader(r2, schema, array.WithChunk(1))
defer rdr.Release()
for rdr.Next() {
rec := rdr.Record()
if rdr.Err() != nil {
fmt.Println("Error reading record:", rdr.Err())
continue
}
if rec == nil {
fmt.Println("No records found")
continue
}
for x := 0; x < int(rec.NumRows()); x++ {
for i := 0; i < int(rec.NumCols()); i++ {
col := rec.Column(i)
fmt.Printf("Column %d: %v\n", i, col.ValueStr(x))
}
}
}
if rdr.Err() != nil {
fmt.Println("Error reading JSON:", rdr.Err())
} else {
fmt.Println("All records processed successfully")
}
}Output:
----- With int64 -----
Column 0: 669996774
Column 0: 669996773
Error reading JSON: json: cannot unmarshal 6.6999677E+8 into Go value of type int64
----- Now with uint64 -----
Column 0: 669996774
Column 0: 669996773
Error reading JSON: json: cannot unmarshal 6.6999677E+8 into Go value of type uint64
Cause:
Int64Builder and Uint64Builder UnmarshalOne functions use strconv.ParseInt and strconv.ParseUint to convert json.Number to Int64/Uint64, these functions do not allow exponential syntax.
Proposed solution:
Since all numbers in JSON are technically float, modify the Int64Builder.UnmarshalOne and Uint64Builder.UnmarshalOne functions to use strconv.ParseFloat which supports expanding exponents to extract a float64 then cast to int64/uint64.
Component(s)
Other
Metadata
Metadata
Assignees
Labels
Type: bugSomething isn't workingSomething isn't working