Skip to content

array.JSONReader error on unmarshal data in exponent notation to int64 field #474

@loicalleyne

Description

@loicalleyne

Describe the bug, including details regarding any error messages, version, and platform.

Issue:
array.JSONReader error on unmarshal data in exponent notation to int64 or uint64 field, even though the exponent represents an integer value.

Repro:

package main

import (
	"bufio"
	"fmt"
	"strings"

	"github.com/apache/arrow-go/v18/arrow"
	"github.com/apache/arrow-go/v18/arrow/array"
)

func main() {
	input := `{"IntegerId":"669996774"}
{"IntegerId":"669996773"}
{"IntegerId":"6.6999677E+8"}    // integer value 669996770
`
	fmt.Println("----- With int64 -----")
	r2 := bufio.NewReaderSize(strings.NewReader(input), 1024*32)
	fields := []arrow.Field{
		{Name: "IntegerId", Type: arrow.PrimitiveTypes.Int64, Nullable: true},
	}
	schema := arrow.NewSchema(fields, nil)
	rdr := array.NewJSONReader(r2, schema, array.WithChunk(1))
	defer rdr.Release()
	for rdr.Next() {
		rec := rdr.Record()
		if rdr.Err() != nil {
			fmt.Println("Error reading record:", rdr.Err())
			continue
		}
		if rec == nil {
			fmt.Println("No records found")
			continue
		}

		for x := 0; x < int(rec.NumRows()); x++ {
			for i := 0; i < int(rec.NumCols()); i++ {
				col := rec.Column(i)
				fmt.Printf("Column %d: %v\n", i, col.ValueStr(x))
			}
		}
	}
	if rdr.Err() != nil {
		fmt.Println("Error reading JSON:", rdr.Err())
	} else {
		fmt.Println("All records processed successfully")
	}
	fmt.Println("----- Now with uint64 -----")
	r2 = bufio.NewReaderSize(strings.NewReader(input), 1024*32)
	fields = []arrow.Field{
		{Name: "IntegerId", Type: arrow.PrimitiveTypes.Uint64, Nullable: true},
	}
	schema = arrow.NewSchema(fields, nil)
	rdr = array.NewJSONReader(r2, schema, array.WithChunk(1))
	defer rdr.Release()
	for rdr.Next() {
		rec := rdr.Record()
		if rdr.Err() != nil {
			fmt.Println("Error reading record:", rdr.Err())
			continue
		}
		if rec == nil {
			fmt.Println("No records found")
			continue
		}

		for x := 0; x < int(rec.NumRows()); x++ {
			for i := 0; i < int(rec.NumCols()); i++ {
				col := rec.Column(i)
				fmt.Printf("Column %d: %v\n", i, col.ValueStr(x))
			}
		}
	}
	if rdr.Err() != nil {
		fmt.Println("Error reading JSON:", rdr.Err())
	} else {
		fmt.Println("All records processed successfully")
	}
}

Output:

----- With int64 -----
Column 0: 669996774
Column 0: 669996773
Error reading JSON: json: cannot unmarshal 6.6999677E+8 into Go value of type int64
----- Now with uint64 -----
Column 0: 669996774
Column 0: 669996773
Error reading JSON: json: cannot unmarshal 6.6999677E+8 into Go value of type uint64

Cause:
Int64Builder and Uint64Builder UnmarshalOne functions use strconv.ParseInt and strconv.ParseUint to convert json.Number to Int64/Uint64, these functions do not allow exponential syntax.

Proposed solution:
Since all numbers in JSON are technically float, modify the Int64Builder.UnmarshalOne and Uint64Builder.UnmarshalOne functions to use strconv.ParseFloat which supports expanding exponents to extract a float64 then cast to int64/uint64.

Component(s)

Other

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type: bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions