Skip to content

Commit c15b214

Browse files
authored
fix: Scalar timestamp parsing (#1109)
This was broken with #1095 (at least for the test source, which gets its data from testdata)
1 parent 221cd41 commit c15b214

File tree

2 files changed

+86
-50
lines changed

2 files changed

+86
-50
lines changed

scalar/timestamp.go

Lines changed: 19 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,21 +9,13 @@ import (
99
"github.com/apache/arrow/go/v13/arrow"
1010
)
1111

12-
// const pgTimestamptzHourFormat = "2006-01-02 15:04:05.999999999Z07"
13-
// const pgTimestamptzMinuteFormat = "2006-01-02 15:04:05.999999999Z07:00"
14-
// const pgTimestamptzSecondFormat = "2006-01-02 15:04:05.999999999Z07:00:00"
15-
16-
// this is the default format used by time.Time.String()
17-
const defaultStringFormat = "2006-01-02 15:04:05.999999999 -0700 MST"
18-
19-
// this is used by arrow string format (time is in UTC)
20-
const arrowStringFormat = "2006-01-02 15:04:05.999999999"
21-
22-
// const microsecFromUnixEpochToY2K = 946684800 * 1000000
23-
2412
const (
25-
// negativeInfinityMicrosecondOffset = -9223372036854775808
26-
// infinityMicrosecondOffset = 9223372036854775807
13+
// this is the default format used by time.Time.String()
14+
defaultStringFormat = "2006-01-02 15:04:05.999999999 -0700 MST"
15+
16+
// these are used by Arrow string format (time is in UTC)
17+
arrowStringFormat = "2006-01-02 15:04:05.999999999"
18+
arrowStringFormatNew = "2006-01-02 15:04:05.999999999Z"
2719
)
2820

2921
type Timestamp struct {
@@ -140,24 +132,19 @@ func (s *Timestamp) DecodeText(src []byte) error {
140132
sbuf = sbuf[:len(defaultStringFormat)]
141133
}
142134

143-
// there is no good way of detecting format so we just try few of them
144-
tim, err = time.Parse(time.RFC3339, sbuf)
145-
if err == nil {
146-
s.Value = tim.UTC()
147-
s.Valid = true
148-
return nil
149-
}
150-
tim, err = time.Parse(defaultStringFormat, sbuf)
151-
if err == nil {
152-
s.Value = tim.UTC()
153-
s.Valid = true
154-
return nil
155-
}
156-
tim, err = time.Parse(arrowStringFormat, sbuf)
157-
if err == nil {
158-
s.Value = tim.UTC()
159-
s.Valid = true
160-
return nil
135+
// there is no good way of detecting format, so we just try few of them
136+
for _, format := range []string{
137+
time.RFC3339,
138+
defaultStringFormat,
139+
arrowStringFormat,
140+
arrowStringFormatNew,
141+
} {
142+
tim, err = time.Parse(format, sbuf)
143+
if err == nil {
144+
s.Value = tim.UTC()
145+
s.Valid = true
146+
return nil
147+
}
161148
}
162149
return &ValidationError{Type: s.DataType(), Msg: "cannot parse timestamp", Value: sbuf, Err: err}
163150
}

scalar/timestamp_test.go

Lines changed: 67 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package scalar
22

33
import (
4+
"strconv"
45
"testing"
56
"time"
67

@@ -69,26 +70,74 @@ func TestTimestampDoubleSet(t *testing.T) {
6970
}
7071

7172
func TestAppendToBuilderTimestamp(t *testing.T) {
72-
units := []arrow.TimeUnit{arrow.Second, arrow.Millisecond, arrow.Microsecond, arrow.Nanosecond}
73-
expected := []string{"1999-01-08 04:05:06Z", "1999-01-08 04:05:06.123Z", "1999-01-08 04:05:06.123456Z", "1999-01-08 04:05:06.123456789Z"}
74-
for i, unit := range units {
75-
timestamp := Timestamp{
76-
Type: &arrow.TimestampType{
77-
Unit: unit,
78-
TimeZone: "UTC",
79-
},
80-
}
81-
err := timestamp.Set("1999-01-08 04:05:06.123456789")
82-
if err != nil {
83-
t.Fatal(err)
84-
}
73+
for idx, tc := range []struct {
74+
Unit arrow.TimeUnit
75+
Input string
76+
Expected string
77+
}{
78+
// Input format: arrowStringFormat
79+
{
80+
Unit: arrow.Second,
81+
Input: "1999-01-08 04:05:06.123456789",
82+
Expected: "1999-01-08 04:05:06Z",
83+
},
84+
{
85+
Unit: arrow.Millisecond,
86+
Input: "1999-01-08 04:05:06.123456789",
87+
Expected: "1999-01-08 04:05:06.123Z",
88+
},
89+
{
90+
Unit: arrow.Microsecond,
91+
Input: "1999-01-08 04:05:06.123456789",
92+
Expected: "1999-01-08 04:05:06.123456Z",
93+
},
94+
{
95+
Unit: arrow.Nanosecond,
96+
Input: "1999-01-08 04:05:06.123456789",
97+
Expected: "1999-01-08 04:05:06.123456789Z",
98+
},
99+
// Input format: arrowStringFormatNew
100+
{
101+
Unit: arrow.Second,
102+
Input: "1999-01-08 04:05:06.123456789Z",
103+
Expected: "1999-01-08 04:05:06Z",
104+
},
105+
{
106+
Unit: arrow.Millisecond,
107+
Input: "1999-01-08 04:05:06.123456789Z",
108+
Expected: "1999-01-08 04:05:06.123Z",
109+
},
110+
{
111+
Unit: arrow.Microsecond,
112+
Input: "1999-01-08 04:05:06.123456789Z",
113+
Expected: "1999-01-08 04:05:06.123456Z",
114+
},
115+
{
116+
Unit: arrow.Nanosecond,
117+
Input: "1999-01-08 04:05:06.123456789Z",
118+
Expected: "1999-01-08 04:05:06.123456789Z",
119+
},
120+
} {
121+
tc := tc
122+
t.Run(strconv.FormatInt(int64(idx), 10), func(t *testing.T) {
123+
timestamp := Timestamp{
124+
Type: &arrow.TimestampType{
125+
Unit: tc.Unit,
126+
TimeZone: "UTC",
127+
},
128+
}
129+
err := timestamp.Set(tc.Input)
130+
if err != nil {
131+
t.Fatal(err)
132+
}
85133

86-
bldr := array.NewTimestampBuilder(memory.DefaultAllocator, timestamp.Type)
87-
AppendToBuilder(bldr, &timestamp)
134+
bldr := array.NewTimestampBuilder(memory.DefaultAllocator, timestamp.Type)
135+
AppendToBuilder(bldr, &timestamp)
88136

89-
arr := bldr.NewArray().(*array.Timestamp)
90-
actual := arr.ValueStr(0)
137+
arr := bldr.NewArray().(*array.Timestamp)
138+
actual := arr.ValueStr(0)
91139

92-
require.Equal(t, expected[i], actual)
140+
require.Equal(t, tc.Expected, actual)
141+
})
93142
}
94143
}

0 commit comments

Comments
 (0)