Skip to content

Commit 97ea97c

Browse files
authored
GH-48553: [Ruby] Add support for reading timestamp array (#48554)
### Rationale for this change It's an array for timestamp. ### What changes are included in this PR? * Add `ArrowFormat::TimestampType` * Add `ArrowFormat::TimestampArray` ### Are these changes tested? Yes. ### Are there any user-facing changes? Yes. * GitHub Issue: #48553 Authored-by: Sutou Kouhei <[email protected]> Signed-off-by: Sutou Kouhei <[email protected]>
1 parent 4cbe26d commit 97ea97c

File tree

4 files changed

+195
-4
lines changed

4 files changed

+195
-4
lines changed

ruby/red-arrow-format/lib/arrow-format/array.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,12 @@ def to_a
183183
end
184184
end
185185

186+
class TimestampArray < TemporalArray
187+
def to_a
188+
apply_validity(@values_buffer.values(:s64, 0, @size))
189+
end
190+
end
191+
186192
class VariableSizeBinaryLayoutArray < Array
187193
def initialize(type, size, validity_buffer, offsets_buffer, values_buffer)
188194
super(type, size, validity_buffer)

ruby/red-arrow-format/lib/arrow-format/file-reader.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
require_relative "org/apache/arrow/flatbuf/schema"
4242
require_relative "org/apache/arrow/flatbuf/struct_"
4343
require_relative "org/apache/arrow/flatbuf/time"
44+
require_relative "org/apache/arrow/flatbuf/timestamp"
4445
require_relative "org/apache/arrow/flatbuf/time_unit"
4546
require_relative "org/apache/arrow/flatbuf/union"
4647
require_relative "org/apache/arrow/flatbuf/union_mode"
@@ -210,6 +211,9 @@ def read_field(fb_field)
210211
type = Time64Type.new(:nanosecond)
211212
end
212213
end
214+
when Org::Apache::Arrow::Flatbuf::Timestamp
215+
unit = fb_type.unit.name.downcase.to_sym
216+
type = TimestampType.new(unit, fb_type.timezone)
213217
when Org::Apache::Arrow::Flatbuf::List
214218
type = ListType.new(read_field(fb_field.children[0]))
215219
when Org::Apache::Arrow::Flatbuf::LargeList

ruby/red-arrow-format/lib/arrow-format/type.rb

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -274,12 +274,16 @@ def build_array(size, validity_buffer, values_buffer)
274274
end
275275

276276
class TimeType < TemporalType
277+
attr_reader :unit
278+
def initialize(name, unit)
279+
super(name)
280+
@unit = unit
281+
end
277282
end
278283

279284
class Time32Type < TimeType
280285
def initialize(unit)
281-
super("Time32")
282-
@unit = unit
286+
super("Time32", unit)
283287
end
284288

285289
def build_array(size, validity_buffer, values_buffer)
@@ -289,15 +293,28 @@ def build_array(size, validity_buffer, values_buffer)
289293

290294
class Time64Type < TimeType
291295
def initialize(unit)
292-
super("Time64")
293-
@unit = unit
296+
super("Time64", unit)
294297
end
295298

296299
def build_array(size, validity_buffer, values_buffer)
297300
Time64Array.new(self, size, validity_buffer, values_buffer)
298301
end
299302
end
300303

304+
class TimestampType < TemporalType
305+
attr_reader :unit
306+
attr_reader :timezone
307+
def initialize(unit, timezone)
308+
super("Timestamp")
309+
@unit = unit
310+
@timezone = timezone
311+
end
312+
313+
def build_array(size, validity_buffer, values_buffer)
314+
TimestampArray.new(self, size, validity_buffer, values_buffer)
315+
end
316+
end
317+
301318
class VariableSizeBinaryType < Type
302319
end
303320

ruby/red-arrow-format/test/test-file-reader.rb

Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ def read
4040
end
4141
end
4242

43+
def type
44+
@type ||= @reader.first.schema.fields[0].type
45+
end
46+
4347
sub_test_case("Null") do
4448
def build_array
4549
Arrow::NullArray.new(3)
@@ -245,6 +249,10 @@ def test_read
245249
assert_equal([{"value" => [@time_00_00_10, nil, @time_00_01_10]}],
246250
read)
247251
end
252+
253+
def test_type
254+
assert_equal(:second, type.unit)
255+
end
248256
end
249257

250258
sub_test_case("Time32(:millisecond)") do
@@ -263,6 +271,10 @@ def test_read
263271
assert_equal([{"value" => [@time_00_00_10_000, nil, @time_00_01_10_000]}],
264272
read)
265273
end
274+
275+
def test_type
276+
assert_equal(:millisecond, type.unit)
277+
end
266278
end
267279

268280
sub_test_case("Time64(:microsecond)") do
@@ -293,6 +305,10 @@ def test_read
293305
],
294306
read)
295307
end
308+
309+
def test_type
310+
assert_equal(:microsecond, type.unit)
311+
end
296312
end
297313

298314
sub_test_case("Time64(:nanosecond)") do
@@ -323,6 +339,154 @@ def test_read
323339
],
324340
read)
325341
end
342+
343+
def test_type
344+
assert_equal(:nanosecond, type.unit)
345+
end
346+
end
347+
348+
sub_test_case("Timestamp(:second)") do
349+
def setup(&block)
350+
@timestamp_2019_11_18_00_09_11 = 1574003351
351+
@timestamp_2025_12_16_05_33_58 = 1765863238
352+
super(&block)
353+
end
354+
355+
def build_array
356+
Arrow::TimestampArray.new(:second,
357+
[
358+
@timestamp_2019_11_18_00_09_11,
359+
nil,
360+
@timestamp_2025_12_16_05_33_58,
361+
])
362+
end
363+
364+
def test_read
365+
assert_equal([
366+
{
367+
"value" => [
368+
@timestamp_2019_11_18_00_09_11,
369+
nil,
370+
@timestamp_2025_12_16_05_33_58,
371+
],
372+
},
373+
],
374+
read)
375+
end
376+
end
377+
378+
sub_test_case("Timestamp(:millisecond)") do
379+
def setup(&block)
380+
@timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000
381+
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000
382+
super(&block)
383+
end
384+
385+
def build_array
386+
Arrow::TimestampArray.new(:milli,
387+
[
388+
@timestamp_2019_11_18_00_09_11,
389+
nil,
390+
@timestamp_2025_12_16_05_33_58,
391+
])
392+
end
393+
394+
def test_read
395+
assert_equal([
396+
{
397+
"value" => [
398+
@timestamp_2019_11_18_00_09_11,
399+
nil,
400+
@timestamp_2025_12_16_05_33_58,
401+
],
402+
},
403+
],
404+
read)
405+
end
406+
end
407+
408+
sub_test_case("Timestamp(:microsecond)") do
409+
def setup(&block)
410+
@timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000
411+
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000
412+
super(&block)
413+
end
414+
415+
def build_array
416+
Arrow::TimestampArray.new(:micro,
417+
[
418+
@timestamp_2019_11_18_00_09_11,
419+
nil,
420+
@timestamp_2025_12_16_05_33_58,
421+
])
422+
end
423+
424+
def test_read
425+
assert_equal([
426+
{
427+
"value" => [
428+
@timestamp_2019_11_18_00_09_11,
429+
nil,
430+
@timestamp_2025_12_16_05_33_58,
431+
],
432+
},
433+
],
434+
read)
435+
end
436+
end
437+
438+
sub_test_case("Timestamp(:nanosecond)") do
439+
def setup(&block)
440+
@timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000_000
441+
@timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000
442+
super(&block)
443+
end
444+
445+
def build_array
446+
Arrow::TimestampArray.new(:nano,
447+
[
448+
@timestamp_2019_11_18_00_09_11,
449+
nil,
450+
@timestamp_2025_12_16_05_33_58,
451+
])
452+
end
453+
454+
def test_read
455+
assert_equal([
456+
{
457+
"value" => [
458+
@timestamp_2019_11_18_00_09_11,
459+
nil,
460+
@timestamp_2025_12_16_05_33_58,
461+
],
462+
},
463+
],
464+
read)
465+
end
466+
end
467+
468+
sub_test_case("Timestamp(timezone)") do
469+
def setup(&block)
470+
@timezone = "UTC"
471+
@timestamp_2019_11_18_00_09_11 = 1574003351
472+
@timestamp_2025_12_16_05_33_58 = 1765863238
473+
super(&block)
474+
end
475+
476+
def build_array
477+
data_type = Arrow::TimestampDataType.new(:second, @timezone)
478+
Arrow::TimestampArray.new(data_type,
479+
[
480+
@timestamp_2019_11_18_00_09_11,
481+
nil,
482+
@timestamp_2025_12_16_05_33_58,
483+
])
484+
end
485+
486+
def test_type
487+
assert_equal([:second, @timezone],
488+
[type.unit, type.timezone])
489+
end
326490
end
327491

328492
sub_test_case("Binary") do

0 commit comments

Comments
 (0)