Skip to content

Commit f97fba5

Browse files
Use time-millis for time (sec) vectors instead of time-micros (consistent with timestamp and should not roll over for time of day)
1 parent 90c72f4 commit f97fba5

File tree

4 files changed

+21
-16
lines changed

4 files changed

+21
-16
lines changed

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/ArrowToAvroUtils.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -132,8 +132,8 @@ public class ArrowToAvroUtils {
132132
* <li>ArrowType.FixedSizeBinary --> FIXED
133133
* <li>ArrowType.Decimal --> decimal (FIXED)
134134
* <li>ArrowType.Date --> date (INT)
135-
* <li>ArrowType.Time (MILLI) --> time-millis (INT)
136-
* <li>ArrowType.Time (SEC | MICRO | NANO) --> time-micros (LONG)
135+
* <li>ArrowType.Time (SEC | MILLI) --> time-millis (INT)
136+
* <li>ArrowType.Time (MICRO | NANO) --> time-micros (LONG)
137137
* <li>ArrowType.Timestamp (NANOSECONDS, TZ != NULL) --> time-nanos (LONG)
138138
* <li>ArrowType.Timestamp (MICROSECONDS, TZ != NULL) --> time-micros (LONG)
139139
* <li>ArrowType.Timestamp (MILLISECONDS | SECONDS, TZ != NULL) --> time-millis (LONG)
@@ -326,7 +326,7 @@ private static <T> T buildBaseTypeSchema(
326326

327327
case Time:
328328
ArrowType.Time timeType = (ArrowType.Time) field.getType();
329-
if (timeType.getUnit() == TimeUnit.MILLISECOND) {
329+
if ((timeType.getUnit() == TimeUnit.SECOND || timeType.getUnit() == TimeUnit.MILLISECOND)) {
330330
return builder.intBuilder().prop("logicalType", "time-millis").endInt();
331331
} else {
332332
// All other time types (sec, micro, nano) are encoded as time-micros (LONG)
@@ -410,7 +410,7 @@ private static <T> SchemaBuilder.FieldAssembler<T> buildBaseFieldSchema(
410410

411411
case Time:
412412
ArrowType.Time timeType = (ArrowType.Time) field.getType();
413-
if (timeType.getUnit() == TimeUnit.MILLISECOND) {
413+
if ((timeType.getUnit() == TimeUnit.SECOND || timeType.getUnit() == TimeUnit.MILLISECOND)) {
414414
return builder.intBuilder().prop("logicalType", "time-millis").endInt().noDefault();
415415
} else {
416416
// All other time types (sec, micro, nano) are encoded as time-micros (LONG)
@@ -504,7 +504,7 @@ private static <T> SchemaBuilder.FieldAssembler<T> buildBaseFieldSchema(
504504

505505
case Time:
506506
ArrowType.Time timeType = (ArrowType.Time) field.getType();
507-
if (timeType.getUnit() == TimeUnit.MILLISECOND) {
507+
if ((timeType.getUnit() == TimeUnit.SECOND || timeType.getUnit() == TimeUnit.MILLISECOND)) {
508508
return (SchemaBuilder.UnionAccumulator)
509509
builder.intBuilder().prop("logicalType", "time-millis").endInt();
510510
} else {

adapter/avro/src/main/java/org/apache/arrow/adapter/avro/producers/logical/AvroTimeSecProducer.java

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,12 @@
2727
*/
2828
public class AvroTimeSecProducer extends BaseAvroProducer<TimeSecVector> {
2929

30-
// Convert seconds to microseconds for Avro time-micros (LONG) type
31-
// Range is 1000 times more than for milliseconds, so won't fit into time-millis (INT)
30+
// Convert seconds to milliseconds for Avro time-millis (INT) type
31+
// INT is enough to cover the number of milliseconds in a day
32+
// So overflows should not happen if values are valid times of day
3233

33-
private static final long MICROS_PER_SECOND = 1000000;
34+
private static final int MILLIS_PER_SECOND = 1000;
35+
private static final long OVERFLOW_LIMIT = Integer.MAX_VALUE / 1000;
3436

3537
/** Instantiate an AvroTimeSecProducer. */
3638
public AvroTimeSecProducer(TimeSecVector vector) {
@@ -40,8 +42,11 @@ public AvroTimeSecProducer(TimeSecVector vector) {
4042
@Override
4143
public void produce(Encoder encoder) throws IOException {
4244
int seconds = vector.getDataBuffer().getInt(currentIndex * (long) TimeSecVector.TYPE_WIDTH);
43-
long micros = seconds * MICROS_PER_SECOND;
44-
encoder.writeLong(micros);
45+
if (Math.abs(seconds) > OVERFLOW_LIMIT) {
46+
throw new ArithmeticException("Time value is too large for Avro encoding");
47+
}
48+
int millis = seconds * MILLIS_PER_SECOND;
49+
encoder.writeInt(millis);
4550
currentIndex++;
4651
}
4752
}

adapter/avro/src/test/java/org/apache/arrow/adapter/avro/ArrowToAvroDataTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,7 +1240,7 @@ public void testWriteTimes() throws Exception {
12401240
// Read and check values
12411241
for (int row = 0; row < rowCount; row++) {
12421242
record = datumReader.read(record, decoder);
1243-
assertEquals(timeSecVector.get(row), (int) ((long) record.get("timeSec") / 1000000));
1243+
assertEquals(timeSecVector.get(row), (int) (record.get("timeSec")) / 1000);
12441244
assertEquals(timeMillisVector.get(row), record.get("timeMillis"));
12451245
assertEquals(timeMicrosVector.get(row), record.get("timeMicros"));
12461246
assertEquals(timeNanosVector.get(row), (long) record.get("timeNanos") * 1000);
@@ -1321,7 +1321,7 @@ public void testWriteNullableTimes() throws Exception {
13211321

13221322
for (int row = 1; row < rowCount; row++) {
13231323
record = datumReader.read(record, decoder);
1324-
assertEquals(timeSecVector.get(row), (int) ((long) record.get("timeSec") / 1000000));
1324+
assertEquals(timeSecVector.get(row), ((int) record.get("timeSec") / 1000));
13251325
assertEquals(timeMillisVector.get(row), record.get("timeMillis"));
13261326
assertEquals(timeMicrosVector.get(row), record.get("timeMicros"));
13271327
assertEquals(timeNanosVector.get(row), (long) record.get("timeNanos") * 1000);

adapter/avro/src/test/java/org/apache/arrow/adapter/avro/ArrowToAvroSchemaTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -515,15 +515,15 @@ public void testConvertTimeTypes() {
515515
assertEquals(Schema.Type.UNION, schema.getField("nullableTimeSec").schema().getType());
516516
assertEquals(2, schema.getField("nullableTimeSec").schema().getTypes().size());
517517
Schema nullableTimeSecSchema = schema.getField("nullableTimeSec").schema().getTypes().get(0);
518-
assertEquals(Schema.Type.LONG, nullableTimeSecSchema.getType());
519-
assertEquals("time-micros", nullableTimeSecSchema.getProp("logicalType"));
518+
assertEquals(Schema.Type.INT, nullableTimeSecSchema.getType());
519+
assertEquals("time-millis", nullableTimeSecSchema.getProp("logicalType"));
520520
assertEquals(
521521
Schema.Type.NULL, schema.getField("nullableTimeSec").schema().getTypes().get(1).getType());
522522

523523
// Assertions for nonNullableTimeSec
524524
Schema nonNullableTimeSecSchema = schema.getField("nonNullableTimeSec").schema();
525-
assertEquals(Schema.Type.LONG, nonNullableTimeSecSchema.getType());
526-
assertEquals("time-micros", nonNullableTimeSecSchema.getProp("logicalType"));
525+
assertEquals(Schema.Type.INT, nonNullableTimeSecSchema.getType());
526+
assertEquals("time-millis", nonNullableTimeSecSchema.getProp("logicalType"));
527527

528528
// Assertions for nullableTimeMillis
529529
assertEquals(Schema.Type.UNION, schema.getField("nullableTimeMillis").schema().getType());

0 commit comments

Comments
 (0)