Skip to content

Commit a8da1b5

Browse files
committed
PARQUET-2249: Add column order for IEEE 754 total order
1 parent 00b6bab commit a8da1b5

File tree

9 files changed

+292
-36
lines changed

9 files changed

+292
-36
lines changed

parquet-column/src/main/java/org/apache/parquet/schema/ColumnOrder.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,11 +36,16 @@ public enum ColumnOrderName {
3636
/**
3737
* Type defined order meaning that the comparison order of the elements are based on its type.
3838
*/
39-
TYPE_DEFINED_ORDER
39+
TYPE_DEFINED_ORDER,
40+
/**
41+
* The column order is defined by the IEEE 754 standard.
42+
*/
43+
IEEE_754_TOTAL_ORDER,
4044
}
4145

4246
private static final ColumnOrder UNDEFINED_COLUMN_ORDER = new ColumnOrder(ColumnOrderName.UNDEFINED);
4347
private static final ColumnOrder TYPE_DEFINED_COLUMN_ORDER = new ColumnOrder(ColumnOrderName.TYPE_DEFINED_ORDER);
48+
private static final ColumnOrder IEEE_754_TOTAL_ORDER = new ColumnOrder(ColumnOrderName.IEEE_754_TOTAL_ORDER);
4449

4550
/**
4651
* @return a {@link ColumnOrder} instance representing an undefined order
@@ -58,6 +63,14 @@ public static ColumnOrder typeDefined() {
5863
return TYPE_DEFINED_COLUMN_ORDER;
5964
}
6065

66+
/**
67+
* @return a {@link ColumnOrder} instance representing an IEEE 754 total order
68+
* @see ColumnOrderName#IEEE_754_TOTAL_ORDER
69+
*/
70+
public static ColumnOrder ieee754TotalOrder() {
71+
return IEEE_754_TOTAL_ORDER;
72+
}
73+
6174
private final ColumnOrderName columnOrderName;
6275

6376
private ColumnOrder(ColumnOrderName columnOrderName) {

parquet-column/src/main/java/org/apache/parquet/schema/Float16.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
public class Float16 {
4949
// Positive infinity of type half-precision float.
5050
private static final short POSITIVE_INFINITY = (short) 0x7c00;
51+
// Negative infinity of type half-precision float.
52+
private static final short NEGATIVE_INFINITY = (short) 0xfc00;
5153
// A Not-a-Number representation of a half-precision float.
5254
private static final short NaN = (short) 0x7e00;
5355
// The bitmask to and a number with to obtain the sign bit.

parquet-column/src/main/java/org/apache/parquet/schema/LogicalTypeAnnotation.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020

2121
import static java.util.Arrays.asList;
2222
import static java.util.Optional.empty;
23+
import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.IEEE_754_TOTAL_ORDER;
2324
import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.TYPE_DEFINED_ORDER;
2425
import static org.apache.parquet.schema.ColumnOrder.ColumnOrderName.UNDEFINED;
2526
import static org.apache.parquet.schema.PrimitiveStringifier.TIMESTAMP_MICROS_STRINGIFIER;
@@ -997,6 +998,13 @@ LogicalTypeToken getType() {
997998
PrimitiveStringifier valueStringifier(PrimitiveType primitiveType) {
998999
return PrimitiveStringifier.FLOAT16_STRINGIFIER;
9991000
}
1001+
1002+
@Override
1003+
boolean isValidColumnOrder(ColumnOrder columnOrder) {
1004+
return columnOrder.getColumnOrderName() == UNDEFINED
1005+
|| columnOrder.getColumnOrderName() == TYPE_DEFINED_ORDER
1006+
|| columnOrder.getColumnOrderName() == IEEE_754_TOTAL_ORDER;
1007+
}
10001008
}
10011009

10021010
public static class UnknownLogicalTypeAnnotation extends LogicalTypeAnnotation {

parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveComparator.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -293,4 +293,65 @@ public String toString() {
293293
return "BINARY_AS_FLOAT16_COMPARATOR";
294294
}
295295
};
296+
297+
static final PrimitiveComparator<Float> FLOAT_IEEE_754_TOTAL_ORDER_COMPARATOR = new PrimitiveComparator<Float>() {
298+
@Override
299+
int compareNotNulls(Float o1, Float o2) {
300+
return compare(o1.floatValue(), o2.floatValue());
301+
}
302+
303+
@Override
304+
public int compare(float f1, float f2) {
305+
int f1Int = Float.floatToRawIntBits(f1);
306+
int f2Int = Float.floatToRawIntBits(f2);
307+
f1Int ^= ((f1Int >> 31) >>> 1);
308+
f2Int ^= ((f2Int >> 31) >>> 1);
309+
return Integer.compare(f1Int, f2Int);
310+
}
311+
312+
@Override
313+
public String toString() {
314+
return "FLOAT_IEEE_754_TOTAL_ORDER_COMPARATOR";
315+
}
316+
};
317+
318+
static final PrimitiveComparator<Double> DOUBLE_IEEE_754_TOTAL_ORDER_COMPARATOR =
319+
new PrimitiveComparator<Double>() {
320+
@Override
321+
int compareNotNulls(Double o1, Double o2) {
322+
return compare(o1.doubleValue(), o2.doubleValue());
323+
}
324+
325+
@Override
326+
public int compare(double d1, double d2) {
327+
long d1Long = Double.doubleToRawLongBits(d1);
328+
long d2Long = Double.doubleToRawLongBits(d2);
329+
d1Long ^= ((d1Long >> 63) >>> 1);
330+
d2Long ^= ((d2Long >> 63) >>> 1);
331+
return Long.compare(d1Long, d2Long);
332+
}
333+
334+
@Override
335+
public String toString() {
336+
return "DOUBLE_IEEE_754_TOTAL_ORDER_COMPARATOR";
337+
}
338+
};
339+
340+
static final PrimitiveComparator<Binary> BINARY_AS_FLOAT16_IEEE_754_TOTAL_ORDER_COMPARATOR =
341+
new BinaryComparator() {
342+
343+
@Override
344+
int compareBinary(Binary b1, Binary b2) {
345+
int b1Short = b1.get2BytesLittleEndian();
346+
int b2Short = b2.get2BytesLittleEndian();
347+
b1Short ^= ((b1Short >> 15) >>> 1);
348+
b2Short ^= ((b2Short >> 15) >>> 1);
349+
return Integer.compare(b1Short, b2Short);
350+
}
351+
352+
@Override
353+
public String toString() {
354+
return "BINARY_AS_FLOAT16_IEEE_754_TOTAL_ORDER_COMPARATOR";
355+
}
356+
};
296357
}

parquet-column/src/main/java/org/apache/parquet/schema/PrimitiveType.java

Lines changed: 47 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
8888
}
8989

9090
@Override
91-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
91+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
9292
if (logicalType == null) {
9393
return PrimitiveComparator.SIGNED_INT64_COMPARATOR;
9494
}
@@ -146,7 +146,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
146146
}
147147

148148
@Override
149-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
149+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
150150
if (logicalType == null) {
151151
return PrimitiveComparator.SIGNED_INT32_COMPARATOR;
152152
}
@@ -210,7 +210,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
210210
}
211211

212212
@Override
213-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
213+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
214214
return PrimitiveComparator.BOOLEAN_COMPARATOR;
215215
}
216216
},
@@ -236,7 +236,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
236236
}
237237

238238
@Override
239-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
239+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
240240
if (logicalType == null) {
241241
return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
242242
}
@@ -298,8 +298,10 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
298298
}
299299

300300
@Override
301-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
302-
return PrimitiveComparator.FLOAT_COMPARATOR;
301+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
302+
return columnOrder.getColumnOrderName() == ColumnOrderName.IEEE_754_TOTAL_ORDER
303+
? PrimitiveComparator.FLOAT_IEEE_754_TOTAL_ORDER_COMPARATOR
304+
: PrimitiveComparator.FLOAT_COMPARATOR;
303305
}
304306
},
305307
DOUBLE("getDouble", Double.TYPE) {
@@ -324,8 +326,10 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
324326
}
325327

326328
@Override
327-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
328-
return PrimitiveComparator.DOUBLE_COMPARATOR;
329+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
330+
return columnOrder.getColumnOrderName() == ColumnOrderName.IEEE_754_TOTAL_ORDER
331+
? PrimitiveComparator.DOUBLE_IEEE_754_TOTAL_ORDER_COMPARATOR
332+
: PrimitiveComparator.DOUBLE_COMPARATOR;
329333
}
330334
},
331335
INT96("getBinary", Binary.class) {
@@ -350,7 +354,7 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
350354
}
351355

352356
@Override
353-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
357+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
354358
return PrimitiveComparator.BINARY_AS_SIGNED_INTEGER_COMPARATOR;
355359
}
356360
},
@@ -376,11 +380,16 @@ public <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> conve
376380
}
377381

378382
@Override
379-
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType) {
383+
PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder) {
380384
if (logicalType == null) {
381385
return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
382386
}
383387

388+
if (logicalType.getType() == LogicalTypeAnnotation.LogicalTypeToken.FLOAT16
389+
&& columnOrder.getColumnOrderName() == ColumnOrderName.IEEE_754_TOTAL_ORDER) {
390+
return PrimitiveComparator.UNSIGNED_LEXICOGRAPHICAL_BINARY_COMPARATOR;
391+
}
392+
384393
return logicalType
385394
.accept(new LogicalTypeAnnotation.LogicalTypeAnnotationVisitor<PrimitiveComparator>() {
386395
@Override
@@ -441,7 +450,7 @@ public abstract void addValueToPrimitiveConverter(
441450

442451
public abstract <T, E extends Exception> T convert(PrimitiveTypeNameConverter<T, E> converter) throws E;
443452

444-
abstract PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType);
453+
abstract PrimitiveComparator<?> comparator(LogicalTypeAnnotation logicalType, ColumnOrder columnOrder);
445454
}
446455

447456
private final PrimitiveTypeName primitive;
@@ -533,6 +542,12 @@ public PrimitiveType(
533542
columnOrder = primitive == PrimitiveTypeName.INT96 || originalType == OriginalType.INTERVAL
534543
? ColumnOrder.undefined()
535544
: ColumnOrder.typeDefined();
545+
} else if (columnOrder.getColumnOrderName() == ColumnOrderName.IEEE_754_TOTAL_ORDER) {
546+
Preconditions.checkArgument(
547+
primitive == PrimitiveTypeName.FLOAT || primitive == PrimitiveTypeName.DOUBLE,
548+
"The column order %s is not supported by type %s",
549+
columnOrder,
550+
primitive);
536551
}
537552
this.columnOrder = requireValidColumnOrder(columnOrder);
538553
}
@@ -579,6 +594,17 @@ public PrimitiveType(
579594
|| logicalTypeAnnotation instanceof LogicalTypeAnnotation.IntervalLogicalTypeAnnotation
580595
? ColumnOrder.undefined()
581596
: ColumnOrder.typeDefined();
597+
} else if (columnOrder.getColumnOrderName() == ColumnOrderName.IEEE_754_TOTAL_ORDER) {
598+
Preconditions.checkArgument(
599+
primitive == PrimitiveTypeName.FLOAT
600+
|| primitive == PrimitiveTypeName.DOUBLE
601+
|| (logicalTypeAnnotation != null
602+
&& logicalTypeAnnotation.getType()
603+
== LogicalTypeAnnotation.LogicalTypeToken.FLOAT16),
604+
"The column order %s is not supported by type %s logical type %s",
605+
columnOrder,
606+
primitive,
607+
logicalTypeAnnotation);
582608
}
583609
this.columnOrder = requireValidColumnOrder(columnOrder);
584610
}
@@ -619,6 +645,15 @@ public PrimitiveType withLogicalTypeAnnotation(LogicalTypeAnnotation logicalType
619645
return new PrimitiveType(getRepetition(), primitive, length, getName(), logicalType, getId());
620646
}
621647

648+
/**
649+
* @param columnOrder the column order
650+
* @return the same type with the column order set
651+
*/
652+
public Type withColumnOrder(ColumnOrder columnOrder) {
653+
return new PrimitiveType(
654+
getRepetition(), primitive, length, getName(), getLogicalTypeAnnotation(), getId(), columnOrder);
655+
}
656+
622657
/**
623658
* @return the primitive type
624659
*/
@@ -833,7 +868,7 @@ protected Type union(Type toMerge, boolean strict) {
833868
*/
834869
@SuppressWarnings("unchecked")
835870
public <T> PrimitiveComparator<T> comparator() {
836-
return (PrimitiveComparator<T>) getPrimitiveTypeName().comparator(getLogicalTypeAnnotation());
871+
return (PrimitiveComparator<T>) getPrimitiveTypeName().comparator(getLogicalTypeAnnotation(), columnOrder());
837872
}
838873

839874
/**

0 commit comments

Comments
 (0)