Skip to content

Commit 16ffd28

Browse files
authored
feat (schema): Add fetching default values for FIXED, DECIMAL, TIME, TIMESTAMP, DATE, and UUID (apache#17892)
1 parent 2352496 commit 16ffd28

File tree

2 files changed

+152
-3
lines changed

2 files changed

+152
-3
lines changed

hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -801,9 +801,10 @@ public static boolean isMetadataField(String fieldName) {
801801
* This is equivalent to {@link org.apache.hudi.avro.HoodieAvroUtils#toJavaDefaultValue(org.apache.avro.Schema.Field)}
802802
* but operates on HoodieSchemaField.
803803
*
804-
* <p>For primitive types (STRING, INT, LONG, FLOAT, DOUBLE, BOOLEAN, ENUM, BYTES),
805-
* the default value is returned as-is. For complex types (ARRAY, MAP, RECORD),
806-
* Avro's GenericData utility is used to properly construct the default value.</p>
804+
* <p>For primitive types (STRING, INT, LONG, FLOAT, DOUBLE, BOOLEAN, ENUM, BYTES, FIXED, DECIMAL)
805+
* and logical types (TIME, TIMESTAMP, DATE, UUID), the default value is returned as-is.
806+
* For complex types (ARRAY, MAP, RECORD), Avro's GenericData utility is used
807+
* to properly construct the default value.</p>
807808
*
808809
* @param field the HoodieSchemaField containing the default value
809810
* @return the Java representation of the default value, or null if no default value exists
@@ -830,6 +831,12 @@ public static Object toJavaDefaultValue(HoodieSchemaField field) {
830831
case BOOLEAN:
831832
case ENUM:
832833
case BYTES:
834+
case FIXED:
835+
case DECIMAL:
836+
case TIME:
837+
case TIMESTAMP:
838+
case DATE:
839+
case UUID:
833840
return defaultVal;
834841
case ARRAY:
835842
case MAP:

hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaUtils.java

Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
import org.junit.jupiter.params.provider.MethodSource;
3434

3535
import java.math.BigDecimal;
36+
import java.math.BigInteger;
3637
import java.nio.ByteBuffer;
3738
import java.sql.Timestamp;
3839
import java.time.LocalDate;
@@ -43,7 +44,9 @@
4344
import java.util.HashSet;
4445
import java.util.List;
4546
import java.util.Map;
47+
import java.util.Random;
4648
import java.util.Set;
49+
import java.util.UUID;
4750
import java.util.stream.Collectors;
4851
import java.util.stream.Stream;
4952

@@ -1989,6 +1992,145 @@ public void testToJavaDefaultValueConsistencyWithAvro() {
19891992
assertEquals(avroIntResult, hoodieIntResult);
19901993
}
19911994

1995+
@Test
1996+
public void testToJavaDefaultValueFixed() {
1997+
// Create a fixed schema with size 4
1998+
HoodieSchema fixedSchema = HoodieSchema.createFixed("FixedType", null, null, 4);
1999+
byte[] defaultBytes = new byte[]{1, 2, 3, 4};
2000+
HoodieSchemaField field = HoodieSchemaField.of("fixedField",
2001+
fixedSchema,
2002+
null,
2003+
defaultBytes);
2004+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2005+
assertArrayEquals(defaultBytes, (byte[]) result);
2006+
}
2007+
2008+
@Test
2009+
public void testToJavaDefaultValueDecimal() {
2010+
// Create a decimal schema with precision 10 and scale 2
2011+
HoodieSchema decimalSchema = HoodieSchema.createDecimal(10, 2);
2012+
byte[] decimalBytes = BigInteger.valueOf(12345).toByteArray();
2013+
HoodieSchemaField field = HoodieSchemaField.of("decimalField",
2014+
decimalSchema,
2015+
null,
2016+
decimalBytes);
2017+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2018+
assertArrayEquals(decimalBytes, (byte[]) result);
2019+
}
2020+
2021+
@Test
2022+
public void testToJavaDefaultValueTimeMillis() {
2023+
// Create time-millis schema
2024+
HoodieSchema timeSchema = HoodieSchema.createTimeMillis();
2025+
// Time is stored as milliseconds since midnight
2026+
int defaultTime = 43200000; // 12:00:00 in millis
2027+
HoodieSchemaField field = HoodieSchemaField.of("timeField",
2028+
timeSchema,
2029+
null,
2030+
defaultTime);
2031+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2032+
assertEquals(defaultTime, result);
2033+
}
2034+
2035+
@Test
2036+
public void testToJavaDefaultValueTimeMicros() {
2037+
// Create time-micros schema
2038+
HoodieSchema timeSchema = HoodieSchema.createTimeMicros();
2039+
// Time is stored as microseconds since midnight
2040+
long defaultTime = 43200000000L; // 12:00:00 in micros
2041+
HoodieSchemaField field = HoodieSchemaField.of("timeField",
2042+
timeSchema,
2043+
null,
2044+
defaultTime);
2045+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2046+
assertEquals(defaultTime, result);
2047+
}
2048+
2049+
@Test
2050+
public void testToJavaDefaultValueTimestampMillis() {
2051+
// Create timestamp-millis schema
2052+
HoodieSchema timestampSchema = HoodieSchema.createTimestampMillis();
2053+
// Timestamp as milliseconds since epoch
2054+
long defaultTimestamp = 1609459200000L; // 2021-01-01 00:00:00 UTC
2055+
HoodieSchemaField field = HoodieSchemaField.of("timestampField",
2056+
timestampSchema,
2057+
null,
2058+
defaultTimestamp);
2059+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2060+
assertEquals(defaultTimestamp, result);
2061+
}
2062+
2063+
@Test
2064+
public void testToJavaDefaultValueTimestampMicros() {
2065+
// Create timestamp-micros schema
2066+
HoodieSchema timestampSchema = HoodieSchema.createTimestampMicros();
2067+
// Timestamp as microseconds since epoch
2068+
long defaultTimestamp = 1609459200000000L; // 2021-01-01 00:00:00 UTC in micros
2069+
HoodieSchemaField field = HoodieSchemaField.of("timestampField",
2070+
timestampSchema,
2071+
null,
2072+
defaultTimestamp);
2073+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2074+
assertEquals(defaultTimestamp, result);
2075+
}
2076+
2077+
@Test
2078+
public void testToJavaDefaultValueDate() {
2079+
// Create date schema
2080+
HoodieSchema dateSchema = HoodieSchema.createDate();
2081+
// Date is stored as days since epoch
2082+
int defaultDate = 18628; // 2021-01-01
2083+
HoodieSchemaField field = HoodieSchemaField.of("dateField",
2084+
dateSchema,
2085+
null,
2086+
defaultDate);
2087+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2088+
assertEquals(defaultDate, result);
2089+
}
2090+
2091+
@Test
2092+
public void testToJavaDefaultValueUUID() {
2093+
// Create UUID schema
2094+
HoodieSchema uuidSchema = HoodieSchema.createUUID();
2095+
long seed = 123456L;
2096+
Random random = new Random(seed);
2097+
long mostSigBits = random.nextLong();
2098+
long leastSigBits = random.nextLong();
2099+
String defaultUuid = new UUID(mostSigBits, leastSigBits).toString();
2100+
HoodieSchemaField field = HoodieSchemaField.of("uuidField",
2101+
uuidSchema,
2102+
null,
2103+
defaultUuid);
2104+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2105+
assertEquals(defaultUuid, result);
2106+
}
2107+
2108+
@Test
2109+
public void testToJavaDefaultValueLocalTimestampMillis() {
2110+
// Create local-timestamp-millis schema
2111+
HoodieSchema localTimestampSchema = HoodieSchema.createLocalTimestampMillis();
2112+
long defaultTimestamp = 1609459200000L;
2113+
HoodieSchemaField field = HoodieSchemaField.of("localTimestampField",
2114+
localTimestampSchema,
2115+
null,
2116+
defaultTimestamp);
2117+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2118+
assertEquals(defaultTimestamp, result);
2119+
}
2120+
2121+
@Test
2122+
public void testToJavaDefaultValueLocalTimestampMicros() {
2123+
// Create local-timestamp-micros schema
2124+
HoodieSchema localTimestampSchema = HoodieSchema.createLocalTimestampMicros();
2125+
long defaultTimestamp = 1609459200000000L;
2126+
HoodieSchemaField field = HoodieSchemaField.of("localTimestampField",
2127+
localTimestampSchema,
2128+
null,
2129+
defaultTimestamp);
2130+
Object result = HoodieSchemaUtils.toJavaDefaultValue(field);
2131+
assertEquals(defaultTimestamp, result);
2132+
}
2133+
19922134
@Test
19932135
void testLogicalTypesRetainedAfterPruneWithNestedRecords() {
19942136
final String logicalTypeKey = "logicalType";

0 commit comments

Comments
 (0)