Skip to content

Commit 5d3b0ef

Browse files
committed
Address comments
1 parent ed2bd55 commit 5d3b0ef

File tree

4 files changed

+118
-31
lines changed

4 files changed

+118
-31
lines changed

hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java

Lines changed: 4 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1033,8 +1033,7 @@ public HoodieSchema parse(String jsonSchema) {
10331033
Schema avroSchema = avroParser.parse(jsonSchema);
10341034
return fromAvroSchema(avroSchema);
10351035
} catch (IllegalArgumentException e) {
1036-
// Wrap validation exceptions to preserve the detailed error message
1037-
throw new HoodieAvroSchemaException(e.getMessage(), e);
1036+
throw new HoodieAvroSchemaException("Invalid schema string format", e);
10381037
} catch (Exception e) {
10391038
throw new HoodieAvroSchemaException("Failed to parse schema: " + jsonSchema, e);
10401039
}
@@ -1056,10 +1055,9 @@ public HoodieSchema parse(InputStream inputStream) {
10561055
} catch (IOException e) {
10571056
throw new HoodieIOException("Failed to parse schema from InputStream", e);
10581057
} catch (IllegalArgumentException e) {
1059-
// Wrap validation exceptions to preserve the detailed error message
1060-
throw new HoodieAvroSchemaException(e.getMessage(), e);
1058+
throw new HoodieAvroSchemaException("Invalid schema format in InputStream", e);
10611059
} catch (Exception e) {
1062-
throw new HoodieAvroSchemaException("Failed to parse schema", e);
1060+
throw new HoodieAvroSchemaException("Failed to parse schema from InputStream", e);
10631061
}
10641062
}
10651063
}
@@ -1486,7 +1484,7 @@ public enum TimePrecision {
14861484
*
14871485
* <p>This is a singleton type - use {@link #variant()} to get the instance.</p>
14881486
*/
1489-
public static class VariantLogicalType extends LogicalType {
1487+
static class VariantLogicalType extends LogicalType {
14901488

14911489
private static final String VARIANT_LOGICAL_TYPE_NAME = "variant";
14921490
// Eager initialization of singleton
@@ -1661,21 +1659,6 @@ private void validateVariantSchema(Schema avroSchema) {
16611659
}
16621660
}
16631661

1664-
/**
1665-
* Checks if the given Avro schema is a Variant schema.
1666-
* This checks for the Variant logical type.
1667-
*
1668-
* @param avroSchema the schema to check
1669-
* @return true if the schema has a Variant logical type
1670-
*/
1671-
public static boolean isVariantSchema(Schema avroSchema) {
1672-
if (avroSchema == null || avroSchema.getType() != Schema.Type.RECORD) {
1673-
return false;
1674-
}
1675-
LogicalType logicalType = avroSchema.getLogicalType();
1676-
return logicalType instanceof VariantLogicalType;
1677-
}
1678-
16791662
/**
16801663
* Checks if this is a shredded variant (has typed_value field or nullable value field).
16811664
*

hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchema.java

Lines changed: 96 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
package org.apache.hudi.common.schema;
2020

21+
import org.apache.hudi.common.schema.HoodieSchema.VariantLogicalType;
2122
import org.apache.hudi.common.util.Option;
2223
import org.apache.hudi.exception.HoodieAvroSchemaException;
2324

@@ -69,6 +70,20 @@ public class TestHoodieSchema {
6970
+ " ]"
7071
+ "}";
7172

73+
/**
74+
* Checks if the given Avro schema is a Variant schema. This checks for the Variant logical type.
75+
*
76+
* @param avroSchema the schema to check
77+
* @return true if the schema has a Variant logical type
78+
*/
79+
public static boolean isVariantSchema(Schema avroSchema) {
80+
if (avroSchema == null || avroSchema.getType() != Schema.Type.RECORD) {
81+
return false;
82+
}
83+
LogicalType logicalType = avroSchema.getLogicalType();
84+
return logicalType instanceof VariantLogicalType;
85+
}
86+
7287
@Test
7388
public void testSchemaCreationFromAvroSchema() {
7489
Schema avroSchema = Schema.create(Schema.Type.STRING);
@@ -151,10 +166,10 @@ public void testPrimitiveSchemaCreation(HoodieSchemaType type) {
151166
assertEquals(type.toAvroType(), schema.getAvroSchema().getType());
152167
} else {
153168
// FIXED throws AvroRuntimeException, others throw IllegalArgumentException
154-
Class<? extends Exception> expectedExceptionType = (type == HoodieSchemaType.FIXED)
155-
? org.apache.avro.AvroRuntimeException.class
169+
Class<? extends Exception> expectedExceptionType = (type == HoodieSchemaType.FIXED)
170+
? org.apache.avro.AvroRuntimeException.class
156171
: IllegalArgumentException.class;
157-
172+
158173
assertThrows(expectedExceptionType, () -> HoodieSchema.create(type), "Should throw exception for non-primitive type or type requiring additional arguments: " + type);
159174
}
160175
}
@@ -1145,7 +1160,7 @@ public void testVariantLogicalTypeDetection() {
11451160
assertEquals("variant", avroSchema.getLogicalType().getName());
11461161

11471162
// Verify isVariantSchema detection
1148-
assertTrue(HoodieSchema.Variant.isVariantSchema(avroSchema));
1163+
assertTrue(isVariantSchema(avroSchema));
11491164
}
11501165

11511166
@Test
@@ -1164,7 +1179,7 @@ public void testVariantRoundTripSerializationToJson() {
11641179
assertEquals(HoodieSchemaType.VARIANT, parsedVariant.getType());
11651180

11661181
// Verify logical type is preserved
1167-
assertTrue(HoodieSchema.Variant.isVariantSchema(parsedVariant.getAvroSchema()));
1182+
assertTrue(isVariantSchema(parsedVariant.getAvroSchema()));
11681183
}
11691184

11701185
@Test
@@ -1185,6 +1200,79 @@ public void testVariantShreddedRoundTripSerializationToJson() {
11851200
assertEquals(HoodieSchemaType.STRING, parsedVariant.getTypedValueField().get().getType());
11861201
}
11871202

1203+
@Test
1204+
public void testVariantFieldInRecordRoundTripSerialization() {
1205+
// Create a record schema with a variant field to simulate a table schema
1206+
HoodieSchema.Variant variantSchema = HoodieSchema.createVariant();
1207+
List<HoodieSchemaField> fields = Arrays.asList(
1208+
HoodieSchemaField.of("id", HoodieSchema.create(HoodieSchemaType.LONG), "Primary key", null),
1209+
HoodieSchemaField.of("name", HoodieSchema.create(HoodieSchemaType.STRING), "User name", null),
1210+
HoodieSchemaField.of("metadata", variantSchema, "Dynamic metadata stored as variant", null)
1211+
);
1212+
1213+
HoodieSchema originalRecord = new HoodieSchema.Builder(HoodieSchemaType.RECORD)
1214+
.setName("UserTable")
1215+
.setNamespace("com.example")
1216+
.setDoc("User table with variant field")
1217+
.setFields(fields)
1218+
.build();
1219+
1220+
String jsonSchema = originalRecord.toString();
1221+
1222+
// Parse back from JSON
1223+
HoodieSchema parsedSchema = HoodieSchema.parse(jsonSchema);
1224+
1225+
// Verify record structure is preserved
1226+
assertEquals(HoodieSchemaType.RECORD, parsedSchema.getType());
1227+
assertEquals("UserTable", parsedSchema.getName());
1228+
assertEquals(3, parsedSchema.getFields().size());
1229+
1230+
// Verify variant field is correctly detected
1231+
Option<HoodieSchemaField> parsedVariantFieldOpt = parsedSchema.getField("metadata");
1232+
assertTrue(parsedVariantFieldOpt.isPresent());
1233+
assertInstanceOf(HoodieSchema.Variant.class, parsedVariantFieldOpt.get().schema());
1234+
HoodieSchema.Variant parsedVariant = (HoodieSchema.Variant) parsedVariantFieldOpt.get().schema();
1235+
assertFalse(parsedVariant.isShredded());
1236+
assertEquals(HoodieSchemaType.VARIANT, parsedVariant.getType());
1237+
assertTrue(isVariantSchema(parsedVariant.getAvroSchema()));
1238+
}
1239+
1240+
@Test
1241+
public void testShreddedVariantFieldInRecordRoundTripSerialization() {
1242+
// Create a record schema with a shredded variant field
1243+
HoodieSchema typedValueSchema = HoodieSchema.create(HoodieSchemaType.STRING);
1244+
HoodieSchema.Variant shreddedVariant = HoodieSchema.createVariantShredded(typedValueSchema);
1245+
List<HoodieSchemaField> fields = Arrays.asList(
1246+
HoodieSchemaField.of("id", HoodieSchema.create(HoodieSchemaType.LONG), "Primary key", null),
1247+
HoodieSchemaField.of("json_data", shreddedVariant, "JSON data with string optimization", null)
1248+
);
1249+
1250+
HoodieSchema originalRecord = new HoodieSchema.Builder(HoodieSchemaType.RECORD)
1251+
.setName("EventTable")
1252+
.setNamespace("com.example")
1253+
.setFields(fields)
1254+
.build();
1255+
1256+
String jsonSchema = originalRecord.toString();
1257+
1258+
// Parse back from JSON
1259+
HoodieSchema parsedSchema = HoodieSchema.parse(jsonSchema);
1260+
1261+
// Verify record structure
1262+
assertEquals(HoodieSchemaType.RECORD, parsedSchema.getType());
1263+
assertEquals("EventTable", parsedSchema.getName());
1264+
assertEquals(2, parsedSchema.getFields().size());
1265+
1266+
// Verify shredded variant field is correctly detected
1267+
Option<HoodieSchemaField> parsedVariantFieldOpt = parsedSchema.getField("json_data");
1268+
assertTrue(parsedVariantFieldOpt.isPresent());
1269+
assertInstanceOf(HoodieSchema.Variant.class, parsedVariantFieldOpt.get().schema());
1270+
HoodieSchema.Variant parsedVariant = (HoodieSchema.Variant) parsedVariantFieldOpt.get().schema();
1271+
assertTrue(parsedVariant.isShredded());
1272+
assertTrue(parsedVariant.getTypedValueField().isPresent());
1273+
assertEquals(HoodieSchemaType.STRING, parsedVariant.getTypedValueField().get().getType());
1274+
}
1275+
11881276
@Test
11891277
public void testVariantBackwardsCompatibility() {
11901278
// Create a variant schema
@@ -1328,7 +1416,7 @@ public void testVariantEquality() {
13281416
// Note: equality depends on underlying Avro schema, which will be different due to nullable value field in shredded variant
13291417
assertNotEquals(shreddedVariant, variant1);
13301418
// Not really required since variant1 == variant2 is established
1331-
assertNotEquals(shreddedVariant, variant2);
1419+
assertNotEquals(shreddedVariant, variant2);
13321420
}
13331421

13341422
@Test
@@ -1341,7 +1429,7 @@ public void testVariantIsNotDetectedForRegularRecord() {
13411429
HoodieSchema regularRecord = HoodieSchema.createRecord("NotAVariant", null, null, fields);
13421430

13431431
// Should not be detected as Variant
1344-
assertFalse(HoodieSchema.Variant.isVariantSchema(regularRecord.getAvroSchema()));
1432+
assertFalse(isVariantSchema(regularRecord.getAvroSchema()));
13451433
assertEquals(HoodieSchemaType.RECORD, regularRecord.getType());
13461434
assertFalse(regularRecord instanceof HoodieSchema.Variant);
13471435
}
@@ -1527,7 +1615,7 @@ public void testVariantCustomNameAndNamespace() {
15271615
assertEquals(customDoc, customVariant.getAvroSchema().getDoc());
15281616

15291617
// Verify it's still recognized as a Variant
1530-
assertTrue(HoodieSchema.Variant.isVariantSchema(customVariant.getAvroSchema()));
1618+
assertTrue(isVariantSchema(customVariant.getAvroSchema()));
15311619
assertEquals(HoodieSchemaType.VARIANT, customVariant.getType());
15321620
}
15331621

@@ -1556,7 +1644,6 @@ public void testVariantLogicalTypeSingleton() {
15561644

15571645
// Both should reference the same singleton instance
15581646
assertSame(logicalType1, logicalType2);
1559-
assertTrue(logicalType1 == logicalType2);
15601647
assertEquals(HoodieSchema.VariantLogicalType.variant(), logicalType1);
15611648

15621649
// Verify reference equality check works in fromAvroSchema

hudi-common/src/test/java/org/apache/hudi/common/schema/TestHoodieSchemaType.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ public void testFromAvro() {
170170
assertEquals(HoodieSchemaType.DECIMAL, HoodieSchemaType.fromAvro(LogicalTypes.decimal(10, 5).addToSchema(Schema.create(Schema.Type.BYTES))));
171171
assertEquals(HoodieSchemaType.DATE, HoodieSchemaType.fromAvro(LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT))));
172172
assertEquals(HoodieSchemaType.UUID, HoodieSchemaType.fromAvro(LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING))));
173+
assertEquals(HoodieSchemaType.VARIANT, HoodieSchemaType.fromAvro(createVariantSchemaForTest()));
173174
}
174175

175176
private static Map<HoodieSchemaType, Schema> buildSampleSchemasForType() {
@@ -200,6 +201,22 @@ private static Map<HoodieSchemaType, Schema> buildSampleSchemasForType() {
200201
LogicalTypes.date().addToSchema(Schema.create(Schema.Type.INT)));
201202
map.put(HoodieSchemaType.UUID,
202203
LogicalTypes.uuid().addToSchema(Schema.create(Schema.Type.STRING)));
204+
map.put(HoodieSchemaType.VARIANT, createVariantSchemaForTest());
203205
return map;
204206
}
207+
208+
/**
209+
* Creates a variant schema manually using Avro APIs.
210+
*
211+
* @return a variant schema with value and metadata fields
212+
*/
213+
private static Schema createVariantSchemaForTest() {
214+
Schema variantRecord = Schema.createRecord("variant", null, null, false);
215+
variantRecord.setFields(Arrays.asList(
216+
new Schema.Field("value", Schema.create(Schema.Type.BYTES), "Variant value component", null),
217+
new Schema.Field("metadata", Schema.create(Schema.Type.BYTES), "Variant metadata component", null)
218+
));
219+
HoodieSchema.VariantLogicalType.variant().addToSchema(variantRecord);
220+
return variantRecord;
221+
}
205222
}

hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/spark/sql/hudi/feature/index/TestExpressionIndex.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -159,7 +159,7 @@ class TestExpressionIndex extends HoodieSparkSqlTestBase with SparkAdapterSuppor
159159
}
160160

161161
test("Test Create Expression Index Syntax") {
162-
withTempDir { tmp =>2
162+
withTempDir { tmp =>
163163
Seq("cow", "mor").foreach { tableType =>
164164
val databaseName = "default"
165165
val tableName = generateTableName

0 commit comments

Comments
 (0)