Skip to content

Commit 7d16660

Browse files
committed
Addressed comments
1 parent 5472853 commit 7d16660

File tree

2 files changed

+9
-5
lines changed

2 files changed

+9
-5
lines changed

hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,7 @@ public static void addToVector(TypeDescription type, ColumnVector colVector, Hoo
353353
*
354354
* @param unionVector The vector to store value.
355355
* @param unionChildTypes All possible types for the value Object.
356-
* @param schema Avro union schema for the value Object.
356+
* @param schema Union schema for the value Object.
357357
* @param value Object to be added to the unionVector
358358
* @param vectorPos The position in the vector where value will be stored at.
359359
* @return succeeded or failed
@@ -801,7 +801,7 @@ public static HoodieSchema createSchema(TypeDescription orcSchema) {
801801
* the nullability of an Avro type. To achieve consistency between the Avro and ORC schema,
802802
* non-NULL types are extracted from the union type.
803803
* @param unionSchema A schema of union type.
804-
* @return An Avro schema that is either NULL or a UNION without NULL fields.
804+
* @return A schema that is either NULL or a UNION without NULL fields.
805805
*/
806806
private static HoodieSchema getActualSchemaType(HoodieSchema unionSchema) {
807807
final List<HoodieSchema> nonNullMembers = unionSchema.getTypes().stream()

hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroParquetReader.java

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,6 @@ protected ClosableIterator<IndexedRecord> getIndexedRecordIterator(HoodieSchema
112112

113113
@Override
114114
public ClosableIterator<IndexedRecord> getIndexedRecordIterator(HoodieSchema readerSchema, HoodieSchema requestedSchema) throws IOException {
115-
//TODO boundary for now to revisit in later pr to use HoodieSchema
116115
return getIndexedRecordIteratorInternal(requestedSchema, Collections.emptyMap());
117116
}
118117

@@ -123,8 +122,13 @@ public ClosableIterator<IndexedRecord> getIndexedRecordIterator(HoodieSchema rea
123122

124123
@Override
125124
public HoodieSchema getSchema() {
126-
fileSchema = fileSchema.or(() -> Option.ofNullable(parquetUtils.readSchema(storage, path)));
127-
return fileSchema.get();
125+
// Lazy initialization with caching: read schema from parquet file footer on first call,
126+
// then cache it in fileSchema to avoid repeated I/O on subsequent calls
127+
return fileSchema.orElseGet(() -> {
128+
HoodieSchema schema = parquetUtils.readSchema(storage, path);
129+
fileSchema = Option.ofNullable(schema);
130+
return schema;
131+
});
128132
}
129133

130134
@Override

0 commit comments

Comments
 (0)