Skip to content

Commit 594dcdf

Browse files
committed
Address comments
1 parent 1123c23 commit 594dcdf

File tree

23 files changed

+193
-144
lines changed

23 files changed

+193
-144
lines changed

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/io/HoodieBinaryCopyHandle.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ private MessageType getWriteSchema(HoodieWriteConfig config, List<StoragePath> i
6969
// All files should have the same schema in this case
7070
try {
7171
ParquetUtils parquetUtils = new ParquetUtils();
72-
MessageType fileSchema = parquetUtils.readSchema(table.getStorage(), inputFiles.get(0));
72+
MessageType fileSchema = parquetUtils.readMessageType(table.getStorage(), inputFiles.get(0));
7373
log.info("Binary copy schema evolution disabled. Using schema from input file: " + inputFiles.get(0));
7474
return fileSchema;
7575
} catch (Exception e) {

hudi-client/hudi-client-common/src/main/java/org/apache/hudi/metadata/SecondaryIndexRecordGenerationUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ public static <T> HoodieData<HoodieRecord> readSecondaryKeysFromFileSlices(Hoodi
263263
if (dataFilePath.isPresent()) {
264264
readerSchema = HoodieIOFactory.getIOFactory(metaClient.getStorage())
265265
.getFileFormatUtils(baseFileFormat)
266-
.readHoodieSchema(metaClient.getStorage(), dataFilePath.get());
266+
.readSchema(metaClient.getStorage(), dataFilePath.get());
267267
} else {
268268
readerSchema = tableSchema;
269269
}

hudi-client/hudi-client-common/src/test/java/org/apache/hudi/io/TestHoodieBinaryCopyHandleSchemaEvolution.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ public void testSchemaEvolutionDisabled_UsesFileSchema() throws Exception {
113113
// Mock ParquetUtils to return file schema
114114
try (MockedConstruction<ParquetUtils> parquetUtilsConstruction = mockConstruction(ParquetUtils.class,
115115
(mock, context) -> {
116-
when(mock.readSchema(eq(storage), eq(inputFiles.get(0)))).thenReturn(fileSchema);
116+
when(mock.readMessageType(eq(storage), eq(inputFiles.get(0)))).thenReturn(fileSchema);
117117
})) {
118118

119119
// When: Creating HoodieBinaryCopyHandle (we can't instantiate directly due to complex dependencies,
@@ -201,7 +201,7 @@ public MessageType testGetWriteSchema(HoodieWriteConfig config, List<StoragePath
201201
throw new IOException("Simulated file read error");
202202
}
203203
ParquetUtils parquetUtils = new ParquetUtils();
204-
MessageType fileSchema = parquetUtils.readSchema(table.getStorage(), inputFiles.get(0));
204+
MessageType fileSchema = parquetUtils.readMessageType(table.getStorage(), inputFiles.get(0));
205205
return fileSchema;
206206
} catch (Exception e) {
207207
throw new HoodieIOException("Failed to read schema from input file when schema evolution is disabled: " + inputFiles.get(0),

hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ public ClosableIterator<UnsafeRow> getUnsafeRowIterator(HoodieSchema requestedSc
189189

190190
private MessageType getFileSchema() {
191191
if (fileSchemaOption.isEmpty()) {
192-
MessageType messageType = ((ParquetUtils) parquetUtils).readSchema(storage, path);
192+
MessageType messageType = ((ParquetUtils) parquetUtils).readMessageType(storage, path);
193193
fileSchemaOption = Option.of(messageType);
194194
}
195195
return fileSchemaOption.get();

hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -659,7 +659,7 @@ public static Object convertValueForSpecificDataTypes(HoodieSchema fieldSchema,
659659
* @return HoodieSchema containing record key and partition path fields
660660
*/
661661
public static HoodieSchema getRecordKeyPartitionPathSchema() {
662-
List<HoodieSchemaField> toBeAddedFields = new ArrayList<>();
662+
List<HoodieSchemaField> toBeAddedFields = new ArrayList<>(2);
663663

664664
HoodieSchemaField recordKeyField =
665665
createNewSchemaField(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);

hudi-common/src/main/java/org/apache/hudi/common/table/TableSchemaResolver.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ public Schema readSchemaFromLastCompaction(Option<HoodieInstant> lastCompactionC
384384
+ lastCompactionCommit + ", could not get schema for table " + metaClient.getBasePath()));
385385
StoragePath path = new StoragePath(filePath);
386386
return HoodieIOFactory.getIOFactory(metaClient.getStorage())
387-
.getFileFormatUtils(path).readHoodieSchema(metaClient.getStorage(), path).toAvroSchema();
387+
.getFileFormatUtils(path).readSchema(metaClient.getStorage(), path).toAvroSchema();
388388
}
389389

390390
private Schema readSchemaFromLogFile(StoragePath path) throws IOException {
@@ -568,7 +568,7 @@ private Schema fetchSchemaFromFiles(Stream<StoragePath> filePaths) {
568568
return readSchemaFromLogFile(filePath);
569569
} else {
570570
HoodieSchema hoodieSchema = HoodieIOFactory.getIOFactory(metaClient.getStorage())
571-
.getFileFormatUtils(filePath).readHoodieSchema(metaClient.getStorage(), filePath);
571+
.getFileFormatUtils(filePath).readSchema(metaClient.getStorage(), filePath);
572572
return hoodieSchema != null ? hoodieSchema.toAvroSchema() : null;
573573
}
574574
} catch (IOException e) {

hudi-common/src/main/java/org/apache/hudi/common/util/FileFormatUtils.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -288,7 +288,7 @@ protected HoodieSchema getKeyIteratorSchema(HoodieStorage storage, StoragePath f
288288
List<String> fields = new ArrayList<>();
289289
fields.addAll(keyGenerator.getRecordKeyFieldNames());
290290
fields.addAll(keyGenerator.getPartitionPathFields());
291-
return HoodieSchemaUtils.projectSchema(readHoodieSchema(storage, filePath), fields);
291+
return HoodieSchemaUtils.projectSchema(readSchema(storage, filePath), fields);
292292
})
293293
.orElse(partitionPath.isPresent() ? HoodieSchemaUtils.getRecordKeySchema() : HoodieSchemaUtils.getRecordKeyPartitionPathSchema());
294294
}
@@ -314,7 +314,7 @@ public abstract ClosableIterator<Pair<HoodieKey, Long>> fetchRecordKeysWithPosit
314314
* @param filePath the data file path.
315315
* @return the Avro schema of the data file.
316316
*/
317-
public abstract HoodieSchema readHoodieSchema(HoodieStorage storage, StoragePath filePath);
317+
public abstract HoodieSchema readSchema(HoodieStorage storage, StoragePath filePath);
318318

319319
/**
320320
* Reads column statistics stored in the metadata.

hudi-common/src/main/java/org/apache/hudi/common/util/HFileUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ public ClosableIterator<Pair<HoodieKey, Long>> fetchRecordKeysWithPositions(Hood
152152
}
153153

154154
@Override
155-
public HoodieSchema readHoodieSchema(HoodieStorage storage, StoragePath filePath) {
155+
public HoodieSchema readSchema(HoodieStorage storage, StoragePath filePath) {
156156
LOG.info("Reading schema from {}", filePath);
157157

158158
try (HoodieFileReader fileReader =

hudi-flink-datasource/hudi-flink/src/main/java/org/apache/hudi/table/format/HoodieRowDataParquetReader.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ public DataType getRowType() {
116116
// Some types in avro are not compatible with parquet.
117117
// Avro only supports representing Decimals as fixed byte array
118118
// and therefore if we convert to Avro directly we'll lose logical type-info.
119-
MessageType messageType = parquetUtils.readSchema(storage, path);
119+
MessageType messageType = parquetUtils.readMessageType(storage, path);
120120
RowType rowType = ParquetSchemaConverter.convertToRowType(messageType);
121121
fileRowType = DataTypes.of(rowType);
122122
}

hudi-hadoop-common/src/main/java/org/apache/hudi/common/util/AvroOrcUtils.java

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ public class AvroOrcUtils {
7979
*
8080
* @param type ORC schema of the value Object.
8181
* @param colVector The column vector to store the value Object.
82-
* @param schema Avro schema of the value Object.
82+
* @param schema Schema of the value Object.
8383
* Only used to check logical types for timestamp unit conversion.
8484
* @param value Object to be added to the column vector
8585
* @param vectorPos The position in the vector where value will be stored at.
@@ -657,6 +657,15 @@ public static TypeDescription createOrcSchema(HoodieSchema schema) {
657657
}
658658
case TIMESTAMP:
659659
HoodieSchema.Timestamp timestampSchema = (HoodieSchema.Timestamp) schema;
660+
// NOTE: Preserving old behavior from before HoodieSchema refactoring:
661+
// - UTC-adjusted timestamps (TimestampMillis/Micros) are converted to ORC Timestamp
662+
// - Local timestamps (LocalTimestampMillis/Micros) are converted to ORC Long
663+
// This is because the old code did not handle LocalTimestamp logical types explicitly,
664+
// causing them to fall through to the base type (LONG) conversion.
665+
if (!timestampSchema.isUtcAdjusted()) {
666+
// Local timestamp - treat as ORC Long (old behavior)
667+
return TypeDescription.createLong();
668+
}
660669
if (timestampSchema.getPrecision() == TimePrecision.MILLIS) {
661670
// The timestamp-millis logical type represents an instant on the global timeline, independent of a
662671
// particular time zone or calendar, with a precision of one millisecond.

0 commit comments

Comments
 (0)