Address partial comments

linliu-code · linliu-code · commit b30dc602e7b9 · 2026-01-28T14:54:58.000-08:00
diff --git a/hudi-common/src/avro/java/org/apache/parquet/schema/AvroSchemaRepair.java b/hudi-common/src/avro/java/org/apache/parquet/schema/AvroSchemaRepair.java
@@ -32,7 +32,6 @@
 import java.util.List;
 
 public class AvroSchemaRepair {
-  public static boolean isLocalTimestampSupported = isLocalTimestampMillisSupported();
 
   public static Schema repairLogicalTypes(Schema fileSchema, Schema tableSchema) {
     Schema repairedSchema = repairAvroSchema(fileSchema, tableSchema);
@@ -242,18 +241,4 @@ public static boolean hasTimestampMillisField(Schema tableSchema) {
             && (tableSchema.getLogicalType() instanceof LogicalTypes.TimestampMillis || tableSchema.getLogicalType() instanceof LogicalTypes.LocalTimestampMillis);
     }
   }
-
-  /**
-   * Check if LogicalTypes.LocalTimestampMillis is supported in the current Avro version
-   *
-   * @return true if LocalTimestampMillis is available, false otherwise
-   */
-  public static boolean isLocalTimestampMillisSupported() {
-    try {
-      return Arrays.stream(LogicalTypes.class.getDeclaredClasses())
-          .anyMatch(c -> c.getSimpleName().equals("LocalTimestampMillis"));
-    } catch (Exception e) {
-      return false;
-    }
-  }
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java b/hudi-common/src/main/java/org/apache/hudi/BaseHoodieTableFileIndex.java
@@ -483,11 +483,6 @@ protected boolean shouldReadAsPartitionedTable() {
     return (partitionColumns.length > 0 && canParsePartitionValues()) || HoodieTableMetadata.isMetadataTable(basePath);
   }
 
-  protected PartitionPath convertToPartitionPath(String partitionPath) {
-    Object[] partitionColumnValues = parsePartitionColumnValues(partitionColumns, partitionPath);
-    return new PartitionPath(partitionPath, partitionColumnValues);
-  }
-
   private static long fileSliceSize(FileSlice fileSlice) {
     long logFileSize = fileSlice.getLogFiles().map(HoodieLogFile::getFileSize)
         .filter(s -> s > 0)
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCache.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaCache.java
@@ -31,7 +31,6 @@
  */
 public class AvroSchemaCache {
 
-
   // Ensure that there is only one variable instance of the same schema within an entire JVM lifetime
   private static final LoadingCache<Schema, Schema> SCHEMA_CACHE = Caffeine.newBuilder().weakValues().maximumSize(1024).build(k -> k);
 
@@ -43,5 +42,4 @@ public class AvroSchemaCache {
   public static Schema intern(Schema schema) {
     return SCHEMA_CACHE.get(schema);
   }
-
 }
diff --git a/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java b/hudi-common/src/main/java/org/apache/hudi/avro/AvroSchemaUtils.java
@@ -223,10 +223,6 @@ public static Option<Schema> findNestedFieldSchema(Schema schema, String fieldNa
     return Option.of(getNonNullTypeFromUnion(schema));
   }
 
-  public static Option<Schema.Type> findNestedFieldType(Schema schema, String fieldName) {
-    return findNestedFieldSchema(schema, fieldName).map(Schema::getType);
-  }
-
   /**
    * Appends provided new fields at the end of the given schema
    *
diff --git a/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java b/hudi-common/src/main/java/org/apache/hudi/common/util/DateTimeUtils.java
@@ -52,12 +52,6 @@ public static Instant microsToInstant(long microsFromEpoch) {
     return Instant.ofEpochSecond(epochSeconds, nanoAdjustment);
   }
 
-  public static Instant nanosToInstant(long nanosFromEpoch) {
-    long epochSeconds = nanosFromEpoch / (1_000_000_000L);
-    long nanoAdjustment = nanosFromEpoch % (1_000_000_000L);
-    return Instant.ofEpochSecond(epochSeconds, nanoAdjustment);
-  }
-
   /**
    * Converts provided {@link Instant} to microseconds (from epoch)
    */
diff --git a/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala b/hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/ColumnStatsIndexSupport.scala
@@ -147,62 +147,6 @@ class ColumnStatsIndexSupport(spark: SparkSession,
     }
   }
 
-  /**
-   * Loads view of the Column Stats Index in a transposed format where single row coalesces every columns'
-   * statistics for a single file, returning it as [[DataFrame]]
-   *
-   * Please check out scala-doc of the [[transpose]] method explaining this view in more details
-   */
-  def loadTransposed[T](targetColumns: Seq[String],
-                        shouldReadInMemory: Boolean,
-                        prunedPartitions: Option[Set[String]] = None,
-                        prunedFileNamesOpt: Option[Set[String]] = None)(block: DataFrame => T): T = {
-    cachedColumnStatsIndexViews.get(targetColumns) match {
-      case Some(cachedDF) =>
-        block(cachedDF)
-      case None =>
-        val colStatsRecords: HoodieData[HoodieMetadataColumnStats] = prunedFileNamesOpt match {
-          case Some(prunedFileNames) =>
-            val filterFunction = new SerializableFunction[HoodieMetadataColumnStats, java.lang.Boolean] {
-              override def apply(r: HoodieMetadataColumnStats): java.lang.Boolean = {
-                prunedFileNames.contains(r.getFileName)
-              }
-            }
-            loadColumnStatsIndexRecords(targetColumns, shouldReadInMemory).filter(filterFunction)
-          case None =>
-            loadColumnStatsIndexRecords(targetColumns, shouldReadInMemory)
-        }
-
-        withPersistedData(colStatsRecords, StorageLevel.MEMORY_ONLY) {
-          val (transposedRows, indexSchema) = transpose(colStatsRecords, targetColumns)
-          val df = if (shouldReadInMemory) {
-            // NOTE: This will instantiate a [[Dataset]] backed by [[LocalRelation]] holding all of the rows
-            //       of the transposed table in memory, facilitating execution of the subsequently chained operations
-            //       on it locally (on the driver; all such operations are actually going to be performed by Spark's
-            //       Optimizer)
-            HoodieUnsafeUtils.createDataFrameFromRows(spark, transposedRows.collectAsList().asScala.toSeq, indexSchema)
-          } else {
-            val rdd = HoodieJavaRDD.getJavaRDD(transposedRows)
-            spark.createDataFrame(rdd, indexSchema)
-          }
-
-          if (allowCaching) {
-            cachedColumnStatsIndexViews.put(targetColumns, df)
-            // NOTE: Instead of collecting the rows from the index and hold them in memory, we instead rely
-            //       on Spark as (potentially distributed) cache managing data lifecycle, while we simply keep
-            //       the referenced to persisted [[DataFrame]] instance
-            df.persist(StorageLevel.MEMORY_ONLY)
-
-            block(df)
-          } else {
-            withPersistedDataset(df) {
-              block(df)
-            }
-          }
-        }
-    }
-  }
-
   /**
    * Loads a view of the Column Stats Index in a raw format, returning it as [[DataFrame]]
    *

Original file line number	Diff line number	Diff line change
`@@ -31,7 +31,6 @@`
`31`	`31`	`*/`
`32`	`32`	`public class AvroSchemaCache {`
`33`	`33`
`34`		`-`
`35`	`34`	`// Ensure that there is only one variable instance of the same schema within an entire JVM lifetime`
`36`	`35`	`private static final LoadingCache<Schema, Schema> SCHEMA_CACHE = Caffeine.newBuilder().weakValues().maximumSize(1024).build(k -> k);`
`37`	`36`
`@@ -43,5 +42,4 @@ public class AvroSchemaCache {`
`43`	`42`	`public static Schema intern(Schema schema) {`
`44`	`43`	`return SCHEMA_CACHE.get(schema);`
`45`	`44`	`}`
`46`		`-`
`47`	`45`	`}`
Original file line number	Diff line number	Diff line change
`@@ -223,10 +223,6 @@ public static Option<Schema> findNestedFieldSchema(Schema schema, String fieldNa`
`223`	`223`	`return Option.of(getNonNullTypeFromUnion(schema));`
`224`	`224`	`}`
`225`	`225`
`226`		`- public static Option<Schema.Type> findNestedFieldType(Schema schema, String fieldName) {`
`227`		`- return findNestedFieldSchema(schema, fieldName).map(Schema::getType);`
`228`		`- }`
`229`		`-`
`230`	`226`	`/**`
`231`	`227`	`* Appends provided new fields at the end of the given schema`
`232`	`228`	`*`