apache
diff --git a/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java‎
Lines changed: 1 addition & 65 deletions b/‎hudi-client/hudi-client-common/src/main/java/org/apache/hudi/table/HoodieTable.java‎
Lines changed: 1 addition & 65 deletions
diff --git a/‎hudi-client/hudi-spark-client/pom.xml‎
Lines changed: 20 additions & 0 deletions b/‎hudi-client/hudi-spark-client/pom.xml‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 1 addition & 1 deletion b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java‎
Lines changed: 1 addition & 14 deletions b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/row/HoodieRowParquetWriteSupport.java‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 4 additions & 8 deletions b/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 4 additions & 8 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/parquet/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetReadSupport.scala‎
Lines changed: 114 additions & 0 deletions b/‎hudi-client/hudi-spark-client/src/parquet/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetReadSupport.scala‎
Lines changed: 114 additions & 0 deletions
diff --git a/‎hudi-common/pom.xml‎
Lines changed: 33 additions & 0 deletions b/‎hudi-common/pom.xml‎
Lines changed: 33 additions & 0 deletions
@@ -46,6 +46,7 @@
 import org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy;
 import org.apache.hudi.common.model.HoodieFileFormat;
 import org.apache.hudi.common.model.HoodieKey;
+import org.apache.hudi.common.model.HoodieRecordLocation;
 import org.apache.hudi.common.model.HoodieWriteStat;
 import org.apache.hudi.common.table.HoodieTableConfig;
 import org.apache.hudi.common.table.HoodieTableMetaClient;
@@ -108,7 +109,6 @@
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
-import static org.apache.hudi.avro.AvroSchemaUtils.getNonNullTypeFromUnion;
 import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.EAGER;
 import static org.apache.hudi.common.model.HoodieFailedWritesCleaningPolicy.LAZY;
 import static org.apache.hudi.common.table.HoodieTableConfig.TABLE_METADATA_PARTITIONS;
@@ -891,70 +891,6 @@ private void validateSchema() throws HoodieUpsertException, HoodieInsertExceptio
     }
   }
 
-  /**
-   * Validates that columns with secondary indexes are not evolved in an incompatible way.
-   *
-   * @param tableSchema the current table schema
-   * @param writerSchema the new writer schema
-   * @param indexMetadata the index metadata containing all index definitions
-   * @throws SchemaCompatibilityException if a secondary index column has incompatible evolution
-   */
-  static void validateSecondaryIndexSchemaEvolution(
-      Schema tableSchema,
-      Schema writerSchema,
-      HoodieIndexMetadata indexMetadata) throws SchemaCompatibilityException {
-    
-    // Filter for secondary index definitions
-    List<HoodieIndexDefinition> secondaryIndexDefs = indexMetadata.getIndexDefinitions().values().stream()
-        .filter(indexDef -> MetadataPartitionType.fromPartitionPath(indexDef.getIndexName()).equals(MetadataPartitionType.SECONDARY_INDEX))
-        .collect(Collectors.toList());
-    
-    if (secondaryIndexDefs.isEmpty()) {
-      return;
-    }
-    
-    // Create a map from source field to index name for efficient lookup
-    Map<String, String> columnToIndexName = new HashMap<>();
-    for (HoodieIndexDefinition indexDef : secondaryIndexDefs) {
-      String indexName = indexDef.getIndexName();
-      for (String sourceField : indexDef.getSourceFields()) {
-        // Note: If a column is part of multiple indexes, this will use the last one
-        // This is fine since we just need any index name for error reporting
-        columnToIndexName.put(sourceField, indexName);
-      }
-    }
-    
-    // Check each indexed column for schema evolution
-    for (Map.Entry<String, String> entry : columnToIndexName.entrySet()) {
-      String columnName = entry.getKey();
-      String indexName = entry.getValue();
-      
-      Schema.Field tableField = tableSchema.getField(columnName);
-      
-      if (tableField == null) {
-        // This shouldn't happen as indexed columns should exist in table schema
-        LOG.warn("Secondary index '{}' references non-existent column: {}", indexName, columnName);
-        continue;
-      }
-      
-      // Use AvroSchemaCompatibility's field lookup logic to handle aliases
-      Schema.Field writerField = AvroSchemaCompatibility.lookupWriterField(writerSchema, tableField);
-      
-      if (writerField != null && !tableField.schema().equals(writerField.schema())) {
-        // Check if this is just making the field nullable/non-nullable, which is safe from SI perspective
-        if (getNonNullTypeFromUnion(tableField.schema()).equals(getNonNullTypeFromUnion(writerField.schema()))) {
-          continue;
-        }
-        
-        String errorMessage = String.format(
-            "Column '%s' has secondary index '%s' and cannot evolve from schema '%s' to '%s'. "
-            + "Please drop the secondary index before changing the column type.",
-            columnName, indexName, tableField.schema(), writerField.schema());
-        throw new SchemaCompatibilityException(errorMessage);
-      }
-    }
-  }
-
   public void validateUpsertSchema() throws HoodieUpsertException {
     if (isMetadataTable) {
       return;
 
@@ -281,6 +281,26 @@
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>3.5.0</version>
+        <executions>
+          <execution>
+            <id>add-spark32plus-parquet-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <skipAddSource>${spark31orEarlier}</skipAddSource>
+              <sources>
+                <source>src/parquet/scala</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
     </plugins>
 
     <resources>
 
@@ -177,7 +177,7 @@ private StructType convertToStruct(MessageType messageType) {
 
   @Override
   public void close() {
-    readerIterators.forEach(ParquetReaderIterator::close);
+    readerIterators.forEach(it -> it.close());
   }
 
   @Override
 
@@ -18,32 +18,19 @@
 
 package org.apache.hudi.io.storage.row;
 
-import org.apache.avro.LogicalTypes;
-import org.apache.avro.Schema;
-import org.apache.hadoop.conf.Configuration;
-
-import org.apache.hudi.SparkAdapterSupport$;
 import org.apache.hudi.avro.HoodieBloomFilterWriteSupport;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.config.HoodieConfig;
 import org.apache.hudi.common.config.HoodieStorageConfig;
 import org.apache.hudi.common.util.Option;
 import org.apache.hudi.common.util.ReflectionUtils;
 
+import org.apache.hadoop.conf.Configuration;
 import org.apache.parquet.hadoop.api.WriteSupport;
-import org.apache.parquet.schema.GroupType;
-import org.apache.parquet.schema.LogicalTypeAnnotation;
-import org.apache.parquet.schema.Type;
-import org.apache.parquet.schema.Types;
-import org.apache.spark.sql.execution.datasources.parquet.ParquetUtils;
 import org.apache.spark.sql.execution.datasources.parquet.ParquetWriteSupport;
-import org.apache.spark.sql.types.DataTypes;
-import org.apache.spark.sql.types.Decimal;
-import org.apache.spark.sql.types.Metadata;
 import org.apache.spark.sql.types.StructType;
 import org.apache.spark.unsafe.types.UTF8String;
 
-import java.util.Arrays;
 import java.util.Collections;
 import java.util.Map;
 
 
@@ -19,19 +19,15 @@
 package org.apache.spark.sql.hudi
 
 import org.apache.avro.Schema
-import org.apache.hadoop.fs.{FileStatus, Path}
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.table.HoodieTableMetaClient
 import org.apache.hudi.storage.StoragePath
 
-import org.apache.avro.Schema
-import org.apache.hadoop.conf.Configuration
 import org.apache.spark.sql._
 import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
 import org.apache.spark.sql.catalyst.catalog.CatalogTable
-import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
-import org.apache.spark.sql.catalyst.expressions.{Attribute, AttributeReference, Expression, InterpretedPredicate}
+import org.apache.spark.sql.catalyst.expressions.{AttributeReference, Expression, InterpretedPredicate}
 import org.apache.spark.sql.catalyst.parser.ParserInterface
 import org.apache.spark.sql.catalyst.planning.PhysicalOperation
 import org.apache.spark.sql.catalyst.plans.logical.{Command, LogicalPlan}
@@ -53,7 +49,7 @@ import java.util.{Locale, TimeZone}
 trait SparkAdapter extends Serializable {
 
   /**
-   * Checks whether provided instance of [[InternalRow]] is actually an instance of [[ColumnarBatchRow]]
+   * Checks whether provided instance of [[InternalRow]] is actually an instance of [[org.apache.spark.sql.vectorized.ColumnarBatchRow]]
    */
   def isColumnarBatchRow(r: InternalRow): Boolean
 
@@ -72,7 +68,7 @@ trait SparkAdapter extends Serializable {
 
   /**
    * Returns an instance of [[HoodieCatalogUtils]] providing for common utils operating on Spark's
-   * [[TableCatalog]]s
+   * [[org.apache.spark.sql.connector.catalog.TableCatalog]]s
    */
   def getCatalogUtils: HoodieCatalogUtils
 
@@ -207,7 +203,7 @@ trait SparkAdapter extends Serializable {
                               metadataColumns: Seq[AttributeReference] = Seq.empty): FileScanRDD
 
   /**
-   * Extract condition in [[DeleteFromTable]]
+   * Extract condition in [[org.apache.spark.sql.catalyst.plans.logical.DeleteFromTable]]
    * SPARK-38626 condition is no longer Option in Spark 3.3
    */
   def extractDeleteCondition(deleteFromTable: Command): Expression
 
@@ -0,0 +1,114 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.execution.datasources.parquet
+
+import org.apache.hudi.SparkAdapterSupport
+import org.apache.hudi.common.util.ValidationUtils
+import org.apache.parquet.hadoop.api.InitContext
+import org.apache.parquet.hadoop.api.ReadSupport.ReadContext
+import org.apache.parquet.schema.{GroupType, MessageType, Type, Types}
+import org.apache.spark.sql.catalyst.util.RebaseDateTime.RebaseSpec
+
+import java.time.ZoneId
+import scala.collection.JavaConverters._
+
+class HoodieParquetReadSupport(
+                                convertTz: Option[ZoneId],
+                                enableVectorizedReader: Boolean,
+                                val enableTimestampFieldRepair: Boolean,
+                                datetimeRebaseSpec: RebaseSpec,
+                                int96RebaseSpec: RebaseSpec,
+                                tableSchemaOpt: org.apache.hudi.common.util.Option[MessageType] = org.apache.hudi.common.util.Option.empty())
+  extends ParquetReadSupport(convertTz, enableVectorizedReader, datetimeRebaseSpec, int96RebaseSpec) with SparkAdapterSupport {
+
+  override def init(context: InitContext): ReadContext = {
+    val readContext = super.init(context)
+    // repair is needed here because this is the schema that is used by the reader to decide what
+    // conversions are necessary
+    val requestedParquetSchema = if (enableTimestampFieldRepair) {
+      HoodieParquetReadSupport.getRepairedSchema(readContext.getRequestedSchema, tableSchemaOpt)
+    } else {
+      readContext.getRequestedSchema
+    }
+    val trimmedParquetSchema = HoodieParquetReadSupport.trimParquetSchema(requestedParquetSchema, context.getFileSchema)
+    new ReadContext(trimmedParquetSchema, readContext.getReadSupportMetadata)
+  }
+}
+
+object HoodieParquetReadSupport {
+  /**
+   * Removes any fields from the parquet schema that do not have any child fields in the actual file schema after the
+   * schema is trimmed down to the requested fields. This can happen when the table schema evolves and only a subset of
+   * the nested fields are required by the query.
+   *
+   * @param requestedSchema the initial parquet schema requested by Spark
+   * @param fileSchema      the actual parquet schema of the file
+   * @return a potentially updated schema with empty struct fields removed
+   */
+  def trimParquetSchema(requestedSchema: MessageType, fileSchema: MessageType): MessageType = {
+    val trimmedFields = requestedSchema.getFields.asScala.map(field => {
+      if (fileSchema.containsField(field.getName)) {
+        trimParquetType(field, fileSchema.asGroupType().getType(field.getName))
+      } else {
+        Some(field)
+      }
+    }).filter(_.isDefined).map(_.get).toArray[Type]
+    Types.buildMessage().addFields(trimmedFields: _*).named(requestedSchema.getName)
+  }
+
+  private def trimParquetType(requestedType: Type, fileType: Type): Option[Type] = {
+    if (requestedType.equals(fileType)) {
+      Some(requestedType)
+    } else {
+      requestedType match {
+        case groupType: GroupType =>
+          ValidationUtils.checkState(!fileType.isPrimitive,
+            "Group type provided by requested schema but existing type in the file is a primitive")
+          val fileTypeGroup = fileType.asGroupType()
+          var hasMatchingField = false
+          val fields = groupType.getFields.asScala.map(field => {
+            if (fileTypeGroup.containsField(field.getName)) {
+              hasMatchingField = true
+              trimParquetType(field, fileType.asGroupType().getType(field.getName))
+            } else {
+              Some(field)
+            }
+          }).filter(_.isDefined).map(_.get).asJava
+          if (hasMatchingField && !fields.isEmpty) {
+            Some(groupType.withNewFields(fields))
+          } else {
+            None
+          }
+        case _ => Some(requestedType)
+      }
+    }
+  }
+
+  def getRepairedSchema(fileSchema: MessageType, tableSchema: org.apache.hudi.common.util.Option[MessageType]): MessageType = {
+    try {
+      val schemaRepairClass = Class.forName("org.apache.parquet.schema.SchemaRepair")
+      val repairMethod = schemaRepairClass.getMethod(
+        "repairLogicalTypes", classOf[MessageType], classOf[org.apache.hudi.common.util.Option[MessageType]])
+      repairMethod.invoke(null, fileSchema, tableSchema).asInstanceOf[MessageType]
+    } catch {
+      case _: Exception => fileSchema
+    }
+  }
+}
@@ -54,6 +54,39 @@
           <skip>false</skip>
         </configuration>
       </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+        <version>3.5.0</version>
+        <executions>
+          <execution>
+            <id>add-spark34plus-avro-sources</id>
+            <phase>generate-sources</phase>
+            <goals>
+              <goal>add-source</goal>
+            </goals>
+            <configuration>
+              <skipAddSource>${spark33orEarlier}</skipAddSource>
+              <sources>
+                <source>src/avro/java</source>
+              </sources>
+            </configuration>
+          </execution>
+          <execution>
+            <id>add-spark34plus-avro-test-sources</id>
+            <phase>generate-test-sources</phase>
+            <goals>
+              <goal>add-test-source</goal>
+            </goals>
+            <configuration>
+              <skipAddTestSource>${spark33orEarlier}</skipAddTestSource>
+              <sources>
+                <source>src/avro/test/java</source>
+              </sources>
+            </configuration>
+          </execution>
+        </executions>
+      </plugin>
       <plugin>
         <groupId>org.apache.rat</groupId>
         <artifactId>apache-rat-plugin</artifactId>
Original file line number	Diff line number	Diff line change
`@@ -177,7 +177,7 @@ private StructType convertToStruct(MessageType messageType) {`
`177`	`177`
`178`	`178`	`@Override`
`179`	`179`	`public void close() {`
`180`		`- readerIterators.forEach(ParquetReaderIterator::close);`
	`180`	`+ readerIterators.forEach(it -> it.close());`
`181`	`181`	`}`
`182`	`182`
`183`	`183`	`@Override`