apache
diff --git a/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 14 additions & 5 deletions b/‎hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/io/storage/HoodieSparkParquetReader.java‎
Lines changed: 14 additions & 5 deletions
diff --git a/‎…quet/HoodieParquetFileFormatHelper.scala‎ ‎…quet/HoodieParquetFileFormatHelper.scala‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetFileFormatHelper.scala renamed to hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetFileFormatHelper.scala
Lines changed: 23 additions & 15 deletions b/‎…quet/HoodieParquetFileFormatHelper.scala‎ ‎…quet/HoodieParquetFileFormatHelper.scala‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetFileFormatHelper.scala renamed to hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/HoodieParquetFileFormatHelper.scala
Lines changed: 23 additions & 15 deletions
diff --git a/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 13 additions & 3 deletions b/‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/hudi/SparkAdapter.scala‎
Lines changed: 13 additions & 3 deletions
diff --git a/‎…/parquet/SparkBasicSchemaEvolution.scala‎ ‎…/parquet/SparkBasicSchemaEvolution.scala‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkBasicSchemaEvolution.scala renamed to hudi-client/hudi-spark-client/src/parquet/scala/org/apache/spark/sql/execution/datasources/parquet/SparkBasicSchemaEvolution.scala
Lines changed: 0 additions & 1 deletion b/‎…/parquet/SparkBasicSchemaEvolution.scala‎ ‎…/parquet/SparkBasicSchemaEvolution.scala‎hudi-client/hudi-spark-client/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/SparkBasicSchemaEvolution.scala renamed to hudi-client/hudi-spark-client/src/parquet/scala/org/apache/spark/sql/execution/datasources/parquet/SparkBasicSchemaEvolution.scala
Lines changed: 0 additions & 1 deletion
diff --git a/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala‎
Lines changed: 4 additions & 2 deletions b/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieBaseRelation.scala‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala‎
Lines changed: 3 additions & 2 deletions b/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/HoodieDataSourceHelper.scala‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala‎
Lines changed: 2 additions & 1 deletion b/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/hudi/cdc/CDCRelation.scala‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala‎
Lines changed: 2 additions & 2 deletions b/‎hudi-spark-datasource/hudi-spark-common/src/main/scala/org/apache/spark/sql/execution/datasources/parquet/LegacyHoodieParquetFileFormat.scala‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala‎
Lines changed: 12 additions & 13 deletions b/‎hudi-spark-datasource/hudi-spark/src/test/scala/org/apache/hudi/functional/TestCOWDataSource.scala‎
Lines changed: 12 additions & 13 deletions
diff --git a/‎hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala‎
Lines changed: 26 additions & 5 deletions b/‎hudi-spark-datasource/hudi-spark2/src/main/scala/org/apache/spark/sql/adapter/Spark2Adapter.scala‎
Lines changed: 26 additions & 5 deletions
@@ -19,6 +19,7 @@
 package org.apache.hudi.io.storage;
 
 import org.apache.hudi.SparkAdapterSupport$;
+import org.apache.hudi.avro.AvroSchemaUtils;
 import org.apache.hudi.avro.HoodieAvroUtils;
 import org.apache.hudi.common.bloom.BloomFilter;
 import org.apache.hudi.common.model.HoodieFileFormat;
@@ -31,6 +32,7 @@
 import org.apache.hudi.common.util.StringUtils;
 import org.apache.hudi.common.util.collection.ClosableIterator;
 import org.apache.hudi.common.util.collection.CloseableMappingIterator;
+import org.apache.hudi.common.util.collection.Pair;
 import org.apache.hudi.storage.HoodieStorage;
 import org.apache.hudi.storage.StoragePath;
 
@@ -56,6 +58,7 @@
 
 import static org.apache.hudi.common.util.TypeUtils.unsafeCast;
 import static org.apache.parquet.avro.AvroSchemaConverter.ADD_LIST_ELEMENT_RECORDS;
+import static org.apache.parquet.avro.HoodieAvroParquetSchemaConverter.getAvroSchemaConverter;
 
 public class HoodieSparkParquetReader implements HoodieSparkFileReader {
 
@@ -126,13 +129,19 @@ private ClosableIterator<InternalRow> getInternalRowIterator(Schema readerSchema
     if (requestedSchema == null) {
       requestedSchema = readerSchema;
     }
-    StructType readerStructType = HoodieInternalRowUtils.getCachedSchema(readerSchema);
-    StructType requestedStructType = HoodieInternalRowUtils.getCachedSchema(requestedSchema);
-    storage.getConf().set(ParquetReadSupport.PARQUET_READ_SCHEMA, readerStructType.json());
-    storage.getConf().set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA(), requestedStructType.json());
+
+    MessageType fileSchema = getFileSchema();
+    Schema nonNullSchema = AvroSchemaUtils.getNonNullTypeFromUnion(requestedSchema);
+    Option<MessageType> messageSchema = Option.of(getAvroSchemaConverter(storage.getConf().unwrapAs(Configuration.class)).convert(nonNullSchema));
+    Pair<StructType, StructType> readerSchemas =
+        SparkAdapterSupport$.MODULE$.sparkAdapter().getReaderSchemas(storage, readerSchema, requestedSchema, fileSchema);
+    storage.getConf().set(ParquetReadSupport.PARQUET_READ_SCHEMA, readerSchemas.getLeft().json());
+    storage.getConf().set(ParquetReadSupport.SPARK_ROW_REQUESTED_SCHEMA(), readerSchemas.getRight().json());
     storage.getConf().set(SQLConf.PARQUET_BINARY_AS_STRING().key(), SQLConf.get().getConf(SQLConf.PARQUET_BINARY_AS_STRING()).toString());
     storage.getConf().set(SQLConf.PARQUET_INT96_AS_TIMESTAMP().key(), SQLConf.get().getConf(SQLConf.PARQUET_INT96_AS_TIMESTAMP()).toString());
-    ParquetReader<InternalRow> reader = ParquetReader.<InternalRow>builder((ReadSupport) new ParquetReadSupport(), new Path(path.toUri()))
+    ParquetReader<InternalRow> reader = ParquetReader.<InternalRow>builder(
+        (ReadSupport) SparkAdapterSupport$.MODULE$.sparkAdapter().getParquetReadSupport(messageSchema),
+            new Path(path.toUri()))
         .withConf(storage.getConf().unwrapAs(Configuration.class))
         .build();
     ParquetReaderIterator<InternalRow> parquetReaderIterator = new ParquetReaderIterator<>(reader);
 
@@ -1,34 +1,41 @@
 /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
  *
- *    http://www.apache.org/licenses/LICENSE-2.0
+ *   http://www.apache.org/licenses/LICENSE-2.0
  *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
  */
 
 package org.apache.spark.sql.execution.datasources.parquet
 
 import org.apache.hadoop.conf.Configuration
 import org.apache.parquet.hadoop.metadata.FileMetaData
-import org.apache.spark.sql.types.{ArrayType, DataType, MapType, StructField, StructType}
+import org.apache.spark.sql.types._
 
 object HoodieParquetFileFormatHelper {
-
   def buildImplicitSchemaChangeInfo(hadoopConf: Configuration,
                                     parquetFileMetaData: FileMetaData,
                                     requiredSchema: StructType): (java.util.Map[Integer, org.apache.hudi.common.util.collection.Pair[DataType, DataType]], StructType) = {
-    val implicitTypeChangeInfo: java.util.Map[Integer, org.apache.hudi.common.util.collection.Pair[DataType, DataType]] = new java.util.HashMap()
     val convert = new ParquetToSparkSchemaConverter(hadoopConf)
     val fileStruct = convert.convert(parquetFileMetaData.getSchema)
+    buildImplicitSchemaChangeInfo(fileStruct, requiredSchema)
+  }
+
+  def buildImplicitSchemaChangeInfo(fileStruct: StructType,
+                                    requiredSchema: StructType): (java.util.Map[Integer, org.apache.hudi.common.util.collection.Pair[DataType, DataType]], StructType) = {
+    val implicitTypeChangeInfo: java.util.Map[Integer, org.apache.hudi.common.util.collection.Pair[DataType, DataType]] = new java.util.HashMap()
+
     val fileStructMap = fileStruct.fields.map(f => (f.name, f.dataType)).toMap
     // if there are missing fields or if field's data type needs to be changed while reading, we handle it here.
     val sparkRequestStructFields = requiredSchema.map(f => {
@@ -45,6 +52,7 @@ object HoodieParquetFileFormatHelper {
   }
 
   def isDataTypeEqual(requiredType: DataType, fileType: DataType): Boolean = (requiredType, fileType) match {
+
     case (requiredType, fileType) if requiredType == fileType => true
 
     case (ArrayType(rt, _), ArrayType(ft, _)) =>
 
@@ -18,11 +18,12 @@
 
 package org.apache.spark.sql.hudi
 
-import org.apache.avro.Schema
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.storage.StoragePath
+import org.apache.hudi.storage.{HoodieStorage, StoragePath}
 
+import org.apache.avro.Schema
+import org.apache.parquet.schema.MessageType
 import org.apache.spark.sql._
 import org.apache.spark.sql.avro.{HoodieAvroDeserializer, HoodieAvroSchemaConverters, HoodieAvroSerializer}
 import org.apache.spark.sql.catalyst.analysis.EliminateSubqueryAliases
@@ -55,6 +56,15 @@ trait SparkAdapter extends Serializable {
 
   def isTimestampNTZType(dataType: DataType): Boolean
 
+  def getParquetReadSupport(messageSchema: org.apache.hudi.common.util.Option[MessageType]): org.apache.parquet.hadoop.api.ReadSupport[_]
+
+  def repairSchemaIfSpecified(shouldRepair: Boolean,
+                              fileSchema: MessageType,
+                              tableSchemaOpt: org.apache.hudi.common.util.Option[MessageType]): MessageType
+
+  def getReaderSchemas(storage: HoodieStorage, readerSchema: Schema, requestedSchema: Schema, fileSchema: MessageType):
+  org.apache.hudi.common.util.collection.Pair[StructType, StructType]
+
   /**
    * Creates Catalyst [[Metadata]] for Hudi's meta-fields (designating these w/
    * [[METADATA_COL_ATTR_KEY]] if available (available in Spark >= 3.2)
@@ -172,7 +182,7 @@ trait SparkAdapter extends Serializable {
   /**
    * Create instance of [[ParquetFileFormat]]
    */
-  def createLegacyHoodieParquetFileFormat(appendPartitionValues: Boolean): Option[ParquetFileFormat]
+  def createLegacyHoodieParquetFileFormat(appendPartitionValues: Boolean, tableAvroSchema: Schema): Option[ParquetFileFormat]
 
   def makeColumnarBatch(vectors: Array[ColumnVector], numRows: Int): ColumnarBatch
 
 
@@ -21,7 +21,6 @@ package org.apache.spark.sql.execution.datasources.parquet
 
 import org.apache.spark.sql.types.StructType
 
-
 /**
  * Intended to be used just with HoodieSparkParquetReader to avoid any java/scala issues
  */
 
@@ -244,7 +244,8 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
       case HoodieFileFormat.PARQUET =>
         // We're delegating to Spark to append partition values to every row only in cases
         // when these corresponding partition-values are not persisted w/in the data file itself
-        val parquetFileFormat = sparkAdapter.createLegacyHoodieParquetFileFormat(shouldExtractPartitionValuesFromPartitionPath).get
+        val parquetFileFormat = sparkAdapter.createLegacyHoodieParquetFileFormat(
+          shouldExtractPartitionValuesFromPartitionPath, tableAvroSchema).get
         (parquetFileFormat, LegacyHoodieParquetFileFormat.FILE_FORMAT_ID)
     }
 
@@ -552,7 +553,8 @@ abstract class HoodieBaseRelation(val sqlContext: SQLContext,
             hadoopConf = hadoopConf,
             // We're delegating to Spark to append partition values to every row only in cases
             // when these corresponding partition-values are not persisted w/in the data file itself
-            appendPartitionValues = shouldAppendPartitionValuesOverride.getOrElse(shouldExtractPartitionValuesFromPartitionPath)
+            appendPartitionValues = shouldAppendPartitionValuesOverride.getOrElse(shouldExtractPartitionValuesFromPartitionPath),
+            tableAvroSchema
           )
           // Since partition values by default are omitted, and not persisted w/in data-files by Spark,
           // data-file readers (such as [[ParquetFileFormat]]) have to inject partition values while reading
 
@@ -52,8 +52,9 @@ object HoodieDataSourceHelper extends PredicateHelper with SparkAdapterSupport {
                                              filters: Seq[Filter],
                                              options: Map[String, String],
                                              hadoopConf: Configuration,
-                                             appendPartitionValues: Boolean = false): PartitionedFile => Iterator[InternalRow] = {
-    val parquetFileFormat: ParquetFileFormat = sparkAdapter.createLegacyHoodieParquetFileFormat(appendPartitionValues).get
+                                             appendPartitionValues: Boolean = false,
+                                             avroTableSchema: Schema): PartitionedFile => Iterator[InternalRow] = {
+    val parquetFileFormat: ParquetFileFormat = sparkAdapter.createLegacyHoodieParquetFileFormat(appendPartitionValues, avroTableSchema).get
     val readParquetFile: PartitionedFile => Iterator[Any] = parquetFileFormat.buildReaderWithPartitionValues(
       sparkSession = sparkSession,
       dataSchema = dataSchema,
 
@@ -102,7 +102,8 @@ class CDCRelation(
       requiredSchema = tableStructSchema,
       filters = Nil,
       options = options,
-      hadoopConf = spark.sessionState.newHadoopConf()
+      hadoopConf = spark.sessionState.newHadoopConf(),
+      avroTableSchema = tableAvroSchema
     )
 
     val changes = cdcExtractor.extractCDCFileSplits().values().asScala.map { splits =>
 
@@ -39,7 +39,7 @@ class LegacyHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterS
 
   override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = {
     sparkAdapter
-      .createLegacyHoodieParquetFileFormat(true).get.supportBatch(sparkSession, schema)
+      .createLegacyHoodieParquetFileFormat(true, null).get.supportBatch(sparkSession, schema)
   }
 
   override def buildReaderWithPartitionValues(sparkSession: SparkSession,
@@ -54,7 +54,7 @@ class LegacyHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterS
         DataSourceReadOptions.EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH.defaultValue.toString).toBoolean
 
     sparkAdapter
-      .createLegacyHoodieParquetFileFormat(shouldExtractPartitionValuesFromPartitionPath).get
+      .createLegacyHoodieParquetFileFormat(shouldExtractPartitionValuesFromPartitionPath, null).get
       .buildReaderWithPartitionValues(sparkSession, dataSchema, partitionSchema, requiredSchema, filters, options, hadoopConf)
   }
 }
 
@@ -45,16 +45,13 @@ import org.apache.hudi.hive.HiveSyncConfigHolder
 import org.apache.hudi.keygen.constant.KeyGeneratorOptions
 import org.apache.hudi.keygen.{ComplexKeyGenerator, CustomKeyGenerator, GlobalDeleteKeyGenerator, NonpartitionedKeyGenerator, SimpleKeyGenerator, TimestampBasedKeyGenerator}
 import org.apache.hudi.metrics.{Metrics, MetricsReporterType}
-import org.apache.hudi.storage.{HoodieStorageUtils, StoragePath, StoragePathFilter}
+import org.apache.hudi.storage.{StoragePath, StoragePathFilter}
 import org.apache.hudi.table.HoodieSparkTable
 import org.apache.hudi.testutils.HoodieSparkClientTestBase
 import org.apache.hudi.util.JFunction
-import org.apache.hudi.{AvroConversionUtils, DataSourceReadOptions, DataSourceWriteOptions, HoodieDataSourceHelpers, QuickstartUtils, ScalaAssertionSupport}
 
 import org.apache.hadoop.fs.FileSystem
 import org.apache.spark.sql.functions.{col, concat, lit, udf, when}
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.fs.{FileSystem, Path, PathFilter}
 import org.apache.spark.sql._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.hudi.HoodieSparkSessionExtension
@@ -66,6 +63,7 @@ import org.junit.jupiter.api.function.Executable
 import org.junit.jupiter.api.{AfterEach, BeforeEach, Disabled, Test}
 import org.junit.jupiter.params.ParameterizedTest
 import org.junit.jupiter.params.provider.{CsvSource, EnumSource, ValueSource}
+import org.slf4j.LoggerFactory
 
 import java.net.URI
 import java.nio.file.Paths
@@ -1699,7 +1697,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
   }
 
   @ParameterizedTest
-  @CsvSource(Array("true, 6", "false, 6", "true, 8", "false, 8", "true, 9", "false, 9"))
+  @CsvSource(Array("true, 6", "false, 6"))
   def testLogicalTypesReadRepair(vectorizedReadEnabled: Boolean, tableVersion: Int): Unit = {
     // Note: for spark 3.3 and 3.4 we should fall back to nonvectorized reader
     // if that is not happening then this test will fail
@@ -1712,6 +1710,7 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       }
       spark.conf.set("spark.sql.parquet.enableVectorizedReader", vectorizedReadEnabled.toString)
       spark.conf.set("spark.sql.session.timeZone", "UTC")
+      spark.conf.set("spark.sql.parquet.inferTimestampNTZ.enabled", "true")
       val tableName = "trips_logical_types_json_cow_read_v" + tableVersion
       val dataPath = "file://" + basePath + "/" + tableName
       val zipOutput = Paths.get(new URI(dataPath))
@@ -1725,15 +1724,15 @@ class TestCOWDataSource extends HoodieSparkClientTestBase with ScalaAssertionSup
       for (row <- rows) {
         val hash = row.get(6).asInstanceOf[String].hashCode()
         if ((hash & 1) == 0) {
-          assertEquals("2020-01-01T00:00:00.001Z", row.get(15).asInstanceOf[Timestamp].toInstant.toString)
-          assertEquals("2020-06-01T12:00:00.000001Z", row.get(16).asInstanceOf[Timestamp].toInstant.toString)
-          assertEquals("2015-05-20T12:34:56.001", row.get(17).toString)
-          assertEquals("2017-07-07T07:07:07.000001", row.get(18).toString)
+          assertEquals("2020-01-01T00:00:00.001Z", row.get(14).asInstanceOf[Timestamp].toInstant.toString)
+          assertEquals("2020-06-01T12:00:00.000001Z", row.get(15).asInstanceOf[Timestamp].toInstant.toString)
+          assertEquals("2015-05-20T12:34:56.001", row.get(16).toString)
+          assertEquals("2017-07-07T07:07:07.000001", row.get(17).toString)
         } else {
-          assertEquals("2019-12-31T23:59:59.999Z", row.get(15).asInstanceOf[Timestamp].toInstant.toString)
-          assertEquals("2020-06-01T11:59:59.999999Z", row.get(16).asInstanceOf[Timestamp].toInstant.toString)
-          assertEquals("2015-05-20T12:34:55.999", row.get(17).toString)
-          assertEquals("2017-07-07T07:07:06.999999", row.get(18).toString)
+          assertEquals("2019-12-31T23:59:59.999Z", row.get(14).asInstanceOf[Timestamp].toInstant.toString)
+          assertEquals("2020-06-01T11:59:59.999999Z", row.get(15).asInstanceOf[Timestamp].toInstant.toString)
+          assertEquals("2015-05-20T12:34:55.999", row.get(16).toString)
+          assertEquals("2017-07-07T07:07:06.999999", row.get(17).toString)
         }
       }
 
 
@@ -18,16 +18,16 @@
 
 package org.apache.spark.sql.adapter
 
-import org.apache.avro.Schema
-import org.apache.hadoop.fs.FileStatus
-import org.apache.hadoop.fs.Path
 import org.apache.hudi.client.utils.SparkRowSerDe
 import org.apache.hudi.common.table.HoodieTableMetaClient
-import org.apache.hudi.storage.StoragePath
+import org.apache.hudi.storage.{HoodieStorage, StoragePath}
 import org.apache.hudi.{AvroConversionUtils, DefaultSource, Spark2HoodieFileScanRDD, Spark2RowSerDe}
 
 import org.apache.avro.Schema
 import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.fs.FileStatus
+import org.apache.hadoop.fs.Path
+import org.apache.parquet.schema.MessageType
 import org.apache.spark.sql._
 import org.apache.spark.sql.avro._
 import org.apache.spark.sql.catalyst.InternalRow
@@ -83,6 +83,19 @@ class Spark2Adapter extends SparkAdapter {
     dataType.getClass.getSimpleName.startsWith("TimestampNTZType")
   }
 
+  override def getParquetReadSupport(messageScheme: org.apache.hudi.common.util.Option[MessageType]):
+  org.apache.parquet.hadoop.api.ReadSupport[_] = {
+    // ParquetReadSupport is package-private in Spark 2.4, so we use reflection to instantiate it
+    val clazz = Class.forName("org.apache.spark.sql.execution.datasources.parquet.ParquetReadSupport")
+    clazz.getDeclaredConstructor().newInstance().asInstanceOf[org.apache.parquet.hadoop.api.ReadSupport[_]]
+  }
+
+  override def repairSchemaIfSpecified(shouldRepair: Boolean,
+                                       fileSchema: MessageType,
+                                       tableSchemaOpt: org.apache.hudi.common.util.Option[MessageType]): MessageType = {
+    fileSchema
+  }
+
   override def getCatalystPlanUtils: HoodieCatalystPlansUtils = HoodieSpark2CatalystPlanUtils
 
   override def getCatalystExpressionUtils: HoodieCatalystExpressionUtils = HoodieSpark2CatalystExpressionUtils
@@ -153,7 +166,7 @@ class Spark2Adapter extends SparkAdapter {
     partitions.toSeq
   }
 
-  override def createLegacyHoodieParquetFileFormat(appendPartitionValues: Boolean): Option[ParquetFileFormat] = {
+  override def createLegacyHoodieParquetFileFormat(appendPartitionValues: Boolean, tableAvroSchema: Schema): Option[ParquetFileFormat] = {
     Some(new Spark24LegacyHoodieParquetFileFormat(appendPartitionValues))
   }
 
@@ -216,4 +229,12 @@ class Spark2Adapter extends SparkAdapter {
     batch.setNumRows(numRows)
     batch
   }
+
+  override def getReaderSchemas(storage: HoodieStorage, readerSchema: Schema, requestedSchema: Schema, fileSchema: MessageType):
+  org.apache.hudi.common.util.collection.Pair[StructType, StructType] = {
+    org.apache.hudi.common.util.collection.Pair.of(
+      HoodieInternalRowUtils.getCachedSchema(readerSchema),
+      HoodieInternalRowUtils.getCachedSchema(requestedSchema)
+    )
+  }
 }
Original file line number	Diff line number	Diff line change
`@@ -102,7 +102,8 @@ class CDCRelation(`
`102`	`102`	`requiredSchema = tableStructSchema,`
`103`	`103`	`filters = Nil,`
`104`	`104`	`options = options,`
`105`		`- hadoopConf = spark.sessionState.newHadoopConf()`
	`105`	`+ hadoopConf = spark.sessionState.newHadoopConf(),`
	`106`	`+ avroTableSchema = tableAvroSchema`
`106`	`107`	`)`
`107`	`108`
`108`	`109`	`val changes = cdcExtractor.extractCDCFileSplits().values().asScala.map { splits =>`
Original file line number	Diff line number	Diff line change
`@@ -39,7 +39,7 @@ class LegacyHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterS`
`39`	`39`
`40`	`40`	`override def supportBatch(sparkSession: SparkSession, schema: StructType): Boolean = {`
`41`	`41`	`sparkAdapter`
`42`		`- .createLegacyHoodieParquetFileFormat(true).get.supportBatch(sparkSession, schema)`
	`42`	`+ .createLegacyHoodieParquetFileFormat(true, null).get.supportBatch(sparkSession, schema)`
`43`	`43`	`}`
`44`	`44`
`45`	`45`	`override def buildReaderWithPartitionValues(sparkSession: SparkSession,`
`@@ -54,7 +54,7 @@ class LegacyHoodieParquetFileFormat extends ParquetFileFormat with SparkAdapterS`
`54`	`54`	`DataSourceReadOptions.EXTRACT_PARTITION_VALUES_FROM_PARTITION_PATH.defaultValue.toString).toBoolean`
`55`	`55`
`56`	`56`	`sparkAdapter`
`57`		`- .createLegacyHoodieParquetFileFormat(shouldExtractPartitionValuesFromPartitionPath).get`
	`57`	`+ .createLegacyHoodieParquetFileFormat(shouldExtractPartitionValuesFromPartitionPath, null).get`
`58`	`58`	`.buildReaderWithPartitionValues(sparkSession, dataSchema, partitionSchema, requiredSchema, filters, options, hadoopConf)`
`59`	`59`	`}`
`60`	`60`	`}`