6969import io .trino .plugin .deltalake .transactionlog .DeltaLakeSchemaSupport .ColumnMappingMode ;
7070import io .trino .plugin .deltalake .transactionlog .DeltaLakeSchemaSupport .UnsupportedTypeException ;
7171import io .trino .plugin .deltalake .transactionlog .DeltaLakeTransactionLogEntry ;
72+ import io .trino .plugin .deltalake .transactionlog .DeltaLakeVersionChecksum ;
7273import io .trino .plugin .deltalake .transactionlog .MetadataEntry ;
7374import io .trino .plugin .deltalake .transactionlog .ProtocolEntry ;
7475import io .trino .plugin .deltalake .transactionlog .RemoveFileEntry ;
7576import io .trino .plugin .deltalake .transactionlog .TableSnapshot ;
7677import io .trino .plugin .deltalake .transactionlog .Transaction ;
7778import io .trino .plugin .deltalake .transactionlog .TransactionLogAccess ;
7879import io .trino .plugin .deltalake .transactionlog .TransactionLogEntries ;
80+ import io .trino .plugin .deltalake .transactionlog .TransactionLogParser .CommitVersionChecksumFileInfo ;
7981import io .trino .plugin .deltalake .transactionlog .checkpoint .CheckpointWriterManager ;
8082import io .trino .plugin .deltalake .transactionlog .checkpoint .LastCheckpoint ;
8183import io .trino .plugin .deltalake .transactionlog .checkpoint .MetadataAndProtocolEntries ;
162164import io .trino .spi .type .VarcharType ;
163165
164166import java .io .IOException ;
167+ import java .io .UncheckedIOException ;
165168import java .net .URI ;
166169import java .net .URISyntaxException ;
167170import java .time .Duration ;
241244import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .getHiveCatalogName ;
242245import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isCollectExtendedStatisticsColumnStatisticsOnWrite ;
243246import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isExtendedStatisticsEnabled ;
247+ import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isLoadMetadataFromChecksumFile ;
244248import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isProjectionPushdownEnabled ;
245249import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isQueryPartitionFilterRequired ;
246250import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isStoreTableMetadataInMetastoreEnabled ;
298302import static io .trino .plugin .deltalake .transactionlog .MetadataEntry .DELTA_CHANGE_DATA_FEED_ENABLED_PROPERTY ;
299303import static io .trino .plugin .deltalake .transactionlog .MetadataEntry .configurationForNewTable ;
300304import static io .trino .plugin .deltalake .transactionlog .TemporalTimeTravelUtil .findLatestVersionUsingTemporal ;
305+ import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .findLatestCommitVersionChecksumFileInfo ;
301306import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .getMandatoryCurrentVersion ;
302307import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .readLastCheckpoint ;
308+ import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .readVersionChecksumFile ;
303309import static io .trino .plugin .deltalake .transactionlog .TransactionLogUtil .getTransactionLogDir ;
304310import static io .trino .plugin .deltalake .transactionlog .TransactionLogUtil .getTransactionLogJsonEntryPath ;
305311import static io .trino .plugin .deltalake .transactionlog .checkpoint .TransactionLogTail .getEntriesFromJson ;
@@ -480,6 +486,15 @@ private record QueriedTable(SchemaTableName schemaTableName, long version)
480486 }
481487 }
482488
489+ private record DeltaLakeTableDescriptor (long version , MetadataEntry metadataEntry , ProtocolEntry protocolEntry )
490+ {
491+ DeltaLakeTableDescriptor
492+ {
493+ requireNonNull (metadataEntry , "metadataEntry is null" );
494+ requireNonNull (protocolEntry , "protocolEntry is null" );
495+ }
496+ }
497+
483498 public DeltaLakeMetadata (
484499 DeltaLakeMetastore metastore ,
485500 TransactionLogAccess transactionLogAccess ,
@@ -717,27 +732,22 @@ public LocatedTableHandle getTableHandle(
717732
718733 String tableLocation = table .location ();
719734 TrinoFileSystem fileSystem = fileSystemFactory .create (session , table );
720- TableSnapshot tableSnapshot = getSnapshot (session , table , endVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor )));
721735
722- MetadataAndProtocolEntries logEntries ;
736+ DeltaLakeTableDescriptor descriptor ;
723737 try {
724- logEntries = transactionLogAccess . getMetadataAndProtocolEntry (session , fileSystem , tableSnapshot );
738+ descriptor = loadDescriptor (session , tableName , table , fileSystem , tableLocation , startVersion , endVersion );
725739 }
726740 catch (TrinoException e ) {
727741 if (e .getErrorCode ().equals (DELTA_LAKE_INVALID_SCHEMA .toErrorCode ())) {
728- return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , e );
742+ return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), table . managed () , tableLocation , e );
729743 }
730744 throw e ;
731745 }
732- MetadataEntry metadataEntry = logEntries .metadata ().orElse (null );
733- if (metadataEntry == null ) {
734- return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Metadata not found in transaction log for " + tableSnapshot .getTable ()));
735- }
736746
737- ProtocolEntry protocolEntry = logEntries . protocol (). orElse ( null );
738- if ( protocolEntry == null ) {
739- return new CorruptedDeltaLakeTableHandle ( tableName , table . catalogOwned (), managed , tableLocation , new TrinoException ( DELTA_LAKE_INVALID_SCHEMA , "Protocol not found in transaction log for " + tableSnapshot . getTable ()) );
740- }
747+ MetadataEntry metadataEntry = descriptor . metadataEntry ( );
748+ ProtocolEntry protocolEntry = descriptor . protocolEntry ();
749+ long snapshotVersion = descriptor . version ( );
750+
741751 if (protocolEntry .minReaderVersion () > MAX_READER_VERSION ) {
742752 LOG .debug ("Skip %s because the reader version is unsupported: %d" , tableName , protocolEntry .minReaderVersion ());
743753 return null ;
@@ -750,8 +760,8 @@ public LocatedTableHandle getTableHandle(
750760 verifySupportedColumnMapping (getColumnMappingMode (metadataEntry , protocolEntry ));
751761 if (metadataScheduler .canStoreTableMetadata (session , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())) &&
752762 endVersion .isEmpty () &&
753- !isSameTransactionVersion (metastoreTable .get (), tableSnapshot )) {
754- tableUpdateInfos .put (tableName , new TableUpdateInfo (session , tableSnapshot . getVersion () , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())));
763+ !isSameTransactionVersion (metastoreTable .get (), snapshotVersion )) {
764+ tableUpdateInfos .put (tableName , new TableUpdateInfo (session , snapshotVersion , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())));
755765 }
756766 return new DeltaLakeTableHandle (
757767 tableName .getSchemaName (),
@@ -767,10 +777,119 @@ public LocatedTableHandle getTableHandle(
767777 Optional .empty (),
768778 Optional .empty (),
769779 Optional .empty (),
770- tableSnapshot . getVersion () ,
780+ snapshotVersion ,
771781 endVersion .isPresent ());
772782 }
773783
784+ private DeltaLakeTableDescriptor loadDescriptor (ConnectorSession session , SchemaTableName tableName , DeltaMetastoreTable table , TrinoFileSystem fileSystem , String tableLocation , Optional <ConnectorTableVersion > startVersion , Optional <ConnectorTableVersion > endVersion )
785+ {
786+ Optional <Long > endTableVersion = endVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor ));
787+
788+ // Load descriptor from the latest checksum file, if enabled and available
789+ if (isLoadMetadataFromChecksumFile (session )) {
790+ Optional <Long > startTableVersion = startVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor ));
791+
792+ Optional <DeltaLakeTableDescriptor > descriptor = loadDescriptorFromChecksum (tableName , fileSystem , tableLocation , startTableVersion , endTableVersion );
793+ if (descriptor .isPresent ()) {
794+ return descriptor .get ();
795+ }
796+ }
797+
798+ // Fall back to scanning the transaction log if checksum file reading is disabled, if the latest checksum file is
799+ // missing, or if the checksum file does not capture the relevant information
800+ return loadDescriptorFromTransactionLog (session , table , fileSystem , endTableVersion );
801+ }
802+
803+ private Optional <DeltaLakeTableDescriptor > loadDescriptorFromChecksum (
804+ SchemaTableName tableName ,
805+ TrinoFileSystem fileSystem ,
806+ String tableLocation ,
807+ Optional <Long > startTableVersion ,
808+ Optional <Long > endTableVersion )
809+ {
810+ long latestEligibleCommit ;
811+
812+ if (endTableVersion .isPresent ()) {
813+ // Optimization: we already validated the existence of endTableVersion in getVersion, so endTableVersion is
814+ // definitionally the latest eligible commit. Attempt to read the latest checksum file directly without an
815+ // additional list operation
816+ latestEligibleCommit = endTableVersion .orElseThrow ();
817+ }
818+ else {
819+ Optional <CommitVersionChecksumFileInfo > checksumFileInfo ;
820+ try {
821+ checksumFileInfo = findLatestCommitVersionChecksumFileInfo (fileSystem , tableLocation , startTableVersion , endTableVersion );
822+ }
823+ catch (IOException | UncheckedIOException e ) {
824+ // If we hit an IO-related error when determining the latest eligible commit, treat this as a hard failure;
825+ // falling back to scanning the Delta log is unlikely to help
826+ throw new TrinoException (DELTA_LAKE_FILESYSTEM_ERROR , "Failed to determine latest commit version for " + tableName , e );
827+ }
828+
829+ if (checksumFileInfo .isEmpty ()) {
830+ // If there are absolutely no commits in the specified range in the Delta log, fail fast to avoid an
831+ // additional useless scan over the log. For consistency, use the same error message as on Delta log scan
832+ // codepath
833+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Metadata not found in transaction log for " + tableName );
834+ }
835+
836+ CommitVersionChecksumFileInfo info = checksumFileInfo .orElseThrow ();
837+ if (!info .hasVersionChecksumFile ()) {
838+ // If there exists a commit in the specified range, but there is no version checksum file available for that
839+ // commit, fall back to scanning the Delta log. Version checksum files are optional per the Delta spec
840+ return Optional .empty ();
841+ }
842+
843+ latestEligibleCommit = info .version ();
844+ }
845+
846+ Optional <DeltaLakeVersionChecksum > versionChecksum ;
847+ try {
848+ versionChecksum = readVersionChecksumFile (fileSystem , tableLocation , latestEligibleCommit );
849+ }
850+ catch (IOException | UncheckedIOException e ) {
851+ throw new TrinoException (DELTA_LAKE_FILESYSTEM_ERROR , format ("Failed to read checksum file for version %d of table %s" , latestEligibleCommit , tableName ), e );
852+ }
853+
854+ if (versionChecksum .isEmpty ()) {
855+ // Nonexistent or structurally-invalid version checksum file; fall back to scanning the Delta log
856+ return Optional .empty ();
857+ }
858+
859+ DeltaLakeVersionChecksum checksum = versionChecksum .orElseThrow ();
860+
861+ MetadataEntry metadataEntry = checksum .getMetadata ();
862+ ProtocolEntry protocolEntry = checksum .getProtocol ();
863+ if (metadataEntry == null || protocolEntry == null ) {
864+ // Version checksum file is missing critical information; fall back to scanning the Delta log
865+ return Optional .empty ();
866+ }
867+
868+ return Optional .of (new DeltaLakeTableDescriptor (latestEligibleCommit , metadataEntry , protocolEntry ));
869+ }
870+
871+ private DeltaLakeTableDescriptor loadDescriptorFromTransactionLog (
872+ ConnectorSession session ,
873+ DeltaMetastoreTable table ,
874+ TrinoFileSystem fileSystem ,
875+ Optional <Long > endTableVersion )
876+ {
877+ TableSnapshot tableSnapshot = getSnapshot (session , table , endTableVersion );
878+ MetadataAndProtocolEntries logEntries = transactionLogAccess .getMetadataAndProtocolEntry (session , fileSystem , tableSnapshot );
879+
880+ MetadataEntry metadataEntry = logEntries .metadata ().orElse (null );
881+ if (metadataEntry == null ) {
882+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Metadata not found in transaction log for " + tableSnapshot .getTable ());
883+ }
884+
885+ ProtocolEntry protocolEntry = logEntries .protocol ().orElse (null );
886+ if (protocolEntry == null ) {
887+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Protocol not found in transaction log for " + tableSnapshot .getTable ());
888+ }
889+
890+ return new DeltaLakeTableDescriptor (tableSnapshot .getVersion (), metadataEntry , protocolEntry );
891+ }
892+
774893 @ Override
775894 public ConnectorTableProperties getTableProperties (ConnectorSession session , ConnectorTableHandle tableHandle )
776895 {
0 commit comments