6969import io .trino .plugin .deltalake .transactionlog .DeltaLakeSchemaSupport .ColumnMappingMode ;
7070import io .trino .plugin .deltalake .transactionlog .DeltaLakeSchemaSupport .UnsupportedTypeException ;
7171import io .trino .plugin .deltalake .transactionlog .DeltaLakeTransactionLogEntry ;
72+ import io .trino .plugin .deltalake .transactionlog .DeltaLakeVersionChecksum ;
7273import io .trino .plugin .deltalake .transactionlog .MetadataEntry ;
7374import io .trino .plugin .deltalake .transactionlog .ProtocolEntry ;
7475import io .trino .plugin .deltalake .transactionlog .RemoveFileEntry ;
162163import io .trino .spi .type .VarcharType ;
163164
164165import java .io .IOException ;
166+ import java .io .UncheckedIOException ;
165167import java .net .URI ;
166168import java .net .URISyntaxException ;
167169import java .time .Duration ;
241243import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .getHiveCatalogName ;
242244import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isCollectExtendedStatisticsColumnStatisticsOnWrite ;
243245import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isExtendedStatisticsEnabled ;
246+ import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isLoadMetadataFromChecksumFile ;
244247import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isProjectionPushdownEnabled ;
245248import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isQueryPartitionFilterRequired ;
246249import static io .trino .plugin .deltalake .DeltaLakeSessionProperties .isStoreTableMetadataInMetastoreEnabled ;
298301import static io .trino .plugin .deltalake .transactionlog .MetadataEntry .DELTA_CHANGE_DATA_FEED_ENABLED_PROPERTY ;
299302import static io .trino .plugin .deltalake .transactionlog .MetadataEntry .configurationForNewTable ;
300303import static io .trino .plugin .deltalake .transactionlog .TemporalTimeTravelUtil .findLatestVersionUsingTemporal ;
304+ import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .getLatestCommitVersion ;
301305import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .getMandatoryCurrentVersion ;
302306import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .readLastCheckpoint ;
307+ import static io .trino .plugin .deltalake .transactionlog .TransactionLogParser .readVersionChecksumFile ;
303308import static io .trino .plugin .deltalake .transactionlog .TransactionLogUtil .getTransactionLogDir ;
304309import static io .trino .plugin .deltalake .transactionlog .TransactionLogUtil .getTransactionLogJsonEntryPath ;
305310import static io .trino .plugin .deltalake .transactionlog .checkpoint .TransactionLogTail .getEntriesFromJson ;
@@ -480,6 +485,15 @@ private record QueriedTable(SchemaTableName schemaTableName, long version)
480485 }
481486 }
482487
488+ private record MetadataAndProtocolAndVersion (long version , MetadataEntry metadataEntry , ProtocolEntry protocolEntry )
489+ {
490+ MetadataAndProtocolAndVersion
491+ {
492+ requireNonNull (metadataEntry , "metadataEntry is null" );
493+ requireNonNull (protocolEntry , "protocolEntry is null" );
494+ }
495+ }
496+
483497 public DeltaLakeMetadata (
484498 DeltaLakeMetastore metastore ,
485499 TransactionLogAccess transactionLogAccess ,
@@ -717,27 +731,39 @@ public LocatedTableHandle getTableHandle(
717731
718732 String tableLocation = table .location ();
719733 TrinoFileSystem fileSystem = fileSystemFactory .create (session , table );
720- TableSnapshot tableSnapshot = getSnapshot ( session , table , endVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor ) ));
734+ Optional < Long > endTableVersion = endVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor ));
721735
722- MetadataAndProtocolEntries logEntries ;
723- try {
724- logEntries = transactionLogAccess .getMetadataAndProtocolEntry (session , fileSystem , tableSnapshot );
725- }
726- catch (TrinoException e ) {
727- if (e .getErrorCode ().equals (DELTA_LAKE_INVALID_SCHEMA .toErrorCode ())) {
728- return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , e );
736+ Optional <MetadataAndProtocolAndVersion > metadataAndProtocol = Optional .empty ();
737+ if (isLoadMetadataFromChecksumFile (session )) {
738+ Optional <Long > startTableVersion = startVersion .map (version -> getVersion (session , fileSystem , tableLocation , version , metadataFetchingExecutor ));
739+
740+ try {
741+ metadataAndProtocol = loadMetadataAndProtocolFromChecksum (tableName , fileSystem , tableLocation , startTableVersion , endTableVersion );
742+ }
743+ catch (TrinoException e ) {
744+ if (e .getErrorCode ().equals (DELTA_LAKE_INVALID_SCHEMA .toErrorCode ())) {
745+ return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , e );
746+ }
747+ throw e ;
729748 }
730- throw e ;
731- }
732- MetadataEntry metadataEntry = logEntries .metadata ().orElse (null );
733- if (metadataEntry == null ) {
734- return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Metadata not found in transaction log for " + tableSnapshot .getTable ()));
735749 }
736750
737- ProtocolEntry protocolEntry = logEntries .protocol ().orElse (null );
738- if (protocolEntry == null ) {
739- return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Protocol not found in transaction log for " + tableSnapshot .getTable ()));
751+ if (metadataAndProtocol .isEmpty ()) {
752+ try {
753+ metadataAndProtocol = Optional .of (loadMetadataAndProtocolFromTransactionLog (session , table , fileSystem , endTableVersion ));
754+ }
755+ catch (TrinoException e ) {
756+ if (e .getErrorCode ().equals (DELTA_LAKE_INVALID_SCHEMA .toErrorCode ())) {
757+ return new CorruptedDeltaLakeTableHandle (tableName , table .catalogOwned (), managed , tableLocation , e );
758+ }
759+ throw e ;
760+ }
740761 }
762+ MetadataAndProtocolAndVersion tableState = metadataAndProtocol .orElseThrow ();
763+ MetadataEntry metadataEntry = tableState .metadataEntry ();
764+ ProtocolEntry protocolEntry = tableState .protocolEntry ();
765+ long snapshotVersion = tableState .version ();
766+
741767 if (protocolEntry .minReaderVersion () > MAX_READER_VERSION ) {
742768 LOG .debug ("Skip %s because the reader version is unsupported: %d" , tableName , protocolEntry .minReaderVersion ());
743769 return null ;
@@ -750,8 +776,8 @@ public LocatedTableHandle getTableHandle(
750776 verifySupportedColumnMapping (getColumnMappingMode (metadataEntry , protocolEntry ));
751777 if (metadataScheduler .canStoreTableMetadata (session , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())) &&
752778 endVersion .isEmpty () &&
753- !isSameTransactionVersion (metastoreTable .get (), tableSnapshot )) {
754- tableUpdateInfos .put (tableName , new TableUpdateInfo (session , tableSnapshot . getVersion () , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())));
779+ !isSameTransactionVersion (metastoreTable .get (), snapshotVersion )) {
780+ tableUpdateInfos .put (tableName , new TableUpdateInfo (session , snapshotVersion , metadataEntry .getSchemaString (), Optional .ofNullable (metadataEntry .getDescription ())));
755781 }
756782 return new DeltaLakeTableHandle (
757783 tableName .getSchemaName (),
@@ -767,10 +793,84 @@ public LocatedTableHandle getTableHandle(
767793 Optional .empty (),
768794 Optional .empty (),
769795 Optional .empty (),
770- tableSnapshot . getVersion () ,
796+ snapshotVersion ,
771797 endVersion .isPresent ());
772798 }
773799
800+ private Optional <MetadataAndProtocolAndVersion > loadMetadataAndProtocolFromChecksum (
801+ SchemaTableName tableName ,
802+ TrinoFileSystem fileSystem ,
803+ String tableLocation ,
804+ Optional <Long > startTableVersion ,
805+ Optional <Long > endTableVersion )
806+ {
807+ // If we can't fail to identify the latest commit in the specified range (either due to an IO-related error or a
808+ // total lack of commits), treat this as a hard failure. Falling back to scanning the log is unlikely to help
809+
810+ OptionalLong latestCommitVersion ;
811+
812+ if (endTableVersion .isPresent ()) {
813+ // Optimization: we already validated the existence of endTableVersion in getVersion, so endTableVersion is
814+ // definitionally the latest eligible commit
815+ latestCommitVersion = OptionalLong .of (endTableVersion .get ());
816+ }
817+ else {
818+ try {
819+ latestCommitVersion = getLatestCommitVersion (fileSystem , tableLocation , startTableVersion , endTableVersion );
820+ }
821+ catch (IOException | UncheckedIOException e ) {
822+ throw new TrinoException (DELTA_LAKE_FILESYSTEM_ERROR , "Failed to determine latest commit version for " + tableName , e );
823+ }
824+ }
825+
826+ if (latestCommitVersion .isEmpty ()) {
827+ String startTableVersionMemo = startTableVersion .map (Object ::toString ).orElse ("earliest" );
828+ String endTableVersionMemo = endTableVersion .map (Object ::toString ).orElse ("latest" );
829+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , format ("Delta table %s has no commits between %s and %s" , tableName , startTableVersionMemo , endTableVersionMemo ));
830+ }
831+
832+ // Valid Delta tables needn't have checksum files, and valid checksum files needn't have metadata and protocol --
833+ // in this case, we should fall back gracefully to scanning the Delta log
834+
835+ long latestChecksumVersion = latestCommitVersion .getAsLong ();
836+ Optional <DeltaLakeVersionChecksum > versionChecksum = readVersionChecksumFile (fileSystem , tableLocation , latestChecksumVersion );
837+ if (versionChecksum .isEmpty ()) {
838+ return Optional .empty ();
839+ }
840+
841+ DeltaLakeVersionChecksum checksum = versionChecksum .get ();
842+
843+ MetadataEntry metadataEntry = checksum .getMetadata ();
844+ ProtocolEntry protocolEntry = checksum .getProtocol ();
845+ if (metadataEntry == null || protocolEntry == null ) {
846+ return Optional .empty ();
847+ }
848+
849+ return Optional .of (new MetadataAndProtocolAndVersion (latestChecksumVersion , metadataEntry , protocolEntry ));
850+ }
851+
852+ private MetadataAndProtocolAndVersion loadMetadataAndProtocolFromTransactionLog (
853+ ConnectorSession session ,
854+ DeltaMetastoreTable table ,
855+ TrinoFileSystem fileSystem ,
856+ Optional <Long > endTableVersion )
857+ {
858+ TableSnapshot tableSnapshot = getSnapshot (session , table , endTableVersion );
859+ MetadataAndProtocolEntries logEntries = transactionLogAccess .getMetadataAndProtocolEntry (session , fileSystem , tableSnapshot );
860+
861+ MetadataEntry metadataEntry = logEntries .metadata ().orElse (null );
862+ if (metadataEntry == null ) {
863+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Metadata not found in transaction log for " + tableSnapshot .getTable ());
864+ }
865+
866+ ProtocolEntry protocolEntry = logEntries .protocol ().orElse (null );
867+ if (protocolEntry == null ) {
868+ throw new TrinoException (DELTA_LAKE_INVALID_SCHEMA , "Protocol not found in transaction log for " + tableSnapshot .getTable ());
869+ }
870+
871+ return new MetadataAndProtocolAndVersion (tableSnapshot .getVersion (), metadataEntry , protocolEntry );
872+ }
873+
774874 @ Override
775875 public ConnectorTableProperties getTableProperties (ConnectorSession session , ConnectorTableHandle tableHandle )
776876 {
0 commit comments