@@ -133,6 +133,17 @@ public class IcebergScanNode extends FileQueryScanNode {
133133 private Map <String , String > backendStorageProperties ;
134134
135135 private Boolean isBatchMode = null ;
136+ // Cached values for LocationPath creation optimization
137+ // These are lazily initialized on first use to avoid parsing overhead for each file
138+ private volatile StorageProperties cachedStorageProperties ;
139+ private volatile String cachedSchema ;
140+ private volatile String cachedFsIdPrefix ;
141+ private volatile boolean locationPathCacheInitialized = false ;
142+ // Cache for path prefix transformation to avoid repeated S3URI parsing
143+ // Maps original path prefix (e.g., "https://bucket.s3.amazonaws.com/") to normalized prefix (e.g., "s3://bucket/")
144+ private volatile String cachedOriginalPathPrefix ;
145+ private volatile String cachedNormalizedPathPrefix ;
146+ private volatile String cachedFsIdentifier ;
136147
137148 // for test
138149 @ VisibleForTesting
@@ -547,9 +558,83 @@ private CloseableIterable<FileScanTask> planFileScanTaskWithManifestCache(TableS
547558 return TableScanUtil .splitFiles (CloseableIterable .withNoopClose (tasks ), targetSplitSize );
548559 }
549560
561+ /**
562+ * Initialize cached values for LocationPath creation on first use.
563+ * This avoids repeated StorageProperties lookup, scheme parsing, and S3URI regex parsing for each file.
564+ */
565+ private void initLocationPathCache (String samplePath ) {
566+ if (locationPathCacheInitialized ) {
567+ return ;
568+ }
569+ synchronized (this ) {
570+ if (locationPathCacheInitialized ) {
571+ return ;
572+ }
573+ try {
574+ // Create a LocationPath using the full method to get all cached values
575+ LocationPath sampleLocationPath = LocationPath .of (samplePath , storagePropertiesMap );
576+ cachedStorageProperties = sampleLocationPath .getStorageProperties ();
577+ cachedSchema = sampleLocationPath .getSchema ();
578+ cachedFsIdentifier = sampleLocationPath .getFsIdentifier ();
579+
580+ // Extract fsIdPrefix like "s3://" from fsIdentifier like "s3://bucket"
581+ int schemeEnd = cachedFsIdentifier .indexOf ("://" );
582+ if (schemeEnd > 0 ) {
583+ cachedFsIdPrefix = cachedFsIdentifier .substring (0 , schemeEnd + 3 );
584+ }
585+
586+ // Cache path prefix mapping for fast transformation
587+ // This allows subsequent files to skip S3URI regex parsing entirely
588+ String normalizedPath = sampleLocationPath .getNormalizedLocation ();
589+
590+ // Find the common prefix by looking for the last '/' before the filename
591+ int lastSlashInOriginal = samplePath .lastIndexOf ('/' );
592+ int lastSlashInNormalized = normalizedPath .lastIndexOf ('/' );
593+
594+ if (lastSlashInOriginal > 0 && lastSlashInNormalized > 0 ) {
595+ cachedOriginalPathPrefix = samplePath .substring (0 , lastSlashInOriginal + 1 );
596+ cachedNormalizedPathPrefix = normalizedPath .substring (0 , lastSlashInNormalized + 1 );
597+ }
598+
599+ locationPathCacheInitialized = true ;
600+ } catch (Exception e ) {
601+ // If caching fails, we'll fall back to the full method each time
602+ LOG .warn ("Failed to initialize LocationPath cache, will use full parsing" , e );
603+ locationPathCacheInitialized = true ;
604+ }
605+ }
606+ }
607+
608+ /**
609+ * Create a LocationPath with cached values for better performance.
610+ * Uses cached path prefix mapping to completely bypass S3URI regex parsing for most files.
611+ * Falls back to full parsing if cache is not available or path doesn't match cached prefix.
612+ */
613+ private LocationPath createLocationPathWithCache (String path ) {
614+ // Initialize cache on first call
615+ if (!locationPathCacheInitialized ) {
616+ initLocationPathCache (path );
617+ }
618+
619+ // Fast path: if path starts with cached original prefix, directly transform without any parsing
620+ if (cachedOriginalPathPrefix != null && path .startsWith (cachedOriginalPathPrefix )) {
621+ // Transform: replace original prefix with normalized prefix
622+ String normalizedPath = cachedNormalizedPathPrefix + path .substring (cachedOriginalPathPrefix .length ());
623+ return LocationPath .ofDirect (normalizedPath , cachedSchema , cachedFsIdentifier , cachedStorageProperties );
624+ }
625+
626+ // Medium path: use cached StorageProperties but still need validateAndNormalizeUri
627+ if (cachedStorageProperties != null ) {
628+ return LocationPath .ofWithCache (path , cachedStorageProperties , cachedSchema , cachedFsIdPrefix );
629+ }
630+
631+ // Fallback to full parsing
632+ return LocationPath .of (path , storagePropertiesMap );
633+ }
634+
550635 private Split createIcebergSplit (FileScanTask fileScanTask ) {
551636 String originalPath = fileScanTask .file ().path ().toString ();
552- LocationPath locationPath = LocationPath . of (originalPath , storagePropertiesMap );
637+ LocationPath locationPath = createLocationPathWithCache (originalPath );
553638 IcebergSplit split = new IcebergSplit (
554639 locationPath ,
555640 fileScanTask .start (),
0 commit comments