Skip to content

Commit 5f36bf7

Browse files
fix(plugin-delta): Fix problem reading tables with spaces in location or partition values (#26397)
1 parent 5efa463 commit 5f36bf7

File tree

19 files changed

+13
-11
lines changed

19 files changed

+13
-11
lines changed

presto-delta/src/main/java/com/facebook/presto/delta/DeltaExpressionUtils.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import io.delta.kernel.utils.CloseableIterator;
3333

3434
import java.io.IOException;
35+
import java.net.URI;
3536
import java.sql.Date;
3637
import java.sql.Timestamp;
3738
import java.util.Iterator;
@@ -239,7 +240,7 @@ private static boolean evaluatePartitionPredicate(
239240
for (DeltaColumnHandle partitionColumn : partitionColumns) {
240241
String columnName = partitionColumn.getName();
241242
String partitionValue = InternalScanFileUtils.getPartitionValues(row).get(columnName);
242-
String filePath = InternalScanFileUtils.getAddFileStatus(row).getPath();
243+
String filePath = URI.create(InternalScanFileUtils.getAddFileStatus(row).getPath()).getPath();
243244
logger.debug("Obtaining domain of file: " + filePath);
244245
Domain domain = getDomain(partitionColumn, partitionValue, typeManager, filePath);
245246
Optional<Map<String, Domain>> domains = partitionPredicate.getDomains();

presto-delta/src/main/java/com/facebook/presto/delta/DeltaSplitManager.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030

3131
import java.io.IOException;
3232
import java.io.UncheckedIOException;
33+
import java.net.URI;
3334
import java.util.Map;
3435
import java.util.Map.Entry;
3536
import java.util.concurrent.CompletableFuture;
@@ -97,7 +98,7 @@ public CompletableFuture<ConnectorSplitBatch> getNextBatch(ConnectorPartitionHan
9798
connectorId,
9899
deltaTable.getSchemaName(),
99100
deltaTable.getTableName(),
100-
addFileStatus.getPath(),
101+
URI.create(addFileStatus.getPath()).getPath(),
101102
0, /* start */
102103
addFileStatus.getSize() /* split length - default is read the entire file in one split */,
103104
addFileStatus.getSize(),
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"txnId":"43073c59-3636-4134-9608-1f30c3f32ec2","tableSizeBytes":0,"numFiles":0,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"379c3206-6fc5-4c37-887b-8128df98f1f5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(38,18)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["as_int","as_long","as_byte","as_short","as_boolean","as_float","as_double","as_string","as_date","as_timestamp","as_big_decimal"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1761674769368},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","timestampNtz"],"writerFeatures":["deletionVectors","timestampNtz","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[0,0,0,0,0,0,0,0,0,0]},"allFiles":[]}
Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,3 @@
1-
{"commitInfo":{"timestamp":1713955885069,"operation":"WRITE","operationParameters":{"mode":"Overwrite","partitionBy":"[\"as_int\",\"as_long\",\"as_byte\",\"as_short\",\"as_boolean\",\"as_float\",\"as_double\",\"as_string\",\"as_date\",\"as_timestamp\",\"as_big_decimal\"]"},"isolationLevel":"Serializable","isBlindAppend":false,"operationMetrics":{"numFiles":"3","numOutputRows":"3","numOutputBytes":"1347"},"engineInfo":"Apache-Spark/3.5.1 Delta-Lake/3.1.0","txnId":"9b250066-b90d-4fe5-abf7-04990dc85713"}}
2-
{"metaData":{"id":"13d0a1fb-53aa-49f6-b5f0-c5347c805d6f","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_timestamp\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(1,0)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["as_int","as_long","as_byte","as_short","as_boolean","as_float","as_double","as_string","as_date","as_timestamp","as_big_decimal"],"configuration":{},"createdTime":1713955883329}}
3-
{"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
4-
{"add":{"path":"as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_date=2021-09-08/as_timestamp=2021-09-08%252011%253A11%253A11/as_big_decimal=1/part-00000-889df1d9-6d79-4ea3-8b69-971cec9bf618.c000.snappy.parquet","partitionValues":{"as_big_decimal":"1","as_int":"1","as_byte":"1","as_long":"1","as_date":"2021-09-08","as_string":"1","as_timestamp":"2021-09-08 11:11:11","as_float":"1.0","as_short":"1","as_boolean":"false","as_double":"1.0"},"size":449,"modificationTime":1713955884743,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"1\"},\"maxValues\":{\"value\":\"1\"},\"nullCount\":{\"value\":0}}"}}
5-
{"add":{"path":"as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00001-6c239ff5-7f3e-4bbd-b06a-4ea89364c08a.c000.snappy.parquet","partitionValues":{"as_big_decimal":null,"as_int":null,"as_byte":null,"as_long":null,"as_date":null,"as_string":null,"as_timestamp":null,"as_float":null,"as_short":null,"as_boolean":null,"as_double":null},"size":449,"modificationTime":1713955884743,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"2\"},\"maxValues\":{\"value\":\"2\"},\"nullCount\":{\"value\":0}}"}}
6-
{"add":{"path":"as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_date=2021-09-08/as_timestamp=2021-09-08%252011%253A11%253A11/as_big_decimal=0/part-00002-8e5e2719-f9d0-4e23-8c27-1ac72563c6ab.c000.snappy.parquet","partitionValues":{"as_big_decimal":"0","as_int":"0","as_byte":"0","as_long":"0","as_date":"2021-09-08","as_string":"0","as_timestamp":"2021-09-08 11:11:11","as_float":"0.0","as_short":"0","as_boolean":"true","as_double":"0.0"},"size":449,"modificationTime":1713955884743,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"0\"},\"maxValues\":{\"value\":\"0\"},\"nullCount\":{\"value\":0}}"}}
1+
{"commitInfo":{"timestamp":1761674769606,"userId":"6777253476263814","userName":"[email protected]","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[\"as_int\",\"as_long\",\"as_byte\",\"as_short\",\"as_boolean\",\"as_float\",\"as_double\",\"as_string\",\"as_date\",\"as_timestamp\",\"as_big_decimal\"]","clusterBy":"[]","description":null,"isManaged":"false","properties":"{\"delta.checkpoint.writeStatsAsJson\":\"false\",\"delta.checkpoint.writeStatsAsStruct\":\"true\",\"delta.enableDeletionVectors\":\"true\"}","statsOnLoad":false},"job":{"jobId":"","runId":""},"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/17.2.x-photon-scala2.13","txnId":"43073c59-3636-4134-9608-1f30c3f32ec2"}}
2+
{"metaData":{"id":"379c3206-6fc5-4c37-887b-8128df98f1f5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(38,18)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["as_int","as_long","as_byte","as_short","as_boolean","as_float","as_double","as_string","as_date","as_timestamp","as_big_decimal"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1761674769368}}
3+
{"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","timestampNtz"],"writerFeatures":["deletionVectors","timestampNtz","appendOnly","invariants"]}}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"txnId":"65f81a4f-7285-45dd-8ce7-8a20f8aa8ef8","tableSizeBytes":1935,"numFiles":3,"numDeletedRecordsOpt":0,"numDeletionVectorsOpt":0,"numMetadata":1,"numProtocol":1,"setTransactions":[],"domainMetadata":[],"metadata":{"id":"379c3206-6fc5-4c37-887b-8128df98f1f5","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"as_int\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_long\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_byte\",\"type\":\"byte\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_short\",\"type\":\"short\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_boolean\",\"type\":\"boolean\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_float\",\"type\":\"float\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_double\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_string\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_date\",\"type\":\"date\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_timestamp\",\"type\":\"timestamp_ntz\",\"nullable\":true,\"metadata\":{}},{\"name\":\"as_big_decimal\",\"type\":\"decimal(38,18)\",\"nullable\":true,\"metadata\":{}},{\"name\":\"value\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["as_int","as_long","as_byte","as_short","as_boolean","as_float","as_double","as_string","as_date","as_timestamp","as_big_decimal"],"configuration":{"delta.checkpoint.writeStatsAsJson":"false","delta.checkpoint.writeStatsAsStruct":"true","delta.enableDeletionVectors":"true"},"createdTime":1761674769368},"protocol":{"minReaderVersion":3,"minWriterVersion":7,"readerFeatures":["deletionVectors","timestampNtz"],"writerFeatures":["deletionVectors","timestampNtz","appendOnly","invariants"]},"histogramOpt":{"sortedBinBoundaries":[0,8192,16384,32768,65536,131072,262144,524288,1048576,2097152,4194304,8388608,12582912,16777216,20971520,25165824,29360128,33554432,37748736,41943040,50331648,58720256,67108864,75497472,83886080,92274688,100663296,109051904,117440512,125829120,130023424,134217728,138412032,142606336,146800640,150994944,167772160,184549376,201326592,218103808,234881024,251658240,268435456,285212672,301989888,318767104,335544320,352321536,369098752,385875968,402653184,419430400,436207616,452984832,469762048,486539264,503316480,520093696,536870912,553648128,570425344,587202560,603979776,671088640,738197504,805306368,872415232,939524096,1006632960,1073741824,1140850688,1207959552,1275068416,1342177280,1409286144,1476395008,1610612736,1744830464,1879048192,2013265920,2147483648,2415919104,2684354560,2952790016,3221225472,3489660928,3758096384,4026531840,4294967296,8589934592,17179869184,34359738368,68719476736,137438953472,274877906944],"fileCounts":[3,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0],"totalBytes":[1935,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]},"deletedRecordCountsHistogramOpt":{"deletedRecordCounts":[3,0,0,0,0,0,0,0,0,0]},"allFiles":[{"path":"as_int=__HIVE_DEFAULT_PARTITION__/as_long=__HIVE_DEFAULT_PARTITION__/as_byte=__HIVE_DEFAULT_PARTITION__/as_short=__HIVE_DEFAULT_PARTITION__/as_boolean=__HIVE_DEFAULT_PARTITION__/as_float=__HIVE_DEFAULT_PARTITION__/as_double=__HIVE_DEFAULT_PARTITION__/as_string=__HIVE_DEFAULT_PARTITION__/as_date=__HIVE_DEFAULT_PARTITION__/as_timestamp=__HIVE_DEFAULT_PARTITION__/as_big_decimal=__HIVE_DEFAULT_PARTITION__/part-00000-16f19de3-66b6-4c24-8558-f9d84ff07d49.c000.snappy.parquet","partitionValues":{"as_big_decimal":null,"as_int":null,"as_byte":null,"as_string":null,"as_timestamp":null,"as_float":null,"as_short":null,"as_boolean":null,"as_double":null,"as_long":null,"as_date":null},"size":645,"modificationTime":1761674772000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"2\"},\"maxValues\":{\"value\":\"2\"},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761674772000000","MIN_INSERTION_TIME":"1761674772000000","MAX_INSERTION_TIME":"1761674772000000","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"as_int=1/as_long=1/as_byte=1/as_short=1/as_boolean=false/as_float=1.0/as_double=1.0/as_string=1/as_date=2021-09-08/as_timestamp=2021-09-08%2006%253A11%253A11/as_big_decimal=1.000000000000000000/part-00002-6c7a17a8-afab-413a-96af-894d78ab408e.c000.snappy.parquet","partitionValues":{"as_big_decimal":"1.000000000000000000","as_int":"1","as_byte":"1","as_string":"1","as_timestamp":"2021-09-08 06:11:11","as_float":"1.0","as_short":"1","as_boolean":"false","as_double":"1.0","as_long":"1","as_date":"2021-09-08"},"size":645,"modificationTime":1761674772000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"1\"},\"maxValues\":{\"value\":\"1\"},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761674772000002","MIN_INSERTION_TIME":"1761674772000002","MAX_INSERTION_TIME":"1761674772000002","OPTIMIZE_TARGET_SIZE":"268435456"}},{"path":"as_int=0/as_long=0/as_byte=0/as_short=0/as_boolean=true/as_float=0.0/as_double=0.0/as_string=0/as_date=2021-09-08/as_timestamp=2021-09-08%2006%253A11%253A11/as_big_decimal=0.000000000000000000/part-00001-60f500b5-1817-43e3-863c-92012bcb0486.c000.snappy.parquet","partitionValues":{"as_big_decimal":"0.000000000000000000","as_int":"0","as_byte":"0","as_string":"0","as_timestamp":"2021-09-08 06:11:11","as_float":"0.0","as_short":"0","as_boolean":"true","as_double":"0.0","as_long":"0","as_date":"2021-09-08"},"size":645,"modificationTime":1761674772000,"dataChange":false,"stats":"{\"numRecords\":1,\"minValues\":{\"value\":\"0\"},\"maxValues\":{\"value\":\"0\"},\"nullCount\":{\"value\":0},\"tightBounds\":true}","tags":{"INSERTION_TIME":"1761674772000001","MIN_INSERTION_TIME":"1761674772000001","MAX_INSERTION_TIME":"1761674772000001","OPTIMIZE_TARGET_SIZE":"268435456"}}]}

0 commit comments

Comments
 (0)