Skip to content

Commit 2908d6d

Browse files
committed
Fixed s3 decoding
Signed-off-by: Kanthi Subramanian <subkanthi@gmail.com>
1 parent 216182f commit 2908d6d

File tree

2 files changed

+52
-4
lines changed
  • src/Storages/ObjectStorage
  • tests/integration/test_database_iceberg

2 files changed

+52
-4
lines changed

src/Storages/ObjectStorage/Utils.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -513,9 +513,11 @@ std::pair<DB::ObjectStoragePtr, std::string> resolveObjectStorageForPath(
513513
normalized_path = "gs://" + target_decomposed.authority + "/" + target_decomposed.key;
514514
}
515515
S3::URI s3_uri(normalized_path);
516-
517-
std::string key_to_use = s3_uri.key;
518-
516+
517+
// Use key (parsed without URI decoding) so that percent-encoded
518+
// characters in object keys (e.g. %2F in Iceberg partition paths) are preserved.
519+
std::string key_to_use = target_decomposed.key;
520+
519521
bool use_base_storage = false;
520522
if (base_storage->getType() == ObjectStorageType::S3)
521523
{

tests/integration/test_database_iceberg/test.py

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -625,6 +625,52 @@ def test_table_with_slash(started_cluster):
625625
assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "\\N\tAAPL\t193.24\t193.31\t('bot')\n"
626626

627627

628+
def test_partition_value_with_slash(started_cluster):
629+
"""Partition value containing '/' produces object keys with %2F; reading must preserve encoding."""
630+
node = started_cluster.instances["node1"]
631+
632+
test_ref = f"test_partition_slash_{uuid.uuid4()}"
633+
table_name = f"{test_ref}_table"
634+
root_namespace = f"{test_ref}_namespace"
635+
636+
# Partition by symbol (string) so partition value "us/west" becomes path segment symbol=us%2Fwest
637+
partition_spec = PartitionSpec(
638+
PartitionField(
639+
source_id=2, field_id=1000, transform=IdentityTransform(), name="symbol"
640+
)
641+
)
642+
schema = DEFAULT_SCHEMA
643+
644+
catalog = load_catalog_impl(started_cluster)
645+
catalog.create_namespace(root_namespace)
646+
647+
table = create_table(
648+
catalog,
649+
root_namespace,
650+
table_name,
651+
schema,
652+
partition_spec=partition_spec,
653+
sort_order=DEFAULT_SORT_ORDER,
654+
)
655+
656+
# Write a row with partition value containing slash (path will have %2F in S3 key)
657+
data = [
658+
{
659+
"datetime": datetime.now(),
660+
"symbol": "us/west",
661+
"bid": 100.0,
662+
"ask": 101.0,
663+
"details": {"created_by": "test"},
664+
}
665+
]
666+
df = pa.Table.from_pylist(data)
667+
table.append(df)
668+
669+
create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
670+
assert 1 == int(node.query(f"SELECT count() FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`"))
671+
assert "us/west" in node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`")
672+
673+
628674
def test_cluster_select(started_cluster):
629675
node1 = started_cluster.instances["node1"]
630676
node2 = started_cluster.instances["node2"]
@@ -665,7 +711,7 @@ def test_cluster_select(started_cluster):
665711
assert len(cluster_secondary_queries) == 1
666712

667713
assert node2.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_name}`", settings={"parallel_replicas_for_cluster_engines":1, 'enable_parallel_replicas': 2, 'cluster_for_parallel_replicas': 'cluster_simple', 'parallel_replicas_for_cluster_engines' : 1}) == 'pablo\n'
668-
714+
669715
def test_not_specified_catalog_type(started_cluster):
670716
node = started_cluster.instances["node1"]
671717
settings = {

0 commit comments

Comments
 (0)