Skip to content

Commit 6fac0ee

Browse files
authored
Merge pull request #1127 from Altinity/backports/25.3.8/87020
25.3 Backport of 87020: Fix table name encoding in data lake rest catalog
2 parents 7e66622 + f3f830d commit 6fac0ee

File tree

2 files changed

+43
-2
lines changed

2 files changed

+43
-2
lines changed

src/Databases/DataLake/RestCatalog.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,8 @@ DB::ReadWriteBufferFromHTTPPtr RestCatalog::createReadBuffer(
263263
{
264264
const auto & context = getContext();
265265

266-
Poco::URI url(base_url / endpoint);
266+
/// enable_url_encoding=false to allow use tables with encoded sequences in names like 'foo%2Fbar'
267+
Poco::URI url(base_url / endpoint, /* enable_url_encoding */ false);
267268
if (!params.empty())
268269
url.setQueryParameters(params);
269270

@@ -496,7 +497,12 @@ DB::Names RestCatalog::parseTables(DB::ReadBuffer & buf, const std::string & bas
496497
for (size_t i = 0; i < identifiers_object->size(); ++i)
497498
{
498499
const auto current_table_json = identifiers_object->get(static_cast<int>(i)).extract<Poco::JSON::Object::Ptr>();
499-
const auto table_name = current_table_json->get("name").extract<String>();
500+
/// If table has encoded sequence (like 'foo%2Fbar')
501+
/// catalog returns decoded character instead of sequence ('foo/bar')
502+
/// Here name encoded back to 'foo%2Fbar' format
503+
const auto table_name_raw = current_table_json->get("name").extract<String>();
504+
std::string table_name;
505+
Poco::URI::encode(table_name_raw, "/", table_name);
500506

501507
tables.push_back(base_namespace + "." + table_name);
502508
if (limit && tables.size() >= limit)

tests/integration/test_database_iceberg/test.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -384,3 +384,38 @@ def record(key):
384384

385385
assert 'aaa\naaa\naaa' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name}`").strip()
386386
assert 'bbb\nbbb\nbbb' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name_2}`").strip()
387+
388+
389+
def test_table_with_slash(started_cluster):
390+
node = started_cluster.instances["node1"]
391+
392+
# pyiceberg at current moment (version 0.9.1) has a bug with table names with slashes
393+
# see https://github.com/apache/iceberg-python/issues/2462
394+
# so we need to encode it manually
395+
table_raw_suffix = "table/foo"
396+
table_encoded_suffix = "table%2Ffoo"
397+
398+
test_ref = f"test_list_tables_{uuid.uuid4()}"
399+
table_name = f"{test_ref}_{table_raw_suffix}"
400+
table_encoded_name = f"{test_ref}_{table_encoded_suffix}"
401+
root_namespace = f"{test_ref}_namespace"
402+
403+
catalog = load_catalog_impl(started_cluster)
404+
catalog.create_namespace(root_namespace)
405+
406+
create_table(catalog, root_namespace, table_name, DEFAULT_SCHEMA, PartitionSpec(), DEFAULT_SORT_ORDER)
407+
table = catalog.load_table(f"{root_namespace}.{table_encoded_name}")
408+
data = [
409+
{
410+
"datetime": datetime.strptime("2025-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"),
411+
"symbol": "AAPL",
412+
"bid": 193.24,
413+
"ask": 193.31,
414+
"details": {"created_by": "bot"},
415+
}
416+
]
417+
df = pa.Table.from_pylist(data)
418+
table.append(df)
419+
420+
create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME)
421+
assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "2025-01-01 12:00:00.000000\tAAPL\t193.24\t193.31\t('bot')\n"

0 commit comments

Comments
 (0)