diff --git a/src/Databases/DataLake/RestCatalog.cpp b/src/Databases/DataLake/RestCatalog.cpp index 8b0d643f3ec1..dd71315fa6f9 100644 --- a/src/Databases/DataLake/RestCatalog.cpp +++ b/src/Databases/DataLake/RestCatalog.cpp @@ -263,7 +263,8 @@ DB::ReadWriteBufferFromHTTPPtr RestCatalog::createReadBuffer( { const auto & context = getContext(); - Poco::URI url(base_url / endpoint); + /// enable_url_encoding=false to allow use tables with encoded sequences in names like 'foo%2Fbar' + Poco::URI url(base_url / endpoint, /* enable_url_encoding */ false); if (!params.empty()) url.setQueryParameters(params); @@ -496,7 +497,12 @@ DB::Names RestCatalog::parseTables(DB::ReadBuffer & buf, const std::string & bas for (size_t i = 0; i < identifiers_object->size(); ++i) { const auto current_table_json = identifiers_object->get(static_cast(i)).extract(); - const auto table_name = current_table_json->get("name").extract(); + /// If table has encoded sequence (like 'foo%2Fbar') + /// catalog returns decoded character instead of sequence ('foo/bar') + /// Here name encoded back to 'foo%2Fbar' format + const auto table_name_raw = current_table_json->get("name").extract(); + std::string table_name; + Poco::URI::encode(table_name_raw, "/", table_name); tables.push_back(base_namespace + "." + table_name); if (limit && tables.size() >= limit) diff --git a/tests/integration/test_database_iceberg/test.py b/tests/integration/test_database_iceberg/test.py index 373f98cedc1c..4d492e62572b 100644 --- a/tests/integration/test_database_iceberg/test.py +++ b/tests/integration/test_database_iceberg/test.py @@ -384,3 +384,38 @@ def record(key): assert 'aaa\naaa\naaa' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name}`").strip() assert 'bbb\nbbb\nbbb' == node.query(f"SELECT symbol FROM {CATALOG_NAME}.`{namespace}.{table_name_2}`").strip() + + +def test_table_with_slash(started_cluster): + node = started_cluster.instances["node1"] + + # pyiceberg at current moment (version 0.9.1) has a bug with table names with slashes + # see https://github.com/apache/iceberg-python/issues/2462 + # so we need to encode it manually + table_raw_suffix = "table/foo" + table_encoded_suffix = "table%2Ffoo" + + test_ref = f"test_list_tables_{uuid.uuid4()}" + table_name = f"{test_ref}_{table_raw_suffix}" + table_encoded_name = f"{test_ref}_{table_encoded_suffix}" + root_namespace = f"{test_ref}_namespace" + + catalog = load_catalog_impl(started_cluster) + catalog.create_namespace(root_namespace) + + create_table(catalog, root_namespace, table_name, DEFAULT_SCHEMA, PartitionSpec(), DEFAULT_SORT_ORDER) + table = catalog.load_table(f"{root_namespace}.{table_encoded_name}") + data = [ + { + "datetime": datetime.strptime("2025-01-01 12:00:00", "%Y-%m-%d %H:%M:%S"), + "symbol": "AAPL", + "bid": 193.24, + "ask": 193.31, + "details": {"created_by": "bot"}, + } + ] + df = pa.Table.from_pylist(data) + table.append(df) + + create_clickhouse_iceberg_database(started_cluster, node, CATALOG_NAME) + assert node.query(f"SELECT * FROM {CATALOG_NAME}.`{root_namespace}.{table_encoded_name}`") == "2025-01-01 12:00:00.000000\tAAPL\t193.24\t193.31\t('bot')\n"