Altinity
diff --git a/‎src/Storages/IPartitionStrategy.cpp‎
Lines changed: 8 additions & 0 deletions b/‎src/Storages/IPartitionStrategy.cpp‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp‎
Lines changed: 5 additions & 2 deletions b/‎src/Storages/ObjectStorage/StorageObjectStorageConfiguration.cpp‎
Lines changed: 5 additions & 2 deletions
diff --git a/‎tests/integration/test_storage_s3/test.py‎
Lines changed: 127 additions & 0 deletions b/‎tests/integration/test_storage_s3/test.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎tests/queries/0_stateless/03547_s3_partition_by_require_partition_wildcard.reference‎ b/‎tests/queries/0_stateless/03547_s3_partition_by_require_partition_wildcard.reference‎
diff --git a/‎tests/queries/0_stateless/03547_s3_partition_by_require_partition_wildcard.sql‎
Lines changed: 0 additions & 3 deletions b/‎tests/queries/0_stateless/03547_s3_partition_by_require_partition_wildcard.sql‎
Lines changed: 0 additions & 3 deletions
@@ -232,8 +232,16 @@ std::shared_ptr<IPartitionStrategy> PartitionStrategyFactory::get(StrategyType s
                 globbed_path,
                 partition_columns_in_data_file);
         case StrategyType::NONE:
+        {
+            if (!partition_columns_in_data_file && strategy == PartitionStrategyFactory::StrategyType::NONE)
+            {
+                throw Exception(
+                    ErrorCodes::BAD_ARGUMENTS,
+                    "Partition strategy `none` cannot be used with partition_columns_in_data_file=0");
+            }
             /// Unreachable for plain object storage, used only by Data Lakes for now
             return nullptr;
+        }
     }
 }
 
 
@@ -82,8 +82,11 @@ void StorageObjectStorageConfiguration::initialize(
     }
     else if (configuration_to_initialize.partition_strategy_type == PartitionStrategyFactory::StrategyType::NONE)
     {
-        // Promote to wildcard in case it is not data lake to make it backwards compatible
-        configuration_to_initialize.partition_strategy_type = PartitionStrategyFactory::StrategyType::WILDCARD;
+        if (configuration_to_initialize.getRawPath().hasPartitionWildcard())
+        {
+            // Promote to wildcard in case it is not data lake to make it backwards compatible
+            configuration_to_initialize.partition_strategy_type = PartitionStrategyFactory::StrategyType::WILDCARD;
+        }
     }
 
     if (configuration_to_initialize.format == "auto")
 
@@ -8,6 +8,7 @@
 import uuid
 
 import pytest
+from pathlib import Path
 
 import helpers.client
 from helpers.cluster import ClickHouseCluster, ClickHouseInstance
@@ -2791,3 +2792,129 @@ def test_key_value_args(started_cluster):
         f"S3(\\'{url}\\', \\'TSVRaw\\', format = \\'TSVRaw\\', access_key_id = \\'minio\\', secret_access_key = \\'[HIDDEN]\\', compression_method = \\'gzip\\')"
         in node.query(f"SHOW CREATE TABLE {table_name}")
     )
+
+
+def test_file_pruning_with_hive_style_partitioning(started_cluster):
+    node = started_cluster.instances["dummy"]
+    table_name = f"test_pruning_with_hive_style_partitioning_{generate_random_string()}"
+    bucket = started_cluster.minio_bucket
+    minio = started_cluster.minio_client
+
+    url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}"
+    node.query(
+        f"""
+    CREATE TABLE {table_name} (a Int32, b Int32, c String) ENGINE = S3('{url}', format = 'Parquet', partition_strategy = 'hive')
+    PARTITION BY (b, c)
+    """
+    )
+    node.query(
+        f"INSERT INTO {table_name} SELECT number, number % 5, toString(number % 2) FROM numbers(20)",
+        settings={"use_hive_partitioning": True},
+    )
+
+    objects = []
+    for obj in list(
+        minio.list_objects(
+            started_cluster.minio_bucket,
+            prefix=table_name,
+            recursive=True,
+        )
+    ):
+        objects.append(obj.object_name)
+
+    objects.sort()
+    assert len(objects) == 10
+
+    prefixes = []
+    for object in objects:
+        assert object.endswith(".parquet")
+        path = Path(object)
+        prefixes.append(str(path.parent))
+
+    assert len(prefixes) == 10
+    assert prefixes == [
+        f"{table_name}/b=0/c=0",
+        f"{table_name}/b=0/c=1",
+        f"{table_name}/b=1/c=0",
+        f"{table_name}/b=1/c=1",
+        f"{table_name}/b=2/c=0",
+        f"{table_name}/b=2/c=1",
+        f"{table_name}/b=3/c=0",
+        f"{table_name}/b=3/c=1",
+        f"{table_name}/b=4/c=0",
+        f"{table_name}/b=4/c=1",
+    ]
+
+    def check_read_files(expected, query_id):
+        node.query("SYSTEM FLUSH LOGS")
+        assert expected == int(
+            node.query(
+                f"SELECT ProfileEvents['EngineFileLikeReadFiles'] FROM system.query_log WHERE query_id = '{query_id}' AND type='QueryFinish'"
+            )
+        )
+
+    # 5 files, each file contains 2 rows
+    assert 5 == int(
+        node.query(f"SELECT uniqExact(_path) FROM {table_name} WHERE c == '0'")
+    )
+
+    query_id = f"{table_name}_query_1"
+    assert 10 == int(
+        node.query(
+            f"SELECT count() FROM {table_name} WHERE c == '0'", query_id=query_id
+        )
+    )
+    # Check files are pruned.
+    check_read_files(5, query_id)
+
+    # 2 files, each contains 2 rows
+    assert 2 == int(
+        node.query(f"SELECT uniqExact(_path) FROM {table_name} WHERE b == 3")
+    )
+
+    query_id = f"{table_name}_query_2"
+    assert 4 == int(
+        node.query(f"SELECT count() FROM {table_name} WHERE b == 3", query_id=query_id)
+    )
+    # Check files are pruned.
+    check_read_files(2, query_id)
+
+    # 1 file with 2 rows.
+    assert 1 == int(
+        node.query(
+            f"SELECT uniqExact(_path) FROM {table_name} WHERE b == 3 AND c == '1'"
+        )
+    )
+
+    query_id = f"{table_name}_query_3"
+    assert 2 == int(
+        node.query(
+            f"SELECT count() FROM {table_name} WHERE b == 3 AND c == '1'",
+            query_id=query_id,
+        )
+    )
+    # Check files are pruned.
+    check_read_files(1, query_id)
+
+    query_id = f"{table_name}_query_4"
+    assert 1 == int(
+        node.query(f"SELECT count() FROM {table_name} WHERE a == 1", query_id=query_id)
+    )
+    # Nothing is pruned, because `a` is not a partition column.
+    check_read_files(10, query_id)
+
+
+def test_partition_by_without_wildcard(started_cluster):
+    node = started_cluster.instances["dummy"]
+    table_name = f"test_partition_by_without_wildcard_{generate_random_string()}"
+    bucket = started_cluster.minio_bucket
+
+    url = f"http://{started_cluster.minio_host}:{started_cluster.minio_port}/{bucket}/{table_name}"
+    # An exception "Partition strategy wildcard can not be used without a '_partition_id' wildcard"
+    # should not be thrown.
+    node.query(
+        f"""
+CREATE TABLE {table_name} (a Int32, b Int32, c String) ENGINE = S3('{url}', format = 'Parquet')
+PARTITION BY (b, c)
+"""
+    )
Original file line number	Diff line number	Diff line change
`@@ -82,8 +82,11 @@ void StorageObjectStorageConfiguration::initialize(`
`82`	`82`	`}`
`83`	`83`	`else if (configuration_to_initialize.partition_strategy_type == PartitionStrategyFactory::StrategyType::NONE)`
`84`	`84`	`{`
`85`		`- // Promote to wildcard in case it is not data lake to make it backwards compatible`
`86`		`- configuration_to_initialize.partition_strategy_type = PartitionStrategyFactory::StrategyType::WILDCARD;`
	`85`	`+ if (configuration_to_initialize.getRawPath().hasPartitionWildcard())`
	`86`	`+ {`
	`87`	`+ // Promote to wildcard in case it is not data lake to make it backwards compatible`
	`88`	`+ configuration_to_initialize.partition_strategy_type = PartitionStrategyFactory::StrategyType::WILDCARD;`
	`89`	`+ }`
`87`	`90`	`}`
`88`	`91`
`89`	`92`	`if (configuration_to_initialize.format == "auto")`