Skip to content

Commit 1ee7da1

Browse files
authored
Merge pull request #1037 from Altinity/backports/25.6.5/87515_and_87621_do_not_put_hive_partition_columns_in_format_header
Antalya 25.6.5 - Backport of ClickHouse#87515 and ClickHouse#87621 - Never not put hive columns in format_header / Do not read hive columns as LC by default
2 parents 51e73b1 + f5182ca commit 1ee7da1

File tree

5 files changed

+25
-3
lines changed

5 files changed

+25
-3
lines changed

src/Storages/HivePartitioningUtils.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include <Formats/EscapingRuleUtils.h>
1010
#include <Formats/FormatFactory.h>
1111
#include <Processors/Chunk.h>
12+
#include <DataTypes/IDataType.h>
1213

1314
namespace DB
1415
{
@@ -85,7 +86,7 @@ NamesAndTypesList extractHivePartitionColumnsFromPath(
8586
{
8687
if (const auto type = tryInferDataTypeByEscapingRule(value, format_settings ? *format_settings : getFormatSettings(context), FormatSettings::EscapingRule::Raw))
8788
{
88-
if (type->canBeInsideLowCardinality())
89+
if (type->canBeInsideLowCardinality() && isStringOrFixedString(type))
8990
{
9091
hive_partition_columns_to_read_from_file_path.emplace_back(key, std::make_shared<DataTypeLowCardinality>(type));
9192
}

src/Storages/prepareReadingFromFormat.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,13 @@ ReadFromFormatInfo prepareReadingFromFormat(
8686
}
8787

8888
/// Create header for InputFormat with columns that will be read from the data.
89-
info.format_header = storage_snapshot->getSampleBlockForColumns(info.columns_description.getNamesOfPhysical());
89+
for (const auto & column : info.columns_description)
90+
{
91+
/// Never read hive partition columns from the data file. This fixes https://github.com/ClickHouse/ClickHouse/issues/87515
92+
if (!hive_parameters.hive_partition_columns_to_read_from_file_path_map.contains(column.name))
93+
info.format_header.insert(ColumnWithTypeAndName{column.type, column.name});
94+
}
95+
9096
info.serialization_hints = getSerializationHintsForFileLikeStorage(storage_snapshot->metadata, context);
9197
return info;
9298
}

tests/queries/0_stateless/03203_hive_style_partitioning.reference

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Elizabeth Delgado
3131
Elizabeth Cross
3232
42 2020-01-01
3333
[1,2,3] 42.42
34-
Array(Int64) LowCardinality(Float64)
34+
Array(Int64) Float64
3535
101
3636
2071
3737
2071
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
1
2+
raw_blob String
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
-- Tags: no-parallel, no-fasttest, no-random-settings
2+
3+
INSERT INTO FUNCTION s3(
4+
s3_conn,
5+
filename='03631',
6+
format=Parquet,
7+
partition_strategy='hive',
8+
partition_columns_in_data_file=1) PARTITION BY (year, country) SELECT 'Brazil' as country, 2025 as year, 1 as id;
9+
10+
-- distinct because minio isn't cleaned up
11+
SELECT count(distinct year) FROM s3(s3_conn, filename='03631/**.parquet', format=RawBLOB) SETTINGS use_hive_partitioning=1;
12+
13+
DESCRIBE s3(s3_conn, filename='03631/**.parquet', format=RawBLOB) SETTINGS use_hive_partitioning=1;

0 commit comments

Comments
 (0)