Skip to content

Commit 14d8cd0

Browse files
authored
Merge pull request ClickHouse#78816 from arthurpassos/allow_empty_value_hive_partitioning
Allow empty value on hive partitioning
2 parents 8781e61 + e0cb1a6 commit 14d8cd0

File tree

7 files changed

+51
-2
lines changed

7 files changed

+51
-2
lines changed

src/Storages/VirtualColumnUtils.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -144,9 +144,9 @@ NameSet getVirtualNamesForFileLikeStorage()
144144
return getCommonVirtualsForFileLikeStorage().getNameSet();
145145
}
146146

147-
static std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
147+
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path)
148148
{
149-
std::string pattern = "([^/]+)=([^/]+)/";
149+
std::string pattern = "([^/]+)=([^/]*)/";
150150
re2::StringPiece input_piece(path);
151151

152152
std::unordered_map<std::string, std::string> key_values;

src/Storages/VirtualColumnUtils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,9 @@ struct VirtualsForFileLikeStorage
110110
void addRequestedFileLikeStorageVirtualsToChunk(
111111
Chunk & chunk, const NamesAndTypesList & requested_virtual_columns,
112112
VirtualsForFileLikeStorage virtual_values, ContextPtr context);
113+
114+
// exposed in the header for unit testing purposes
115+
std::unordered_map<std::string, std::string> parseHivePartitioningKeysAndValues(const String & path);
113116
}
114117

115118
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
#include <gtest/gtest.h>
2+
#include <Common/tests/gtest_global_context.h>
3+
#include <Storages/VirtualColumnUtils.h>
4+
#include <DataTypes/DataTypeString.h>
5+
6+
using namespace DB;
7+
8+
TEST(VirtualColumnUtils, parseHivePartitioningKeysAndValuesEmptyValue)
9+
{
10+
static std::string empty_value_path = "/output_data/year=2022/country=/data_0.parquet";
11+
12+
auto map = VirtualColumnUtils::parseHivePartitioningKeysAndValues(empty_value_path);
13+
14+
ASSERT_TRUE(map.size() == 2);
15+
16+
ASSERT_TRUE(map["year"] == "2022");
17+
ASSERT_TRUE(map["country"].empty());
18+
}
19+
20+
TEST(VirtualColumnUtils, getVirtualsForFileLikeStorageEmptyValue)
21+
{
22+
static std::string empty_value_path = "/output_data/year=2022/country=/data_0.parquet";
23+
24+
const auto & context_holder = getContext();
25+
26+
auto year_column = ColumnDescription("year", std::make_shared<DataTypeString>());
27+
auto country_column = ColumnDescription("country", std::make_shared<DataTypeString>());
28+
auto non_partition_column = ColumnDescription("non_partition", std::make_shared<DataTypeString>());
29+
30+
ColumnsDescription columns;
31+
32+
columns.add(year_column);
33+
columns.add(country_column);
34+
columns.add(non_partition_column);
35+
36+
auto res = VirtualColumnUtils::getVirtualsForFileLikeStorage(columns, context_holder.context, empty_value_path);
37+
38+
ASSERT_TRUE(res.has("year"));
39+
ASSERT_TRUE(res.has("country"));
40+
}

tests/queries/0_stateless/03203_hive_style_partitioning.reference

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,9 @@ Gibson Elizabeth
9191
Greer Elizabeth
9292
Delgado Elizabeth
9393
Cross Elizabeth
94+
test/hive_partitioning/column0=Arthur/column1=/sample.parquet
95+
test/hive_partitioning/column0=Arthur/column1=ABC/sample.parquet
96+
test/hive_partitioning/column0=Arthur/column1=/sample.parquet
9497
OK
9598
TESTING THE S3CLUSTER PARTITIONING
9699
last Elizabeth

tests/queries/0_stateless/03203_hive_style_partitioning.sh

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,9 @@ SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0
6969
7070
SELECT *, non_existing_column FROM s3('http://localhost:11111/test/hive_partitioning/non_existing_column=Elizabeth/sample.parquet') LIMIT 10;
7171
SELECT *, column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=*/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
72+
73+
SELECT _path FROM s3('http://localhost:11111/test/hive_partitioning/column0=Arthur/**.parquet') order by _path;
74+
SELECT _path FROM s3('http://localhost:11111/test/hive_partitioning/column0=Arthur/**.parquet') where column1 = '';
7275
"""
7376

7477
$CLICKHOUSE_CLIENT -q """
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)