Skip to content

Commit b737d09

Browse files
committed
Revert "tmp"
This reverts commit ee9e4e5.
1 parent d23a5f6 commit b737d09

File tree

5 files changed

+31
-124
lines changed

5 files changed

+31
-124
lines changed

src/Storages/VirtualColumnUtils.cpp

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
#include <stack>
33

44
#include <Storages/VirtualColumnUtils.h>
5-
#include <absl/container/flat_hash_set.h>
65
#include "Formats/NumpyDataTypes.h"
76

87
#include <Core/NamesAndTypes.h>
@@ -154,27 +153,6 @@ static auto makeExtractor()
154153
return KeyValuePairExtractorBuilder().withItemDelimiters({'/'}).withKeyValueDelimiter('=').buildWithReferenceMap();
155154
}
156155

157-
HivePartitioningKeysAndValues parseHivePartitioningKeysAndValuesRegex(const String & path)
158-
{
159-
const static RE2 pattern_re("([^/]+)=([^/]*)/");
160-
re2::StringPiece input_piece(path);
161-
162-
HivePartitioningKeysAndValues result;
163-
std::string_view key;
164-
std::string_view value;
165-
166-
while (RE2::FindAndConsume(&input_piece, pattern_re, &key, &value))
167-
{
168-
auto it = result.find(key);
169-
if (it != result.end() && it->second != value)
170-
throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, key);
171-
172-
auto col_name = key;
173-
result[col_name] = value;
174-
}
175-
return result;
176-
}
177-
178156
HivePartitioningKeysAndValues parseHivePartitioningKeysAndValues(const String & path)
179157
{
180158
static auto extractor = makeExtractor();
@@ -205,45 +183,6 @@ HivePartitioningKeysAndValues parseHivePartitioningKeysAndValues(const String &
205183
return key_values;
206184
}
207185

208-
std::pair<ColumnPtr, ColumnPtr> parseHivePartitioningKeysAndValuesOldExtractkv(const String & path)
209-
{
210-
static auto extractor = KeyValuePairExtractorBuilder().withItemDelimiters({'/'}).withKeyValueDelimiter('=').buildWithoutEscaping();
211-
212-
auto keys = ColumnString::create();
213-
auto values = ColumnString::create();
214-
215-
// cutting the filename to prevent malformed filenames that contain key-value-pairs from being extracted
216-
// not sure if we actually need to do that, but just in case. Plus, the previous regex impl took care of it
217-
const auto last_slash_pos = path.find_last_of('/');
218-
219-
if (last_slash_pos == std::string::npos)
220-
{
221-
// nothing to extract, there is no path, just a filename
222-
return std::make_pair(std::move(keys), std::move(values));
223-
}
224-
225-
std::string_view path_without_filename(path.data(), last_slash_pos);
226-
227-
extractor.extract(path_without_filename, keys, values);
228-
229-
keys->validate();
230-
values->validate();
231-
232-
absl::flat_hash_set<StringRef> check_for_duplicates_set;
233-
234-
for (auto i = 0u; i < keys->size(); i++)
235-
{
236-
auto [_, inserted] = check_for_duplicates_set.insert(keys->getDataAt(i));
237-
238-
if (!inserted)
239-
{
240-
throw Exception(ErrorCodes::INCORRECT_DATA, "Path '{}' to file with enabled hive-style partitioning contains duplicated partition key {} with different values, only unique keys are allowed", path, keys->getDataAt(i).toString());
241-
}
242-
}
243-
244-
return std::make_pair(std::move(keys), std::move(values));
245-
}
246-
247186
VirtualColumnsDescription getVirtualsForFileLikeStorage(ColumnsDescription & storage_columns, const ContextPtr & context, const std::string & path, std::optional<FormatSettings> format_settings_)
248187
{
249188
VirtualColumnsDescription desc;

src/Storages/VirtualColumnUtils.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,9 +112,7 @@ void addRequestedFileLikeStorageVirtualsToChunk(
112112

113113
using HivePartitioningKeysAndValues = absl::flat_hash_map<std::string_view, std::string_view>;
114114

115-
HivePartitioningKeysAndValues parseHivePartitioningKeysAndValuesRegex(const String & path);
116115
HivePartitioningKeysAndValues parseHivePartitioningKeysAndValues(const String & path);
117-
std::pair<ColumnPtr, ColumnPtr> parseHivePartitioningKeysAndValuesOldExtractkv(const String & path);
118116

119117
}
120118

src/Storages/tests/gtest_virtual_column_utils.cpp

Lines changed: 0 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -85,64 +85,3 @@ TEST(VirtualColumnUtils, getVirtualsForFileLikeStorageEmptyValue)
8585
ASSERT_TRUE(res.has("year"));
8686
ASSERT_TRUE(res.has("country"));
8787
}
88-
89-
static std::vector<std::string> test_paths = {
90-
"/some/folder/key1=val1/key2=val2/file1.txt",
91-
"/data/keyA=valA/keyB=valB/keyC=valC/file2.txt",
92-
"/another/dir/x=1/y=2/z=3/file3.txt",
93-
"/tiny/path/a=b/file4.txt",
94-
"/yet/another/path/k1=v1/k2=v2/k3=v3/k4=v4/k5=v5/"
95-
};
96-
97-
TEST(VirtualColumnUtils, BenchmarkRegexParser)
98-
{
99-
static constexpr int iterations = 100000;
100-
101-
auto start = std::chrono::steady_clock::now();
102-
103-
for (int i = 0; i < iterations; ++i)
104-
{
105-
const auto & path = test_paths[i % 5];
106-
auto result = parseHivePartitioningKeysAndValuesRegex(path);
107-
ASSERT_TRUE(!result.empty());
108-
}
109-
110-
auto end = std::chrono::steady_clock::now();
111-
auto duration_ms = std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count();
112-
113-
std::cout << "[BenchmarkRegexParser] "
114-
<< iterations << " iterations across 5 paths took "
115-
<< duration_ms << " ms\n";
116-
117-
auto start_extractkv = std::chrono::steady_clock::now();
118-
119-
for (int i = 0; i < iterations; ++i)
120-
{
121-
const auto & path = test_paths[i % 5];
122-
auto result = parseHivePartitioningKeysAndValues(path);
123-
ASSERT_TRUE(!result.empty());
124-
}
125-
126-
auto end_extractkv = std::chrono::steady_clock::now();
127-
auto duration_ms_extractkv = std::chrono::duration_cast<std::chrono::milliseconds>(end_extractkv - start_extractkv).count();
128-
129-
std::cout << "[BenchmarkExtractkvParser] "
130-
<< iterations << " iterations across 5 paths took "
131-
<< duration_ms_extractkv << " ms\n";
132-
133-
auto start_extractkv_old = std::chrono::steady_clock::now();
134-
135-
for (int i = 0; i < iterations; ++i)
136-
{
137-
const auto & path = test_paths[i % 5];
138-
auto result = parseHivePartitioningKeysAndValuesOldExtractkv(path);
139-
ASSERT_TRUE(!result.first->empty());
140-
}
141-
142-
auto end_extractkv_old = std::chrono::steady_clock::now();
143-
auto duration_ms_extractkv_old = std::chrono::duration_cast<std::chrono::milliseconds>(end_extractkv_old - start_extractkv_old).count();
144-
145-
std::cout << "[BenchmarkExtractkvOldParser] "
146-
<< iterations << " iterations across 5 paths took "
147-
<< duration_ms_extractkv_old << " ms\n";
148-
}

tests/queries/0_stateless/03203_hive_style_partitioning.reference

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,3 +95,24 @@ test/hive_partitioning/column0=Arthur/column1=/sample.parquet
9595
test/hive_partitioning/column0=Arthur/column1=ABC/sample.parquet
9696
test/hive_partitioning/column0=Arthur/column1=/sample.parquet
9797
OK
98+
TESTING THE S3CLUSTER PARTITIONING
99+
last Elizabeth
100+
Frank Elizabeth
101+
Moreno Elizabeth
102+
Guzman Elizabeth
103+
Stephens Elizabeth
104+
Franklin Elizabeth
105+
Gibson Elizabeth
106+
Greer Elizabeth
107+
Delgado Elizabeth
108+
Cross Elizabeth
109+
last Elizabeth
110+
Frank Elizabeth
111+
Moreno Elizabeth
112+
Guzman Elizabeth
113+
Stephens Elizabeth
114+
Franklin Elizabeth
115+
Gibson Elizabeth
116+
Greer Elizabeth
117+
Delgado Elizabeth
118+
Cross Elizabeth

tests/queries/0_stateless/03203_hive_style_partitioning.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,3 +79,13 @@ set use_hive_partitioning = 0;
7979
8080
SELECT *, _column0 FROM s3('http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
8181
""" 2>&1 | grep -F -q "UNKNOWN_IDENTIFIER" && echo "OK" || echo "FAIL";
82+
83+
$CLICKHOUSE_LOCAL -q "SELECT 'TESTING THE S3CLUSTER PARTITIONING'"
84+
85+
$CLICKHOUSE_CLIENT -q """
86+
set use_hive_partitioning = 1;
87+
88+
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') LIMIT 10;
89+
90+
SELECT *, column0 FROM s3Cluster(test_cluster_one_shard_three_replicas_localhost, 'http://localhost:11111/test/hive_partitioning/column0=Elizabeth/sample.parquet') WHERE column0 = 'Elizabeth' LIMIT 10;
91+
"""

0 commit comments

Comments
 (0)