Skip to content

Commit e900946

Browse files
Merge pull request ClickHouse#80171 from ClickHouse/remove-global-regexps
Remove regular expressions from global variables
2 parents d0e7a3a + 9d8897d commit e900946

File tree

3 files changed

+32
-16
lines changed

3 files changed

+32
-16
lines changed

src/Common/parseGlobs.cpp

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,27 @@ namespace ErrorCodes
1515
extern const int BAD_ARGUMENTS;
1616
}
1717

18-
static const re2::RE2 range_regex(R"({([\d]+\.\.[\d]+)})"); /// regexp for {M..N}, where M and N - non-negative integers
19-
static const re2::RE2 enum_regex(R"({([^{}*,]+[^{}*]*[^{}*,])})"); /// regexp for {expr1,expr2,expr3}, expr's should be without "{", "}", "*" and ","
18+
namespace
19+
{
20+
struct Regexps
21+
{
22+
static const Regexps & instance()
23+
{
24+
static Regexps regexps;
25+
return regexps;
26+
}
27+
28+
/// regexp for {M..N}, where M and N - non-negative integers
29+
re2::RE2 range_regex{R"({([\d]+\.\.[\d]+)})"};
30+
31+
/// regexp for {expr1,expr2,expr3}, expr's should be without "{", "}", "*" and ","
32+
re2::RE2 enum_regex{R"({([^{}*,]+[^{}*]*[^{}*,])})"};
33+
};
34+
}
2035

2136
bool containsRangeGlob(const std::string & input)
2237
{
23-
return RE2::PartialMatch(input, range_regex);
38+
return RE2::PartialMatch(input, Regexps::instance().range_regex);
2439
}
2540

2641
bool containsOnlyEnumGlobs(const std::string & input)
@@ -67,8 +82,8 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
6782
std::string_view matched_range;
6883
std::string_view matched_enum;
6984

70-
auto did_match_range = RE2::PartialMatch(input, range_regex, &matched_range);
71-
auto did_match_enum = RE2::PartialMatch(input, enum_regex, &matched_enum);
85+
auto did_match_range = RE2::PartialMatch(input, Regexps::instance().range_regex, &matched_range);
86+
auto did_match_enum = RE2::PartialMatch(input, Regexps::instance().enum_regex, &matched_enum);
7287

7388
/// Enum regex matches ranges, so if they both match and point to the same data,
7489
/// it is a range.
@@ -78,7 +93,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
7893
/// We matched a range, and range comes earlier than enum
7994
if (did_match_range && (!did_match_enum || matched_range.data() < matched_enum.data()))
8095
{
81-
RE2::FindAndConsume(&input, range_regex, &matched);
96+
RE2::FindAndConsume(&input, Regexps::instance().range_regex, &matched);
8297
std::string buffer(matched);
8398
oss_for_replacing << escaped_with_globs.substr(current_index, matched_range.data() - escaped_with_globs.data() - current_index - 1) << '(';
8499

@@ -122,7 +137,7 @@ std::string makeRegexpPatternFromGlobs(const std::string & initial_str_with_glob
122137
/// We matched enum, and it comes earlier than range.
123138
else if (did_match_enum && (!did_match_range || matched_enum.data() < matched_range.data()))
124139
{
125-
RE2::FindAndConsume(&input, enum_regex, &matched);
140+
RE2::FindAndConsume(&input, Regexps::instance().enum_regex, &matched);
126141
std::string buffer(matched);
127142

128143
oss_for_replacing << escaped_with_globs.substr(current_index, matched.data() - escaped_with_globs.data() - current_index - 1) << '(';

src/Common/parseGlobs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#pragma once
2+
23
#include <string>
34
#include <vector>
45

6+
57
namespace DB
68
{
79
bool containsRangeGlob(const std::string & input);

src/Storages/StorageURL.cpp

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,18 +9,15 @@
99
#include <Parsers/ASTInsertQuery.h>
1010
#include <Parsers/ASTLiteral.h>
1111
#include <Parsers/ASTFunction.h>
12-
#include <Parsers/ASTIdentifier.h>
1312

1413
#include <IO/ConnectionTimeouts.h>
1514
#include <IO/WriteBufferFromHTTP.h>
16-
#include <IO/WriteHelpers.h>
1715

1816
#include <Formats/FormatFactory.h>
1917
#include <Formats/ReadSchemaUtils.h>
2018
#include <Processors/Formats/IInputFormat.h>
2119
#include <Processors/Formats/IOutputFormat.h>
2220
#include <Processors/Executors/PullingPipelineExecutor.h>
23-
#include <Processors/ISource.h>
2421
#include <Processors/Sources/NullSource.h>
2522
#include <Processors/Transforms/AddingDefaultsTransform.h>
2623
#include <Processors/Transforms/ExtractColumnsTransform.h>
@@ -111,12 +108,6 @@ static const std::unordered_set<std::string_view> optional_configuration_keys =
111108
"headers.header.value",
112109
};
113110

114-
/// Headers in config file will have structure "headers.header.name" and "headers.header.value".
115-
/// But Poco::AbstractConfiguration converts them into "header", "header[1]", "header[2]".
116-
static const std::vector<std::shared_ptr<re2::RE2>> optional_regex_keys = {
117-
std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].name)"),
118-
std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].value)"),
119-
};
120111

121112
bool urlWithGlobs(const String & uri)
122113
{
@@ -1569,6 +1560,14 @@ size_t StorageURL::evalArgsAndCollectHeaders(
15691560

15701561
void StorageURL::processNamedCollectionResult(Configuration & configuration, const NamedCollection & collection)
15711562
{
1563+
/// Headers in config file will have structure "headers.header.name" and "headers.header.value".
1564+
/// But Poco::AbstractConfiguration converts them into "header", "header[1]", "header[2]".
1565+
static const std::vector<std::shared_ptr<re2::RE2>> optional_regex_keys
1566+
{
1567+
std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].name)"),
1568+
std::make_shared<re2::RE2>(R"(headers.header\[[0-9]*\].value)"),
1569+
};
1570+
15721571
validateNamedCollection(collection, required_configuration_keys, optional_configuration_keys, optional_regex_keys);
15731572

15741573
configuration.url = collection.get<String>("url");

0 commit comments

Comments
 (0)