Skip to content

Commit 272f2ea

Browse files
Merge pull request ClickHouse#78094 from ClickHouse/pwd-leak
Query masking rules are now able to throw on match
2 parents 302f7c3 + eaca551 commit 272f2ea

File tree

6 files changed

+46
-15
lines changed

6 files changed

+46
-15
lines changed

docs/ru/operations/server-configuration-parameters/settings.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1547,6 +1547,7 @@ ClickHouse использует потоки из глобального пул
15471547
<name>hide SSN</name>
15481548
<regexp>(^|\D)\d{3}-\d{2}-\d{4}($|\D)</regexp>
15491549
<replace>000-00-0000</replace>
1550+
<throw_on_match>false</throw_on_match>
15501551
</rule>
15511552
</query_masking_rules>
15521553
```
@@ -1555,6 +1556,7 @@ ClickHouse использует потоки из глобального пул
15551556
- `name` - имя правила (необязательно)
15561557
- `regexp` - совместимое с RE2 регулярное выражение (обязательное)
15571558
- `replace` - строка замены для конфиденциальных данных (опционально, по умолчанию - шесть звездочек)
1559+
- `throw_on_match` - нужно ли кидать исключение, если такая строка обнаружилась в логах.
15581560

15591561
Правила маскировки применяются ко всему запросу (для предотвращения утечки конфиденциальных данных из неправильно оформленных / не интерпретируемых запросов).
15601562

docs/zh/operations/server-configuration-parameters/settings.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -627,6 +627,7 @@ SSL客户端/服务器配置。
627627
<name>hide SSN</name>
628628
<regexp>(^|\D)\d{3}-\d{2}-\d{4}($|\D)</regexp>
629629
<replace>000-00-0000</replace>
630+
<throw_on_match>false</throw_on_match>
630631
</rule>
631632
</query_masking_rules>
632633
```
@@ -635,6 +636,7 @@ SSL客户端/服务器配置。
635636
- `name` -规则的名称(可选)
636637
- `regexp` -RE2兼容正则表达式(强制性)
637638
- `replace` -敏感数据的替换字符串(可选,默认情况下-六个星号)
639+
- `throw_on_match` -如果在日志中找到这样的字符串,我是否需要抛出异常?
638640

639641
屏蔽规则应用于整个查询(以防止敏感数据从格式错误/不可解析的查询泄漏)。
640642

src/Common/SensitiveDataMasker.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,8 @@ class SensitiveDataMasker::MaskingRule
4141
const std::string replacement_string;
4242
const std::string regexp_string;
4343

44+
const bool throw_on_match;
45+
4446
const RE2 regexp;
4547
const std::string_view replacement;
4648

@@ -52,10 +54,16 @@ class SensitiveDataMasker::MaskingRule
5254
//* TODO: option with hyperscan? https://software.intel.com/en-us/articles/why-and-how-to-replace-pcre-with-hyperscan
5355
// re2::set should also work quite fast, but it doesn't return the match position, only which regexp was matched
5456

55-
MaskingRule(const std::string & name_, const std::string & regexp_string_, const std::string & replacement_string_)
57+
MaskingRule(
58+
const std::string & name_,
59+
const std::string & regexp_string_,
60+
const std::string & replacement_string_,
61+
bool throw_on_match_
62+
)
5663
: name(name_)
5764
, replacement_string(replacement_string_)
5865
, regexp_string(regexp_string_)
66+
, throw_on_match(throw_on_match_)
5967
, regexp(regexp_string, RE2::Quiet)
6068
, replacement(replacement_string)
6169
{
@@ -69,6 +77,12 @@ class SensitiveDataMasker::MaskingRule
6977
uint64_t apply(std::string & data) const
7078
{
7179
auto m = RE2::GlobalReplace(&data, regexp, replacement);
80+
81+
if (throw_on_match && m > 0)
82+
throw Exception(ErrorCodes::LOGICAL_ERROR,
83+
"The rule {} was triggered on the log line {}",
84+
name, data);
85+
7286
#ifndef NDEBUG
7387
matches_count += m;
7488
#endif
@@ -140,10 +154,11 @@ SensitiveDataMasker::SensitiveDataMasker(const Poco::Util::AbstractConfiguration
140154
}
141155

142156
auto replace = config.getString(rule_config_prefix + ".replace", "******");
157+
auto throw_on_match = config.getBool(rule_config_prefix + ".throw_on_match", false);
143158

144159
try
145160
{
146-
addMaskingRule(rule_name, regexp, replace);
161+
addMaskingRule(rule_name, regexp, replace, throw_on_match);
147162
}
148163
catch (DB::Exception & e)
149164
{
@@ -163,9 +178,12 @@ SensitiveDataMasker::SensitiveDataMasker(const Poco::Util::AbstractConfiguration
163178
}
164179

165180
void SensitiveDataMasker::addMaskingRule(
166-
const std::string & name, const std::string & regexp_string, const std::string & replacement_string)
181+
const std::string & name,
182+
const std::string & regexp_string,
183+
const std::string & replacement_string,
184+
bool throw_on_match)
167185
{
168-
all_masking_rules.push_back(std::make_unique<MaskingRule>(name, regexp_string, replacement_string));
186+
all_masking_rules.push_back(std::make_unique<MaskingRule>(name, regexp_string, replacement_string, throw_on_match));
169187
}
170188

171189

src/Common/SensitiveDataMasker.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,12 @@ class SensitiveDataMasker
6161
static MaskerMultiVersion::Version getInstance();
6262

6363
/// Used in tests.
64-
void addMaskingRule(const std::string & name, const std::string & regexp_string, const std::string & replacement_string);
64+
void addMaskingRule(
65+
const std::string & name,
66+
const std::string & regexp_string,
67+
const std::string & replacement_string,
68+
bool throw_on_match
69+
);
6570

6671
#ifndef NDEBUG
6772
void printStats();

src/Common/tests/gtest_sensitive_data_masker.cpp

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
#pragma clang diagnostic ignored "-Wundef"
1010

1111
#include <gtest/gtest.h>
12-
#include <chrono>
1312

1413

1514
namespace DB
@@ -28,11 +27,11 @@ TEST(Common, SensitiveDataMasker)
2827

2928
Poco::AutoPtr<Poco::Util::XMLConfiguration> empty_xml_config = new Poco::Util::XMLConfiguration();
3029
DB::SensitiveDataMasker masker(*empty_xml_config, "");
31-
masker.addMaskingRule("all a letters", "a+", "--a--");
32-
masker.addMaskingRule("all b letters", "b+", "--b--");
33-
masker.addMaskingRule("all d letters", "d+", "--d--");
34-
masker.addMaskingRule("all x letters", "x+", "--x--");
35-
masker.addMaskingRule("rule \"d\" result", "--d--", "*****"); // RE2 regexps are applied one-by-one in order
30+
masker.addMaskingRule("all a letters", "a+", "--a--", /*throw_on_match=*/false);
31+
masker.addMaskingRule("all b letters", "b+", "--b--", /*throw_on_match=*/false);
32+
masker.addMaskingRule("all d letters", "d+", "--d--", /*throw_on_match=*/false);
33+
masker.addMaskingRule("all x letters", "x+", "--x--", /*throw_on_match=*/false);
34+
masker.addMaskingRule("rule \"d\" result", "--d--", "*****", /*throw_on_match=*/false); // RE2 regexps are applied one-by-one in order
3635
std::string x = "aaaaaaaaaaaaa bbbbbbbbbb cccc aaaaaaaaaaaa d ";
3736
EXPECT_EQ(masker.wipeSensitiveData(x), 5);
3837
EXPECT_EQ(x, "--a-- --b-- cccc --a-- ***** ");
@@ -46,9 +45,9 @@ TEST(Common, SensitiveDataMasker)
4645
#endif
4746

4847
DB::SensitiveDataMasker masker2(*empty_xml_config, "");
49-
masker2.addMaskingRule("hide root password", "qwerty123", "******");
50-
masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000");
51-
masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "[email protected]");
48+
masker2.addMaskingRule("hide root password", "qwerty123", "******", /*throw_on_match=*/false);
49+
masker2.addMaskingRule("hide SSN", "[0-9]{3}-[0-9]{2}-[0-9]{4}", "000-00-0000", /*throw_on_match=*/false);
50+
masker2.addMaskingRule("hide email", "[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\\.[A-Za-z]{2,4}", "[email protected]", /*throw_on_match=*/false);
5251

5352
std::string query = "SELECT id FROM mysql('localhost:3308', 'database', 'table', 'root', 'qwerty123') WHERE ssn='123-45-6789' or "
5453
"email='[email protected]'";
@@ -63,7 +62,7 @@ TEST(Common, SensitiveDataMasker)
6362
// gtest has not good way to check exception content, so just do it manually (see https://github.com/google/googletest/issues/952 )
6463
try
6564
{
66-
maskerbad.addMaskingRule("bad regexp", "**", "");
65+
maskerbad.addMaskingRule("bad regexp", "**", "", /*throw_on_match=*/false);
6766
ADD_FAILURE() << "addMaskingRule() should throw an error" << std::endl;
6867
}
6968
catch (const DB::Exception & e)

tests/config/config.d/query_masking_rules.xml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,10 @@
55
<regexp>TOPSECRET.TOPSECRET</regexp>
66
<replace>[hidden]</replace>
77
</rule>
8+
<rule>
9+
<name>Detect passwords in tests</name>
10+
<regexp>(?i)P@ssw0rd</regexp>
11+
<throw_on_match>true</throw_on_match>
12+
</rule>
813
</query_masking_rules>
914
</clickhouse>

0 commit comments

Comments
 (0)