Skip to content

Commit 185112e

Browse files
committed
fix formats/yaml: check for key uniqueness in mappings
commit_hash:18d56cb82d6bdccbc3e28c26aa528208a31a733e
1 parent 7fac552 commit 185112e

File tree

6 files changed

+294
-9
lines changed

6 files changed

+294
-9
lines changed

core/src/components/common_server_component_list_test.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@ constexpr std::string_view kStaticConfig = R"(
6060
overflow_behavior: $overflow_behavior
6161
tracer:
6262
service-name: config-service
63-
statistics-storage:
64-
# Nothing
6563
dynamic-config:
6664
updates-enabled: true
6765
fs-cache-path: $dynamic-config-cache-path
@@ -109,8 +107,6 @@ constexpr std::string_view kStaticConfig = R"(
109107
config-settings: false
110108
additional-cleanup-interval: 5m
111109
first-update-fail-ok: true
112-
tracer:
113-
service-name: config-service
114110
statistics-storage:
115111
# Nothing
116112
http-client-statistics-core:

redis/functional_tests/pytest_redis_cluster_topology_plugin/pytest_plugin.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,9 @@ def stop(self):
189189

190190
def assert_running(self):
191191
pid = self._read_pid()
192-
assert _is_pid_running(pid), f'Redis server process is dead, please look at the logfile {self.logpath}'
192+
assert pid is not None and _is_pid_running(pid), (
193+
f'Redis server process is dead, please look at the logfile {self.log_path}'
194+
)
193195

194196
def _check_instance(self):
195197
with self.get_client() as client:

universal/include/userver/formats/yaml/serialize.hpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,10 @@ namespace blocking {
3030
formats::yaml::Value FromFile(const std::string& path);
3131
} // namespace blocking
3232

33+
namespace impl {
34+
formats::yaml::Value FromStringAllowRepeatedKeys(const std::string& doc);
35+
} // namespace impl
36+
3337
} // namespace formats::yaml
3438

3539
USERVER_NAMESPACE_END

universal/include/userver/formats/yaml/value.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@ namespace formats::yaml {
1919

2020
class ValueBuilder;
2121

22+
namespace impl {
23+
formats::yaml::Value FromStringAllowRepeatedKeys(const std::string& doc);
24+
} // namespace impl
25+
2226
/// @ingroup userver_universal userver_containers userver_formats
2327
///
2428
/// @brief Non-mutable YAML value representation.
@@ -291,6 +295,7 @@ class Value final {
291295
friend formats::yaml::Value FromString(const std::string&);
292296
friend formats::yaml::Value FromStream(std::istream&);
293297
friend void Serialize(const formats::yaml::Value&, std::ostream&);
298+
friend formats::yaml::Value impl::FromStringAllowRepeatedKeys(const std::string& doc);
294299
};
295300

296301
template <typename T>

universal/src/formats/yaml/serialize.cpp

Lines changed: 182 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,187 @@
11
#include <userver/formats/yaml/serialize.hpp>
22

3+
#include <algorithm>
34
#include <array>
45
#include <fstream>
6+
#include <functional>
57
#include <memory>
68
#include <sstream>
9+
#include <string_view>
10+
#include <type_traits>
711

812
#include <fmt/format.h>
13+
#include <yaml-cpp/yaml.h>
14+
#include <boost/container/small_vector.hpp>
915

16+
#include <userver/formats/common/path.hpp>
1017
#include <userver/formats/yaml/exception.hpp>
11-
12-
#include <yaml-cpp/yaml.h>
18+
#include <userver/formats/yaml/value.hpp>
19+
#include <userver/utils/enumerate.hpp>
20+
#include <userver/utils/not_null.hpp>
1321

1422
USERVER_NAMESPACE_BEGIN
1523

1624
namespace formats::yaml {
1725

26+
namespace {
27+
28+
constexpr std::size_t kInitialStackDepth = 16;
29+
constexpr std::size_t kInitialSortedKeysSize = 32;
30+
31+
struct PathElement {
32+
explicit PathElement(YAML::Node& parent)
33+
: current(parent.begin()),
34+
end(parent.end())
35+
{}
36+
37+
// Cannot store Node[&], because it has broken copy/move semantics for iteration.
38+
// https://github.com/jbeder/yaml-cpp/issues/928
39+
//
40+
// Cannot store const_iterator, because incurs extra copies.
41+
YAML::Node::iterator current;
42+
YAML::Node::iterator end;
43+
};
44+
45+
class RawYamlVisitor final {
46+
public:
47+
explicit RawYamlVisitor(YAML::Node& root)
48+
: root_(root)
49+
{}
50+
51+
template <typename Visitor, typename = std::enable_if_t<std::is_invocable_v<Visitor&, YAML::Node&>>>
52+
void VisitPreOrder(Visitor visitor) {
53+
if (!root_.IsSequence() && !root_.IsMap()) {
54+
return;
55+
}
56+
57+
visitor(root_);
58+
path_stack_.emplace_back(root_);
59+
60+
while (true) {
61+
if (path_stack_.back().current == path_stack_.back().end) {
62+
path_stack_.pop_back();
63+
if (path_stack_.empty()) {
64+
break;
65+
}
66+
++path_stack_.back().current;
67+
continue;
68+
}
69+
70+
auto item = *path_stack_.back().current;
71+
auto& node = item.IsDefined() ? item : item.second;
72+
73+
visitor(node);
74+
if (node.IsSequence() || node.IsMap()) {
75+
path_stack_.emplace_back(node);
76+
} else {
77+
++path_stack_.back().current;
78+
}
79+
}
80+
}
81+
82+
// Complexity: O(whole-yaml-size)
83+
std::string ComputeCurrentPath() const {
84+
if (path_stack_.empty()) {
85+
return common::kPathRoot;
86+
}
87+
88+
std::string path;
89+
for (const auto [idx, element] : utils::enumerate(path_stack_)) {
90+
if (element.current->IsDefined()) {
91+
// Sequence iterator.
92+
const auto begin = idx == 0 ? root_.begin() : path_stack_[idx - 1].current->begin();
93+
common::AppendPath(path, std::distance(begin, element.current));
94+
} else {
95+
// Map iterator.
96+
const auto map_item = *element.current;
97+
const auto& key = map_item.first;
98+
switch (key.Type()) {
99+
case YAML::NodeType::Null:
100+
common::AppendPath(path, "null");
101+
break;
102+
case YAML::NodeType::Scalar:
103+
common::AppendPath(path, key.Scalar());
104+
break;
105+
default: {
106+
// YAML can have non-scalar keys:
107+
// https://yaml.org/spec/1.2.2/#mapping
108+
std::ostringstream stream;
109+
stream << key;
110+
common::AppendPath(path, stream.str());
111+
break;
112+
}
113+
}
114+
}
115+
}
116+
return path;
117+
}
118+
119+
private:
120+
YAML::Node& root_;
121+
boost::container::small_vector<PathElement, kInitialStackDepth> path_stack_;
122+
};
123+
124+
// Non-unique keys in mappings are not allowed, but yaml-cpp does not check for them:
125+
// https://github.com/jbeder/yaml-cpp/issues/60
126+
class UniquenessChecker final {
127+
public:
128+
explicit UniquenessChecker(YAML::Node& root)
129+
: visitor_(root)
130+
{}
131+
132+
void CheckKeyUniquenessIterative() {
133+
visitor_.VisitPreOrder([this](const YAML::Node& node) {
134+
if (node.IsMap()) {
135+
CheckSingleMapKeyUniqueness(node);
136+
}
137+
});
138+
}
139+
140+
private:
141+
void CheckSingleMapKeyUniqueness(const YAML::Node& node) {
142+
UASSERT(node.IsMap());
143+
sorted_keys_.clear();
144+
sorted_keys_.reserve(node.size());
145+
for (const auto item : node) {
146+
const auto& key = item.first;
147+
switch (key.Type()) {
148+
case YAML::NodeType::Null:
149+
sorted_keys_.push_back("null");
150+
break;
151+
case YAML::NodeType::Scalar:
152+
sorted_keys_.push_back(key.Scalar());
153+
break;
154+
default:
155+
// This is a valid key, but too complex to check. YAML nodes may be recursive and infinitely deep.
156+
break;
157+
}
158+
}
159+
160+
std::sort(sorted_keys_.begin(), sorted_keys_.end());
161+
if (const auto duplicate = std::adjacent_find(sorted_keys_.begin(), sorted_keys_.end());
162+
duplicate != sorted_keys_.end())
163+
{
164+
throw ParseException(
165+
fmt::format("Duplicate mapping key '{}' at path '{}'", *duplicate, visitor_.ComputeCurrentPath())
166+
);
167+
}
168+
}
169+
170+
RawYamlVisitor visitor_;
171+
boost::container::small_vector<std::string_view, kInitialSortedKeysSize> sorted_keys_;
172+
};
173+
174+
} // namespace
175+
18176
formats::yaml::Value FromString(const std::string& doc) {
19177
if (doc.empty()) {
20178
throw ParseException("YAML document is empty");
21179
}
22180

23181
try {
24-
return YAML::Load(doc);
182+
auto node = YAML::Load(doc);
183+
UniquenessChecker{node}.CheckKeyUniquenessIterative();
184+
return formats::yaml::Value(node);
25185
} catch (const YAML::ParserException& e) {
26186
throw ParseException(e.what());
27187
}
@@ -33,7 +193,9 @@ formats::yaml::Value FromStream(std::istream& is) {
33193
}
34194

35195
try {
36-
return YAML::Load(is);
196+
auto node = YAML::Load(is);
197+
UniquenessChecker{node}.CheckKeyUniquenessIterative();
198+
return formats::yaml::Value(node);
37199
} catch (const YAML::ParserException& e) {
38200
throw ParseException(e.what());
39201
}
@@ -63,6 +225,22 @@ formats::yaml::Value FromFile(const std::string& path) {
63225
}
64226
} // namespace blocking
65227

228+
namespace impl {
229+
230+
formats::yaml::Value FromStringAllowRepeatedKeys(const std::string& doc) {
231+
if (doc.empty()) {
232+
throw ParseException("YAML document is empty");
233+
}
234+
235+
try {
236+
return formats::yaml::Value(YAML::Load(doc));
237+
} catch (const YAML::ParserException& e) {
238+
throw ParseException(e.what());
239+
}
240+
}
241+
242+
} // namespace impl
243+
66244
} // namespace formats::yaml
67245

68246
USERVER_NAMESPACE_END

universal/src/formats/yaml/serialize_test.cpp

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <userver/formats/yaml/exception.hpp>
44
#include <userver/formats/yaml/serialize.hpp>
5+
#include <userver/utest/assert_macros.hpp>
56

67
#include <formats/common/serialize_test.hpp>
78

@@ -27,4 +28,103 @@ struct Serialization<formats::yaml::Value> : public ::testing::Test {
2728

2829
INSTANTIATE_TYPED_TEST_SUITE_P(FormatsYaml, Serialization, formats::yaml::Value);
2930

31+
TEST(FormatsYamlDuplicateKeys, AtRoot) {
32+
const std::string yaml_string = R"(
33+
Key1: 1
34+
Key2: 2
35+
Key3: 3
36+
Key1: 1
37+
)";
38+
UEXPECT_THROW_MSG(
39+
formats::yaml::FromString(yaml_string),
40+
formats::yaml::ParseException,
41+
"Duplicate mapping key 'Key1' at path '/'"
42+
);
43+
}
44+
45+
TEST(FormatsYamlDuplicateKeys, InObject) {
46+
const std::string yaml_string = R"(
47+
Key1:
48+
Key2: 2
49+
Key3:
50+
Key4: 4
51+
Key5: 5
52+
Key4: 4
53+
Key6: 6
54+
Key7: 7)";
55+
UEXPECT_THROW_MSG(
56+
formats::yaml::FromString(yaml_string),
57+
formats::yaml::ParseException,
58+
"Duplicate mapping key 'Key4' at path 'Key1.Key3'"
59+
);
60+
}
61+
62+
TEST(FormatsYamlDuplicateKeys, InArray) {
63+
const std::string yaml_string = R"(
64+
- 1
65+
- 2
66+
- - 3
67+
- Key4: 1
68+
Key4: 1
69+
)";
70+
UEXPECT_THROW_MSG(
71+
formats::yaml::FromString(yaml_string),
72+
formats::yaml::ParseException,
73+
"Duplicate mapping key 'Key4' at path '[2][1]'"
74+
);
75+
}
76+
77+
TEST(FormatsYamlDuplicateKeys, VariousScalarsInMapKeys) {
78+
{
79+
const std::string yaml_string = R"(
80+
1: foo
81+
1: foo
82+
)";
83+
UEXPECT_THROW_MSG(
84+
formats::yaml::FromString(yaml_string),
85+
formats::yaml::ParseException,
86+
"Duplicate mapping key '1' at path '/'"
87+
);
88+
}
89+
90+
{
91+
const std::string yaml_string = R"(
92+
true: foo
93+
true: foo
94+
)";
95+
UEXPECT_THROW_MSG(
96+
formats::yaml::FromString(yaml_string),
97+
formats::yaml::ParseException,
98+
"Duplicate mapping key 'true' at path '/'"
99+
);
100+
}
101+
102+
{
103+
const std::string yaml_string = R"(
104+
null: foo
105+
null: foo
106+
)";
107+
UEXPECT_THROW_MSG(
108+
formats::yaml::FromString(yaml_string),
109+
formats::yaml::ParseException,
110+
"Duplicate mapping key 'null' at path '/'"
111+
);
112+
}
113+
}
114+
115+
TEST(FormatsYamlDuplicateKeys, NonScalarsInMapKeys) {
116+
{
117+
const std::string yaml_string = R"(
118+
[1, 2, 3]: foo
119+
[1, 2, 3]: bar
120+
)";
121+
formats::yaml::Value value;
122+
// This check relies on implementation of FromString. Currently, we do not throw an exception in this case,
123+
// but we could do so in the future. In that case the test should be changed.
124+
// Anyway, we should not hang or crash here.
125+
UEXPECT_NO_THROW(value = formats::yaml::FromString(yaml_string));
126+
EXPECT_EQ(value.GetSize(), 2);
127+
}
128+
}
129+
30130
USERVER_NAMESPACE_END

0 commit comments

Comments
 (0)