From 6bbc603b22b9a91b748fcdf22cc82650e44d1a91 Mon Sep 17 00:00:00 2001 From: Nicolas Le Scouarnec Date: Sat, 4 Nov 2023 09:22:05 +0100 Subject: [PATCH 1/5] Adding support for handling YAML Merge Key (#41) Support for YAML Merge keys ( <<: [*dict1, *dict2] ) is added. The merge key is a specific scalar with value << (and tag !!merge) that implies that during node construction, the map (or sequence of maps) are merged into the current map. The priority rules are that each key from maps within the value associated with << are added iff the key is not yet present in the current map (and first map gets higher priority). Test cases have been added accordingly. --- include/yaml-cpp/exceptions.h | 2 ++ src/nodebuilder.cpp | 48 +++++++++++++++++++++++++++-- src/nodebuilder.h | 1 + test/integration/load_node_test.cpp | 37 ++++++++++++++++++++++ 4 files changed, 85 insertions(+), 3 deletions(-) diff --git a/include/yaml-cpp/exceptions.h b/include/yaml-cpp/exceptions.h index f6b2602ae..9c8da4ce9 100644 --- a/include/yaml-cpp/exceptions.h +++ b/include/yaml-cpp/exceptions.h @@ -87,6 +87,8 @@ const char* const INVALID_ANCHOR = "invalid anchor"; const char* const INVALID_ALIAS = "invalid alias"; const char* const INVALID_TAG = "invalid tag"; const char* const BAD_FILE = "bad file"; +const char* const MERGE_KEY_NEEDS_SINGLE_OR_SEQUENCE_OF_MAPS = + "merge key needs either single map or sequence of maps"; template inline const std::string KEY_NOT_FOUND_WITH_KEY( diff --git a/src/nodebuilder.cpp b/src/nodebuilder.cpp index bbaefac8a..dcddd05c4 100644 --- a/src/nodebuilder.cpp +++ b/src/nodebuilder.cpp @@ -15,6 +15,7 @@ NodeBuilder::NodeBuilder() m_stack{}, m_anchors{}, m_keys{}, + m_mergeDicts{}, m_mapDepth(0) { m_anchors.push_back(nullptr); // since the anchors start at 1 } @@ -71,8 +72,24 @@ void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag, m_mapDepth++; } +void MergeMapCollection(detail::node& map_to, detail::node& map_from, + detail::shared_memory_holder& pMemory) { + const detail::node& const_map_to = map_to; + for (auto j = map_from.begin(); j != map_from.end(); j++) { + detail::node* s = const_map_to.get(*j->first, pMemory); + if (s == nullptr) { + map_to.insert(*j->first, *j->second, pMemory); + } + } +} + void NodeBuilder::OnMapEnd() { assert(m_mapDepth > 0); + detail::node& collection = *m_stack.back(); + for (detail::node* n : m_mergeDicts) { + MergeMapCollection(collection, *n, m_pMemory); + } + m_mergeDicts.clear(); m_mapDepth--; Pop(); } @@ -107,15 +124,40 @@ void NodeBuilder::Pop() { m_stack.pop_back(); detail::node& collection = *m_stack.back(); - if (collection.type() == NodeType::Sequence) { collection.push_back(node, m_pMemory); } else if (collection.type() == NodeType::Map) { assert(!m_keys.empty()); PushedKey& key = m_keys.back(); if (key.second) { - collection.insert(*key.first, node, m_pMemory); - m_keys.pop_back(); + detail::node& nk = *key.first; + if (nk.type() == NodeType::Scalar && + ((nk.tag() == "tag:yaml.org,2002:merge" && nk.scalar() == "<<") || + (nk.tag() == "?" && nk.scalar() == "<<"))) { + if (node.type() == NodeType::Map) { + m_mergeDicts.emplace_back(&node); + m_keys.pop_back(); + } else if (node.type() == NodeType::Sequence) { + for (auto i = node.begin(); i != node.end(); i++) { + auto v = *i; + if ((*v).type() == NodeType::Map) { + m_mergeDicts.emplace_back(&(*v)); + } else { + throw ParserException( + node.mark(), + ErrorMsg::MERGE_KEY_NEEDS_SINGLE_OR_SEQUENCE_OF_MAPS); + } + } + m_keys.pop_back(); + } else { + throw ParserException( + node.mark(), + ErrorMsg::MERGE_KEY_NEEDS_SINGLE_OR_SEQUENCE_OF_MAPS); + } + } else { + collection.insert(*key.first, node, m_pMemory); + m_keys.pop_back(); + } } else { key.second = true; } diff --git a/src/nodebuilder.h b/src/nodebuilder.h index c580d40e2..c8f280435 100644 --- a/src/nodebuilder.h +++ b/src/nodebuilder.h @@ -67,6 +67,7 @@ class NodeBuilder : public EventHandler { using PushedKey = std::pair; std::vector m_keys; + Nodes m_mergeDicts; std::size_t m_mapDepth; }; } // namespace YAML diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 9d0c790fd..b670b64dc 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -173,6 +173,43 @@ TEST(LoadNodeTest, CloneAlias) { EXPECT_EQ(clone[0], clone); } +TEST(LoadNodeTest, MergeKeyA) { + Node node = Load( + "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " + "&stuff { << : *foo, b : 3} }"); + EXPECT_EQ(NodeType::Map, node["z"].Type()); + EXPECT_FALSE(node["z"]["<<"]); + EXPECT_EQ(1, node["z"]["a"].as()); + EXPECT_EQ(3, node["z"]["b"].as()); + EXPECT_EQ(1, node["z"]["c"].as()); +} + +TEST(LoadNodeTest, MergeKeyB) { + Node node = Load( + "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " + "&stuff { << : *foo, b : 3}, w: { << : [*stuff, *bar], c: 4 }, v: { '<<' " + ": *foo } , u : {!!merge << : *bar} }"); + EXPECT_EQ(NodeType::Map, node["z"].Type()); + EXPECT_EQ(NodeType::Map, node["w"].Type()); + EXPECT_FALSE(node["z"]["<<"]); + EXPECT_EQ(1, node["z"]["a"].as()); + EXPECT_EQ(3, node["z"]["b"].as()); + EXPECT_EQ(1, node["z"]["c"].as()); + + EXPECT_EQ(1, node["w"]["a"].as()); + EXPECT_EQ(3, node["w"]["b"].as()); + EXPECT_EQ(4, node["w"]["c"].as()); + EXPECT_EQ(2, node["w"]["d"].as()); + EXPECT_EQ(2, node["w"]["e"].as()); + EXPECT_EQ(2, node["w"]["f"].as()); + + EXPECT_TRUE(node["v"]["<<"]); + EXPECT_EQ(1, node["v"]["<<"]["a"].as()); + + EXPECT_FALSE(node["u"]["<<"]); + EXPECT_EQ(2, node["u"]["d"].as()); +} + TEST(LoadNodeTest, ForceInsertIntoMap) { Node node; node["a"] = "b"; From 629297180d25a5fa5ddc018454700c70c9a3a88a Mon Sep 17 00:00:00 2001 From: Nicolas Le Scouarnec Date: Fri, 8 Dec 2023 20:42:30 +0100 Subject: [PATCH 2/5] Fix merge-key handling in case the dictionary contains a sub-dictionary --- src/nodebuilder.cpp | 10 ++++++---- src/nodebuilder.h | 2 +- test/integration/load_node_test.cpp | 6 +++++- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/nodebuilder.cpp b/src/nodebuilder.cpp index dcddd05c4..a1daf8ec7 100644 --- a/src/nodebuilder.cpp +++ b/src/nodebuilder.cpp @@ -70,6 +70,7 @@ void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag, node.set_tag(tag); node.set_style(style); m_mapDepth++; + m_mergeDicts.emplace_back(); } void MergeMapCollection(detail::node& map_to, detail::node& map_from, @@ -86,11 +87,12 @@ void MergeMapCollection(detail::node& map_to, detail::node& map_from, void NodeBuilder::OnMapEnd() { assert(m_mapDepth > 0); detail::node& collection = *m_stack.back(); - for (detail::node* n : m_mergeDicts) { + auto& toMerge = *m_mergeDicts.rbegin(); + for (detail::node* n : toMerge) { MergeMapCollection(collection, *n, m_pMemory); } - m_mergeDicts.clear(); m_mapDepth--; + m_mergeDicts.pop_back(); Pop(); } @@ -135,13 +137,13 @@ void NodeBuilder::Pop() { ((nk.tag() == "tag:yaml.org,2002:merge" && nk.scalar() == "<<") || (nk.tag() == "?" && nk.scalar() == "<<"))) { if (node.type() == NodeType::Map) { - m_mergeDicts.emplace_back(&node); + m_mergeDicts.rbegin()->emplace_back(&node); m_keys.pop_back(); } else if (node.type() == NodeType::Sequence) { for (auto i = node.begin(); i != node.end(); i++) { auto v = *i; if ((*v).type() == NodeType::Map) { - m_mergeDicts.emplace_back(&(*v)); + m_mergeDicts.rbegin()->emplace_back(&(*v)); } else { throw ParserException( node.mark(), diff --git a/src/nodebuilder.h b/src/nodebuilder.h index c8f280435..b053d58e4 100644 --- a/src/nodebuilder.h +++ b/src/nodebuilder.h @@ -67,7 +67,7 @@ class NodeBuilder : public EventHandler { using PushedKey = std::pair; std::vector m_keys; - Nodes m_mergeDicts; + std::vector m_mergeDicts; std::size_t m_mapDepth; }; } // namespace YAML diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index b670b64dc..aa81faa9d 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -188,7 +188,7 @@ TEST(LoadNodeTest, MergeKeyB) { Node node = Load( "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " "&stuff { << : *foo, b : 3}, w: { << : [*stuff, *bar], c: 4 }, v: { '<<' " - ": *foo } , u : {!!merge << : *bar} }"); + ": *foo } , u : {!!merge << : *bar}, t: {!!merge << : *bar, h: 3} }"); EXPECT_EQ(NodeType::Map, node["z"].Type()); EXPECT_EQ(NodeType::Map, node["w"].Type()); EXPECT_FALSE(node["z"]["<<"]); @@ -208,6 +208,10 @@ TEST(LoadNodeTest, MergeKeyB) { EXPECT_FALSE(node["u"]["<<"]); EXPECT_EQ(2, node["u"]["d"].as()); + + EXPECT_FALSE(node["t"]["<<"]); + EXPECT_EQ(2, node["t"]["d"].as()); + EXPECT_EQ(3, node["t"]["h"].as()); } TEST(LoadNodeTest, ForceInsertIntoMap) { From bcf7604479236b16622fd0b01f4d4353ef21ce43 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 May 2024 09:23:41 +0200 Subject: [PATCH 3/5] Fix merge operator support (that can be visible by iterating through the node) The problem is that const_map_to.get(*j->first) compares only the shared_ptr's, while we need to compare the key itself. v2: remove const iterator v3: fix use-after-free due to const reference --- src/nodebuilder.cpp | 19 ++++++++++++++----- test/integration/load_node_test.cpp | 15 +++++++++++++++ 2 files changed, 29 insertions(+), 5 deletions(-) diff --git a/src/nodebuilder.cpp b/src/nodebuilder.cpp index a1daf8ec7..aed6581b0 100644 --- a/src/nodebuilder.cpp +++ b/src/nodebuilder.cpp @@ -1,3 +1,4 @@ +#include #include #include "nodebuilder.h" @@ -75,12 +76,20 @@ void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag, void MergeMapCollection(detail::node& map_to, detail::node& map_from, detail::shared_memory_holder& pMemory) { - const detail::node& const_map_to = map_to; for (auto j = map_from.begin(); j != map_from.end(); j++) { - detail::node* s = const_map_to.get(*j->first, pMemory); - if (s == nullptr) { - map_to.insert(*j->first, *j->second, pMemory); - } + const auto from_key = j->first; + /// NOTE: const_map_to.get(*j->first) cannot be used here, since it + /// compares only the shared_ptr's, while we need to compare the key + /// itself. + /// + /// NOTE: get() also iterates over elements + bool found = std::any_of(map_to.begin(), map_to.end(), [&](const detail::node_iterator_value & kv) + { + const auto key_node = kv.first; + return key_node->scalar() == from_key->scalar(); + }); + if (!found) + map_to.insert(*from_key, *j->second, pMemory); } } diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index aa81faa9d..8ca2d3527 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -1,6 +1,7 @@ #include "yaml-cpp/yaml.h" // IWYU pragma: keep #include "gtest/gtest.h" +#include namespace YAML { namespace { @@ -184,6 +185,20 @@ TEST(LoadNodeTest, MergeKeyA) { EXPECT_EQ(1, node["z"]["c"].as()); } +TEST(LoadNodeTest, MergeKeyAIterator) { + Node node = Load( + "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " + "&stuff { << : *foo, b : 3} }"); + EXPECT_EQ(NodeType::Map, node["z"].Type()); + + const auto& z = node["z"]; + size_t z_b_keys = std::count_if(z.begin(), z.end(), [&](const detail::iterator_value & kv) + { + return kv.first.as() == "b"; + }); + ASSERT_EQ(z_b_keys, 1); +} + TEST(LoadNodeTest, MergeKeyB) { Node node = Load( "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " From 2acec3d15b76846cd05f63536e883237dbaf421a Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Fri, 3 May 2024 10:55:38 +0200 Subject: [PATCH 4/5] Fix order for merging iterator Consider the following YAML: trait1: &t1 foo: 1 trait2: &t2 foo: 2 merged: <<: *t1 <<: *t2 yq reports: $ yq .merged.foo < /tmp/yaml 2 while the order that yaml-cpp returns is different, since it will firstly handle 1, and will not replace it with 2: $ util/parse < /tmp/yaml trait1: ? &1 foo : &2 1 trait2: foo: 2 merged: *1 : *2 (Don't mix up "*2" with "2", it is trait1) --- src/nodebuilder.cpp | 5 +++-- test/integration/load_node_test.cpp | 19 ++++++++++++++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/nodebuilder.cpp b/src/nodebuilder.cpp index aed6581b0..837dfdb54 100644 --- a/src/nodebuilder.cpp +++ b/src/nodebuilder.cpp @@ -97,8 +97,9 @@ void NodeBuilder::OnMapEnd() { assert(m_mapDepth > 0); detail::node& collection = *m_stack.back(); auto& toMerge = *m_mergeDicts.rbegin(); - for (detail::node* n : toMerge) { - MergeMapCollection(collection, *n, m_pMemory); + /// The elements for merging should be traversed in reverse order to prefer last values. + for (auto it = toMerge.rbegin(); it != toMerge.rend(); ++it) { + MergeMapCollection(collection, **it, m_pMemory); } m_mapDepth--; m_mergeDicts.pop_back(); diff --git a/test/integration/load_node_test.cpp b/test/integration/load_node_test.cpp index 8ca2d3527..f6dab4945 100644 --- a/test/integration/load_node_test.cpp +++ b/test/integration/load_node_test.cpp @@ -199,6 +199,23 @@ TEST(LoadNodeTest, MergeKeyAIterator) { ASSERT_EQ(z_b_keys, 1); } +TEST(LoadNodeTest, MergeKeyTwoOverrides) { + Node node = Load(R"( +trait1: &t1 + foo: 1 + +trait2: &t2 + foo: 2 + +merged: + <<: *t1 + <<: *t2 +)"); + EXPECT_EQ(NodeType::Map, node["merged"].Type()); + EXPECT_FALSE(node["merged"]["<<"]); + EXPECT_EQ(2, node["merged"]["foo"].as()); +} + TEST(LoadNodeTest, MergeKeyB) { Node node = Load( "{x: &foo {a : 1,b : 1,c : 1}, y: &bar {d: 2, e : 2, f : 2, a : 2}, z: " @@ -211,7 +228,7 @@ TEST(LoadNodeTest, MergeKeyB) { EXPECT_EQ(3, node["z"]["b"].as()); EXPECT_EQ(1, node["z"]["c"].as()); - EXPECT_EQ(1, node["w"]["a"].as()); + EXPECT_EQ(2, node["w"]["a"].as()); EXPECT_EQ(3, node["w"]["b"].as()); EXPECT_EQ(4, node["w"]["c"].as()); EXPECT_EQ(2, node["w"]["d"].as()); From 89452973d814e6d60b129c2a223920018190f565 Mon Sep 17 00:00:00 2001 From: Azat Khuzhin Date: Mon, 23 Sep 2024 10:29:34 +0200 Subject: [PATCH 5/5] Mark MergeMapCollection() static --- src/nodebuilder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/nodebuilder.cpp b/src/nodebuilder.cpp index 837dfdb54..cc1765005 100644 --- a/src/nodebuilder.cpp +++ b/src/nodebuilder.cpp @@ -74,7 +74,7 @@ void NodeBuilder::OnMapStart(const Mark& mark, const std::string& tag, m_mergeDicts.emplace_back(); } -void MergeMapCollection(detail::node& map_to, detail::node& map_from, +static void MergeMapCollection(detail::node& map_to, detail::node& map_from, detail::shared_memory_holder& pMemory) { for (auto j = map_from.begin(); j != map_from.end(); j++) { const auto from_key = j->first;