From 3b29886ae9d9d11c290117c94225147736bf562c Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Wed, 1 Oct 2025 21:21:53 +0000 Subject: [PATCH 1/2] [MsgPack] Use JSON schema boolean resolution rules Since YAMLIO generally knows the types ahead of time (since it primarily functions to (de)serialize concrete C++ types) "tag resolution" isn't really a meaningful isssue. With MsgPackDocument we permit arbitrary documents with arbitrary type nodes, and so the YAML "NO" problem is an issue. Address this in MsgPackDocument itself to avoid a significant and far-reaching backwards-incompatible change to YAMLIO (although we could still consider tightening things up there in the future). Use the "JSON Schema" for tag resolution where the only literals to resolve to bool by default are "true" and "false". --- llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp | 19 ++++- .../BinaryFormat/MsgPackDocumentTest.cpp | 74 +++++++++++++++++++ 2 files changed, 89 insertions(+), 4 deletions(-) diff --git a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp index 80b421d5f752e..fbf871f709a64 100644 --- a/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp +++ b/llvm/lib/BinaryFormat/MsgPackDocumentYAML.cpp @@ -29,6 +29,10 @@ struct ScalarDocNode : DocNode { StringRef getYAMLTag() const; }; +bool isJSONSchemaBoolLiteral(StringRef S) { + return S == "true" || S == "false"; +} + } // namespace /// Convert this DocNode to a string, assuming it is scalar. @@ -84,11 +88,18 @@ StringRef DocNode::fromString(StringRef S, StringRef Tag) { *this = getDocument()->getNode(); return ""; } - if (Tag == "!bool" || Tag == "") { + if (Tag == "!bool") { *this = getDocument()->getNode(false); - StringRef Err = yaml::ScalarTraits::input(S, nullptr, getBool()); - if (Err == "" || Tag != "") - return Err; + return yaml::ScalarTraits::input(S, nullptr, getBool()); + } + // FIXME: This enforces the "JSON Schema" tag resolution for boolean literals, + // defined at https://yaml.org/spec/1.2.2/#json-schema which we adopt because + // the more general pre-1.2 resolution includes many common strings (e.g. "n", + // "no", "y", "yes", ...). This should be handled at the YAMLTraits level, but + // that change would have a much broader impact. + if (Tag == "" && isJSONSchemaBoolLiteral(S)) { + *this = getDocument()->getNode(S == "true"); + return ""; } if (Tag == "!float" || Tag == "") { *this = getDocument()->getNode(0.0); diff --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp index 6a6ad7010f629..5aace87cb3b6b 100644 --- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp +++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp @@ -420,3 +420,77 @@ TEST(MsgPackDocument, TestInputYAMLMap) { ASSERT_EQ(SS.getKind(), Type::String); ASSERT_EQ(SS.getString(), "2"); } + +TEST(MsgPackDocument, TestInputYAMLBoolean) { + Document Doc; + auto GetFirst = [](Document &Doc) { return Doc.getRoot().getArray()[0]; }; + auto ToYAML = [](Document &Doc) { + std::string S; + raw_string_ostream OS(S); + Doc.toYAML(OS); + return S; + }; + + bool Ok; + + Ok = Doc.fromYAML("- n\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "n"); + ASSERT_EQ(ToYAML(Doc), "---\n- n\n...\n"); + + Ok = Doc.fromYAML("- y\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "y"); + ASSERT_EQ(ToYAML(Doc), "---\n- y\n...\n"); + + Ok = Doc.fromYAML("- no\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "no"); + ASSERT_EQ(ToYAML(Doc), "---\n- no\n...\n"); + + Ok = Doc.fromYAML("- yes\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "yes"); + ASSERT_EQ(ToYAML(Doc), "---\n- yes\n...\n"); + + Ok = Doc.fromYAML("- false\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), false); + ASSERT_EQ(ToYAML(Doc), "---\n- false\n...\n"); + + Ok = Doc.fromYAML("- true\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), true); + ASSERT_EQ(ToYAML(Doc), "---\n- true\n...\n"); + + Ok = Doc.fromYAML("- !str false\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "false"); + ASSERT_EQ(ToYAML(Doc), "---\n- !str 'false'\n...\n"); + + Ok = Doc.fromYAML("- !str true\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::String); + ASSERT_EQ(GetFirst(Doc).getString(), "true"); + ASSERT_EQ(ToYAML(Doc), "---\n- !str 'true'\n...\n"); + + // FIXME: A fix for these requires changes in YAMLParser/YAMLTraits. + Ok = Doc.fromYAML("- \"false\"\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), false); + ASSERT_EQ(ToYAML(Doc), "---\n- false\n...\n"); + + Ok = Doc.fromYAML("- \"true\"\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), true); + ASSERT_EQ(ToYAML(Doc), "---\n- true\n...\n"); +} From 8fb115616eccb9d2c266e83114d7314c91927620 Mon Sep 17 00:00:00 2001 From: Scott Linder Date: Thu, 4 Dec 2025 21:50:50 +0000 Subject: [PATCH 2/2] Rename test and add !bool tag cases --- .../unittests/BinaryFormat/MsgPackDocumentTest.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp index 5aace87cb3b6b..60b2b28c7fac9 100644 --- a/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp +++ b/llvm/unittests/BinaryFormat/MsgPackDocumentTest.cpp @@ -421,7 +421,7 @@ TEST(MsgPackDocument, TestInputYAMLMap) { ASSERT_EQ(SS.getString(), "2"); } -TEST(MsgPackDocument, TestInputYAMLBoolean) { +TEST(MsgPackDocument, TestYAMLBoolean) { Document Doc; auto GetFirst = [](Document &Doc) { return Doc.getRoot().getArray()[0]; }; auto ToYAML = [](Document &Doc) { @@ -481,6 +481,18 @@ TEST(MsgPackDocument, TestInputYAMLBoolean) { ASSERT_EQ(GetFirst(Doc).getString(), "true"); ASSERT_EQ(ToYAML(Doc), "---\n- !str 'true'\n...\n"); + Ok = Doc.fromYAML("- !bool false\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), false); + ASSERT_EQ(ToYAML(Doc), "---\n- false\n...\n"); + + Ok = Doc.fromYAML("- !bool true\n"); + ASSERT_TRUE(Ok); + ASSERT_EQ(GetFirst(Doc).getKind(), Type::Boolean); + ASSERT_EQ(GetFirst(Doc).getBool(), true); + ASSERT_EQ(ToYAML(Doc), "---\n- true\n...\n"); + // FIXME: A fix for these requires changes in YAMLParser/YAMLTraits. Ok = Doc.fromYAML("- \"false\"\n"); ASSERT_TRUE(Ok);