Skip to content

Commit 2021012

Browse files
committed
test: output normalization
1 parent 14fc45a commit 2021012

File tree

3 files changed

+313
-15
lines changed

3 files changed

+313
-15
lines changed
Lines changed: 262 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,262 @@
1+
//
2+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
// Copyright (c) 2024 Alan de Freitas ([email protected])
7+
//
8+
// Official repository: https://github.com/cppalliance/mrdocs
9+
//
10+
11+
#include "TextNormalization.hpp"
12+
#include <algorithm>
13+
#include <cctype>
14+
#include <initializer_list>
15+
#include <string>
16+
#include <string_view>
17+
#include <vector>
18+
#include <llvm/Support/Path.h>
19+
20+
namespace mrdocs::test_support {
21+
namespace {
22+
23+
bool
24+
isHorizontalSpace(char c)
25+
{
26+
return c == ' ' || c == '\t';
27+
}
28+
29+
void
30+
normalizeNewlines(std::string& text)
31+
{
32+
std::string normalized;
33+
normalized.reserve(text.size());
34+
for (std::size_t i = 0; i < text.size(); ++i)
35+
{
36+
if (text[i] == '\r')
37+
{
38+
if (i + 1 < text.size() && text[i + 1] == '\n')
39+
{
40+
++i;
41+
}
42+
normalized.push_back('\n');
43+
}
44+
else
45+
{
46+
normalized.push_back(text[i]);
47+
}
48+
}
49+
text.swap(normalized);
50+
}
51+
52+
void
53+
rstripEachLine(std::string& text)
54+
{
55+
std::string trimmed;
56+
trimmed.reserve(text.size());
57+
std::size_t lineStart = 0;
58+
for (std::size_t i = 0; i <= text.size(); ++i)
59+
{
60+
if (i == text.size() || text[i] == '\n')
61+
{
62+
std::size_t lineEnd = i;
63+
while (lineEnd > lineStart &&
64+
(text[lineEnd - 1] == ' ' ||
65+
text[lineEnd - 1] == '\t' ||
66+
text[lineEnd - 1] == '\r'))
67+
{
68+
--lineEnd;
69+
}
70+
trimmed.append(text.data() + lineStart, lineEnd - lineStart);
71+
if (i != text.size())
72+
{
73+
trimmed.push_back('\n');
74+
}
75+
lineStart = i + 1;
76+
}
77+
}
78+
text.swap(trimmed);
79+
}
80+
81+
void
82+
collapseBlankLines(std::string& text, std::size_t maxBlankLines)
83+
{
84+
if (text.empty())
85+
return;
86+
87+
std::string collapsed;
88+
collapsed.reserve(text.size());
89+
std::size_t blankCount = 0;
90+
std::size_t pos = 0;
91+
while (pos < text.size())
92+
{
93+
auto nextNewline = text.find('\n', pos);
94+
bool hasNewline = nextNewline != std::string::npos;
95+
std::size_t lineLength =
96+
(hasNewline ? nextNewline : text.size()) - pos;
97+
std::string_view line(text.data() + pos, lineLength);
98+
bool isBlank = line.empty();
99+
100+
if (!isBlank || blankCount < maxBlankLines)
101+
{
102+
collapsed.append(line);
103+
if (hasNewline)
104+
{
105+
collapsed.push_back('\n');
106+
}
107+
}
108+
109+
blankCount = isBlank ? blankCount + 1 : 0;
110+
if (!hasNewline)
111+
break;
112+
pos = nextNewline + 1;
113+
}
114+
115+
text.swap(collapsed);
116+
}
117+
118+
std::string
119+
collapseSpacesOutsideVerbatim(
120+
std::string_view text,
121+
std::initializer_list<llvm::StringRef> verbatimTags)
122+
{
123+
std::vector<std::string> verbatim;
124+
verbatim.reserve(verbatimTags.size());
125+
for (auto const tag : verbatimTags)
126+
{
127+
verbatim.emplace_back(tag.lower());
128+
}
129+
130+
std::vector<std::string> verbatimStack;
131+
std::string out;
132+
out.reserve(text.size());
133+
134+
bool previousSpace = false;
135+
std::size_t i = 0;
136+
while (i < text.size())
137+
{
138+
if (text[i] == '<')
139+
{
140+
auto close = text.find('>', i);
141+
if (close == std::string::npos)
142+
{
143+
out.append(text.substr(i));
144+
break;
145+
}
146+
147+
llvm::StringRef tag(text.data() + i + 1, close - i - 1);
148+
tag = tag.ltrim();
149+
bool isClosing = tag.consume_front("/");
150+
tag = tag.ltrim();
151+
llvm::StringRef tagBody = tag.rtrim();
152+
bool selfClosing = tagBody.ends_with("/");
153+
llvm::StringRef name = tag.take_while([](char c) {
154+
return std::isalnum(static_cast<unsigned char>(c)) ||
155+
c == '-' || c == ':';
156+
});
157+
std::string lowerName = name.lower();
158+
159+
if (isClosing)
160+
{
161+
if (!verbatimStack.empty() &&
162+
verbatimStack.back() == lowerName)
163+
{
164+
verbatimStack.pop_back();
165+
}
166+
}
167+
else
168+
{
169+
bool isVerbatim = std::find(
170+
verbatim.begin(), verbatim.end(), lowerName) != verbatim.end();
171+
if (isVerbatim && !selfClosing)
172+
{
173+
verbatimStack.push_back(lowerName);
174+
}
175+
}
176+
177+
out.append(text.substr(i, close - i + 1));
178+
previousSpace = false;
179+
i = close + 1;
180+
continue;
181+
}
182+
183+
char c = text[i];
184+
if (verbatimStack.empty() && isHorizontalSpace(c))
185+
{
186+
if (!previousSpace)
187+
{
188+
out.push_back(' ');
189+
}
190+
previousSpace = true;
191+
++i;
192+
continue;
193+
}
194+
195+
previousSpace = false;
196+
out.push_back(c);
197+
++i;
198+
}
199+
200+
return out;
201+
}
202+
203+
} // namespace
204+
205+
OutputFormat
206+
guessOutputFormat(llvm::StringRef pathOrExtension)
207+
{
208+
llvm::StringRef ext = llvm::sys::path::extension(pathOrExtension);
209+
if (ext.empty())
210+
ext = pathOrExtension;
211+
ext = ext.ltrim(".");
212+
auto lower = ext.lower();
213+
llvm::StringRef extLower(lower);
214+
215+
if (extLower == "html" || extLower == "htm")
216+
return OutputFormat::html;
217+
if (extLower == "adoc" || extLower == "asciidoc")
218+
return OutputFormat::adoc;
219+
if (extLower == "xml")
220+
return OutputFormat::xml;
221+
return OutputFormat::other;
222+
}
223+
224+
std::string
225+
normalizeForComparison(std::string_view text, OutputFormat format)
226+
{
227+
std::string normalized(text);
228+
normalizeNewlines(normalized);
229+
230+
switch (format)
231+
{
232+
case OutputFormat::html:
233+
normalized = collapseSpacesOutsideVerbatim(
234+
normalized, { "pre", "code", "textarea" });
235+
rstripEachLine(normalized);
236+
break;
237+
238+
case OutputFormat::xml:
239+
rstripEachLine(normalized);
240+
collapseBlankLines(normalized, 1);
241+
break;
242+
243+
case OutputFormat::adoc:
244+
rstripEachLine(normalized);
245+
collapseBlankLines(normalized, 1);
246+
break;
247+
248+
case OutputFormat::other:
249+
rstripEachLine(normalized);
250+
break;
251+
}
252+
253+
return normalized;
254+
}
255+
256+
std::string
257+
normalizeForComparison(std::string_view text, llvm::StringRef pathOrExtension)
258+
{
259+
return normalizeForComparison(text, guessOutputFormat(pathOrExtension));
260+
}
261+
262+
} // namespace mrdocs::test_support
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
//
2+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
3+
// See https://llvm.org/LICENSE.txt for license information.
4+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
5+
//
6+
// Copyright (c) 2024 Alan de Freitas ([email protected])
7+
//
8+
// Official repository: https://github.com/cppalliance/mrdocs
9+
//
10+
11+
#ifndef MRDOCS_TEST_SUPPORT_TEXTNORMALIZATION_HPP
12+
#define MRDOCS_TEST_SUPPORT_TEXTNORMALIZATION_HPP
13+
14+
#include <llvm/ADT/StringRef.h>
15+
#include <string>
16+
#include <string_view>
17+
18+
namespace mrdocs::test_support {
19+
20+
/** File format classification used by test normalizers. */
21+
enum class OutputFormat
22+
{
23+
html,
24+
adoc,
25+
xml,
26+
other,
27+
};
28+
29+
/** Deduce the output format from a path or extension. */
30+
OutputFormat
31+
guessOutputFormat(llvm::StringRef pathOrExtension);
32+
33+
/** Normalize text for comparison in tests based on the output format. */
34+
std::string
35+
normalizeForComparison(std::string_view text, OutputFormat format);
36+
37+
/** Convenience overload that accepts a path or extension directly. */
38+
std::string
39+
normalizeForComparison(std::string_view text, llvm::StringRef pathOrExtension);
40+
41+
} // namespace mrdocs::test_support
42+
43+
#endif

src/test/TestRunner.cpp

Lines changed: 8 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include <mrdocs/Platform.hpp>
1212
#include "TestArgs.hpp"
1313
#include "TestRunner.hpp"
14+
#include "Support/TextNormalization.hpp"
1415
#include <lib/ConfigImpl.hpp>
1516
#include <lib/CorpusImpl.hpp>
1617
#include <lib/Gen/hbs/HandlebarsGenerator.hpp>
@@ -56,16 +57,6 @@ writeFile(
5657
}
5758

5859
namespace {
59-
void
60-
replaceCRLFWithLF(std::string &str)
61-
{
62-
std::string::size_type pos = 0;
63-
while ((pos = str.find("\r\n", pos)) != std::string::npos) {
64-
str.replace(pos, 2, "\n");
65-
pos += 1; // Move past the '\n' character
66-
}
67-
}
68-
6960
SingleFileDB
7061
makeSingleFileDB(llvm::StringRef pathName, std::vector<std::string> cmds)
7162
{
@@ -181,7 +172,9 @@ TestRunner::handleCompilationDatabase(
181172
{
182173
return report::error("{}: \"{}\"", exp.error(), filePath);
183174
}
184-
replaceCRLFWithLF(generatedDocs);
175+
auto const format = test_support::guessOutputFormat(expectedPath.str());
176+
std::string normalizedGenerated = test_support::normalizeForComparison(
177+
generatedDocs, format);
185178

186179
// Generate tagfile
187180
if (auto hbsGen = dynamic_cast<hbs::HandlebarsGenerator const*>(gen_))
@@ -234,9 +227,9 @@ TestRunner::handleCompilationDatabase(
234227
}
235228

236229
// Analyse results
237-
std::string expectedDocs = expectedDocsBuf->getBuffer().str();
238-
replaceCRLFWithLF(expectedDocs);
239-
if (generatedDocs == expectedDocs)
230+
std::string const expectedDocs = test_support::normalizeForComparison(
231+
expectedDocsBuf->getBuffer(), format);
232+
if (normalizedGenerated == expectedDocs)
240233
{
241234
report::info("\"{}\" passed", filePath);
242235
++results.expectedDocsMatching;
@@ -259,7 +252,7 @@ TestRunner::handleCompilationDatabase(
259252
}
260253
report::error("{}: \"{}\"",
261254
Error("Incorrect results"), filePathSv);
262-
auto res = test_suite::diffStrings(expectedDocs, generatedDocs);
255+
auto res = test_suite::diffStrings(expectedDocs, normalizedGenerated);
263256
report::error("{} lines added", res.added);
264257
report::error("{} lines removed", res.removed);
265258

0 commit comments

Comments
 (0)