Skip to content

Commit a64caf2

Browse files
committed
Support whitespace around tag names
1 parent 73fb10a commit a64caf2

File tree

2 files changed

+47
-6
lines changed

2 files changed

+47
-6
lines changed

src/html2md.cpp

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -416,21 +416,28 @@ Converter *Converter::UpdatePrevChFromMd() {
416416
}
417417

418418
bool Converter::ParseCharInTag(char ch) {
419+
static bool skipping_leading_whitespace = true;
420+
419421
if (ch == '/' && !is_in_attribute_value_) {
420422
is_closing_tag_ = current_tag_.empty();
421423
is_self_closing_tag_ = !is_closing_tag_;
422-
424+
skipping_leading_whitespace = true; // Reset for next tag
423425
return true;
424426
}
425427

426-
if (ch == '>')
428+
if (ch == '>') {
429+
// Trim trailing whitespace by removing characters from current_tag_
430+
while (!current_tag_.empty() && std::isspace(current_tag_.back())) {
431+
current_tag_.pop_back();
432+
}
433+
skipping_leading_whitespace = true; // Reset for next tag
427434
return OnHasLeftTag();
435+
}
428436

429437
if (ch == '"') {
430438
if (is_in_attribute_value_) {
431439
is_in_attribute_value_ = false;
432440
} else {
433-
// Look backwards for '=' possibly with whitespace after it
434441
size_t pos = current_tag_.length();
435442
while (pos > 0 && isspace(current_tag_[pos - 1])) {
436443
pos--;
@@ -439,12 +446,18 @@ bool Converter::ParseCharInTag(char ch) {
439446
is_in_attribute_value_ = true;
440447
}
441448
}
449+
skipping_leading_whitespace = false; // Stop skipping after attribute
442450
return true;
443451
}
444452

445-
// Convert tag characters to lowercase as we build them
446-
current_tag_ += tolower(ch);
453+
// Handle whitespace: skip leading whitespace, keep others
454+
if (isspace(ch) && skipping_leading_whitespace) {
455+
return true; // Ignore leading whitespace
456+
}
447457

458+
// Once we encounter a non-whitespace character, stop skipping
459+
skipping_leading_whitespace = false;
460+
current_tag_ += tolower(ch);
448461
return false;
449462
}
450463

tests/main.cpp

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,32 @@ bool testSelfClosingUppercaseTags() {
326326
return true;
327327
}
328328

329+
bool testWhitespaceTags() {
330+
testOption("whitespaceTags");
331+
332+
// Test cases with various tags containing whitespace
333+
vector<std::pair<string, string>> testCases = {
334+
// { HTML input, Expected Markdown output }
335+
{"< p >Hello</ p >", "Hello\n"},
336+
{"< p>Text</ p >", "Text\n"},
337+
{"<p >Text</p >", "Text\n"}
338+
};
339+
340+
for (const auto &[html, expectedMd] : testCases) {
341+
html2md::Converter c(html);
342+
auto md = c.convert();
343+
344+
if (md != expectedMd) {
345+
cout << "Failed to convert whitespace tag: " << html << "\n"
346+
<< "Expected Markdown: " << expectedMd << "\n"
347+
<< "Generated Markdown: " << md << "\n";
348+
return false;
349+
}
350+
}
351+
352+
return true;
353+
}
354+
329355
int main(int argc, const char **argv) {
330356
// List to store all markdown files in this dir
331357
vector<string> files;
@@ -382,7 +408,9 @@ int main(int argc, const char **argv) {
382408
&testUppercaseTags,
383409
&testUppercaseAttributes,
384410
&testMixedCaseTags,
385-
&testSelfClosingUppercaseTags};
411+
&testSelfClosingUppercaseTags,
412+
&testWhitespaceTags
413+
};
386414

387415
for (const auto &test : tests)
388416
if (!test()) {

0 commit comments

Comments
 (0)