Skip to content

Commit ac565d9

Browse files
committed
Fix attribute parsing with whitespace around equals sign
1 parent f3ecc17 commit ac565d9

File tree

2 files changed

+42
-4
lines changed

2 files changed

+42
-4
lines changed

src/html2md.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -418,10 +418,16 @@ bool Converter::ParseCharInTag(char ch) {
418418
if (ch == '"') {
419419
if (is_in_attribute_value_) {
420420
is_in_attribute_value_ = false;
421-
} else if (current_tag_[current_tag_.length() - 1] == '=') {
422-
is_in_attribute_value_ = true;
421+
} else {
422+
// Look backwards for '=' possibly with whitespace after it
423+
size_t pos = current_tag_.length();
424+
while (pos > 0 && isspace(current_tag_[pos - 1])) {
425+
pos--;
426+
}
427+
if (pos > 0 && current_tag_[pos - 1] == '=') {
428+
is_in_attribute_value_ = true;
429+
}
423430
}
424-
425431
return true;
426432
}
427433

tests/main.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,38 @@ bool testFormatTable() {
170170
return formattedTable == expectedOutput;
171171
}
172172

173+
bool testAttributeWhitespace() {
174+
testOption("attributeWhitespace");
175+
176+
// Test different variations of whitespace around equals sign
177+
vector<string> testCases = {
178+
"<a href=\"http://example.com/\">no space</a>",
179+
"<a href =\"http://example.com/\">space before</a>",
180+
"<a href= \"http://example.com/\">space after</a>",
181+
"<a href = \"http://example.com/\">space both sides</a>"};
182+
183+
for (const auto &html : testCases) {
184+
html2md::Converter c(html);
185+
auto md = c.convert();
186+
187+
// Basic check that the conversion worked
188+
if (md.empty()) {
189+
cerr << "Failed to convert: " << html << "\n";
190+
return false;
191+
}
192+
193+
// For anchor tags, check if URL was properly extracted
194+
if (html.find("<a") != string::npos) {
195+
if (md.find("http://example.com/") == string::npos) {
196+
cerr << "Failed to extract URL from: " << html << "\n";
197+
return false;
198+
}
199+
}
200+
}
201+
202+
return true;
203+
}
204+
173205
int main(int argc, const char **argv) {
174206
// List to store all markdown files in this dir
175207
vector<string> files;
@@ -219,7 +251,7 @@ int main(int argc, const char **argv) {
219251

220252
// Test the options
221253
auto tests = {&testDisableTitle, &testUnorderedList, &testOrderedList,
222-
&testFormatTable};
254+
&testFormatTable, &testAttributeWhitespace};
223255

224256
for (const auto &test : tests)
225257
if (!test()) {

0 commit comments

Comments
 (0)