Skip to content

Commit 3420758

Browse files
authored
fix: handle invalid description term headers when generating documentation (#645)
1 parent ae7bfba commit 3420758

File tree

3 files changed

+49
-17
lines changed

3 files changed

+49
-17
lines changed
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"id": "8ca59a5e-f734-4b5f-877e-848740bca124",
3+
"type": "bugfix",
4+
"description": "Handle invalid (empty) description term headers when generating documentation."
5+
}

smithy-kotlin-codegen/src/main/kotlin/software/amazon/smithy/kotlin/codegen/lang/DocumentationPreprocessor.kt

Lines changed: 25 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,13 +27,7 @@ class DocumentationPreprocessor : KotlinIntegration {
2727
return transformer.mapTraits(model) { _, trait ->
2828
when (trait) {
2929
is DocumentationTrait -> {
30-
// There's definitely some improperly escaped HTML characters within preformat blocks in existing
31-
// models. Ensure we strip those now, the parser is VERY forgiving and will mistreat any sequences
32-
// of characters that happen to form tags as such.
33-
val sanitizedDoc = trait.value
34-
.applyWithin("<code>", "</code>", String::escapeHtml)
35-
.applyWithin("<pre>", "</pre>", String::escapeHtml)
36-
val docs = toKdoc(sanitizedDoc)
30+
val docs = toKdoc(trait.value)
3731
DocumentationTrait(docs, trait.sourceLocation)
3832
}
3933
else -> trait
@@ -49,10 +43,28 @@ class DocumentationPreprocessor : KotlinIntegration {
4943
return renderer.text()
5044
}
5145

46+
/**
47+
* Parse the raw documentation into an HTML structure from which we can deterministically generate valid markdown.
48+
* This method accepts the model documentation as-is and is designated to handle any input or parsing quirks that
49+
* might occur.
50+
*/
5251
private fun parseClean(rawDoc: String): Document {
53-
val parsed = Jsoup.parse(rawDoc)
54-
55-
parsed.body().stripBlankTextNodes()
52+
// There's definitely some improperly escaped HTML characters within preformat blocks in existing
53+
// models. Ensure we strip those now, the parser is VERY forgiving and will mistreat any sequences
54+
// of characters that happen to form tags as such.
55+
val sanitized = rawDoc
56+
.applyWithin("<code>", "</code>", String::escapeHtml)
57+
.applyWithin("<pre>", "</pre>", String::escapeHtml)
58+
val parsed = Jsoup.parse(sanitized)
59+
60+
parsed.body().filterDescendants(
61+
// Jsoup will preserve newlines between elements as blank text nodes. These have zero bearing on the content
62+
// of the document to begin with and only serve to complicate traversal.
63+
{ it is TextNode && it.isBlank },
64+
// Some docs contain empty definition terms, which we render as section headers. An empty section header
65+
// (literal "## \n" is invalid markdown according to dokka.
66+
{ it.nodeName() == "dt" && it.childNodes().isEmpty() }
67+
)
5668

5769
return parsed
5870
}
@@ -181,17 +193,13 @@ class DocumentationPreprocessor : KotlinIntegration {
181193
}
182194
}
183195

184-
/**
185-
* Jsoup will preserve newlines between elements as blank text nodes. These have zero bearing on the content of the
186-
* document to begin with and only serve to complicate traversal.
187-
*/
188-
private fun Node.stripBlankTextNodes() {
189-
if (this is TextNode && isBlank) {
196+
private fun Node.filterDescendants(vararg matchers: (Node) -> Boolean) {
197+
if (matchers.any { it(this) }) {
190198
remove()
191199
return
192200
}
193201

194-
childNodes().forEach(Node::stripBlankTextNodes)
202+
childNodes().forEach { it.filterDescendants(*matchers) }
195203
}
196204

197205
private fun Node.hasAncestor(predicate: (Node) -> Boolean): Boolean =

smithy-kotlin-codegen/src/test/kotlin/software/amazon/smithy/kotlin/codegen/lang/DocumentationPreprocessorTest.kt

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,25 @@ class DocumentationPreprocessorTest {
176176
inputTest(input, expected)
177177
}
178178

179+
@Test
180+
fun `it renders description list section headers`() {
181+
val input = """
182+
<dl>
183+
<dt>term1</dt>
184+
<dd><p>description1</p></dd>
185+
<dt/>
186+
<dd><p>definition of a nonexistent term...</p></dd>
187+
<dl>
188+
""".trimIndent()
189+
val expected = """
190+
## term1
191+
description1
192+
193+
definition of a nonexistent term...
194+
""".trimIndent()
195+
inputTest(input, expected)
196+
}
197+
179198
@Test
180199
fun `it fully renders S3 CreateMultipartUpload`() {
181200
val input = """

0 commit comments

Comments
 (0)