Skip to content

Commit 769bb62

Browse files
authored
feat: generate convert html to markdown for generated docs (#381)
1 parent f2e074a commit 769bb62

File tree

10 files changed

+304
-63
lines changed

10 files changed

+304
-63
lines changed

gradle.properties

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,8 @@ kotlin.native.ignoreDisabledTargets=true
1010

1111
# kotlin libraries
1212
coroutinesVersion=1.5.0
13-
13+
commonMarkParserVersion=0.15.2
14+
jsoupVersion=1.14.3
1415
# testing/utility
1516
# FIXME - junit5 not working
1617
junitVersion=5.6.2

smithy-swift-codegen/build.gradle.kts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ group = "software.amazon.smithy"
1818
version = "0.1.0"
1919

2020
val smithyVersion: String by project
21+
val commonMarkParserVersion: String by project
22+
val jsoupVersion: String by project
2123
val kotestVersion: String by project
2224
val junitVersion: String by project
2325
val jacocoVersion: String by project
@@ -26,6 +28,8 @@ dependencies {
2628
implementation(kotlin("stdlib-jdk8"))
2729
api("software.amazon.smithy:smithy-codegen-core:$smithyVersion")
2830
api("software.amazon.smithy:smithy-waiters:$smithyVersion")
31+
api("com.atlassian.commonmark:commonmark:$commonMarkParserVersion")
32+
api("org.jsoup:jsoup:$jsoupVersion")
2933
implementation("software.amazon.smithy:smithy-protocol-test-traits:$smithyVersion")
3034
implementation("software.amazon.smithy:smithy-aws-traits:$smithyVersion")
3135
testImplementation("org.junit.jupiter:junit-jupiter:$junitVersion")
Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
package software.amazon.smithy.swift.cod
2+
3+
import org.commonmark.node.BlockQuote
4+
import org.commonmark.node.FencedCodeBlock
5+
import org.commonmark.node.Heading
6+
import org.commonmark.node.HtmlBlock
7+
import org.commonmark.node.ListBlock
8+
import org.commonmark.node.ThematicBreak
9+
import org.commonmark.parser.Parser
10+
import org.commonmark.renderer.html.HtmlRenderer
11+
import org.jsoup.Jsoup
12+
import org.jsoup.nodes.Node
13+
import org.jsoup.nodes.TextNode
14+
import org.jsoup.safety.Safelist
15+
import org.jsoup.select.NodeTraversor
16+
import org.jsoup.select.NodeVisitor
17+
import software.amazon.smithy.utils.CodeWriter
18+
import software.amazon.smithy.utils.SetUtils
19+
import software.amazon.smithy.utils.StringUtils
20+
21+
// Inspired from Go's implementation:
22+
// https://github.com/aws/smithy-go/blob/main/codegen/smithy-go-codegen/src/main/java/software/amazon/smithy/go/codegen/DocumentationConverter.java
23+
class DocumentationConverter {
24+
companion object {
25+
val MARKDOWN_PARSER = Parser.builder()
26+
.enabledBlockTypes(
27+
SetUtils.of(
28+
Heading::class.java,
29+
HtmlBlock::class.java,
30+
ThematicBreak::class.java,
31+
FencedCodeBlock::class.java,
32+
BlockQuote::class.java,
33+
ListBlock::class.java
34+
)
35+
).build()
36+
val SWIFTDOC_ALLOWLIST = Safelist()
37+
.addTags("code", "pre", "ul", "ol", "li", "a", "br", "h1", "h2", "h3", "h4", "h5", "h6")
38+
.addAttributes("a", "href")
39+
.addProtocols("a", "href", "http", "https", "mailto")
40+
fun convert(docs: String): String {
41+
val htmlDocs = HtmlRenderer.builder().escapeHtml(false).build().render(MARKDOWN_PARSER.parse(docs))
42+
val cleanedHtmlDocs = Jsoup.clean(htmlDocs, SWIFTDOC_ALLOWLIST)
43+
val formatter = FormattingVisitor()
44+
val body: Node = Jsoup.parse(cleanedHtmlDocs).body()
45+
NodeTraversor.traverse(formatter, body)
46+
return formatter.toString().replace("\$", "\$\$")
47+
}
48+
}
49+
50+
class FormattingVisitor(
51+
val writer: CodeWriter = CodeWriter(),
52+
var needsListPrefix: Boolean = false,
53+
var needsBracketsForLink: Boolean = false,
54+
var shouldStripPrefixWhitespace: Boolean = false,
55+
) : NodeVisitor {
56+
private val TEXT_BLOCK_NODES = SetUtils.of("br", "p", "h1", "h2", "h3", "h4", "h5", "h6")
57+
private val LIST_BLOCK_NODES = SetUtils.of("ul", "ol")
58+
private val CODE_BLOCK_NODES = SetUtils.of("pre", "code")
59+
60+
override fun head(node: Node, depth: Int) {
61+
val name = node.nodeName()
62+
if (isTopLevelCodeBlock(node, depth)) {
63+
writer.indent()
64+
}
65+
66+
if (node is TextNode) {
67+
writeText(node as TextNode)
68+
} else if (TEXT_BLOCK_NODES.contains(name) || isTopLevelCodeBlock(node, depth)) {
69+
writeNewline()
70+
writeIndent()
71+
} else if (LIST_BLOCK_NODES.contains(name)) {
72+
writeNewline()
73+
} else if (name == "li") {
74+
// We don't actually write out the list prefix here in case the list element
75+
// starts with one or more text blocks. By deferring writing those out until
76+
// the first bit of actual text, we can ensure that no intermediary newlines
77+
// are kept. It also has the added benefit of eliminating empty list elements.
78+
needsListPrefix = true
79+
} else if (name == "a") {
80+
needsBracketsForLink = true
81+
}
82+
}
83+
84+
private fun writeNewline() {
85+
// While jsoup will strip out redundant whitespace, it will still leave some. If we
86+
// start a new line then we want to make sure we don't keep any prefixing whitespace.
87+
shouldStripPrefixWhitespace = true
88+
writer.write("")
89+
}
90+
private fun writeText(node: TextNode) {
91+
if (node.isBlank) {
92+
return
93+
}
94+
95+
// Docs can have valid $ characters that shouldn't run through formatters.
96+
var text = node.text().replace("$", "$$")
97+
if (shouldStripPrefixWhitespace) {
98+
shouldStripPrefixWhitespace = false
99+
text = StringUtils.stripStart(text, " \t")
100+
}
101+
if (needsBracketsForLink) {
102+
needsBracketsForLink = false
103+
text = "[$text]"
104+
}
105+
if (needsListPrefix) {
106+
needsListPrefix = false
107+
writer.write("")
108+
writeIndent()
109+
text = "* " + StringUtils.stripStart(text, " \t")
110+
}
111+
writer.writeInline(text)
112+
}
113+
114+
fun writeIndent() {
115+
writer.setNewline("").write("").setNewline("\n")
116+
}
117+
118+
private fun isTopLevelCodeBlock(node: Node, depth: Int): Boolean {
119+
// The node must be a code block node
120+
if (!CODE_BLOCK_NODES.contains(node.nodeName())) {
121+
return false
122+
}
123+
124+
// It must either have no siblings or its siblings must be separate blocks.
125+
if (!allSiblingsAreBlocks(node)) {
126+
return false
127+
}
128+
129+
// Depth 0 will always be a "body" element, so depth 1 means it's top level.
130+
if (depth == 1) {
131+
return true
132+
}
133+
134+
// If its depth is 2, it could still be effectively top level if its parent is a p
135+
// node whose siblings are all blocks.
136+
val parent = node.parent()
137+
return depth == 2 && parent!!.nodeName() == "p" && allSiblingsAreBlocks(parent)
138+
}
139+
140+
/**
141+
* Determines whether a given node's siblings are all text blocks, code blocks, or lists.
142+
*
143+
*
144+
* Siblings that are blank text nodes are skipped.
145+
*
146+
* @param node The node whose siblings should be checked.
147+
* @return true if the node's siblings are blocks, otherwise false.
148+
*/
149+
private fun allSiblingsAreBlocks(node: Node): Boolean {
150+
// Find the nearest sibling to the left which is not a blank text node.
151+
var previous = node.previousSibling()
152+
while (true) {
153+
if (previous is TextNode) {
154+
if (previous.isBlank) {
155+
previous = previous.previousSibling()
156+
continue
157+
}
158+
}
159+
break
160+
}
161+
162+
// Find the nearest sibling to the right which is not a blank text node.
163+
var next = node.nextSibling()
164+
while (true) {
165+
if (next is TextNode) {
166+
if (next.isBlank) {
167+
next = next.nextSibling()
168+
continue
169+
}
170+
}
171+
break
172+
}
173+
return (previous == null || isBlockNode(previous)) && (next == null || isBlockNode(next))
174+
}
175+
176+
private fun isBlockNode(node: Node): Boolean {
177+
val name = node.nodeName()
178+
return (
179+
TEXT_BLOCK_NODES.contains(name) || LIST_BLOCK_NODES.contains(name) ||
180+
CODE_BLOCK_NODES.contains(name)
181+
)
182+
}
183+
184+
override fun tail(node: Node, depth: Int) {
185+
val name = node.nodeName()
186+
if (isTopLevelCodeBlock(node, depth)) {
187+
writer.dedent()
188+
}
189+
190+
if (TEXT_BLOCK_NODES.contains(name) || isTopLevelCodeBlock(node, depth) ||
191+
LIST_BLOCK_NODES.contains(name)
192+
) {
193+
writeNewline()
194+
writeNewline()
195+
} else if (name == "a") {
196+
val url = node.absUrl("href")
197+
if (!url.isEmpty()) {
198+
writer.writeInline("(\$L)", url)
199+
}
200+
} else if (name == "li") {
201+
// Clear out the expectation of a list element if the element's body is empty.
202+
needsListPrefix = false
203+
writer.write("")
204+
}
205+
}
206+
207+
override fun toString(): String {
208+
var result = writer.toString()
209+
if (StringUtils.isBlank(result)) {
210+
return ""
211+
}
212+
213+
// Strip trailing whitespace from every line. We can't use the codewriter for this due to
214+
// not knowing when a line will end, as we typically build them up over many elements.
215+
val lines = result.split("\n").dropLastWhile { it.isEmpty() }.toTypedArray()
216+
for (i in lines.indices) {
217+
lines[i] = StringUtils.stripEnd(lines[i], " \t")
218+
}
219+
result = java.lang.String.join("\n", *lines)
220+
221+
// Strip out leading and trailing newlines.
222+
return StringUtils.strip(result, "\n")
223+
}
224+
}
225+
}

smithy-swift-codegen/src/main/kotlin/software/amazon/smithy/swift/codegen/ServiceGenerator.kt

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,9 @@ class ServiceGenerator(
5959
val outputParam = "completion: @escaping ($outputType) -> Void"
6060

6161
val paramTerminator = ", "
62-
62+
if (op.id.name == "createBucket") {
63+
print("we are here")
64+
}
6365
writer.writeShapeDocs(op)
6466
writer.writeAvailableAttribute(model, op)
6567

smithy-swift-codegen/src/main/kotlin/software/amazon/smithy/swift/codegen/SwiftWriter.kt

Lines changed: 3 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import software.amazon.smithy.model.traits.DeprecatedTrait
1717
import software.amazon.smithy.model.traits.DocumentationTrait
1818
import software.amazon.smithy.model.traits.EnumDefinition
1919
import software.amazon.smithy.model.traits.RequiredTrait
20+
import software.amazon.smithy.swift.cod.DocumentationConverter
2021
import software.amazon.smithy.swift.codegen.integration.SectionId
2122
import software.amazon.smithy.swift.codegen.integration.SectionWriter
2223
import software.amazon.smithy.swift.codegen.model.defaultValue
@@ -187,51 +188,13 @@ class SwiftWriter(private val fullPackageName: String) : CodeWriter() {
187188
popState()
188189
}
189190

190-
// Most commonly occurring (but not exhaustive) set of HTML tags found in AWS models
191-
private val commonHtmlTags = setOf(
192-
"a",
193-
"b",
194-
"code",
195-
"dd",
196-
"dl",
197-
"dt",
198-
"i",
199-
"important",
200-
"li",
201-
"note",
202-
"p",
203-
"strong",
204-
"ul"
205-
).map { listOf("<$it>", "</$it>") }.flatten()
206-
207-
// Replace characters in the input documentation to prevent issues in codegen or rendering.
208-
// NOTE: Currently we look for specific strings of Html tags commonly found in docs
209-
// and remove them. A better solution would be to generally convert from HTML to "pure"
210-
// markdown such that formatting is preserved.
211-
// TODO: https://github.com/awslabs/aws-sdk-swift/issues/329
212191
fun writeDocs(docs: String) {
192+
val convertedDocs = DocumentationConverter.convert(docs)
213193
writeSingleLineDocs {
214-
write(sanitizeDocumentation(docs))
194+
write(convertedDocs)
215195
}
216196
}
217197

218-
/**
219-
* This function escapes "$" characters so formatters are not run.
220-
*/
221-
private fun sanitizeDocumentation(doc: String): String {
222-
return doc
223-
.stripAll(commonHtmlTags)
224-
.replace("\$", "\$\$")
225-
}
226-
227-
// Remove all strings from source string and return the result
228-
private fun String.stripAll(stripList: List<String>): String {
229-
var newStr = this
230-
for (item in stripList) newStr = newStr.replace(item, "")
231-
232-
return newStr
233-
}
234-
235198
/**
236199
* Writes shape documentation comments if docs are present.
237200
*/

0 commit comments

Comments
 (0)