Skip to content

Commit 0707ada

Browse files
committed
XSLT stylesheet to convert cmark XML back to Commonmark
Initial version of an XSLT stylesheet that converts the XML format produced by `cmark -t xml` back to Commonmark. Fixes commonmark#264
1 parent 63c675f commit 0707ada

File tree

1 file changed

+308
-0
lines changed

1 file changed

+308
-0
lines changed

tools/xml2md.xsl

Lines changed: 308 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,308 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!--
4+
5+
xml2md.xsl
6+
==========
7+
8+
This XSLT stylesheet transforms the cmark XML format back to Commonmark.
9+
Since the XML output is lossy, a lossless MD->XML->MD roundtrip isn't
10+
possible. The XML->MD->XML roundtrip should produce the original XML,
11+
though.
12+
13+
HTML blocks and inlines aren't supported.
14+
15+
Example usage with xsltproc:
16+
17+
cmark -t xml doc.md | xsltproc -novalid xml2md.xsl -
18+
19+
-->
20+
21+
<xsl:stylesheet
22+
version="1.0"
23+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
24+
xmlns:md="http://commonmark.org/xml/1.0">
25+
26+
<xsl:output method="text" encoding="utf-8"/>
27+
28+
<!-- Generic templates -->
29+
30+
<xsl:template match="/ | md:document | md:list">
31+
<xsl:apply-templates select="md:*"/>
32+
</xsl:template>
33+
34+
<xsl:template match="md:*">
35+
<xsl:message>Unsupported element '<xsl:value-of select="local-name()"/>'</xsl:message>
36+
</xsl:template>
37+
38+
<xsl:template match="md:*" mode="indent"/>
39+
40+
<!-- Indent blocks -->
41+
42+
<xsl:template match="md:*" mode="indent-block">
43+
<xsl:if test="preceding-sibling::md:*">
44+
<xsl:if test="not(ancestor::md:list[1][@tight='true'])">
45+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
46+
<xsl:text>&#10;</xsl:text>
47+
</xsl:if>
48+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
49+
</xsl:if>
50+
</xsl:template>
51+
52+
<!-- Heading -->
53+
54+
<xsl:template match="md:heading">
55+
<xsl:apply-templates select="." mode="indent-block"/>
56+
<xsl:value-of select="substring('###### ', 7 - @level)"/>
57+
<xsl:apply-templates select="md:*"/>
58+
<xsl:text>&#10;</xsl:text>
59+
</xsl:template>
60+
61+
<!-- Paragraph -->
62+
63+
<xsl:template match="md:paragraph">
64+
<xsl:apply-templates select="." mode="indent-block"/>
65+
<xsl:apply-templates select="md:*"/>
66+
<xsl:text>&#10;</xsl:text>
67+
</xsl:template>
68+
69+
<!-- Thematic break -->
70+
71+
<xsl:template match="md:thematic_break">
72+
<xsl:apply-templates select="." mode="indent-block"/>
73+
<xsl:text>***&#10;</xsl:text>
74+
</xsl:template>
75+
76+
<!-- List -->
77+
78+
<xsl:template match="md:list">
79+
<xsl:apply-templates select="." mode="indent-block"/>
80+
<xsl:apply-templates select="md:*"/>
81+
</xsl:template>
82+
83+
<xsl:template match="md:item">
84+
<xsl:apply-templates select="." mode="indent-block"/>
85+
<xsl:choose>
86+
<xsl:when test="../@type = 'bullet'">-</xsl:when>
87+
<xsl:when test="../@type = 'ordered'">
88+
<xsl:value-of select="../@start + position() - 1"/>
89+
<xsl:choose>
90+
<xsl:when test="../@delim = 'period'">.</xsl:when>
91+
<xsl:when test="../@delim = 'paren'">)</xsl:when>
92+
</xsl:choose>
93+
</xsl:when>
94+
</xsl:choose>
95+
<xsl:text> </xsl:text>
96+
<xsl:apply-templates select="md:*"/>
97+
</xsl:template>
98+
99+
<xsl:template match="md:item" mode="indent">
100+
<xsl:choose>
101+
<xsl:when test="../@type = 'bullet'">
102+
<xsl:text> </xsl:text>
103+
</xsl:when>
104+
<xsl:when test="../@type = 'ordered'">
105+
<xsl:text> </xsl:text>
106+
</xsl:when>
107+
</xsl:choose>
108+
</xsl:template>
109+
110+
<!-- Block quote -->
111+
112+
<xsl:template match="md:block_quote">
113+
<xsl:apply-templates select="." mode="indent-block"/>
114+
<xsl:text>&gt; </xsl:text>
115+
<xsl:apply-templates select="md:*"/>
116+
</xsl:template>
117+
118+
<xsl:template match="md:block_quote" mode="indent">
119+
<xsl:text>&gt; </xsl:text>
120+
</xsl:template>
121+
122+
<!-- Code block -->
123+
124+
<xsl:template match="md:code_block">
125+
<xsl:apply-templates select="." mode="indent-block"/>
126+
<!-- TODO: Longer delimiter if text contains ``` -->
127+
<xsl:text>```</xsl:text>
128+
<xsl:value-of select="@info"/>
129+
<xsl:text>&#10;</xsl:text>
130+
<xsl:call-template name="indent-lines">
131+
<xsl:with-param name="code" select="."/>
132+
</xsl:call-template>
133+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
134+
<xsl:text>```</xsl:text>
135+
<xsl:text>&#10;</xsl:text>
136+
</xsl:template>
137+
138+
<!-- Inline HTML -->
139+
140+
<xsl:template match="md:html_block">
141+
<xsl:apply-templates select="." mode="indent-block"/>
142+
<xsl:value-of select="substring-before(., '&#10;')"/>
143+
<xsl:text>&#10;</xsl:text>
144+
<xsl:call-template name="indent-lines">
145+
<xsl:with-param name="code" select="substring-after(., '&#10;')"/>
146+
</xsl:call-template>
147+
</xsl:template>
148+
149+
<!-- Indent multiple lines -->
150+
151+
<xsl:template name="indent-lines">
152+
<xsl:param name="code"/>
153+
<xsl:if test="contains($code, '&#10;')">
154+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
155+
<xsl:value-of select="substring-before($code, '&#10;')"/>
156+
<xsl:text>&#10;</xsl:text>
157+
<xsl:call-template name="indent-lines">
158+
<xsl:with-param name="code" select="substring-after($code, '&#10;')"/>
159+
</xsl:call-template>
160+
</xsl:if>
161+
</xsl:template>
162+
163+
<!-- Text -->
164+
165+
<xsl:template match="md:text">
166+
<xsl:variable name="t" select="string(.)"/>
167+
<xsl:variable name="first" select="substring($t, 1, 1)"/>
168+
<xsl:variable name="marker-check" select="translate(substring($t, 1, 10), '0123456789', '')"/>
169+
<xsl:choose>
170+
<!-- Escape ordered list markers -->
171+
<xsl:when test="starts-with($marker-check, '.') and $first != '.'">
172+
<xsl:value-of select="substring-before($t, '.')"/>
173+
<xsl:text>\.</xsl:text>
174+
<xsl:call-template name="escape-text">
175+
<xsl:with-param name="text" select="substring-after($t, '.')"/>
176+
</xsl:call-template>
177+
</xsl:when>
178+
<xsl:when test="starts-with($marker-check, ')') and $first != ')'">
179+
<xsl:value-of select="substring-before($t, ')')"/>
180+
<xsl:text>\)</xsl:text>
181+
<xsl:call-template name="escape-text">
182+
<xsl:with-param name="text" select="substring-after($t, ')')"/>
183+
</xsl:call-template>
184+
</xsl:when>
185+
<!-- Escape leading block characters -->
186+
<xsl:when test="contains('-+>#=~', $first)">
187+
<xsl:text>\</xsl:text>
188+
<xsl:value-of select="$first"/>
189+
<xsl:call-template name="escape-text">
190+
<xsl:with-param name="text" select="substring($t, 2)"/>
191+
</xsl:call-template>
192+
</xsl:when>
193+
<!-- Otherwise -->
194+
<xsl:otherwise>
195+
<xsl:call-template name="escape-text">
196+
<xsl:with-param name="text" select="$t"/>
197+
</xsl:call-template>
198+
</xsl:otherwise>
199+
</xsl:choose>
200+
</xsl:template>
201+
202+
<!-- Breaks -->
203+
204+
<xsl:template match="md:softbreak">
205+
<xsl:text>&#10;</xsl:text>
206+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
207+
</xsl:template>
208+
209+
<xsl:template match="md:linebreak">
210+
<xsl:text> &#10;</xsl:text>
211+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
212+
</xsl:template>
213+
214+
<!-- Emphasis -->
215+
216+
<xsl:template match="md:emph">
217+
<xsl:text>*</xsl:text>
218+
<xsl:apply-templates select="md:*"/>
219+
<xsl:text>*</xsl:text>
220+
</xsl:template>
221+
222+
<xsl:template match="md:strong">
223+
<xsl:text>**</xsl:text>
224+
<xsl:apply-templates select="md:*"/>
225+
<xsl:text>**</xsl:text>
226+
</xsl:template>
227+
228+
<!-- Inline code -->
229+
230+
<xsl:template match="md:code">
231+
<xsl:call-template name="escape-code">
232+
<xsl:with-param name="text" select="string(.)"/>
233+
</xsl:call-template>
234+
</xsl:template>
235+
236+
<xsl:template name="escape-code">
237+
<xsl:param name="text"/>
238+
<xsl:param name="delim" select="'`'"/>
239+
240+
<xsl:choose>
241+
<xsl:when test="contains($text, $delim)">
242+
<xsl:call-template name="escape-code">
243+
<xsl:with-param name="text" select="$text"/>
244+
<xsl:with-param name="delim" select="concat($delim, '`')"/>
245+
</xsl:call-template>
246+
</xsl:when>
247+
<xsl:otherwise>
248+
<xsl:value-of select="$delim"/>
249+
<xsl:value-of select="$text"/>
250+
<xsl:value-of select="$delim"/>
251+
</xsl:otherwise>
252+
</xsl:choose>
253+
</xsl:template>
254+
255+
<!-- Links and images -->
256+
257+
<xsl:template match="md:link | md:image">
258+
<xsl:if test="self::md:image">!</xsl:if>
259+
<xsl:text>[</xsl:text>
260+
<xsl:apply-templates select="md:*"/>
261+
<xsl:text>](</xsl:text>
262+
<xsl:call-template name="escape-text">
263+
<xsl:with-param name="text" select="string(@destination)"/>
264+
<xsl:with-param name="escape" select="'()'"/>
265+
</xsl:call-template>
266+
<xsl:if test="string(@title)">
267+
<xsl:text> "</xsl:text>
268+
<xsl:call-template name="escape-text">
269+
<xsl:with-param name="text" select="string(@title)"/>
270+
<xsl:with-param name="escape" select="'&quot;'"/>
271+
</xsl:call-template>
272+
<xsl:text>"</xsl:text>
273+
</xsl:if>
274+
<xsl:text>)</xsl:text>
275+
</xsl:template>
276+
277+
<!-- Inline HTML -->
278+
279+
<xsl:template match="md:html_inline">
280+
<xsl:value-of select="."/>
281+
</xsl:template>
282+
283+
<!-- Escape text -->
284+
285+
<xsl:template name="escape-text">
286+
<xsl:param name="text"/>
287+
<xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
288+
289+
<xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
290+
<xsl:choose>
291+
<xsl:when test="contains($trans, '\')">
292+
<xsl:variable name="safe" select="substring-before($trans, '\')"/>
293+
<xsl:variable name="l" select="string-length($safe)"/>
294+
<xsl:value-of select="$safe"/>
295+
<xsl:text>\</xsl:text>
296+
<xsl:value-of select="substring($text, $l + 1, 1)"/>
297+
<xsl:call-template name="escape-text">
298+
<xsl:with-param name="text" select="substring($text, $l + 2)"/>
299+
<xsl:with-param name="escape" select="$escape"/>
300+
</xsl:call-template>
301+
</xsl:when>
302+
<xsl:otherwise>
303+
<xsl:value-of select="$text"/>
304+
</xsl:otherwise>
305+
</xsl:choose>
306+
</xsl:template>
307+
308+
</xsl:stylesheet>

0 commit comments

Comments
 (0)