Skip to content

Commit c44bced

Browse files
authored
Merge pull request commonmark#267 from nwellnhof/xml2md
XSLT stylesheet to convert cmark XML back to Commonmark
2 parents 63c675f + 75870fb commit c44bced

File tree

1 file changed

+319
-0
lines changed

1 file changed

+319
-0
lines changed

tools/xml2md.xsl

Lines changed: 319 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,319 @@
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
3+
<!--
4+
5+
xml2md.xsl
6+
==========
7+
8+
This XSLT stylesheet transforms the cmark XML format back to Commonmark.
9+
Since the XML output is lossy, a lossless MD->XML->MD roundtrip isn't
10+
possible. The XML->MD->XML roundtrip should produce the original XML,
11+
though.
12+
13+
Example usage with xsltproc:
14+
15+
cmark -t xml doc.md | xsltproc -novalid xml2md.xsl -
16+
17+
-->
18+
19+
<xsl:stylesheet
20+
version="1.0"
21+
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
22+
xmlns:md="http://commonmark.org/xml/1.0">
23+
24+
<xsl:output method="text" encoding="utf-8"/>
25+
26+
<!-- Generic templates -->
27+
28+
<xsl:template match="/ | md:document | md:list">
29+
<xsl:apply-templates select="md:*"/>
30+
</xsl:template>
31+
32+
<xsl:template match="md:*">
33+
<xsl:message>Unsupported element '<xsl:value-of select="local-name()"/>'</xsl:message>
34+
</xsl:template>
35+
36+
<xsl:template match="md:*" mode="indent"/>
37+
38+
<!-- Indent blocks -->
39+
40+
<xsl:template match="md:*" mode="indent-block">
41+
<xsl:if test="preceding-sibling::md:*">
42+
<xsl:if test="not(ancestor::md:list[1][@tight='true'])">
43+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
44+
<xsl:text>&#10;</xsl:text>
45+
</xsl:if>
46+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
47+
</xsl:if>
48+
</xsl:template>
49+
50+
<!-- Heading -->
51+
52+
<xsl:template match="md:heading">
53+
<xsl:apply-templates select="." mode="indent-block"/>
54+
<xsl:value-of select="substring('###### ', 7 - @level)"/>
55+
<xsl:apply-templates select="md:*"/>
56+
<xsl:text>&#10;</xsl:text>
57+
</xsl:template>
58+
59+
<!-- Paragraph -->
60+
61+
<xsl:template match="md:paragraph">
62+
<xsl:apply-templates select="." mode="indent-block"/>
63+
<xsl:apply-templates select="md:*"/>
64+
<xsl:text>&#10;</xsl:text>
65+
</xsl:template>
66+
67+
<!-- Thematic break -->
68+
69+
<xsl:template match="md:thematic_break">
70+
<xsl:apply-templates select="." mode="indent-block"/>
71+
<xsl:text>***&#10;</xsl:text>
72+
</xsl:template>
73+
74+
<!-- List -->
75+
76+
<xsl:template match="md:list">
77+
<xsl:apply-templates select="." mode="indent-block"/>
78+
<xsl:apply-templates select="md:*"/>
79+
</xsl:template>
80+
81+
<xsl:template match="md:item">
82+
<xsl:apply-templates select="." mode="indent-block"/>
83+
<xsl:choose>
84+
<xsl:when test="../@type = 'bullet'">-</xsl:when>
85+
<xsl:when test="../@type = 'ordered'">
86+
<xsl:value-of select="../@start + position() - 1"/>
87+
<xsl:choose>
88+
<xsl:when test="../@delim = 'period'">.</xsl:when>
89+
<xsl:when test="../@delim = 'paren'">)</xsl:when>
90+
</xsl:choose>
91+
</xsl:when>
92+
</xsl:choose>
93+
<xsl:text> </xsl:text>
94+
<xsl:apply-templates select="md:*"/>
95+
</xsl:template>
96+
97+
<xsl:template match="md:item" mode="indent">
98+
<xsl:choose>
99+
<xsl:when test="../@type = 'bullet'">
100+
<xsl:text> </xsl:text>
101+
</xsl:when>
102+
<xsl:when test="../@type = 'ordered'">
103+
<xsl:text> </xsl:text>
104+
</xsl:when>
105+
</xsl:choose>
106+
</xsl:template>
107+
108+
<!-- Block quote -->
109+
110+
<xsl:template match="md:block_quote">
111+
<xsl:apply-templates select="." mode="indent-block"/>
112+
<xsl:text>&gt; </xsl:text>
113+
<xsl:apply-templates select="md:*"/>
114+
</xsl:template>
115+
116+
<xsl:template match="md:block_quote" mode="indent">
117+
<xsl:text>&gt; </xsl:text>
118+
</xsl:template>
119+
120+
<!-- Code block -->
121+
122+
<xsl:template match="md:code_block">
123+
<xsl:apply-templates select="." mode="indent-block"/>
124+
125+
<xsl:variable name="t" select="string(.)"/>
126+
<xsl:variable name="delim">
127+
<xsl:call-template name="code-delim">
128+
<xsl:with-param name="text" select="$t"/>
129+
<xsl:with-param name="delim" select="'```'"/>
130+
</xsl:call-template>
131+
</xsl:variable>
132+
133+
<xsl:value-of select="$delim"/>
134+
<xsl:value-of select="@info"/>
135+
<xsl:text>&#10;</xsl:text>
136+
<xsl:call-template name="indent-lines">
137+
<xsl:with-param name="code" select="$t"/>
138+
</xsl:call-template>
139+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
140+
<xsl:value-of select="$delim"/>
141+
<xsl:text>&#10;</xsl:text>
142+
</xsl:template>
143+
144+
<!-- Inline HTML -->
145+
146+
<xsl:template match="md:html_block">
147+
<xsl:apply-templates select="." mode="indent-block"/>
148+
<xsl:value-of select="substring-before(., '&#10;')"/>
149+
<xsl:text>&#10;</xsl:text>
150+
<xsl:call-template name="indent-lines">
151+
<xsl:with-param name="code" select="substring-after(., '&#10;')"/>
152+
</xsl:call-template>
153+
</xsl:template>
154+
155+
<!-- Indent multiple lines -->
156+
157+
<xsl:template name="indent-lines">
158+
<xsl:param name="code"/>
159+
<xsl:if test="contains($code, '&#10;')">
160+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
161+
<xsl:value-of select="substring-before($code, '&#10;')"/>
162+
<xsl:text>&#10;</xsl:text>
163+
<xsl:call-template name="indent-lines">
164+
<xsl:with-param name="code" select="substring-after($code, '&#10;')"/>
165+
</xsl:call-template>
166+
</xsl:if>
167+
</xsl:template>
168+
169+
<!-- Text -->
170+
171+
<xsl:template match="md:text">
172+
<xsl:variable name="t" select="string(.)"/>
173+
<xsl:variable name="first" select="substring($t, 1, 1)"/>
174+
<xsl:variable name="marker-check" select="translate(substring($t, 1, 10), '0123456789', '')"/>
175+
<xsl:choose>
176+
<!-- Escape ordered list markers -->
177+
<xsl:when test="starts-with($marker-check, '.') and $first != '.'">
178+
<xsl:value-of select="substring-before($t, '.')"/>
179+
<xsl:text>\.</xsl:text>
180+
<xsl:call-template name="escape-text">
181+
<xsl:with-param name="text" select="substring-after($t, '.')"/>
182+
</xsl:call-template>
183+
</xsl:when>
184+
<xsl:when test="starts-with($marker-check, ')') and $first != ')'">
185+
<xsl:value-of select="substring-before($t, ')')"/>
186+
<xsl:text>\)</xsl:text>
187+
<xsl:call-template name="escape-text">
188+
<xsl:with-param name="text" select="substring-after($t, ')')"/>
189+
</xsl:call-template>
190+
</xsl:when>
191+
<!-- Escape leading block characters -->
192+
<xsl:when test="contains('-+>#=~', $first)">
193+
<xsl:text>\</xsl:text>
194+
<xsl:value-of select="$first"/>
195+
<xsl:call-template name="escape-text">
196+
<xsl:with-param name="text" select="substring($t, 2)"/>
197+
</xsl:call-template>
198+
</xsl:when>
199+
<!-- Otherwise -->
200+
<xsl:otherwise>
201+
<xsl:call-template name="escape-text">
202+
<xsl:with-param name="text" select="$t"/>
203+
</xsl:call-template>
204+
</xsl:otherwise>
205+
</xsl:choose>
206+
</xsl:template>
207+
208+
<!-- Breaks -->
209+
210+
<xsl:template match="md:softbreak">
211+
<xsl:text>&#10;</xsl:text>
212+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
213+
</xsl:template>
214+
215+
<xsl:template match="md:linebreak">
216+
<xsl:text> &#10;</xsl:text>
217+
<xsl:apply-templates select="ancestor::md:*" mode="indent"/>
218+
</xsl:template>
219+
220+
<!-- Emphasis -->
221+
222+
<xsl:template match="md:emph">
223+
<xsl:text>*</xsl:text>
224+
<xsl:apply-templates select="md:*"/>
225+
<xsl:text>*</xsl:text>
226+
</xsl:template>
227+
228+
<xsl:template match="md:strong">
229+
<xsl:text>**</xsl:text>
230+
<xsl:apply-templates select="md:*"/>
231+
<xsl:text>**</xsl:text>
232+
</xsl:template>
233+
234+
<!-- Inline code -->
235+
236+
<xsl:template match="md:code">
237+
<xsl:variable name="t" select="string(.)"/>
238+
<xsl:variable name="delim">
239+
<xsl:call-template name="code-delim">
240+
<xsl:with-param name="text" select="$t"/>
241+
<xsl:with-param name="delim" select="'`'"/>
242+
</xsl:call-template>
243+
</xsl:variable>
244+
<xsl:value-of select="$delim"/>
245+
<xsl:value-of select="$t"/>
246+
<xsl:value-of select="$delim"/>
247+
</xsl:template>
248+
249+
<!-- Links and images -->
250+
251+
<xsl:template match="md:link | md:image">
252+
<xsl:if test="self::md:image">!</xsl:if>
253+
<xsl:text>[</xsl:text>
254+
<xsl:apply-templates select="md:*"/>
255+
<xsl:text>](</xsl:text>
256+
<xsl:call-template name="escape-text">
257+
<xsl:with-param name="text" select="string(@destination)"/>
258+
<xsl:with-param name="escape" select="'()'"/>
259+
</xsl:call-template>
260+
<xsl:if test="string(@title)">
261+
<xsl:text> "</xsl:text>
262+
<xsl:call-template name="escape-text">
263+
<xsl:with-param name="text" select="string(@title)"/>
264+
<xsl:with-param name="escape" select="'&quot;'"/>
265+
</xsl:call-template>
266+
<xsl:text>"</xsl:text>
267+
</xsl:if>
268+
<xsl:text>)</xsl:text>
269+
</xsl:template>
270+
271+
<!-- Inline HTML -->
272+
273+
<xsl:template match="md:html_inline">
274+
<xsl:value-of select="."/>
275+
</xsl:template>
276+
277+
<!-- Escaping helpers -->
278+
279+
<xsl:template name="escape-text">
280+
<xsl:param name="text"/>
281+
<xsl:param name="escape" select="'*_`&lt;[]&amp;'"/>
282+
283+
<xsl:variable name="trans" select="translate($text, $escape, '\\\\\\\')"/>
284+
<xsl:choose>
285+
<xsl:when test="contains($trans, '\')">
286+
<xsl:variable name="safe" select="substring-before($trans, '\')"/>
287+
<xsl:variable name="l" select="string-length($safe)"/>
288+
<xsl:value-of select="$safe"/>
289+
<xsl:text>\</xsl:text>
290+
<xsl:value-of select="substring($text, $l + 1, 1)"/>
291+
<xsl:call-template name="escape-text">
292+
<xsl:with-param name="text" select="substring($text, $l + 2)"/>
293+
<xsl:with-param name="escape" select="$escape"/>
294+
</xsl:call-template>
295+
</xsl:when>
296+
<xsl:otherwise>
297+
<xsl:value-of select="$text"/>
298+
</xsl:otherwise>
299+
</xsl:choose>
300+
</xsl:template>
301+
302+
<xsl:template name="code-delim">
303+
<xsl:param name="text"/>
304+
<xsl:param name="delim"/>
305+
306+
<xsl:choose>
307+
<xsl:when test="contains($text, $delim)">
308+
<xsl:call-template name="code-delim">
309+
<xsl:with-param name="text" select="$text"/>
310+
<xsl:with-param name="delim" select="concat($delim, '`')"/>
311+
</xsl:call-template>
312+
</xsl:when>
313+
<xsl:otherwise>
314+
<xsl:value-of select="$delim"/>
315+
</xsl:otherwise>
316+
</xsl:choose>
317+
</xsl:template>
318+
319+
</xsl:stylesheet>

0 commit comments

Comments
 (0)