1
1
open Omd
2
2
open Base
3
3
4
- let escape_url_chars = Staged. unstage @@ String.Escaping. escape ~escapeworthy: [ '<' ; '>' ; '|' ] ~escape_char: '\\'
5
-
6
4
(* https://api.slack.com/reference/surfaces/formatting#escaping *)
7
5
let escape_mrkdwn =
8
6
String. concat_map ~f: (function
@@ -11,56 +9,198 @@ let escape_mrkdwn =
11
9
| '&' -> " &"
12
10
| c -> String. make 1 c)
13
11
14
- (* * Unescape markdown characters escaped because "any ASCII punctuation
15
- character may be backslash-escaped"
16
- https://spec.commonmark.org/0.30/#backslash-escapes
17
-
18
- This pertains to '\\', '[', ']', '(', ')', '`', '*' unconditionally,
19
- and '.', '-', '+', '!', '<', '>', '#' depending on chars before/after.
20
- Argument escapeworthy_map can be left blank because escaped chars are
21
- unescaped to themselves. *)
22
- let unescape_omd = Staged. unstage @@ String.Escaping. unescape_gen_exn ~escapeworthy_map: [] ~escape_char: '\\'
23
-
24
- (* * Escape the `escape_char` '\\' for use with `unescape_omd` later *)
25
- let escape_omd = Staged. unstage @@ String.Escaping. escape_gen_exn ~escapeworthy_map: [] ~escape_char: '\\'
26
-
27
- let transform_text = escape_mrkdwn
28
-
29
- (* * `Omd.to_markdown` escapes backslash (and other applicable chars) in
30
- `Text` elements but not `Code` elements, so do the same for the latter so
31
- that `unescape_omd` can apply uniformly to the whole mrkdwn string later *)
32
- let transform_code s = escape_omd @@ escape_mrkdwn s
33
-
34
- let rec transform_list = List. map ~f: transform
35
-
36
- and transform_flatten = List. map ~f: transform_list
37
-
38
- and surround s t =
39
- let t = to_markdown @@ transform_list t in
40
- Raw (Printf. sprintf " %s%s%s" s t s)
12
+ (* * Translates omd AST to a Slack mrkdwn string. Code heavily adapted
13
+ from omd 1.3.1 source.
14
+ https://github.com/ocaml/omd/blob/1.3.1/src/omd_backend.ml#L872
15
+ *)
16
+ let rec mrkdwn_of_md md =
17
+ let b = Buffer. create 128 in
18
+ let references : ref_container option ref = ref None in
19
+ let nl b = Buffer. add_char b '\n' in
20
+ let nl_if_needed_above b =
21
+ if Buffer. length b > 0 && (not @@ Char. equal '\n' (Buffer. nth b (Buffer. length b - 1 ))) then nl b
22
+ in
23
+ let add_spaces n =
24
+ for _i = 1 to n do
25
+ Buffer. add_char b ' '
26
+ done
27
+ in
28
+ let rec loop ?(fst_p_in_li = true ) ?(is_in_list = false ) list_indent =
29
+ (* [list_indent: int] is the indentation level in number of spaces.
30
+ [fst_p_in_li: bool] is used to apply different indentation to the first
31
+ paragraph in a list items.
32
+ [is_in_list: bool] is necessary to know if we are inside a paragraph
33
+ which is inside a list item because those need to be indented!
34
+ *)
35
+ let loop ?(fst_p_in_li = fst_p_in_li) ?(is_in_list = is_in_list) list_indent l =
36
+ loop ~fst_p_in_li ~is_in_list list_indent l
37
+ in
38
+ function
39
+ | [] -> ()
40
+ | el :: tl ->
41
+ match el with
42
+ | X _ -> loop list_indent tl
43
+ | Blockquote q ->
44
+ (* mrkdwn doesn't support nested quotes, but output '>' chars anyway*)
45
+ let quote s =
46
+ let b = Buffer. create (String. length s) in
47
+ let l = String. length s in
48
+ let rec loop is_nl i =
49
+ if i < l then begin
50
+ if is_nl && i < l - 1 then Buffer. add_string b " > " ;
51
+ match s.[i] with
52
+ | '\n' ->
53
+ nl b;
54
+ loop true (i + 1 )
55
+ | c ->
56
+ Buffer. add_char b c;
57
+ loop false (i + 1 )
58
+ end
59
+ else Buffer. contents b
60
+ in
61
+ loop true 0
62
+ in
63
+ Buffer. add_string b (quote @@ mrkdwn_of_md q);
64
+ if not @@ List. is_empty tl then nl_if_needed_above b;
65
+ loop list_indent tl
66
+ | Ref (rc , _name , _text , fallback ) | Img_ref (rc , _name , _text , fallback ) ->
67
+ (* [rc] stores all refs from document, so it's enough to record just the
68
+ first encounter
69
+ *)
70
+ if Option. is_empty ! references then references := Some rc;
71
+ (* [fallback#to_string] renders as
72
+ [<text>][<name>] for Ref, e.g., [interesting fact][1]
73
+ and
74
+ ![<text>][<name>] for Img_ref, e.g., ![image of cat][1]
75
+ *)
76
+ loop list_indent (Raw fallback#to_string :: tl)
77
+ | Paragraph [] -> loop list_indent tl
78
+ | Paragraph md ->
79
+ (* indent if inside a list (Olp or Ulp) *)
80
+ if is_in_list then if fst_p_in_li then add_spaces (list_indent - 2 ) else add_spaces list_indent;
81
+ (* paragraph body + skip line *)
82
+ loop ~fst_p_in_li: false list_indent md;
83
+ nl b;
84
+ nl b;
85
+ loop ~fst_p_in_li: false list_indent tl
86
+ | Img (alt , src , title ) -> loop list_indent (Url (src, [ Text alt ], title) :: tl)
87
+ | Text t ->
88
+ Buffer. add_string b @@ escape_mrkdwn t;
89
+ loop list_indent tl
90
+ | Raw s ->
91
+ Buffer. add_string b s;
92
+ loop list_indent tl
93
+ | Raw_block s ->
94
+ nl b;
95
+ Buffer. add_string b s;
96
+ nl b;
97
+ loop list_indent tl
98
+ | Emph md' ->
99
+ Buffer. add_string b " _" ;
100
+ loop list_indent md';
101
+ Buffer. add_string b " _" ;
102
+ loop list_indent tl
103
+ | Bold md' ->
104
+ Buffer. add_string b " *" ;
105
+ loop list_indent md';
106
+ Buffer. add_string b " *" ;
107
+ loop list_indent tl
108
+ | Ul l ->
109
+ nl_if_needed_above b;
110
+ List. iter l ~f: (fun li ->
111
+ add_spaces list_indent;
112
+ Buffer. add_string b " - " ;
113
+ loop ~is_in_list: true (list_indent + 4 ) li;
114
+ nl_if_needed_above b);
115
+ if list_indent = 0 then nl b;
116
+ loop list_indent tl
117
+ | Ol l ->
118
+ nl_if_needed_above b;
119
+ List. iteri l ~f: (fun i li ->
120
+ add_spaces list_indent;
121
+ Printf. bprintf b " %d. " (i + 1 );
122
+ loop ~is_in_list: true (list_indent + 4 ) li;
123
+ nl_if_needed_above b);
124
+ if list_indent = 0 then nl b;
125
+ loop list_indent tl
126
+ | Ulp l ->
127
+ List. iter l ~f: (fun li ->
128
+ nl_if_needed_above b;
129
+ add_spaces list_indent;
130
+ Buffer. add_string b " - " ;
131
+ loop ~is_in_list: true (list_indent + 4 ) li (* Paragraphs => No need of '\n' *) );
132
+ loop list_indent tl
133
+ | Olp l ->
134
+ List. iteri l ~f: (fun i li ->
135
+ nl_if_needed_above b;
136
+ add_spaces list_indent;
137
+ Printf. bprintf b " %d. " i;
138
+ loop ~is_in_list: true (list_indent + 4 ) li (* Paragraphs => No need of '\n' *) );
139
+ loop list_indent tl
140
+ | Code_block (_lang , c ) ->
141
+ (* unlike commonmark, can't have code block inside lists, so print code block with
142
+ zero indent, but continue rest of the list at correct indent after
41
143
42
- (* * massage markdown AST so that rendered result looks like slack mrkdwn *)
43
- and transform = function
44
- | H1 t | H2 t | H3 t | H4 t | H5 t | H6 t -> Paragraph (transform_list [ Bold t ])
45
- | Paragraph t -> Paragraph (transform_list t)
46
- | Emph t -> surround " _" (transform_list t)
47
- | Bold t -> surround " *" (transform_list t)
48
- | Ul ts -> Ul (transform_flatten ts)
49
- | Ol ts -> Ol (transform_flatten ts)
50
- | Ulp ts -> Ulp (transform_flatten ts)
51
- | Olp ts -> Olp (transform_flatten ts)
52
- | Url (href , label , title ) ->
53
- let label = escape_url_chars @@ to_markdown @@ transform_list label in
54
- let title = if String. length title > 0 then Printf. sprintf " %s - " @@ escape_url_chars title else title in
55
- Raw (Printf. sprintf " <%s|%s%s>" href title label)
56
- | Html _ as e -> Raw (Printf. sprintf " `%s`" @@ to_markdown [ e ])
57
- | Html_comment _ -> Br
58
- | Html_block _ as e -> Code_block (" " , to_markdown [ e ])
59
- | Blockquote t -> Blockquote (transform_list t)
60
- | Img (alt , src , title ) -> transform @@ Url (src, [ Text alt ], title)
61
- | Code_block (_ , str ) -> Code_block (" " , transform_code str)
62
- | Code (_ , str ) -> Code (" " , transform_code str)
63
- | Text s -> Text (transform_text s)
64
- | (Br | Hr | NL | Ref _ | Img_ref _ | Raw _ | Raw_block _ | X _ ) as e -> e
144
+ note: sometimes indentation intended as list item paragraph is wrongly
145
+ interpreted as code block - an issue with Omd.of_string
146
+ e.g., both should be parsed the second way, but aren't:
147
+ # of_string " - foo\n\n bar";;
148
+ - : t = [Ul [[Text "foo"]]; NL; NL; Code_block ("", "bar")]
149
+ # of_string "- foo\n\n bar";;
150
+ - : t = [Ulp [[Paragraph [Text "foo"]; Paragraph [Text "bar"]]]]
151
+ *)
152
+ nl_if_needed_above b;
153
+ Buffer. add_string b " ```\n " ;
154
+ Buffer. add_string b (escape_mrkdwn c);
155
+ nl_if_needed_above b;
156
+ Buffer. add_string b " ```\n " ;
157
+ loop list_indent tl
158
+ | Code (_lang , c ) ->
159
+ (* sadly, slack mrkdwn has no way to escape backticks within in-line code,
160
+ so broken markup is unavoidable
161
+ *)
162
+ Buffer. add_char b '`' ;
163
+ Buffer. add_string b (escape_mrkdwn c);
164
+ Buffer. add_char b '`' ;
165
+ loop list_indent tl
166
+ | Hr ->
167
+ Buffer. add_string b " * * *\n " ;
168
+ loop list_indent tl
169
+ | Html (_tagname , _attrs , _body ) as html -> loop list_indent (Code (" " , to_html [ html ]) :: tl)
170
+ | Html_block (_tagname , _attrs , _body ) as html -> loop list_indent (Code_block (" " , to_html [ html ]) :: tl)
171
+ | Html_comment _s -> loop list_indent tl
172
+ | Url (href , s , title ) ->
173
+ Buffer. add_char b '<' ;
174
+ Buffer. add_string b href;
175
+ Buffer. add_char b '|' ;
176
+ if String. length title > 0 then Printf. bprintf b " %s - " @@ escape_mrkdwn title;
177
+ loop list_indent s;
178
+ Buffer. add_char b '>' ;
179
+ loop list_indent tl
180
+ | H1 md' | H2 md' | H3 md' | H4 md' | H5 md' | H6 md' -> loop list_indent (Paragraph [ Bold md' ] :: tl)
181
+ | Br ->
182
+ (* the string "\\n" (backslash-newline) or end of line double-space renders Br *)
183
+ nl b;
184
+ loop list_indent tl
185
+ | NL ->
186
+ (* the string "\n" renders NL *)
187
+ nl_if_needed_above b;
188
+ loop list_indent tl
189
+ in
190
+ (* print the document *)
191
+ loop 0 md;
192
+ (* print any references *)
193
+ begin
194
+ match ! references with
195
+ | None -> ()
196
+ | Some r ->
197
+ let print_ref (name , (url , title )) =
198
+ if String. equal title " " then Printf. bprintf b " [%s]: %s \n " name url
199
+ else Printf. bprintf b " [%s]: %s \" %s\"\n " name url title
200
+ in
201
+ nl b;
202
+ List. iter ~f: print_ref r#get_all
203
+ end ;
204
+ Buffer. contents b
65
205
66
- let mrkdwn_of_markdown str = unescape_omd @@ to_markdown @@ transform_list @@ of_string str
206
+ let mrkdwn_of_markdown str = mrkdwn_of_md @@ of_string str
0 commit comments