@@ -3,8 +3,9 @@ defmodule Html2Markdown.Converter do
33 Handles the conversion of HTML nodes to Markdown format.
44 """
55
6- alias Html2Markdown.TableConverter
6+ alias Html2Markdown . { TableConverter , Options }
77
8+ @ spec convert_to_markdown ( list ( Floki . html_node ( ) ) , Options . t ( ) ) :: String . t ( )
89 def convert_to_markdown ( document , opts ) do
910 document
1011 |> build_markdown_iolist ( opts )
@@ -16,8 +17,12 @@ defmodule Html2Markdown.Converter do
1617 nodes
1718 |> Enum . reduce ( [ ] , fn node , acc ->
1819 case process_node_to_iolist ( node , opts ) do
19- [ ] -> acc
20- "" -> acc
20+ [ ] ->
21+ acc
22+
23+ "" ->
24+ acc
25+
2126 iodata ->
2227 if acc == [ ] do
2328 [ iodata ]
@@ -150,6 +155,7 @@ defmodule Html2Markdown.Converter do
150155 case List . keyfind ( attrs , "title" , 0 ) do
151156 { "title" , title } ->
152157 [ "" , process_children_to_iolist ( children , opts ) , " (" , title , ")" ]
158+
153159 _ ->
154160 process_children_to_iolist ( children , opts )
155161 end
@@ -161,9 +167,11 @@ defmodule Html2Markdown.Converter do
161167 defp process_node_to_iolist ( { "q" , attrs , children } , opts ) do
162168 # Handle cite attribute if present
163169 quote_content = [ "\" " , process_children_to_iolist ( children , opts ) , "\" " ]
170+
164171 case List . keyfind ( attrs , "cite" , 0 ) do
165172 { "cite" , url } ->
166173 [ quote_content , " (" , url , ")" ]
174+
167175 _ ->
168176 quote_content
169177 end
@@ -174,6 +182,7 @@ defmodule Html2Markdown.Converter do
174182 { "datetime" , datetime } ->
175183 # Include datetime as title attribute in markdown
176184 [ "" , process_children_to_iolist ( children , opts ) , " <time datetime=\" " , datetime , "\" >" ]
185+
177186 _ ->
178187 process_children_to_iolist ( children , opts )
179188 end
@@ -184,6 +193,7 @@ defmodule Html2Markdown.Converter do
184193 { "src" , src } ->
185194 # Convert video to a link
186195 [ "[Video](" , src , ")" ]
196+
187197 _ ->
188198 # Check for source children
189199 "[Video]"
@@ -200,17 +210,18 @@ defmodule Html2Markdown.Converter do
200210 do: [ "\n " , process_children_to_iolist ( children , opts ) , "\n " ]
201211
202212 defp process_node_to_iolist ( { "picture" , _ , children } , opts ) do
203- case Enum . find ( children , fn
204- { tag , _ , _ } when is_binary ( tag ) -> tag == "img"
205- _ -> false
206- end ) do
213+ case Enum . find ( children , fn
214+ { tag , _ , _ } when is_binary ( tag ) -> tag == "img"
215+ _ -> false
216+ end ) do
207217 { "img" , attrs , _ } ->
208218 case { List . keyfind ( attrs , "src" , 0 ) , List . keyfind ( attrs , "alt" , 0 ) } do
209219 { { "src" , src } , { "alt" , alt } } -> [ "" ]
210220 { { "src" , src } , _ } -> [ "" ]
211221 _ -> [ ]
212222 end
213- _ ->
223+
224+ _ ->
214225 # No img found, process children normally
215226 process_children_to_iolist ( children , opts )
216227 end
@@ -236,11 +247,13 @@ defmodule Html2Markdown.Converter do
236247 case List . keyfind ( attrs , "href" , 0 ) do
237248 { "href" , url } ->
238249 children_text = IO . iodata_to_binary ( process_children_to_iolist ( children , opts ) )
250+
239251 if children_text == "" do
240252 [ "[" , url , "](" , url , ")" ]
241253 else
242254 [ "[" , children_text , "](" , url , ")" ]
243255 end
256+
244257 _ ->
245258 process_children_to_iolist ( children , opts )
246259 end
@@ -269,106 +282,116 @@ defmodule Html2Markdown.Converter do
269282
270283 defp process_definition_list_to_iolist ( children , opts ) when is_list ( children ) do
271284 # Group elements into definition groups (dt followed by its dd elements)
272- { groups , last_group } = children
273- |> Enum . reduce ( { [ ] , nil } , fn
274- { "dt" , _ , _ } = dt , { groups , current_group } ->
275- # Start a new group with this dt
276- new_group = % { dt: dt , dds: [ ] }
277- if current_group do
278- { groups ++ [ current_group ] , new_group }
279- else
280- { groups , new_group }
281- end
285+ { groups , last_group } =
286+ children
287+ |> Enum . reduce ( { [ ] , nil } , fn
288+ { "dt" , _ , _ } = dt , { groups , current_group } ->
289+ # Start a new group with this dt
290+ new_group = % { dt: dt , dds: [ ] }
291+
292+ if current_group do
293+ { groups ++ [ current_group ] , new_group }
294+ else
295+ { groups , new_group }
296+ end
282297
283- { "dd" , _ , _ } = dd , { groups , current_group } when not is_nil ( current_group ) ->
284- # Add dd to current group
285- updated_group = Map . update! ( current_group , :dds , & ( & 1 ++ [ dd ] ) )
286- { groups , updated_group }
298+ { "dd" , _ , _ } = dd , { groups , current_group } when not is_nil ( current_group ) ->
299+ # Add dd to current group
300+ updated_group = Map . update! ( current_group , :dds , & ( & 1 ++ [ dd ] ) )
301+ { groups , updated_group }
287302
288- { "dd" , _ , _ } = dd , { groups , nil } ->
289- # dd without preceding dt - create a group with no dt
290- { groups ++ [ % { dt: nil , dds: [ dd ] } ] , nil }
303+ { "dd" , _ , _ } = dd , { groups , nil } ->
304+ # dd without preceding dt - create a group with no dt
305+ { groups ++ [ % { dt: nil , dds: [ dd ] } ] , nil }
291306
292- other , { groups , current_group } ->
293- # Other elements get their own group
294- groups_with_current = if current_group , do: groups ++ [ current_group ] , else: groups
295- { groups_with_current ++ [ % { dt: nil , dds: [ ] , other: other } ] , nil }
296- end )
307+ other , { groups , current_group } ->
308+ # Other elements get their own group
309+ groups_with_current = if current_group , do: groups ++ [ current_group ] , else: groups
310+ { groups_with_current ++ [ % { dt: nil , dds: [ ] , other: other } ] , nil }
311+ end )
297312
298313 # Add the last group if any
299- all_groups = if last_group do
300- groups ++ [ last_group ]
301- else
302- groups
303- end
314+ all_groups =
315+ if last_group do
316+ groups ++ [ last_group ]
317+ else
318+ groups
319+ end
304320
305321 # Process each group
306- result = all_groups
307- |> Enum . reduce ( [ ] , fn group , acc ->
308- group_iolist = case group do
309- % { dt: nil , dds: [ ] , other: other } ->
310- # Just process the other element
311- process_node_to_iolist ( other , opts )
312-
313- % { dt: nil , dds: dds } ->
314- # Just dd elements without dt
315- Enum . map ( dds , & process_node_to_iolist ( & 1 , opts ) )
316- |> Enum . intersperse ( "\n " )
317-
318- % { dt: dt , dds: [ ] } ->
319- # Just dt without dd
320- process_node_to_iolist ( dt , opts )
321-
322- % { dt: dt , dds: dds } ->
323- # dt with dd elements
324- dt_iolist = process_node_to_iolist ( dt , opts )
325- dd_iolists = Enum . map ( dds , & process_node_to_iolist ( & 1 , opts ) )
326- [ dt_iolist , "\n " , Enum . intersperse ( dd_iolists , "\n " ) ]
327- end
322+ result =
323+ all_groups
324+ |> Enum . reduce ( [ ] , fn group , acc ->
325+ group_iolist =
326+ case group do
327+ % { dt: nil , dds: [ ] , other: other } ->
328+ # Just process the other element
329+ process_node_to_iolist ( other , opts )
330+
331+ % { dt: nil , dds: dds } ->
332+ # Just dd elements without dt
333+ Enum . map ( dds , & process_node_to_iolist ( & 1 , opts ) )
334+ |> Enum . intersperse ( "\n " )
335+
336+ % { dt: dt , dds: [ ] } ->
337+ # Just dt without dd
338+ process_node_to_iolist ( dt , opts )
339+
340+ % { dt: dt , dds: dds } ->
341+ # dt with dd elements
342+ dt_iolist = process_node_to_iolist ( dt , opts )
343+ dd_iolists = Enum . map ( dds , & process_node_to_iolist ( & 1 , opts ) )
344+ [ dt_iolist , "\n " , Enum . intersperse ( dd_iolists , "\n " ) ]
345+ end
328346
329- if acc == [ ] do
330- [ group_iolist ]
331- else
332- [ acc , "\n \n " , group_iolist ]
333- end
334- end )
347+ if acc == [ ] do
348+ [ group_iolist ]
349+ else
350+ [ acc , "\n \n " , group_iolist ]
351+ end
352+ end )
335353
336354 [ "\n " , result , "\n " ]
337355 end
338356
339357 defp process_ul_list_to_iolist ( children , opts ) when is_list ( children ) do
340- items = children
341- |> Enum . map ( & process_list_item_to_iolist ( & 1 , opts ) )
342- |> Enum . intersperse ( "\n " )
358+ items =
359+ children
360+ |> Enum . map ( & process_list_item_to_iolist ( & 1 , opts ) )
361+ |> Enum . intersperse ( "\n " )
343362
344363 [ "\n " , items , "\n " ]
345364 end
346365
347366 defp process_ol_list_to_iolist ( children , opts ) when is_list ( children ) do
348- items = children
349- |> Enum . with_index ( 1 )
350- |> Enum . map ( fn { child , index } ->
351- process_ordered_list_item_to_iolist ( child , index , opts )
352- end )
353- |> Enum . intersperse ( "\n " )
367+ items =
368+ children
369+ |> Enum . with_index ( 1 )
370+ |> Enum . map ( fn { child , index } ->
371+ process_ordered_list_item_to_iolist ( child , index , opts )
372+ end )
373+ |> Enum . intersperse ( "\n " )
354374
355375 [ "\n " , items , "\n " ]
356376 end
357377
358378 defp process_list_item_to_iolist ( { "li" , _ , children } , opts ) ,
359379 do: [ "- " , process_children_to_iolist ( children , opts ) ]
380+
360381 defp process_list_item_to_iolist ( other , opts ) ,
361382 do: process_node_to_iolist ( other , opts )
362383
363384 defp process_ordered_list_item_to_iolist ( { "li" , _ , children } , index , opts ) ,
364385 do: [ Integer . to_string ( index ) , ". " , process_children_to_iolist ( children , opts ) ]
386+
365387 defp process_ordered_list_item_to_iolist ( other , _index , opts ) ,
366388 do: process_node_to_iolist ( other , opts )
367389
368390 defp process_children_to_iolist ( children , opts ) do
369- iolist = children
370- |> Enum . map ( & process_node_to_iolist ( & 1 , opts ) )
371- |> Enum . intersperse ( " " )
391+ iolist =
392+ children
393+ |> Enum . map ( & process_node_to_iolist ( & 1 , opts ) )
394+ |> Enum . intersperse ( " " )
372395
373396 # Only trim if we're normalizing whitespace
374397 if opts . normalize_whitespace do
@@ -391,30 +414,35 @@ defmodule Html2Markdown.Converter do
391414
392415 # Process details/summary elements
393416 defp process_details_to_iolist ( children , opts ) do
394- { summary , content } = Enum . split_with ( children , fn
395- { "summary" , _ , _ } -> true
396- _ -> false
397- end )
398-
399- summary_iolist = case summary do
400- [ { "summary" , _ , summary_children } | _ ] ->
401- [ "**" , process_children_to_iolist ( summary_children , opts ) , "**" ]
402- _ ->
403- [ "**Details**" ]
404- end
405-
417+ { summary , content } =
418+ Enum . split_with ( children , fn
419+ { "summary" , _ , _ } -> true
420+ _ -> false
421+ end )
422+
423+ summary_iolist =
424+ case summary do
425+ [ { "summary" , _ , summary_children } | _ ] ->
426+ [ "**" , process_children_to_iolist ( summary_children , opts ) , "**" ]
427+
428+ _ ->
429+ [ "**Details**" ]
430+ end
431+
406432 content_iolist = process_children_to_iolist ( content , opts )
407-
433+
408434 [ "\n " , summary_iolist , "\n " , content_iolist , "\n " ]
409435 end
410436
411437 # Compatibility wrapper functions
438+ @ spec process_node ( Floki . html_node ( ) , Options . t ( ) ) :: String . t ( )
412439 def process_node ( node , opts ) do
413440 node
414441 |> process_node_to_iolist ( opts )
415442 |> IO . iodata_to_binary ( )
416443 end
417444
445+ @ spec process_children ( list ( Floki . html_node ( ) ) , Options . t ( ) ) :: String . t ( )
418446 def process_children ( children , opts ) do
419447 children
420448 |> process_children_to_iolist ( opts )
0 commit comments