Skip to content

Commit 4677384

Browse files
committed
feat: add type specifications for better code clarity
Add @SPEC annotations to all public functions across modules: - Html2Markdown module with html_content and conversion_options types - Options module with t() type for configuration - Parser module with html_tree type - Converter and TableConverter with proper return types Also apply consistent code formatting across all files
1 parent 615a289 commit 4677384

File tree

11 files changed

+719
-566
lines changed

11 files changed

+719
-566
lines changed

lib/html2markdown.ex

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,21 @@ defmodule Html2Markdown do
1818

1919
alias Html2Markdown.{Options, Parser, Converter}
2020

21+
@type html_content :: String.t()
22+
@type markdown_content :: String.t()
23+
@type conversion_options :: %{
24+
optional(:navigation_classes) => [String.t()],
25+
optional(:non_content_tags) => [String.t()],
26+
optional(:markdown_flavor) => :basic | :gfm,
27+
optional(:normalize_whitespace) => boolean()
28+
}
29+
2130
@doc """
2231
Converts the content from an HTML document to Markdown (removing non-content sections and tags)
2332
2433
Uses default options for conversion. To customize behavior, use `convert/2`.
2534
"""
35+
@spec convert(html_content()) :: markdown_content()
2636
def convert(document) when is_binary(document) do
2737
convert(document, %{})
2838
end
@@ -48,6 +58,7 @@ defmodule Html2Markdown do
4858
"\\nHello\\n"
4959
5060
"""
61+
@spec convert(html_content(), conversion_options()) :: markdown_content()
5162
def convert(document, options) when is_binary(document) and is_map(options) do
5263
opts = Options.merge(options)
5364

@@ -56,5 +67,6 @@ defmodule Html2Markdown do
5667
|> Converter.convert_to_markdown(opts)
5768
end
5869

70+
@spec convert(any(), any()) :: {:error, String.t()}
5971
def convert(_document, _options), do: {:error, "Could not convert HTML to Markdown"}
6072
end

lib/html2markdown/converter.ex

Lines changed: 116 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@ defmodule Html2Markdown.Converter do
33
Handles the conversion of HTML nodes to Markdown format.
44
"""
55

6-
alias Html2Markdown.TableConverter
6+
alias Html2Markdown.{TableConverter, Options}
77

8+
@spec convert_to_markdown(list(Floki.html_node()), Options.t()) :: String.t()
89
def convert_to_markdown(document, opts) do
910
document
1011
|> build_markdown_iolist(opts)
@@ -16,8 +17,12 @@ defmodule Html2Markdown.Converter do
1617
nodes
1718
|> Enum.reduce([], fn node, acc ->
1819
case process_node_to_iolist(node, opts) do
19-
[] -> acc
20-
"" -> acc
20+
[] ->
21+
acc
22+
23+
"" ->
24+
acc
25+
2126
iodata ->
2227
if acc == [] do
2328
[iodata]
@@ -150,6 +155,7 @@ defmodule Html2Markdown.Converter do
150155
case List.keyfind(attrs, "title", 0) do
151156
{"title", title} ->
152157
["", process_children_to_iolist(children, opts), " (", title, ")"]
158+
153159
_ ->
154160
process_children_to_iolist(children, opts)
155161
end
@@ -161,9 +167,11 @@ defmodule Html2Markdown.Converter do
161167
defp process_node_to_iolist({"q", attrs, children}, opts) do
162168
# Handle cite attribute if present
163169
quote_content = ["\"", process_children_to_iolist(children, opts), "\""]
170+
164171
case List.keyfind(attrs, "cite", 0) do
165172
{"cite", url} ->
166173
[quote_content, " (", url, ")"]
174+
167175
_ ->
168176
quote_content
169177
end
@@ -174,6 +182,7 @@ defmodule Html2Markdown.Converter do
174182
{"datetime", datetime} ->
175183
# Include datetime as title attribute in markdown
176184
["", process_children_to_iolist(children, opts), " <time datetime=\"", datetime, "\">"]
185+
177186
_ ->
178187
process_children_to_iolist(children, opts)
179188
end
@@ -184,6 +193,7 @@ defmodule Html2Markdown.Converter do
184193
{"src", src} ->
185194
# Convert video to a link
186195
["[Video](", src, ")"]
196+
187197
_ ->
188198
# Check for source children
189199
"[Video]"
@@ -200,17 +210,18 @@ defmodule Html2Markdown.Converter do
200210
do: ["\n", process_children_to_iolist(children, opts), "\n"]
201211

202212
defp process_node_to_iolist({"picture", _, children}, opts) do
203-
case Enum.find(children, fn
204-
{tag, _, _} when is_binary(tag) -> tag == "img"
205-
_ -> false
206-
end) do
213+
case Enum.find(children, fn
214+
{tag, _, _} when is_binary(tag) -> tag == "img"
215+
_ -> false
216+
end) do
207217
{"img", attrs, _} ->
208218
case {List.keyfind(attrs, "src", 0), List.keyfind(attrs, "alt", 0)} do
209219
{{"src", src}, {"alt", alt}} -> ["![", alt, "](", src, ")"]
210220
{{"src", src}, _} -> ["![](", src, ")"]
211221
_ -> []
212222
end
213-
_ ->
223+
224+
_ ->
214225
# No img found, process children normally
215226
process_children_to_iolist(children, opts)
216227
end
@@ -236,11 +247,13 @@ defmodule Html2Markdown.Converter do
236247
case List.keyfind(attrs, "href", 0) do
237248
{"href", url} ->
238249
children_text = IO.iodata_to_binary(process_children_to_iolist(children, opts))
250+
239251
if children_text == "" do
240252
["[", url, "](", url, ")"]
241253
else
242254
["[", children_text, "](", url, ")"]
243255
end
256+
244257
_ ->
245258
process_children_to_iolist(children, opts)
246259
end
@@ -269,106 +282,116 @@ defmodule Html2Markdown.Converter do
269282

270283
defp process_definition_list_to_iolist(children, opts) when is_list(children) do
271284
# Group elements into definition groups (dt followed by its dd elements)
272-
{groups, last_group} = children
273-
|> Enum.reduce({[], nil}, fn
274-
{"dt", _, _} = dt, {groups, current_group} ->
275-
# Start a new group with this dt
276-
new_group = %{dt: dt, dds: []}
277-
if current_group do
278-
{groups ++ [current_group], new_group}
279-
else
280-
{groups, new_group}
281-
end
285+
{groups, last_group} =
286+
children
287+
|> Enum.reduce({[], nil}, fn
288+
{"dt", _, _} = dt, {groups, current_group} ->
289+
# Start a new group with this dt
290+
new_group = %{dt: dt, dds: []}
291+
292+
if current_group do
293+
{groups ++ [current_group], new_group}
294+
else
295+
{groups, new_group}
296+
end
282297

283-
{"dd", _, _} = dd, {groups, current_group} when not is_nil(current_group) ->
284-
# Add dd to current group
285-
updated_group = Map.update!(current_group, :dds, &(&1 ++ [dd]))
286-
{groups, updated_group}
298+
{"dd", _, _} = dd, {groups, current_group} when not is_nil(current_group) ->
299+
# Add dd to current group
300+
updated_group = Map.update!(current_group, :dds, &(&1 ++ [dd]))
301+
{groups, updated_group}
287302

288-
{"dd", _, _} = dd, {groups, nil} ->
289-
# dd without preceding dt - create a group with no dt
290-
{groups ++ [%{dt: nil, dds: [dd]}], nil}
303+
{"dd", _, _} = dd, {groups, nil} ->
304+
# dd without preceding dt - create a group with no dt
305+
{groups ++ [%{dt: nil, dds: [dd]}], nil}
291306

292-
other, {groups, current_group} ->
293-
# Other elements get their own group
294-
groups_with_current = if current_group, do: groups ++ [current_group], else: groups
295-
{groups_with_current ++ [%{dt: nil, dds: [], other: other}], nil}
296-
end)
307+
other, {groups, current_group} ->
308+
# Other elements get their own group
309+
groups_with_current = if current_group, do: groups ++ [current_group], else: groups
310+
{groups_with_current ++ [%{dt: nil, dds: [], other: other}], nil}
311+
end)
297312

298313
# Add the last group if any
299-
all_groups = if last_group do
300-
groups ++ [last_group]
301-
else
302-
groups
303-
end
314+
all_groups =
315+
if last_group do
316+
groups ++ [last_group]
317+
else
318+
groups
319+
end
304320

305321
# Process each group
306-
result = all_groups
307-
|> Enum.reduce([], fn group, acc ->
308-
group_iolist = case group do
309-
%{dt: nil, dds: [], other: other} ->
310-
# Just process the other element
311-
process_node_to_iolist(other, opts)
312-
313-
%{dt: nil, dds: dds} ->
314-
# Just dd elements without dt
315-
Enum.map(dds, &process_node_to_iolist(&1, opts))
316-
|> Enum.intersperse("\n")
317-
318-
%{dt: dt, dds: []} ->
319-
# Just dt without dd
320-
process_node_to_iolist(dt, opts)
321-
322-
%{dt: dt, dds: dds} ->
323-
# dt with dd elements
324-
dt_iolist = process_node_to_iolist(dt, opts)
325-
dd_iolists = Enum.map(dds, &process_node_to_iolist(&1, opts))
326-
[dt_iolist, "\n", Enum.intersperse(dd_iolists, "\n")]
327-
end
322+
result =
323+
all_groups
324+
|> Enum.reduce([], fn group, acc ->
325+
group_iolist =
326+
case group do
327+
%{dt: nil, dds: [], other: other} ->
328+
# Just process the other element
329+
process_node_to_iolist(other, opts)
330+
331+
%{dt: nil, dds: dds} ->
332+
# Just dd elements without dt
333+
Enum.map(dds, &process_node_to_iolist(&1, opts))
334+
|> Enum.intersperse("\n")
335+
336+
%{dt: dt, dds: []} ->
337+
# Just dt without dd
338+
process_node_to_iolist(dt, opts)
339+
340+
%{dt: dt, dds: dds} ->
341+
# dt with dd elements
342+
dt_iolist = process_node_to_iolist(dt, opts)
343+
dd_iolists = Enum.map(dds, &process_node_to_iolist(&1, opts))
344+
[dt_iolist, "\n", Enum.intersperse(dd_iolists, "\n")]
345+
end
328346

329-
if acc == [] do
330-
[group_iolist]
331-
else
332-
[acc, "\n\n", group_iolist]
333-
end
334-
end)
347+
if acc == [] do
348+
[group_iolist]
349+
else
350+
[acc, "\n\n", group_iolist]
351+
end
352+
end)
335353

336354
["\n", result, "\n"]
337355
end
338356

339357
defp process_ul_list_to_iolist(children, opts) when is_list(children) do
340-
items = children
341-
|> Enum.map(&process_list_item_to_iolist(&1, opts))
342-
|> Enum.intersperse("\n")
358+
items =
359+
children
360+
|> Enum.map(&process_list_item_to_iolist(&1, opts))
361+
|> Enum.intersperse("\n")
343362

344363
["\n", items, "\n"]
345364
end
346365

347366
defp process_ol_list_to_iolist(children, opts) when is_list(children) do
348-
items = children
349-
|> Enum.with_index(1)
350-
|> Enum.map(fn {child, index} ->
351-
process_ordered_list_item_to_iolist(child, index, opts)
352-
end)
353-
|> Enum.intersperse("\n")
367+
items =
368+
children
369+
|> Enum.with_index(1)
370+
|> Enum.map(fn {child, index} ->
371+
process_ordered_list_item_to_iolist(child, index, opts)
372+
end)
373+
|> Enum.intersperse("\n")
354374

355375
["\n", items, "\n"]
356376
end
357377

358378
defp process_list_item_to_iolist({"li", _, children}, opts),
359379
do: ["- ", process_children_to_iolist(children, opts)]
380+
360381
defp process_list_item_to_iolist(other, opts),
361382
do: process_node_to_iolist(other, opts)
362383

363384
defp process_ordered_list_item_to_iolist({"li", _, children}, index, opts),
364385
do: [Integer.to_string(index), ". ", process_children_to_iolist(children, opts)]
386+
365387
defp process_ordered_list_item_to_iolist(other, _index, opts),
366388
do: process_node_to_iolist(other, opts)
367389

368390
defp process_children_to_iolist(children, opts) do
369-
iolist = children
370-
|> Enum.map(&process_node_to_iolist(&1, opts))
371-
|> Enum.intersperse(" ")
391+
iolist =
392+
children
393+
|> Enum.map(&process_node_to_iolist(&1, opts))
394+
|> Enum.intersperse(" ")
372395

373396
# Only trim if we're normalizing whitespace
374397
if opts.normalize_whitespace do
@@ -391,30 +414,35 @@ defmodule Html2Markdown.Converter do
391414

392415
# Process details/summary elements
393416
defp process_details_to_iolist(children, opts) do
394-
{summary, content} = Enum.split_with(children, fn
395-
{"summary", _, _} -> true
396-
_ -> false
397-
end)
398-
399-
summary_iolist = case summary do
400-
[{"summary", _, summary_children} | _] ->
401-
["**", process_children_to_iolist(summary_children, opts), "**"]
402-
_ ->
403-
["**Details**"]
404-
end
405-
417+
{summary, content} =
418+
Enum.split_with(children, fn
419+
{"summary", _, _} -> true
420+
_ -> false
421+
end)
422+
423+
summary_iolist =
424+
case summary do
425+
[{"summary", _, summary_children} | _] ->
426+
["**", process_children_to_iolist(summary_children, opts), "**"]
427+
428+
_ ->
429+
["**Details**"]
430+
end
431+
406432
content_iolist = process_children_to_iolist(content, opts)
407-
433+
408434
["\n", summary_iolist, "\n", content_iolist, "\n"]
409435
end
410436

411437
# Compatibility wrapper functions
438+
@spec process_node(Floki.html_node(), Options.t()) :: String.t()
412439
def process_node(node, opts) do
413440
node
414441
|> process_node_to_iolist(opts)
415442
|> IO.iodata_to_binary()
416443
end
417444

445+
@spec process_children(list(Floki.html_node()), Options.t()) :: String.t()
418446
def process_children(children, opts) do
419447
children
420448
|> process_children_to_iolist(opts)

lib/html2markdown/options.ex

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,13 @@ defmodule Html2Markdown.Options do
33
Handles configuration options for HTML to Markdown conversion.
44
"""
55

6+
@type t :: %{
7+
navigation_classes: [String.t()],
8+
non_content_tags: [String.t()],
9+
markdown_flavor: :basic | :gfm,
10+
normalize_whitespace: boolean()
11+
}
12+
613
@default_options %{
714
navigation_classes: ["footer", "menu", "nav", "sidebar", "aside"],
815
non_content_tags: [
@@ -36,13 +43,15 @@ defmodule Html2Markdown.Options do
3643
@doc """
3744
Returns the default options map.
3845
"""
46+
@spec defaults() :: t()
3947
def defaults do
4048
@default_options
4149
end
4250

4351
@doc """
4452
Merges user options with defaults.
4553
"""
54+
@spec merge(map()) :: t()
4655
def merge(user_options) when is_map(user_options) do
4756
Map.merge(@default_options, user_options)
4857
end

0 commit comments

Comments
 (0)