Consistent search data rendering

josevalim · josevalim · commit 603afc520d25 · 2025-05-08T18:12:01.000+02:00
diff --git a/lib/ex_doc/doc_ast.ex b/lib/ex_doc/doc_ast.ex
@@ -183,16 +183,21 @@ defmodule ExDoc.DocAST do
   @doc """
   Returns text content from the given AST.
   """
-  def text(ast) do
+  def text(ast, joiner \\ "") do
     ast
-    |> do_text()
+    |> do_text(joiner)
     |> IO.iodata_to_binary()
     |> String.trim()
   end
 
-  defp do_text(ast) when is_list(ast), do: Enum.map(ast, &do_text/1)
-  defp do_text(ast) when is_binary(ast), do: ast
-  defp do_text({_tag, _attr, ast, _meta}), do: text(ast)
+  defp do_text(ast, joiner) when is_list(ast),
+    do: Enum.map_intersperse(ast, joiner, &do_text(&1, joiner))
+
+  defp do_text(ast, _joiner) when is_binary(ast),
+    do: ast
+
+  defp do_text({_tag, _attr, ast, _meta}, joiner),
+    do: do_text(ast, joiner)
 
   @doc """
   Wraps a list of HTML nodes into `<section>` tags whenever `headers` returns true.
diff --git a/lib/ex_doc/formatter/html.ex b/lib/ex_doc/formatter/html.ex
@@ -22,7 +22,6 @@ defmodule ExDoc.Formatter.HTML do
     project_nodes = render_all(project_nodes, filtered_modules, ".html", config, [])
     extras = build_extras(config, ".html")
 
-    # Generate search early on without api reference in extras
     static_files = generate_assets(".", default_assets(config), config)
     search_data = generate_search_data(project_nodes, extras, config)
 
@@ -62,6 +61,7 @@ defmodule ExDoc.Formatter.HTML do
   @doc """
   Autolinks and renders all docs.
   """
+  # TODO: Move this outside of the formatter
   def render_all(project_nodes, filtered_modules, ext, config, opts) do
     base = [
       apps: config.apps,
@@ -120,7 +120,7 @@ defmodule ExDoc.Formatter.HTML do
 
   defp render_doc(%{doc: doc} = node, language, autolink_opts, opts) do
     doc = autolink_and_highlight(doc, language, autolink_opts, opts)
-    %{node | doc: doc, rendered_doc: ExDoc.DocAST.to_string(doc)}
+    %{node | doc: doc}
   end
 
   defp id(%{id: mod_id}, %{id: "c:" <> id}) do
diff --git a/lib/ex_doc/formatter/html/search_data.ex b/lib/ex_doc/formatter/html/search_data.ex
@@ -1,6 +1,5 @@
 defmodule ExDoc.Formatter.HTML.SearchData do
   @moduledoc false
-  alias ExDoc.Utils
 
   def create(nodes, extras, proglang) do
     items = Enum.flat_map(nodes, &module/1) ++ Enum.flat_map(extras, &extra/1)
@@ -24,147 +23,92 @@ defmodule ExDoc.Formatter.HTML.SearchData do
     Enum.map(search_data, fn item ->
       link =
         if item.anchor === "" do
-          "#{map.id}.html"
+          "#{URI.encode(map.id)}.html"
         else
-          "#{map.id}.html##{item.anchor}"
+          "#{URI.encode(map.id)}.html##{URI.encode(item.anchor)}"
         end
 
       encode(link, item.title <> " - #{map.id}", item.type, clean_markdown(item.body))
     end)
   end
 
   defp extra(map) do
-    {intro, sections} = extract_sections_from_markdown(map.source)
-
-    intro_json_item =
-      encode(
-        "#{map.id}.html",
-        map.title,
-        :extras,
-        intro
-      )
-
-    section_json_items =
-      for {header, body} <- sections do
-        encode(
-          "#{map.id}.html##{Utils.text_to_id(header)}",
-          header <> " - #{map.title}",
-          :extras,
-          body
-        )
-      end
-
-    [intro_json_item | section_json_items]
+    page = URI.encode(map.id) <> ".html"
+    {intro, sections} = extract_sections_from_markdown(map.source, "")
+
+    intro = encode(page, map.title, :extras, intro)
+    [intro | render_sections(sections, page, map.title, :extras)]
   end
 
   defp module(%ExDoc.ModuleNode{} = node) do
-    # TODO: This should work on DocAST
-    {intro, sections} = extract_sections(node.doc_format, node)
-
-    module =
-      encode(
-        "#{node.id}.html",
-        node.title,
-        node.type,
-        intro
-      )
-
-    module_sections =
-      for {header, body} <- sections do
-        encode(
-          "#{node.id}.html#module-#{Utils.text_to_id(header)}",
-          header <> " - #{node.title}",
-          node.type,
-          body
-        )
-      end
-
-    docs = Enum.flat_map(node.docs, &node_child(&1, node))
-    [module] ++ module_sections ++ docs
+    page = URI.encode(node.id) <> ".html"
+    {intro, sections} = extract_sections(node.source_format, node, "module-")
+    module = encode(page, node.title, node.type, intro)
+    docs = Enum.flat_map(node.docs, &node_child(&1, node, page))
+    [module] ++ render_sections(sections, page, node.title, node.type) ++ docs
   end
 
-  defp node_child(node, module_node) do
-    {intro, sections} = extract_sections(module_node.doc_format, node)
-
-    child =
-      encode(
-        "#{module_node.id}.html##{node.id}",
-        "#{module_node.id}.#{node.name}/#{node.arity}",
-        node.type,
-        intro
-      )
-
-    child_sections =
-      for {header, body} <- sections do
-        encode(
-          "#{module_node.id}.html##{node.id}-#{Utils.text_to_id(header)}",
-          header <> " - #{module_node.id}.#{node.name}/#{node.arity}",
-          node.type,
-          body
-        )
-      end
-
-    [child] ++ child_sections
+  defp node_child(node, module_node, page) do
+    title = "#{module_node.id}.#{node.name}/#{node.arity}"
+    {intro, sections} = extract_sections(module_node.source_format, node, node.id <> "-")
+
+    child = encode("#{page}##{URI.encode(node.id)}", title, node.type, intro)
+    [child | render_sections(sections, page, title, node.type)]
   end
 
   defp encode(ref, title, type, doc) do
-    %{
-      ref: URI.encode(ref),
-      title: title,
-      type: type,
-      doc: doc
-    }
+    %{ref: ref, title: title, type: type, doc: doc}
   end
 
-  # TODO: Perform this via DocAST and remove doc_format (and perhaps source_doc)
-  defp extract_sections("text/markdown", %{source_doc: %{"en" => doc}}) do
-    extract_sections_from_markdown(doc)
+  # TODO: Perform this via DocAST and remove source_format (and perhaps source_doc)
+  defp extract_sections("text/markdown", %{source_doc: %{"en" => doc}}, prefix) do
+    extract_sections_from_markdown(doc, prefix)
   end
 
-  defp extract_sections("application/erlang+html", %{rendered_doc: nil}) do
-    {nil, []}
+  defp extract_sections(_format, %{doc: nil}, _prefix) do
+    {"", []}
   end
 
-  defp extract_sections("application/erlang+html", %{rendered_doc: doc}) do
-    {clean_html(doc), []}
+  defp extract_sections(_format, %{doc: doc}, _prefix) do
+    {ExDoc.DocAST.text(doc, " "), []}
   end
 
-  defp extract_sections(_format, _doc) do
-    {"", []}
-  end
-
-  # TODO: This should work on DocAST when we prebuild extra.
-  defp extract_sections_from_markdown(string) do
-    [intro | sections] =
+  defp extract_sections_from_markdown(string, prefix) do
+    [intro | headers_sections] =
       Regex.split(~r/(?<!#)###? (?<header>\b.+)/, string, include_captures: true)
 
-    sections =
-      for [header, section] <- Enum.chunk_every(sections, 2) do
-        header = String.trim_leading(header, "#")
+    {headers, sections} =
+      headers_sections
+      |> Enum.chunk_every(2)
+      |> Enum.map(fn [header, section] -> {header, section} end)
+      |> Enum.unzip()
 
-        section =
-          section
-          |> ExDoc.Utils.strip_tags(" ")
-          |> drop_ignorable_codeblocks()
-          |> String.trim()
+    # Now convert the headers into a single markdown document
+    header_tags =
+      headers
+      |> Enum.join("\n\n")
+      |> ExDoc.Markdown.to_ast()
+      |> ExDoc.DocAST.add_ids_to_headers([:h2, :h3], prefix)
 
-        {clean_markdown(header), section}
-      end
+    sections =
+      Enum.zip_with(header_tags, sections, fn {_, attrs, inner, _}, section ->
+        {ExDoc.DocAST.text(inner), Keyword.fetch!(attrs, :id), clean_markdown(section)}
+      end)
 
     {clean_markdown(intro), sections}
   end
 
-  defp clean_markdown(doc) do
-    doc
+  defp clean_markdown(text) do
+    text
     |> ExDoc.Utils.strip_tags(" ")
+    |> drop_ignorable_codeblocks()
     |> String.trim()
   end
 
-  defp clean_html(doc) do
-    doc
-    |> ExDoc.Utils.strip_tags(" ")
-    |> String.replace(~r/\s+/, " ")
-    |> String.trim()
+  defp render_sections(sections, page, title, type) do
+    for {header, anchor, body} <- sections do
+      encode("#{page}##{anchor}", header <> " - " <> title, type, body)
+    end
   end
 
   @ignored_codeblocks ~w[vega-lite]
diff --git a/lib/ex_doc/nodes.ex b/lib/ex_doc/nodes.ex
@@ -9,10 +9,9 @@ defmodule ExDoc.ModuleNode do
             module: nil,
             group: nil,
             deprecated: nil,
-            doc_format: nil,
             doc: nil,
             source_doc: nil,
-            rendered_doc: nil,
+            source_format: nil,
             moduledoc_line: nil,
             moduledoc_file: nil,
             source_path: nil,
@@ -35,10 +34,9 @@ defmodule ExDoc.ModuleNode do
           module: module(),
           group: atom() | nil,
           deprecated: String.t() | nil,
-          doc_format: String.t() | nil,
           doc: ExDoc.DocAST.t() | nil,
           source_doc: term() | nil,
-          rendered_doc: String.t() | nil,
+          source_format: String.t() | nil,
           moduledoc_line: non_neg_integer(),
           moduledoc_file: String.t(),
           source_path: String.t() | nil,
@@ -63,7 +61,6 @@ defmodule ExDoc.DocNode do
             deprecated: nil,
             doc: nil,
             source_doc: nil,
-            rendered_doc: nil,
             type: nil,
             signature: nil,
             specs: [],
@@ -84,7 +81,6 @@ defmodule ExDoc.DocNode do
           deprecated: String.t() | nil,
           doc: ExDoc.DocAST.t() | nil,
           source_doc: term() | nil,
-          rendered_doc: String.t() | nil,
           type: atom(),
           signature: String.t(),
           specs: [ExDoc.Language.spec_ast()],
diff --git a/lib/ex_doc/retriever.ex b/lib/ex_doc/retriever.ex
@@ -157,9 +157,9 @@ defmodule ExDoc.Retriever do
       deprecated: metadata[:deprecated],
       docs_groups: config.docs_groups ++ module_data.default_groups,
       docs: ExDoc.Utils.natural_sort_by(docs, &"#{&1.name}/#{&1.arity}"),
-      doc_format: format,
       doc: normalize_doc_ast(doc_ast, "module-"),
       source_doc: source_doc,
+      source_format: format,
       moduledoc_line: doc_line,
       moduledoc_file: doc_file,
       source_url: source_link(source, module_data.source_line),
diff --git a/test/ex_doc/formatter/html/search_data_test.exs b/test/ex_doc/formatter/html/search_data_test.exs
@@ -112,6 +112,7 @@ defmodule ExDoc.Formatter.HTML.SearchDataTest do
       erlc(c, :search_foo, """
       %% @doc
       %% Hello <em>world</em>.
+      %% Newline.
       -module(search_foo).
       """)
 
@@ -122,7 +123,7 @@ defmodule ExDoc.Formatter.HTML.SearchDataTest do
     assert item["ref"] == "search_foo.html"
     assert item["type"] == "module"
     assert item["title"] == "search_foo"
-    assert item["doc"] == "Hello world ."
+    assert item["doc"] == "Hello  world . Newline."
   end
 
   test "function", c do
diff --git a/test/ex_doc/retriever/elixir_test.exs b/test/ex_doc/retriever/elixir_test.exs
@@ -51,7 +51,6 @@ defmodule ExDoc.Retriever.ElixirTest do
                group: "Functions",
                id: "function/0",
                name: :function,
-               rendered_doc: nil,
                signature: "function()",
                source_url: nil,
                specs: [spec],
diff --git a/test/ex_doc/retriever/erlang_test.exs b/test/ex_doc/retriever/erlang_test.exs
@@ -66,7 +66,6 @@ defmodule ExDoc.Retriever.ErlangTest do
                module: :mod,
                nested_context: nil,
                nested_title: nil,
-               rendered_doc: nil,
                source_path: _,
                source_url: _,
                title: "mod",
@@ -87,7 +86,6 @@ defmodule ExDoc.Retriever.ErlangTest do
                group: "Functions",
                id: "function1/0",
                name: :function1,
-               rendered_doc: nil,
                signature: _,
                source_url: _,
                specs: _,
@@ -163,7 +161,6 @@ defmodule ExDoc.Retriever.ErlangTest do
                module: :mod,
                nested_context: nil,
                nested_title: nil,
-               rendered_doc: nil,
                source_path: _,
                source_url: "module.hrl:2",
                title: "mod",
@@ -183,7 +180,6 @@ defmodule ExDoc.Retriever.ErlangTest do
                group: "Functions",
                id: "function/0",
                name: :function,
-               rendered_doc: nil,
                signature: _,
                source_url: "function.hrl:24",
                specs: _,
@@ -404,7 +400,6 @@ defmodule ExDoc.Retriever.ErlangTest do
         module: :mod,
         nested_context: nil,
         nested_title: nil,
-        rendered_doc: nil,
         source_path: _,
         source_url: _,
         title: "mod",
@@ -424,7 +419,6 @@ defmodule ExDoc.Retriever.ErlangTest do
         group: "Functions",
         id: "function1/0",
         name: :function1,
-        rendered_doc: nil,
         signature: _,
         source_url: _,
         specs: _,