Skip to content

Commit 52aa72b

Browse files
committed
Merge #95 from 86-anchor-escape
2 parents 63cdf8e + d3e77d0 commit 52aa72b

File tree

14 files changed

+1228
-19
lines changed

14 files changed

+1228
-19
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ jobs:
8282
Pkg.PackageSpec(name="Documenter", version="1.0.0"),
8383
Pkg.PackageSpec(name="MarkdownAST", version="0.1.2"),
8484
Pkg.PackageSpec(name="OrderedCollections", version="1.6.0"),
85+
Pkg.PackageSpec(name="Bijections", version="0.1.4"),
8586
])
8687
Pkg.precompile()
8788
Pkg.status()

NEWS.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
66

77
## [Unreleased][]
88

9+
### Added
10+
11+
* The `CitationBibliography` plugin object now has an internal field `anchor_keys` that is a bijective mapping of citation keys to HTML anchor names. The anchor names are normalized versions of the citation keys that are restricted to ASCII alphanumerics, dashes (`-`) and underscores (`_`). This provides [compatibility with HTML4](https://www.w3.org/TR/html4/types.html#type-id) and additionally [avoids issues with CSS selectors](https://stackoverflow.com/a/79022). It also works around restrictions of the `Documenter.DOM` framework that is used internally to render HTML content. [[#95][]]
12+
13+
14+
### Fixed
15+
16+
* Citation keys the contain special characters (like colons) no longer produce broken links. This is achieved by normalizing HTML anchor names to contain only alphanumeric ASCII characters, dashes, and underscores [[#86][], [#95][]]
17+
918

1019
## [Version 1.3.7][1.3.7] - 2025-03-29
1120

@@ -198,8 +207,10 @@ There were several bugs and limitations in version `1.2.x` for which some existi
198207
[1.2.0]: https://github.com/JuliaDocs/DocumenterCitations.jl/compare/v1.1.0...v1.2.0
199208
[1.1.0]: https://github.com/JuliaDocs/DocumenterCitations.jl/compare/v1.0.0...v1.1.0
200209
[1.0.0]: https://github.com/JuliaDocs/DocumenterCitations.jl/compare/v0.2.12...v1.0.0
210+
[#95]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/95
201211
[#89]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/89
202212
[#87]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/87
213+
[#86]: https://github.com/JuliaDocs/DocumenterCitations.jl/issues/86
203214
[#83]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/83
204215
[#80]: https://github.com/JuliaDocs/DocumenterCitations.jl/issues/80
205216
[#79]: https://github.com/JuliaDocs/DocumenterCitations.jl/pull/79

Project.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ version = "1.3.7+dev"
66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
88
Bibliography = "f1be7e48-bf82-45af-a471-ae754a193061"
9+
Bijections = "e2ed5e7c-b2de-5872-ae92-c73ca462fb04"
910
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
1011
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"
1112
Logging = "56ddb016-857b-54e1-b83d-db4d58db5568"
@@ -17,6 +18,7 @@ Unicode = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
1718
[compat]
1819
AbstractTrees = "0.4"
1920
Bibliography = "0.2.15, 0.3"
21+
Bijections = "0.1.4"
2022
Dates = "1"
2123
Documenter = "1"
2224
Logging = "1"

src/DocumenterCitations.jl

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ using Documenter.Writers.HTMLWriter
99
import MarkdownAST
1010
import AbstractTrees
1111

12+
using Bijections: Bijections
1213
using Logging
1314
using Markdown
1415
using Bibliography: Bibliography, xyear, xlink, xtitle
@@ -48,6 +49,11 @@ should not be considered part of the stable API.
4849
* `anchor_map`: an [`AnchorMap`](https://documenter.juliadocs.org/stable/lib/internals/anchors/#Documenter.AnchorMap)
4950
object that keeps track of the link anchors for references in bibliography
5051
blocks
52+
* `anchor_keys`: a [bijective map](https://github.com/scheinerman/Bijections.jl?tab=readme-ov-file#bijections)
53+
of citation keys to HTML anchor names. Whenever possible, an anchor name is
54+
identical to the citation key, but anchor names are restricted to consist
55+
only of ASCII letters, digits, and the symbols `-`, `_`. Thus, citation keys
56+
are normalized to meet that restriction.
5157
"""
5258
struct CitationBibliography <: Documenter.Plugin
5359

@@ -72,6 +78,9 @@ struct CitationBibliography <: Documenter.Plugin
7278
# canonical bibliography blocks
7379
anchor_map::Documenter.AnchorMap
7480

81+
# Map citation key => anchor name
82+
anchor_keys::Bijections.Bijection{String,String}
83+
7584
end
7685

7786
function CitationBibliography(bibfile::AbstractString=""; style=nothing)
@@ -117,13 +126,15 @@ function CitationBibliography(bibfile::AbstractString=""; style=nothing)
117126
citations = OrderedDict{String,Int64}()
118127
page_citations = Dict{String,Set{String}}()
119128
anchor_map = Documenter.AnchorMap()
129+
anchor_keys = Bijections.Bijection{String,String}()
120130
return CitationBibliography(
121131
bibfile,
122132
style,
123133
entries,
124134
citations,
125135
page_citations,
126-
anchor_map
136+
anchor_map,
137+
anchor_keys
127138
)
128139
end
129140

src/expand_bibliography.jl

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ _ALLOW_PRE_13_FALLBACK = true
44
55
Runs after [`CollectCitations`](@ref) but before [`ExpandCitations`](@ref).
66
7-
Each bibliography is rendered into HTML as a a [definition
7+
Each bibliography is rendered into HTML as a [definition
88
list](https://www.w3schools.com/tags/tag_dl.asp), a [bullet
99
list](https://www.w3schools.com/tags/tag_ul.asp), or an
1010
[enumeration](https://www.w3schools.com/tags/tag_ol.asp) depending on
@@ -362,18 +362,18 @@ function expand_bibliography(node::MarkdownAST.Node, meta, page, doc)
362362
end
363363
for (key, entry) in entries_to_show
364364
if fields[:Canonical]
365-
anchor_key = key
365+
anchor_key = get_anchor_key(key, bib.anchor_keys)
366366
# Add anchor that citations can link to from anywhere in the docs.
367-
if Documenter.anchor_exists(anchors, key)
367+
if Documenter.anchor_exists(anchors, anchor_key)
368368
# Skip entries that already have a canonical bib entry
369369
# elsewhere. This is expected behavior, not an error/warning,
370370
# allowing to split the canonical bibliography in multiple
371371
# parts.
372372
@debug "Skipping key=$(key) (existing anchor)"
373373
continue
374374
else
375-
@debug "Defining anchor for key=$(key)"
376-
Documenter.anchor_add!(anchors, entry, key, page.build)
375+
@debug "Defining anchor $(repr(anchor_key)) for key=$(repr(key))"
376+
Documenter.anchor_add!(anchors, entry, anchor_key, page.build)
377377
end
378378
else
379379
anchor_key = nothing
@@ -406,6 +406,78 @@ function expand_bibliography(node::MarkdownAST.Node, meta, page, doc)
406406
end
407407

408408

409+
# Generate a suitably normalized (restricted ASCII) HTML anchor name from a
410+
# citation key.
411+
#
412+
# The [HTML4 standard requires](https://www.w3.org/TR/html4/types.html#type-id)
413+
# that anchor names must begin with a letter ([A-Za-z]) and may be followed by
414+
# any number of letters, digits ([0-9]), hyphens ("-"), underscores ("_"),
415+
# colons (":"), and periods ("."). Dots and colons are further problematic for
416+
# compatibility with CSS selectors, see https://stackoverflow.com/a/79022.
417+
# Even more importantly, these characters are not supported by the
418+
# `Documenter.DOM` framework that we use to generate HTML: it will silently
419+
# drop anything after a colon or period. The numerical `suffix_index` gets
420+
# appended to the anchor name if > 0 and can be used to disambiguate labels.
421+
function get_anchor_key(
422+
citation_key::String,
423+
cache::Bijections.Bijection{String,String};
424+
suffix_index::Int64=1
425+
)
426+
if haskey(cache, citation_key)
427+
anchor_key = cache[citation_key]
428+
else
429+
anchor_key = normalize_anchor(citation_key) # => [A-Za-z0-0_-]
430+
if suffix_index > 1
431+
anchor_key *= "-$suffix_index"
432+
end
433+
if !startswith(anchor_key, r"[A-Za-z]")
434+
# Anchors must start with a letter. Instead of rejecting "invalid"
435+
# anchors, we just prepend something arbitrary.
436+
anchor_key = "cit-" * anchor_key
437+
end
438+
try
439+
# The Bijection type takes care of all the work of checking for
440+
# duplicates here.
441+
cache[citation_key] = anchor_key
442+
catch
443+
suffix_index += 1
444+
msg = "HTML anchor for citation key $(repr(citation_key)) normalizes to ambiguous $(repr(anchor_key)) conflicting with citation key $(repr(cache(anchor_key))). Disambiguating with suffix \"-$(suffix_index)\""
445+
@warn(msg)
446+
if suffix_index < 100
447+
return get_anchor_key(citation_key, cache; suffix_index)
448+
else
449+
error("Internal error: cannot find disambiguated anchor key")
450+
end
451+
end
452+
@debug "Generated anchor key $(repr(anchor_key)) for citation key $(repr(citation_key))"
453+
end
454+
return anchor_key
455+
end
456+
457+
458+
# Transform an arbitrary string `s` into a normalized string containing only
459+
# ASCII letters, numbers, and the symbols `_` and `-`, i.e., matching the
460+
# regex `r"^[A-Za-z0-9._-]+$"`. Letters with diacritics are normalized into
461+
# their ASCII equivalents, `:` and `/` are converted to `_`, and all other
462+
# characters are dropped.
463+
function normalize_anchor(s::AbstractString)
464+
s_norm = Unicode.normalize(s, :NFKD) # decompose diacritics
465+
chars = Char[]
466+
for c in s_norm
467+
if ('A' <= c <= 'Z') ||
468+
('a' <= c <= 'z') ||
469+
('0' <= c <= '9') ||
470+
c == '_' ||
471+
c == '-'
472+
push!(chars, c)
473+
elseif (c == ':') || (c == '/') || (c == '.')
474+
push!(chars, '_')
475+
end
476+
end
477+
return String(chars)
478+
end
479+
480+
409481
# Deal with `@__FILE__` in `Pages`, convert it to the name of the current file.
410482
function _resolve__FILE__(Pages, page)
411483
__FILE__ = let ex = Meta.parse("_ = @__FILE__", 1; raise=false)[1]

src/expand_citations.jl

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -133,18 +133,19 @@ function expand_citation(
133133
@assert cit isa DirectCitationLink
134134
# E.g., "[Semi-AD paper](@cite GoerzQ2022)"
135135
key = cit.key
136-
anchor = Documenter.anchor(anchors, key)
137-
if isnothing(anchor)
138-
link_text = ast_linktext(cit.node)
139-
@error "expand_citation$rec: No destination for key=$(repr(key)) → unlinked text $(repr(link_text))"
140-
return Documenter.mdparse(link_text; mode=:span)
141-
else
136+
if haskey(bib.anchor_keys, key)
137+
anchor_key = bib.anchor_keys[key]
138+
anchor = Documenter.anchor(anchors, anchor_key)
142139
expanded_node = MarkdownAST.copy_tree(node)
143140
path = relpath(anchor.file, dirname(page.build))
144141
expanded_node.element.destination =
145142
string(path, Documenter.anchor_fragment(anchor))
146143
@debug "expand_citation$rec: $cit → link to $(expanded_node.element.destination)"
147144
return expanded_node
145+
else
146+
link_text = ast_linktext(cit.node)
147+
@error "expand_citation$rec: No destination for key=$(repr(key)) → unlinked text $(repr(link_text))"
148+
return Documenter.mdparse(link_text; mode=:span)
148149
end
149150
end
150151
end

test/Project.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
[deps]
22
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
33
Bibliography = "f1be7e48-bf82-45af-a471-ae754a193061"
4+
Bijections = "e2ed5e7c-b2de-5872-ae92-c73ca462fb04"
45
Coverage = "a2441757-f6aa-5fb2-8edb-039e3f45d037"
56
Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"
67
Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"

test/runtests.jl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ using DocumenterCitations
5656
include("test_keys_with_underscores.jl")
5757
end
5858

59+
println("\n* anchor_keys (test_anchor_keys.jl):")
60+
@time @safetestset "anchor_keys" begin
61+
include("test_anchor_keys.jl")
62+
end
63+
5964
println("\n* integration test (test_integration.jl):")
6065
@time @safetestset "integration" begin
6166
include("test_integration.jl")

test/test_anchor_keys.jl

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
using DocumenterCitations
2+
using DocumenterCitations: get_anchor_key
3+
using Test
4+
using Bijections
5+
using TestingUtilities: @Test # much better at comparing strings
6+
using IOCapture: IOCapture
7+
8+
include("run_makedocs.jl")
9+
10+
11+
@testset "Documenter.DOM anchors" begin
12+
Documenter.DOM.@tags div
13+
@test string(div["#anchor"]("")) == "<div id=\"anchor\"></div>"
14+
@test string(div["#anchor-suffix"]("")) == "<div id=\"anchor-suffix\"></div>"
15+
@test string(div["#anchor_suffix"]("")) == "<div id=\"anchor_suffix\"></div>"
16+
# The way that Documenter.DOM eats `:` and `.` is the reason we have to
17+
# strip those characters from citation keys. If that behavior ever changes,
18+
# we can reconsider.
19+
@test_broken string(div["#anchor:suffix"]("")) == "<div id=\"anchor:suffix\"></div>"
20+
@test_broken string(div["#anchor.suffix"]("")) == "<div id=\"anchor.suffix\"></div>"
21+
end
22+
23+
24+
@testset "anchor key ambiguity" begin
25+
26+
cache = Bijections.Bijection{String,String}()
27+
28+
anchor_key = get_anchor_key("AbsilMahonySepulchre:2008", cache)
29+
@test anchor_key == "AbsilMahonySepulchre_2008"
30+
31+
# cache hit
32+
@test cache("AbsilMahonySepulchre_2008") == "AbsilMahonySepulchre:2008"
33+
anchor_key = get_anchor_key("AbsilMahonySepulchre:2008", cache)
34+
@test anchor_key == "AbsilMahonySepulchre_2008"
35+
36+
# Invalid: starts with number
37+
anchor_key = get_anchor_key("2008_AbsilMahonySepulchre", cache)
38+
@test anchor_key == "cit-2008_AbsilMahonySepulchre"
39+
40+
# Ambiguous: periods are not allowed allowed (substituted with '_')
41+
c = IOCapture.capture(rethrow=Union{}) do
42+
get_anchor_key("AbsilMahonySepulchre.2008", cache)
43+
end
44+
@test c.value == "AbsilMahonySepulchre_2008-2"
45+
msg = "Warning: HTML anchor for citation key \"AbsilMahonySepulchre.2008\" normalizes to ambiguous \"AbsilMahonySepulchre_2008\" conflicting with citation key \"AbsilMahonySepulchre:2008\". Disambiguating with suffix \"-2\""
46+
@test contains(c.output, msg)
47+
48+
# Ambiguous key: `=` is not allowed (dropped)
49+
c = IOCapture.capture(rethrow=Union{}) do
50+
get_anchor_key("AbsilMahonySepulchre_=2008", cache)
51+
end
52+
@test c.value == "AbsilMahonySepulchre_2008-3"
53+
msg = "Warning: HTML anchor for citation key \"AbsilMahonySepulchre_=2008\" normalizes to ambiguous \"AbsilMahonySepulchre_2008\" conflicting with citation key \"AbsilMahonySepulchre:2008\". Disambiguating with suffix \"-2\""
54+
@test contains(c.output, msg)
55+
msg = "Warning: HTML anchor for citation key \"AbsilMahonySepulchre_=2008\" normalizes to ambiguous \"AbsilMahonySepulchre_2008-2\" conflicting with citation key \"AbsilMahonySepulchre.2008\". Disambiguating with suffix \"-3\""
56+
@test contains(c.output, msg)
57+
58+
end
59+
60+
61+
@testset "keys with symbols" begin
62+
63+
# https://github.com/JuliaDocs/DocumenterCitations.jl/issues/86
64+
65+
bib = CitationBibliography(
66+
joinpath(@__DIR__, "test_anchor_keys", "src", "refs.bib"),
67+
style=:numeric
68+
)
69+
70+
run_makedocs(
71+
joinpath(@__DIR__, "test_anchor_keys");
72+
sitename="Test",
73+
plugins=[bib],
74+
pages=["Home" => "index.md", "References" => "references.md",],
75+
warnonly=true,
76+
check_success=true
77+
) do dir, result, success, backtrace, output
78+
79+
@test success
80+
81+
@test bib.anchor_keys["Chirikjian:2012"] == "Chirikjian_2012"
82+
@test bib.anchor_keys["Chirikjian_2012"] == "Chirikjian_2012-2"
83+
84+
@test contains(output, "normalizes to ambiguous \"Chirikjian_2012\"")
85+
86+
#! format: off
87+
index_html = read(joinpath(dir, "build", "index.html"), String)
88+
@Test contains(index_html, "<a href=\"references/#Chirikjian_2012\">")
89+
@Test contains(index_html, "<a href=\"references/#Chirikjian_2012-2\">")
90+
91+
references_html = read(joinpath(dir, "build", "references", "index.html"), String)
92+
@Test contains(references_html, "<div id=\"Chirikjian_2012\">")
93+
@Test contains(references_html, "<div id=\"Chirikjian_2012-2\">")
94+
#! format: on
95+
96+
end
97+
98+
end

test/test_anchor_keys/src/index.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# Testing citation keys with special symbols
2+
3+
You can read more about the theory of Lie groups for example in [Chirikjian:2012](@cite).
4+
5+
Note the ambiguous citations keys, as for Ref. [Chirikjian_2012](@cite) are automatically disambiguated.

0 commit comments

Comments
 (0)