Skip to content

Commit 332ad64

Browse files
committed
more tests, reduce verbosity
1 parent 55ad49b commit 332ad64

File tree

18 files changed

+225
-85
lines changed

18 files changed

+225
-85
lines changed

src/documentertools/canonical_urls.jl

Lines changed: 100 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ function update_canonical_links_for_version(
4040
canonical_href = Gumbo.getattr(e, "href", nothing)
4141
if canonical_href != new_canonical_href
4242
Gumbo.setattr!(e, "href", new_canonical_href)
43-
@warn "canonical_href updated" canonical_href new_canonical_href fileinfo.relpath
43+
@debug "update_canonical_links_for_version: canonical_href updated" canonical_href new_canonical_href fileinfo.relpath
4444
dom_updated = true
4545
end
4646
end
@@ -55,7 +55,7 @@ function update_canonical_links_for_version(
5555
)
5656
)
5757
push!(e.children, canonical_href_element)
58-
@warn "Added new canonical_href" new_canonical_href fileinfo.relpath
58+
@debug "update_canonical_links_for_version: added new canonical_href" new_canonical_href fileinfo.relpath
5959
dom_updated = true
6060
break
6161
end
@@ -72,6 +72,10 @@ end
7272
is_canonical_element(e) = (e isa Gumbo.HTMLElement) && (Gumbo.tag(e) == :link) && (Gumbo.getattr(e, "rel", nothing) == "canonical")
7373
joinurl(ps::AbstractString...) = join(ps, '/')
7474

75+
"""
76+
Takes the multi-versioned Documenter site in `docs_directory` and updates the HTML canonical URLs
77+
to point to `canonical`.
78+
"""
7579
function update_canonical_links(
7680
docs_directory::AbstractString;
7781
canonical::AbstractString
@@ -85,67 +89,7 @@ function update_canonical_links(
8589
redirect_url = get_meta_redirect_url(redirect_index_html_path)
8690
splitpath(normpath(redirect_url))
8791
else
88-
# Try to extract the list of versions from versions.js
89-
versions_js = joinpath(docs_directory, "versions.js")
90-
isfile(versions_js) || throw(ArgumentError("versions.js is missing in $(docs_directory)"))
91-
versions = map(extract_versions_list(versions_js)) do version_str
92-
isversion, version_number = if occursin(Base.VERSION_REGEX, version_str)
93-
true, VersionNumber(version_str)
94-
else
95-
false, nothing
96-
end
97-
fullpath = joinpath(docs_directory, version_str)
98-
return (;
99-
path = version_str,
100-
path_exists = isdir(fullpath) || islink(fullpath),
101-
symlink = islink(fullpath),
102-
isversion,
103-
version_number,
104-
fullpath,
105-
)
106-
end
107-
# We'll filter out a couple of potential bad cases and issue warnings
108-
filter(versions) do vi
109-
if !vi.path_exists
110-
@warn "update_canonical_links: path does not exists or is not a directory" docs_directory vi
111-
return false
112-
end
113-
return true
114-
end
115-
# We need to determine the canonical path. This would usually be something like the stable/
116-
# directory, but it can have a different name, including being a version number. So first we
117-
# try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
118-
# previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
119-
# with the highest version number. This does not cover all possible cases, but should be good
120-
# enough for now.
121-
if isempty(versions)
122-
error("Unable to determine the canonical path. Found no version directories")
123-
end
124-
125-
non_version_symlinks = filter(vi -> !vi.isversion && vi.symlink, versions)
126-
canonical_version = if isempty(non_version_symlinks)
127-
# We didn't find any non-version symlinks, so we'll try to find the vN directory now
128-
# as a fallback.
129-
version_symlinks = map(versions) do vi
130-
if !(vi.symlink && vi.isversion)
131-
return nothing
132-
end
133-
m = match(r"^([0-9]+)$", vi.path)
134-
isnothing(m) && return nothing
135-
parse(Int, m[1]) => vi
136-
end
137-
filter!(!isnothing, version_symlinks)
138-
if isempty(version_symlinks)
139-
error("Unable to determine the canonical path. Found no version directories")
140-
end
141-
_, idx = findmax(first, version_symlinks)
142-
version_symlinks[idx][2]
143-
elseif length(non_version_symlinks) > 1
144-
error("Unable to determine the canonical path. Found multiple non-version symlinks.\n$(non_version_symlinks)")
145-
else
146-
only(non_version_symlinks)
147-
end
148-
(canonical_version.path,)
92+
canonical_version_from_versions_js(docs_directory)
14993
end
15094
canonical_full_root = joinurl(canonical, canonical_path...)
15195
# If we have determined which version should be the canonical version, we can actually
@@ -170,31 +114,22 @@ function update_canonical_links(
170114
continue
171115
end
172116
# Finally, we can run update_canonical_links_for_version on the directory.
173-
@info "Updating canonical URLs for" docs_directory filename canonical_full_root
117+
@debug "Updating canonical URLs for version" docs_directory filename canonical_full_root
174118
update_canonical_links_for_version(path; canonical = canonical_full_root)
175119
end
176120
end
177121

178-
function extract_versions_list(versions_js::AbstractString)
179-
versions_js = abspath(versions_js)
180-
isfile(versions_js) || throw(ArgumentError("No such file: $(versions_js)"))
181-
versions_js_content = read(versions_js, String)
182-
m = match(r"var\s+DOC_VERSIONS\s*=\s*\[([0-9A-Za-z\"\s.,+-]+)\]", versions_js_content)
183-
if isnothing(m)
184-
throw(ArgumentError("""
185-
Could not find DOC_VERSIONS in $(versions_js):
186-
$(versions_js_content)"""))
187-
end
188-
versions = strip.(c -> isspace(c) || (c == '"'), split(m[1], ","))
189-
filter!(!isempty, versions)
190-
if isempty(versions)
191-
throw(ArgumentError("""
192-
DOC_VERSIONS empty in $(versions_js):
193-
$(versions_js_content)"""))
194-
end
195-
return versions
122+
function canonical_directory_from_redirect_index_html(docs_directory::AbstractString)
123+
redirect_index_html_path = joinpath(docs_directory, "index.html")
124+
isfile(redirect_index_html_path) || return nothing
125+
redirect_url = get_meta_redirect_url(redirect_index_html_path)
126+
splitpath(normpath(redirect_url))
196127
end
197128

129+
"""
130+
Parses the HTML file at `indexhtml_path` and tries to extract the `url=...` value
131+
of the redirect `<meta http-equiv="refresh" ...>` tag.
132+
"""
198133
function get_meta_redirect_url(indexhtml_path::AbstractString)
199134
html = Gumbo.parsehtml(read(indexhtml_path, String))
200135
for e in AbstractTrees.PreOrderDFS(html.root)
@@ -215,3 +150,86 @@ function get_meta_redirect_url(indexhtml_path::AbstractString)
215150
end
216151
return nothing
217152
end
153+
154+
function canonical_version_from_versions_js(docs_directory)
155+
isdir(docs_directory) || throw(ArgumentError("Not a directory: $(docs_directory)"))
156+
# Try to extract the list of versions from versions.js
157+
versions_js = joinpath(docs_directory, "versions.js")
158+
isfile(versions_js) || throw(ArgumentError("versions.js is missing in $(docs_directory)"))
159+
versions = map(extract_versions_list(versions_js)) do version_str
160+
isversion, version_number = if occursin(Base.VERSION_REGEX, version_str)
161+
true, VersionNumber(version_str)
162+
else
163+
false, nothing
164+
end
165+
fullpath = joinpath(docs_directory, version_str)
166+
return (;
167+
path = version_str,
168+
path_exists = isdir(fullpath) || islink(fullpath),
169+
symlink = islink(fullpath),
170+
isversion,
171+
version_number,
172+
fullpath,
173+
)
174+
end
175+
# We'll filter out a couple of potential bad cases and issue warnings
176+
filter(versions) do vi
177+
if !vi.path_exists
178+
@warn "update_canonical_links: path does not exists or is not a directory" docs_directory vi
179+
return false
180+
end
181+
return true
182+
end
183+
# We need to determine the canonical path. This would usually be something like the stable/
184+
# directory, but it can have a different name, including being a version number. So first we
185+
# try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
186+
# previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
187+
# with the highest version number. This does not cover all possible cases, but should be good
188+
# enough for now.
189+
if isempty(versions)
190+
error("Unable to determine the canonical path. Found no version directories")
191+
end
192+
193+
non_version_symlinks = filter(vi -> !vi.isversion && vi.symlink, versions)
194+
canonical_version = if isempty(non_version_symlinks)
195+
# We didn't find any non-version symlinks, so we'll try to find the vN directory now
196+
# as a fallback.
197+
version_symlinks = map(versions) do vi
198+
m = match(r"^v([0-9]+)$", vi.path)
199+
isnothing(m) && return nothing
200+
parse(Int, m[1]) => vi
201+
end
202+
filter!(!isnothing, version_symlinks)
203+
if isempty(version_symlinks)
204+
error("Unable to determine the canonical path. Found no version directories")
205+
end
206+
_, idx = findmax(first, version_symlinks)
207+
version_symlinks[idx][2]
208+
elseif length(non_version_symlinks) > 1
209+
error("Unable to determine the canonical path. Found multiple non-version symlinks.\n$(non_version_symlinks)")
210+
else
211+
only(non_version_symlinks)
212+
end
213+
214+
return canonical_version.path
215+
end
216+
217+
function extract_versions_list(versions_js::AbstractString)
218+
versions_js = abspath(versions_js)
219+
isfile(versions_js) || throw(ArgumentError("No such file: $(versions_js)"))
220+
versions_js_content = read(versions_js, String)
221+
m = match(r"var\s+DOC_VERSIONS\s*=\s*\[([0-9A-Za-z\"\s.,+-]+)\]", versions_js_content)
222+
if isnothing(m)
223+
throw(ArgumentError("""
224+
Could not find DOC_VERSIONS in $(versions_js):
225+
$(versions_js_content)"""))
226+
end
227+
versions = strip.(c -> isspace(c) || (c == '"'), split(m[1], ","))
228+
filter!(!isempty, versions)
229+
if isempty(versions)
230+
throw(ArgumentError("""
231+
DOC_VERSIONS empty in $(versions_js):
232+
$(versions_js_content)"""))
233+
end
234+
return versions
235+
end

test/documentertools.jl

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,3 +37,85 @@ FIXTURES = joinpath(@__DIR__, "fixtures")
3737
end
3838

3939
end
40+
41+
function withfiles(f, files::Pair...)
42+
mktempdir() do path
43+
for (filename, content) in files
44+
filepath = joinpath(path, filename)
45+
if content isa AbstractString
46+
write(filepath, content)
47+
elseif content === :dir
48+
mkdir(filepath)
49+
elseif content isa Tuple && content[1] === :symlink
50+
symlink(content[2], filepath)
51+
else
52+
error("Invalid content: $content")
53+
end
54+
end
55+
@debug "ls -Alh $path" * read(`ls -Alh $path`, String)
56+
f(path)
57+
end
58+
end
59+
60+
@testset "canonical_urls" begin
61+
@testset "parsing versions.js" begin
62+
withfiles("versions.js" => """
63+
var DOC_VERSIONS = [
64+
"stable",
65+
"v0.27",
66+
"v0.1",
67+
"dev",
68+
];
69+
""",
70+
"v0.27" => :dir, "v0.1" => :dir, "dev" => :dir,
71+
"stable" => (:symlink, "v0.27"),
72+
) do path
73+
@test DocumenterTools.extract_versions_list(joinpath(path, "versions.js")) == ["stable", "v0.27", "v0.1", "dev"]
74+
@test DocumenterTools.canonical_version_from_versions_js(path) == "stable"
75+
end
76+
77+
withfiles("versions.js" => """
78+
var DOC_VERSIONS = [
79+
"v1",
80+
"v2",
81+
"dev",
82+
];
83+
""",
84+
"v1" => :dir, "v2" => :dir, "dev" => :dir,
85+
) do path
86+
@test DocumenterTools.extract_versions_list(joinpath(path, "versions.js")) == ["v1", "v2", "dev"]
87+
@test DocumenterTools.canonical_version_from_versions_js(path) == "v2"
88+
end
89+
end
90+
91+
@testset "parsing redirect index.html" begin
92+
mktempdir() do path
93+
@test DocumenterTools.canonical_directory_from_redirect_index_html(path) === nothing
94+
end
95+
96+
mktempdir() do path
97+
file = joinpath(path, "index.html")
98+
write(file, """
99+
<!--This file is automatically generated by Documenter.jl-->
100+
<meta http-equiv="refresh" content="0; url=./stable/"/>
101+
""")
102+
@test DocumenterTools.get_meta_redirect_url(file) == "./stable/"
103+
@test DocumenterTools.canonical_directory_from_redirect_index_html(path) == ["stable"]
104+
end
105+
end
106+
107+
@testset "update_canonical_links" begin
108+
out = tempname()
109+
cp(joinpath(FIXTURES, "pre"), out)
110+
@test DocumenterTools.canonical_directory_from_redirect_index_html(out) == ["stable"]
111+
DocumenterTools.update_canonical_links(out; canonical = "https://example.org/this-is-test")
112+
DocumenterTools.walkdocs(joinpath(FIXTURES, "post")) do fileinfo
113+
post = read(fileinfo.fullpath, String)
114+
changed = read(joinpath(out, fileinfo.relpath), String)
115+
if changed != post
116+
@error "update_canonical_links: change and post not matching" out fileinfo
117+
end
118+
@test changed == post
119+
end
120+
end
121+
end

test/fixtures/post/index.html

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
<!--This file is automatically generated by Documenter.jl-->
2+
<meta http-equiv="refresh" content="0; url=./stable/"/>
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE ><HTML><head><link href="https://example.org/this-is-test/stable/bar/" rel="canonical"/></head><body>
2+
nothing much in here
3+
4+
</body></HTML>

test/fixtures/post/stable/baz.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!DOCTYPE ><HTML><head><link href="https://example.org/this-is-test/stable/baz.html" rel="canonical"/></head><body>
2+
nothing much in here
3+
4+
5+
</body></HTML>

test/fixtures/pre/v1.0.0/index.html renamed to test/fixtures/post/stable/index.html

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,9 @@
22
<HTML>
33
<head>
44
<script data-outdated-warner></script>
5+
<link href="https://example.org/this-is-test/stable/" rel="canonical"/>
56
</head>
67
<body>
78
nothing much in here
89
</body>
9-
</HTML>
10+
</HTML>

test/fixtures/post/stable/siteinfo.js

Whitespace-only changes.

test/fixtures/post/v0.5.0/asset.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
<!DOCTYPE ><HTML><head><link href="https://example.org/this-is-test/stable/foo/" rel="canonical"/></head><body>
2+
nothing much in here
3+
4+
</body></HTML>

test/fixtures/post/v0.5.0/index.html

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
<!DOCTYPE ><HTML><head><link <="" head="" href="https://example.org/this-is-test/stable/" rel="canonical"/></head><body>
2+
nothing much in here
3+
4+
5+
</body></HTML>

0 commit comments

Comments
 (0)