Skip to content

Commit 55ad49b

Browse files
committed
wip
1 parent 98d6b9d commit 55ad49b

File tree

11 files changed

+388
-3
lines changed

11 files changed

+388
-3
lines changed

Project.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@ version = "0.6.1"
55

66
[deps]
77
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
8-
DocumenterTools = "35a29f4d-8980-5a13-9543-d66fff28ecb8"
98
Git = "d7ba0133-e1db-5d97-8f8c-041e4b3a1eb2"
109
Gumbo = "708ec375-b3d6-5a57-a7ce-8257bf98657a"
1110
HypertextLiteral = "ac1192a8-f4b3-4bfe-ba22-af5b92cd3ab2"

src/MultiDocumenter.jl

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,15 @@
11
module MultiDocumenter
22

3-
import DocumenterTools
43
import Gumbo, AbstractTrees
54
using HypertextLiteral
65
import Git: git
76

7+
module DocumenterTools
8+
import Gumbo, AbstractTrees
9+
include("documentertools/walkdocs.jl")
10+
include("documentertools/canonical_urls.jl")
11+
end
12+
813
"""
914
SearchConfig(index_versions = ["stable"], engine = MultiDocumenter.FlexSearch, lowfi = false)
1015
@@ -129,7 +134,9 @@ function make(
129134
)
130135
maybe_clone(flatten_multidocrefs(docs))
131136

132-
canonical = rstrip(canonical, '/')
137+
if !isnothing(canonical)
138+
canonical = rstrip(canonical, '/')
139+
end
133140
dir = make_output_structure(flatten_multidocrefs(docs), prettyurls, hide_previews; canonical)
134141
out_assets = joinpath(dir, "assets")
135142
if assets_dir !== nothing && isdir(assets_dir)

src/documentertools/canonical_urls.jl

Lines changed: 217 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,217 @@
1+
# This is vendored version of code that should eventually moved into DocumenterTools.jl
2+
# once the generic interface has crystallized, and then DocumenterTools should be added
3+
# as a dependency here.
4+
#
5+
# WIP upstream PR: https://github.com/JuliaDocs/DocumenterTools.jl/pull/78
6+
#
7+
# Note: these functions are not part of MultiDocumenter public API.
8+
9+
"""
10+
DocumenterTools.update_canonical_links_for_build(
11+
docs_directory::AbstractString;
12+
canonical::AbstractString,
13+
)
14+
15+
- **`canonical`**: corresponds to the `canonical` attribute of `Documenter.HTML`,
16+
specifying the root of the canonical URL.
17+
"""
18+
function update_canonical_links_for_version(
19+
docs_directory::AbstractString;
20+
canonical::AbstractString
21+
)
22+
canonical = rstrip(canonical, '/')
23+
24+
walkdocs(docs_directory, isdochtml) do fileinfo
25+
@debug "update_canonical_links: checking $(fileinfo.relpath)"
26+
# Determine the
27+
filepath = splitpath(fileinfo.relpath)
28+
new_canonical_href = if filepath[end] == "index.html"
29+
joinurl(canonical, filepath[1:end-1]...) * '/'
30+
else
31+
joinurl(canonical, filepath[1:end]...)
32+
end
33+
34+
html = Gumbo.parsehtml(read(fileinfo.fullpath, String))
35+
n_canonical_tags::Int = 0
36+
dom_updated::Bool = false
37+
for e in AbstractTrees.PreOrderDFS(html.root)
38+
is_canonical_element(e) || continue
39+
n_canonical_tags += 1
40+
canonical_href = Gumbo.getattr(e, "href", nothing)
41+
if canonical_href != new_canonical_href
42+
Gumbo.setattr!(e, "href", new_canonical_href)
43+
@warn "canonical_href updated" canonical_href new_canonical_href fileinfo.relpath
44+
dom_updated = true
45+
end
46+
end
47+
if n_canonical_tags == 0
48+
for e in AbstractTrees.PreOrderDFS(html.root)
49+
e isa Gumbo.HTMLElement || continue
50+
Gumbo.tag(e) == :head || continue
51+
canonical_href_element = Gumbo.HTMLElement{:link}(
52+
[], e, Dict(
53+
"rel" => "canonical",
54+
"href" => new_canonical_href,
55+
)
56+
)
57+
push!(e.children, canonical_href_element)
58+
@warn "Added new canonical_href" new_canonical_href fileinfo.relpath
59+
dom_updated = true
60+
break
61+
end
62+
end
63+
if dom_updated
64+
open(io -> print(io, html), fileinfo.fullpath, "w")
65+
end
66+
if n_canonical_tags > 1
67+
@error "Multiple canonical tags!" file = fileinfo.relpath
68+
end
69+
end
70+
end
71+
72+
is_canonical_element(e) = (e isa Gumbo.HTMLElement) && (Gumbo.tag(e) == :link) && (Gumbo.getattr(e, "rel", nothing) == "canonical")
73+
joinurl(ps::AbstractString...) = join(ps, '/')
74+
75+
function update_canonical_links(
76+
docs_directory::AbstractString;
77+
canonical::AbstractString
78+
)
79+
canonical = rstrip(canonical, '/')
80+
docs_directory = abspath(docs_directory)
81+
isdir(docs_directory) || throw(ArgumentError("No such directory: $(docs_directory)"))
82+
83+
redirect_index_html_path = joinpath(docs_directory, "index.html")
84+
canonical_path = if isfile(redirect_index_html_path)
85+
redirect_url = get_meta_redirect_url(redirect_index_html_path)
86+
splitpath(normpath(redirect_url))
87+
else
88+
# Try to extract the list of versions from versions.js
89+
versions_js = joinpath(docs_directory, "versions.js")
90+
isfile(versions_js) || throw(ArgumentError("versions.js is missing in $(docs_directory)"))
91+
versions = map(extract_versions_list(versions_js)) do version_str
92+
isversion, version_number = if occursin(Base.VERSION_REGEX, version_str)
93+
true, VersionNumber(version_str)
94+
else
95+
false, nothing
96+
end
97+
fullpath = joinpath(docs_directory, version_str)
98+
return (;
99+
path = version_str,
100+
path_exists = isdir(fullpath) || islink(fullpath),
101+
symlink = islink(fullpath),
102+
isversion,
103+
version_number,
104+
fullpath,
105+
)
106+
end
107+
# We'll filter out a couple of potential bad cases and issue warnings
108+
filter(versions) do vi
109+
if !vi.path_exists
110+
@warn "update_canonical_links: path does not exists or is not a directory" docs_directory vi
111+
return false
112+
end
113+
return true
114+
end
115+
# We need to determine the canonical path. This would usually be something like the stable/
116+
# directory, but it can have a different name, including being a version number. So first we
117+
# try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
118+
# previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
119+
# with the highest version number. This does not cover all possible cases, but should be good
120+
# enough for now.
121+
if isempty(versions)
122+
error("Unable to determine the canonical path. Found no version directories")
123+
end
124+
125+
non_version_symlinks = filter(vi -> !vi.isversion && vi.symlink, versions)
126+
canonical_version = if isempty(non_version_symlinks)
127+
# We didn't find any non-version symlinks, so we'll try to find the vN directory now
128+
# as a fallback.
129+
version_symlinks = map(versions) do vi
130+
if !(vi.symlink && vi.isversion)
131+
return nothing
132+
end
133+
m = match(r"^([0-9]+)$", vi.path)
134+
isnothing(m) && return nothing
135+
parse(Int, m[1]) => vi
136+
end
137+
filter!(!isnothing, version_symlinks)
138+
if isempty(version_symlinks)
139+
error("Unable to determine the canonical path. Found no version directories")
140+
end
141+
_, idx = findmax(first, version_symlinks)
142+
version_symlinks[idx][2]
143+
elseif length(non_version_symlinks) > 1
144+
error("Unable to determine the canonical path. Found multiple non-version symlinks.\n$(non_version_symlinks)")
145+
else
146+
only(non_version_symlinks)
147+
end
148+
(canonical_version.path,)
149+
end
150+
canonical_full_root = joinurl(canonical, canonical_path...)
151+
# If we have determined which version should be the canonical version, we can actually
152+
# go and run update_canonical_links_for_version on each directory.
153+
for filename in readdir(docs_directory)
154+
path = joinpath(docs_directory, filename)
155+
# We'll skip all files. This includes files such as index.html, which in this
156+
# directory will likely be the redirect. Also, links should be pointing to other
157+
# versions, so we'll skip them too.
158+
if islink(path) || !isdir(path)
159+
continue
160+
end
161+
# For true directories, we check that siteinfo.js file is present, which is a pretty
162+
# good indicator that it's a proper Documenter build.
163+
if !isfile(joinpath(path, "siteinfo.js"))
164+
# We want to warn if we run across any directories that are not Documenter builds.
165+
# But previews/ is one valid case which may be present and so we shouldn't warn
166+
# for this one.
167+
if filename != "previews"
168+
@warn "update_canonical_links: skipping directory that does not look like a Documenter build" filename docs_directory
169+
end
170+
continue
171+
end
172+
# Finally, we can run update_canonical_links_for_version on the directory.
173+
@info "Updating canonical URLs for" docs_directory filename canonical_full_root
174+
update_canonical_links_for_version(path; canonical = canonical_full_root)
175+
end
176+
end
177+
178+
function extract_versions_list(versions_js::AbstractString)
179+
versions_js = abspath(versions_js)
180+
isfile(versions_js) || throw(ArgumentError("No such file: $(versions_js)"))
181+
versions_js_content = read(versions_js, String)
182+
m = match(r"var\s+DOC_VERSIONS\s*=\s*\[([0-9A-Za-z\"\s.,+-]+)\]", versions_js_content)
183+
if isnothing(m)
184+
throw(ArgumentError("""
185+
Could not find DOC_VERSIONS in $(versions_js):
186+
$(versions_js_content)"""))
187+
end
188+
versions = strip.(c -> isspace(c) || (c == '"'), split(m[1], ","))
189+
filter!(!isempty, versions)
190+
if isempty(versions)
191+
throw(ArgumentError("""
192+
DOC_VERSIONS empty in $(versions_js):
193+
$(versions_js_content)"""))
194+
end
195+
return versions
196+
end
197+
198+
function get_meta_redirect_url(indexhtml_path::AbstractString)
199+
html = Gumbo.parsehtml(read(indexhtml_path, String))
200+
for e in AbstractTrees.PreOrderDFS(html.root)
201+
e isa Gumbo.HTMLElement || continue
202+
Gumbo.tag(e) == :meta || continue
203+
Gumbo.getattr(e, "http-equiv", nothing) == "refresh" || continue
204+
content = Gumbo.getattr(e, "content", nothing)
205+
if isnothing(content)
206+
@warn "<meta http-equiv=\"refresh\" ...> with no content attribute" path = indexhtml_path
207+
continue
208+
end
209+
m = match(r"[0-9]+;\s*url=(.*)", content)
210+
if isnothing(m)
211+
@warn "Unable to parse content value of <meta http-equiv=\"refresh\" ...>" content path = indexhtml_path
212+
continue
213+
end
214+
return m.captures[1]
215+
end
216+
return nothing
217+
end

src/documentertools/walkdocs.jl

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
# This is vendored version of code that should eventually moved into DocumenterTools.jl
2+
# once the generic interface has crystallized, and then DocumenterTools should be added
3+
# as a dependency here.
4+
#
5+
# WIP upstream PR: https://github.com/JuliaDocs/DocumenterTools.jl/pull/75
6+
#
7+
# Note: these functions are not part of MultiDocumenter public API.
8+
9+
"""
10+
struct FileInfo
11+
12+
Objects of this type are passed as arguments to the callback of the [`walkdocs`](@ref) function.
13+
See [`walkdocs`](@ref) for information on how to interpret the docstrings.
14+
"""
15+
Base.@kwdef struct FileInfo
16+
root :: String
17+
filename :: String
18+
relpath :: String
19+
fullpath :: String
20+
end
21+
22+
"""
23+
isdochtml(::Fileinfo) -> Bool
24+
25+
Checks if the file is a Documenter-generated HTML file.
26+
"""
27+
function isdochtml(fileinfo::FileInfo)
28+
_, ext = splitext(fileinfo.filename)
29+
return ext == ".html"
30+
end
31+
32+
"""
33+
walkdocs(f, dir::AbstractString[, filter_cb]; collect::Bool=false)
34+
35+
Takes a directory `dir`, which is assumed to contain Documenter-generated documentation,
36+
walks over all the files and calls `f` on each of the files it find. Optionally, a
37+
`filter_cb(::FileInfo)` function can be passed to only call `f` on files for which it returns
38+
`true`.
39+
40+
`f` and `filter_cb` will be called with a single object that has the following fields (all strings):
41+
42+
- `.root`: the root directory of the walk, i.e. `dir` (but as an absolute path)
43+
- `.filename`: file name
44+
- `.relpath`: path to the file, relative to `dir`
45+
- `.fullpath`: absolute path to the file
46+
47+
See also the [`FileInfo`](@ref) struct.
48+
49+
If `collect = true` is set, the function also "collects" all the return values from `f`
50+
from each of the function calls, essentially making `walkdocs` behave like a `map` function
51+
applied on each of the HTML files.
52+
53+
```julia
54+
walkdocs(directory_root, filter = isdochtml) do fileinfo
55+
@show fileinfo.fullpath
56+
end
57+
```
58+
"""
59+
function walkdocs(f, dir::AbstractString, filter_cb = _ -> true; collect::Bool=false)
60+
hasmethod(f, (FileInfo,)) || throw(MethodError(f, (FileInfo,)))
61+
hasmethod(filter_cb, (FileInfo,)) || throw(MethodError(filter_cb, (FileInfo,)))
62+
63+
dir = abspath(dir)
64+
isdir(dir) || error("docwalker: dir is not a directory\n dir = $(dir)")
65+
66+
mapped_collection = collect ? Any[] : nothing
67+
for (root, _, files) in walkdir(dir)
68+
for file in files
69+
file_fullpath = joinpath(root, file)
70+
file_relpath = Base.relpath(file_fullpath, dir)
71+
fileinfo = FileInfo(;
72+
root = dir,
73+
filename = file,
74+
relpath = file_relpath,
75+
fullpath = file_fullpath,
76+
)
77+
# Check that the file actually matches the filter, and only then
78+
# call the callback f().
79+
if filter_cb(fileinfo)
80+
r = f(fileinfo)
81+
if collect
82+
push!(mapped_collection, r)
83+
end
84+
end
85+
end
86+
end
87+
return mapped_collection
88+
end

test/documentertools.jl

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
using Test
2+
import MultiDocumenter: DocumenterTools
3+
4+
FIXTURES = joinpath(@__DIR__, "fixtures")
5+
6+
@testset "walkdocs" begin
7+
8+
let fileinfos = DocumenterTools.FileInfo[]
9+
rs = DocumenterTools.walkdocs(joinpath(FIXTURES, "pre")) do fileinfo
10+
push!(fileinfos, fileinfo)
11+
@test isabspath(fileinfo.root)
12+
@test isabspath(fileinfo.fullpath)
13+
@test !isabspath(fileinfo.relpath)
14+
@test joinpath(fileinfo.root, fileinfo.relpath) == fileinfo.fullpath
15+
end
16+
@test rs === nothing
17+
@test length(fileinfos) == 5
18+
end
19+
20+
let fileinfos = []
21+
rs = DocumenterTools.walkdocs(joinpath(FIXTURES, "pre"), DocumenterTools.isdochtml) do fileinfo
22+
push!(fileinfos, fileinfo)
23+
@test isabspath(fileinfo.root)
24+
@test isabspath(fileinfo.fullpath)
25+
@test !isabspath(fileinfo.relpath)
26+
@test joinpath(fileinfo.root, fileinfo.relpath) == fileinfo.fullpath
27+
end
28+
@test rs === nothing
29+
@test length(fileinfos) == 4
30+
end
31+
32+
let rs = DocumenterTools.walkdocs(joinpath(FIXTURES, "pre"), collect=true) do fileinfo
33+
fileinfo.root
34+
end
35+
@test length(rs) == 5
36+
@test all(s -> isa(s, String), rs)
37+
end
38+
39+
end

test/fixtures/pre/v0.5.0/asset.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
<!DOCTYPE >
2+
<HTML>
3+
<head></head>
4+
<body>
5+
nothing much in here
6+
</body>
7+
</HTML>

0 commit comments

Comments
 (0)