@@ -40,7 +40,7 @@ function update_canonical_links_for_version(
40
40
canonical_href = Gumbo. getattr (e, " href" , nothing )
41
41
if canonical_href != new_canonical_href
42
42
Gumbo. setattr! (e, " href" , new_canonical_href)
43
- @warn " canonical_href updated" canonical_href new_canonical_href fileinfo. relpath
43
+ @debug " update_canonical_links_for_version: canonical_href updated" canonical_href new_canonical_href fileinfo. relpath
44
44
dom_updated = true
45
45
end
46
46
end
@@ -55,7 +55,7 @@ function update_canonical_links_for_version(
55
55
)
56
56
)
57
57
push! (e. children, canonical_href_element)
58
- @warn " Added new canonical_href" new_canonical_href fileinfo. relpath
58
+ @debug " update_canonical_links_for_version: added new canonical_href" new_canonical_href fileinfo. relpath
59
59
dom_updated = true
60
60
break
61
61
end
72
72
is_canonical_element (e) = (e isa Gumbo. HTMLElement) && (Gumbo. tag (e) == :link ) && (Gumbo. getattr (e, " rel" , nothing ) == " canonical" )
73
73
joinurl (ps:: AbstractString... ) = join (ps, ' /' )
74
74
75
+ """
76
+ Takes the multi-versioned Documenter site in `docs_directory` and updates the HTML canonical URLs
77
+ to point to `canonical`.
78
+ """
75
79
function update_canonical_links (
76
80
docs_directory:: AbstractString ;
77
81
canonical:: AbstractString
@@ -85,67 +89,7 @@ function update_canonical_links(
85
89
redirect_url = get_meta_redirect_url (redirect_index_html_path)
86
90
splitpath (normpath (redirect_url))
87
91
else
88
- # Try to extract the list of versions from versions.js
89
- versions_js = joinpath (docs_directory, " versions.js" )
90
- isfile (versions_js) || throw (ArgumentError (" versions.js is missing in $(docs_directory) " ))
91
- versions = map (extract_versions_list (versions_js)) do version_str
92
- isversion, version_number = if occursin (Base. VERSION_REGEX, version_str)
93
- true , VersionNumber (version_str)
94
- else
95
- false , nothing
96
- end
97
- fullpath = joinpath (docs_directory, version_str)
98
- return (;
99
- path = version_str,
100
- path_exists = isdir (fullpath) || islink (fullpath),
101
- symlink = islink (fullpath),
102
- isversion,
103
- version_number,
104
- fullpath,
105
- )
106
- end
107
- # We'll filter out a couple of potential bad cases and issue warnings
108
- filter (versions) do vi
109
- if ! vi. path_exists
110
- @warn " update_canonical_links: path does not exists or is not a directory" docs_directory vi
111
- return false
112
- end
113
- return true
114
- end
115
- # We need to determine the canonical path. This would usually be something like the stable/
116
- # directory, but it can have a different name, including being a version number. So first we
117
- # try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
118
- # previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
119
- # with the highest version number. This does not cover all possible cases, but should be good
120
- # enough for now.
121
- if isempty (versions)
122
- error (" Unable to determine the canonical path. Found no version directories" )
123
- end
124
-
125
- non_version_symlinks = filter (vi -> ! vi. isversion && vi. symlink, versions)
126
- canonical_version = if isempty (non_version_symlinks)
127
- # We didn't find any non-version symlinks, so we'll try to find the vN directory now
128
- # as a fallback.
129
- version_symlinks = map (versions) do vi
130
- if ! (vi. symlink && vi. isversion)
131
- return nothing
132
- end
133
- m = match (r" ^([0-9]+)$" , vi. path)
134
- isnothing (m) && return nothing
135
- parse (Int, m[1 ]) => vi
136
- end
137
- filter! (! isnothing, version_symlinks)
138
- if isempty (version_symlinks)
139
- error (" Unable to determine the canonical path. Found no version directories" )
140
- end
141
- _, idx = findmax (first, version_symlinks)
142
- version_symlinks[idx][2 ]
143
- elseif length (non_version_symlinks) > 1
144
- error (" Unable to determine the canonical path. Found multiple non-version symlinks.\n $(non_version_symlinks) " )
145
- else
146
- only (non_version_symlinks)
147
- end
148
- (canonical_version. path,)
92
+ canonical_version_from_versions_js (docs_directory)
149
93
end
150
94
canonical_full_root = joinurl (canonical, canonical_path... )
151
95
# If we have determined which version should be the canonical version, we can actually
@@ -170,31 +114,22 @@ function update_canonical_links(
170
114
continue
171
115
end
172
116
# Finally, we can run update_canonical_links_for_version on the directory.
173
- @info " Updating canonical URLs for" docs_directory filename canonical_full_root
117
+ @debug " Updating canonical URLs for version " docs_directory filename canonical_full_root
174
118
update_canonical_links_for_version (path; canonical = canonical_full_root)
175
119
end
176
120
end
177
121
178
- function extract_versions_list (versions_js:: AbstractString )
179
- versions_js = abspath (versions_js)
180
- isfile (versions_js) || throw (ArgumentError (" No such file: $(versions_js) " ))
181
- versions_js_content = read (versions_js, String)
182
- m = match (r" var\s +DOC_VERSIONS\s *=\s *\[ ([0-9A-Za-z\"\s .,+-]+)\] " , versions_js_content)
183
- if isnothing (m)
184
- throw (ArgumentError ("""
185
- Could not find DOC_VERSIONS in $(versions_js) :
186
- $(versions_js_content) """ ))
187
- end
188
- versions = strip .(c -> isspace (c) || (c == ' "' ), split (m[1 ], " ," ))
189
- filter! (! isempty, versions)
190
- if isempty (versions)
191
- throw (ArgumentError ("""
192
- DOC_VERSIONS empty in $(versions_js) :
193
- $(versions_js_content) """ ))
194
- end
195
- return versions
122
+ function canonical_directory_from_redirect_index_html (docs_directory:: AbstractString )
123
+ redirect_index_html_path = joinpath (docs_directory, " index.html" )
124
+ isfile (redirect_index_html_path) || return nothing
125
+ redirect_url = get_meta_redirect_url (redirect_index_html_path)
126
+ splitpath (normpath (redirect_url))
196
127
end
197
128
129
+ """
130
+ Parses the HTML file at `indexhtml_path` and tries to extract the `url=...` value
131
+ of the redirect `<meta http-equiv="refresh" ...>` tag.
132
+ """
198
133
function get_meta_redirect_url (indexhtml_path:: AbstractString )
199
134
html = Gumbo. parsehtml (read (indexhtml_path, String))
200
135
for e in AbstractTrees. PreOrderDFS (html. root)
@@ -215,3 +150,86 @@ function get_meta_redirect_url(indexhtml_path::AbstractString)
215
150
end
216
151
return nothing
217
152
end
153
+
154
+ function canonical_version_from_versions_js (docs_directory)
155
+ isdir (docs_directory) || throw (ArgumentError (" Not a directory: $(docs_directory) " ))
156
+ # Try to extract the list of versions from versions.js
157
+ versions_js = joinpath (docs_directory, " versions.js" )
158
+ isfile (versions_js) || throw (ArgumentError (" versions.js is missing in $(docs_directory) " ))
159
+ versions = map (extract_versions_list (versions_js)) do version_str
160
+ isversion, version_number = if occursin (Base. VERSION_REGEX, version_str)
161
+ true , VersionNumber (version_str)
162
+ else
163
+ false , nothing
164
+ end
165
+ fullpath = joinpath (docs_directory, version_str)
166
+ return (;
167
+ path = version_str,
168
+ path_exists = isdir (fullpath) || islink (fullpath),
169
+ symlink = islink (fullpath),
170
+ isversion,
171
+ version_number,
172
+ fullpath,
173
+ )
174
+ end
175
+ # We'll filter out a couple of potential bad cases and issue warnings
176
+ filter (versions) do vi
177
+ if ! vi. path_exists
178
+ @warn " update_canonical_links: path does not exists or is not a directory" docs_directory vi
179
+ return false
180
+ end
181
+ return true
182
+ end
183
+ # We need to determine the canonical path. This would usually be something like the stable/
184
+ # directory, but it can have a different name, including being a version number. So first we
185
+ # try to find a non-version directory _that is a symlink_ (so that it wouldn't get confused)
186
+ # previews/ or dev builds. If that fails, we try to find the directory matching `v[0-9]+`,
187
+ # with the highest version number. This does not cover all possible cases, but should be good
188
+ # enough for now.
189
+ if isempty (versions)
190
+ error (" Unable to determine the canonical path. Found no version directories" )
191
+ end
192
+
193
+ non_version_symlinks = filter (vi -> ! vi. isversion && vi. symlink, versions)
194
+ canonical_version = if isempty (non_version_symlinks)
195
+ # We didn't find any non-version symlinks, so we'll try to find the vN directory now
196
+ # as a fallback.
197
+ version_symlinks = map (versions) do vi
198
+ m = match (r" ^v([0-9]+)$" , vi. path)
199
+ isnothing (m) && return nothing
200
+ parse (Int, m[1 ]) => vi
201
+ end
202
+ filter! (! isnothing, version_symlinks)
203
+ if isempty (version_symlinks)
204
+ error (" Unable to determine the canonical path. Found no version directories" )
205
+ end
206
+ _, idx = findmax (first, version_symlinks)
207
+ version_symlinks[idx][2 ]
208
+ elseif length (non_version_symlinks) > 1
209
+ error (" Unable to determine the canonical path. Found multiple non-version symlinks.\n $(non_version_symlinks) " )
210
+ else
211
+ only (non_version_symlinks)
212
+ end
213
+
214
+ return canonical_version. path
215
+ end
216
+
217
+ function extract_versions_list (versions_js:: AbstractString )
218
+ versions_js = abspath (versions_js)
219
+ isfile (versions_js) || throw (ArgumentError (" No such file: $(versions_js) " ))
220
+ versions_js_content = read (versions_js, String)
221
+ m = match (r" var\s +DOC_VERSIONS\s *=\s *\[ ([0-9A-Za-z\"\s .,+-]+)\] " , versions_js_content)
222
+ if isnothing (m)
223
+ throw (ArgumentError ("""
224
+ Could not find DOC_VERSIONS in $(versions_js) :
225
+ $(versions_js_content) """ ))
226
+ end
227
+ versions = strip .(c -> isspace (c) || (c == ' "' ), split (m[1 ], " ," ))
228
+ filter! (! isempty, versions)
229
+ if isempty (versions)
230
+ throw (ArgumentError ("""
231
+ DOC_VERSIONS empty in $(versions_js) :
232
+ $(versions_js_content) """ ))
233
+ end
234
+ return versions
235
+ end
0 commit comments