Skip to content

Commit ad9b58f

Browse files
authored
Merge pull request #2569 from cpmsmith/rust-improvements
Improve Rust scraper
2 parents 515906b + 8f39593 commit ad9b58f

File tree

9 files changed

+151
-22
lines changed

9 files changed

+151
-22
lines changed

assets/javascripts/lib/page.js

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,13 +271,25 @@ var onclick = function (event) {
271271
}
272272

273273
let link = $.eventTarget(event);
274-
while (link && link.tagName !== "A") {
274+
while (link && !(link.tagName === "A" || link.tagName === "a")) {
275275
link = link.parentNode;
276276
}
277277

278-
if (link && !link.target && isSameOrigin(link.href)) {
278+
if (!link) return;
279+
280+
// If the `<a>` is in an SVG, its attributes are `SVGAnimatedString`s
281+
// instead of strings
282+
let href = link.href instanceof SVGAnimatedString
283+
? new URL(link.href.baseVal, location.href).href
284+
: link.href;
285+
let target = link.target instanceof SVGAnimatedString
286+
? link.target.baseVal
287+
: link.target;
288+
289+
if (!target && isSameOrigin(href)) {
279290
event.preventDefault();
280-
let path = link.pathname + link.search + link.hash;
291+
let parsedHref = new URL(href);
292+
let path = parsedHref.pathname + parsedHref.search + parsedHref.hash;
281293
path = path.replace(/^\/\/+/, "/"); // IE11 bug
282294
page.show(path);
283295
}

assets/javascripts/lib/util.js

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -353,6 +353,16 @@ $.lockScroll = function (el, fn) {
353353
}
354354
};
355355

356+
// If `el` is inside any `<details>` elements, expand them.
357+
$.openDetailsAncestors = function (el) {
358+
while (el) {
359+
if (el.tagName === "DETAILS") {
360+
el.open = true;
361+
}
362+
el = el.parentElement;
363+
}
364+
}
365+
356366
let smoothScroll =
357367
(smoothStart =
358368
smoothEnd =

assets/javascripts/views/content/content.js

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ app.views.Content = class Content extends app.View {
114114
$.scrollToWithImageLock(el, this.scrollEl, "top", {
115115
margin: this.scrollEl === this.el ? 0 : $.offset(this.el).top,
116116
});
117+
$.openDetailsAncestors(el);
117118
$.highlight(el, { className: "_highlight" });
118119
} else {
119120
this.scrollTo(this.scrollMap[this.routeCtx.state.id]);

assets/stylesheets/pages/_rust.scss

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,73 @@
1818
float: right;
1919
margin-left: .5rem;
2020
}
21+
22+
.grammar-container { @extend %note, %note-gray; }
23+
24+
/* Railroad styles from:
25+
* https://github.com/rust-lang/reference/blob/f82156b8c3a784158ce609bebfa3a77b5ae8a5ed/theme/reference.css#L683-L734
26+
* Plus CSS variables inheriting from DevDocs variables
27+
*/
28+
29+
svg.railroad {
30+
--railroad-background-color: var(--boxBackground);
31+
--railroad-background-image:
32+
linear-gradient(to right, rgb(from currentColor r g b / 0.1) 1px, transparent 1px),
33+
linear-gradient(to bottom, rgb(from currentColor r g b / 0.1) 1px, transparent 1px);
34+
--railroad-path-stroke: currentColor;
35+
--railroad-rect-stroke: currentColor;
36+
--railroad-rect-fill: var(--noteBackground);
37+
--railroad-text-fill: currentColor;
38+
39+
background-color: var(--railroad-background-color);
40+
background-size: 15px 15px;
41+
background-image: var(--railroad-background-image);
42+
}
43+
44+
svg.railroad rect.railroad_canvas {
45+
stroke-width: 0px;
46+
fill: none;
47+
}
48+
49+
svg.railroad path {
50+
stroke-width: 3px;
51+
stroke: var(--railroad-path-stroke);
52+
fill: none;
53+
}
54+
55+
svg.railroad .debug {
56+
stroke-width: 1px;
57+
stroke: red;
58+
}
59+
60+
svg.railroad text {
61+
font: 14px monospace;
62+
text-anchor: middle;
63+
fill: var(--railroad-text-fill);
64+
}
65+
66+
svg.railroad .nonterminal text {
67+
font-weight: bold;
68+
}
69+
70+
svg.railroad text.comment {
71+
font: italic 12px monospace;
72+
}
73+
74+
svg.railroad rect {
75+
stroke-width: 3px;
76+
stroke: var(--railroad-rect-stroke);
77+
fill: var(--railroad-rect-fill);
78+
}
79+
80+
svg.railroad g.labeledbox>rect {
81+
stroke-width: 1px;
82+
stroke: grey;
83+
stroke-dasharray: 5px;
84+
fill: rgba(90, 90, 150, .1);
85+
}
86+
87+
svg.railroad g.exceptbox > rect {
88+
fill:rgba(245, 160, 125, .1);
89+
}
2190
}

lib/docs/filters/core/clean_text.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
module Docs
44
class CleanTextFilter < Filter
5-
EMPTY_NODES_RGX = /<(?!td|th|iframe|mspace)(\w+)[^>]*>[[:space:]]*<\/\1>/
5+
EMPTY_NODES_RGX = /<(?!td|th|iframe|mspace|rect|path|ellipse|line|polyline)(\w+)[^>]*>[[:space:]]*<\/\1>/
66

77
def call
88
return html if context[:clean_text] == false

lib/docs/filters/core/normalize_paths.rb

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@ def call
77
result[:store_path] = store_path
88

99
css('a').each do |link|
10-
next unless (href = link['href']) && relative_url_string?(href)
11-
link['href'] = normalize_href(href)
10+
href = link['href']
11+
link['href'] = normalize_href(href) if href && relative_url_string?(href)
12+
13+
xlink_href = link['xlink:href']
14+
link['xlink:href'] = normalize_href(xlink_href) if xlink_href && relative_url_string?(xlink_href)
1215
end
1316

1417
doc

lib/docs/filters/rust/clean_html.rb

Lines changed: 28 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@ module Docs
44
class Rust
55
class CleanHtmlFilter < Filter
66
def call
7-
if slug.start_with?('book') || slug.start_with?('reference')
7+
if slug.start_with?('book') || slug.start_with?('reference') || slug.start_with?('error_codes')
88
@doc = at_css('#content main')
9-
elsif slug == 'error-index'
9+
elsif slug.start_with?('error_codes')
1010
css('.error-undescribed').remove
1111

1212
css('.error-described').each do |node|
@@ -32,6 +32,8 @@ def call
3232

3333
css('.doc-anchor').remove
3434

35+
css('.rule-link').remove
36+
3537
# Fix notable trait sections
3638
css('.method, .rust.trait').each do |node|
3739
traitSection = node.at_css('.notable-traits')
@@ -55,6 +57,30 @@ def call
5557
node.before(node.children).remove
5658
end
5759

60+
css('button.grammar-toggle-railroad').remove
61+
css('.grammar-container').each do |node|
62+
next_element = node.next_element
63+
if next_element && next_element['class'] && next_element['class'].include?('grammar-railroad')
64+
next_element.remove
65+
node.add_child(next_element)
66+
end
67+
68+
node.css('[onclick="show_railroad()"]').each do |subnode|
69+
subnode.remove_attribute('onclick')
70+
end
71+
72+
# We changed this to a <pre> in parse(), changing it back here
73+
node.name = 'div'
74+
node.css('.grammar-literal').each do |literal|
75+
literal.name = 'code'
76+
end
77+
end
78+
79+
css('.grammar-railroad').each do |node|
80+
node.name = 'details'
81+
node.prepend_child("<summary>Syntax diagram</summary>")
82+
end
83+
5884
css('a.header').each do |node|
5985
unless node.first_element_child.nil?
6086
node.first_element_child['id'] = node['name'] || node['id']

lib/docs/filters/rust/entries.rb

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,22 @@ class Rust
33
class EntriesFilter < Docs::EntriesFilter
44

55
def get_name
6-
if slug.start_with?('book') || slug.start_with?('reference')
7-
name = at_css("h2", "h1")
8-
ch1 = slug[/ch(\d+)-(\d+)/, 1]
9-
ch2 = slug[/ch(\d+)-(\d+)/, 2]
6+
if slug.start_with?('book')
7+
name = at_css('main h1', 'main h2')
8+
9+
if slug.start_with?('book/appendix')
10+
return name ? name.content : 'Appendix'
11+
end
12+
13+
ch1 = slug[/ch(\d+)-(\d+)/, 1] || '00'
14+
ch2 = slug[/ch(\d+)-(\d+)/, 2] || '00'
1015
name ? "#{ch1}.#{ch2}. #{name.content}" : 'Introduction'
11-
elsif slug == 'error-index'
16+
elsif slug.start_with?('reference')
17+
at_css('main h1').content
18+
elsif slug == 'error_codes/error-index'
1219
'Compiler Errors'
20+
elsif slug.start_with?('error_codes')
21+
slug.split('/').last.upcase
1322
else
1423
at_css('main h1').at_css('button')&.remove
1524
name = at_css('main h1').content.remove(/\A.+\s/).remove('⎘')
@@ -26,7 +35,7 @@ def get_type
2635
'Guide'
2736
elsif slug.start_with?('reference')
2837
'Reference'
29-
elsif slug == 'error-index'
38+
elsif slug.start_with?('error_codes')
3039
'Compiler Errors'
3140
else
3241
path = name.split('::')
@@ -40,12 +49,8 @@ def get_type
4049
end
4150

4251
def additional_entries
43-
if slug.start_with?('book') || slug.start_with?('reference')
52+
if slug.start_with?('book') || slug.start_with?('reference') || slug.start_with?('error_codes')
4453
[]
45-
elsif slug == 'error-index'
46-
css('.error-described h2.section-header').each_with_object [] do |node, entries|
47-
entries << [node.content, node['id']] unless node.content.include?('Note:')
48-
end
4954
else
5055
css('.method')
5156
.each_with_object({}) { |node, entries|

lib/docs/scrapers/rust.rb

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,13 @@
33
module Docs
44
class Rust < UrlScraper
55
self.type = 'rust'
6-
self.release = '1.88.0'
6+
self.release = '1.90.0'
77
self.base_url = 'https://doc.rust-lang.org/'
88
self.root_path = 'book/index.html'
99
self.initial_paths = %w(
1010
reference/introduction.html
1111
std/index.html
12-
error-index.html)
12+
error_codes/error-index.html)
1313
self.links = {
1414
home: 'https://www.rust-lang.org/',
1515
code: 'https://github.com/rust-lang/rust'
@@ -21,7 +21,8 @@ class Rust < UrlScraper
2121
/\Abook\//,
2222
/\Areference\//,
2323
/\Acollections\//,
24-
/\Astd\// ]
24+
/\Astd\//,
25+
/\Aerror_codes\//, ]
2526

2627
options[:skip] = %w(book/README.html book/ffi.html)
2728
options[:skip_patterns] = [/(?<!\.html)\z/, /\/print\.html/, /\Abook\/second-edition\//]
@@ -56,6 +57,8 @@ def process_response?(response)
5657

5758
def parse(response) # Hook here because Nokogori removes whitespace from headings
5859
response.body.gsub! %r{<h[1-6] class="code-header">}, '<pre class="code-header">'
60+
# And the reference uses whitespace for indentation in grammar definitions
61+
response.body.gsub! %r{<div class="grammar-container">([\W\w]+?)</div>}, '<pre class="grammar-container">\1</pre>'
5962
super
6063
end
6164
end

0 commit comments

Comments
 (0)