Skip to content

Commit 6614375

Browse files
committed
scala: finish scraper and filters
1 parent 74323fd commit 6614375

File tree

5 files changed

+145
-183
lines changed

5 files changed

+145
-183
lines changed

lib/docs/filters/scala/clean_html.rb

Lines changed: 62 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -2,97 +2,107 @@ module Docs
22
class Scala
33
class CleanHtmlFilter < Filter
44
def call
5+
@doc = at_css('#content')
6+
57
always
8+
add_title
69

7-
if slug == 'index'
8-
root
9-
else
10-
other
11-
end
10+
doc
1211
end
1312

1413
def always
15-
# remove deprecated sections
14+
# Remove deprecated sections
1615
css('.members').each do |members|
1716
header = members.at_css('h3')
1817
members.remove if header.text.downcase.include? 'deprecate'
1918
end
20-
# Some of this is just for 2.12
21-
# These are things that provide interactive features, which are not supported yet.
22-
css('#subpackage-spacer, #search, #mbrsel, .diagram-btn').remove
23-
css('#footer').remove
24-
css('.toggleContainer').remove
19+
20+
css('#mbrsel, #footer').remove
21+
22+
css('.diagram-container').remove
23+
css('.toggleContainer > .toggle').each do |node|
24+
title = node.at_css('span')
25+
next if title.nil?
26+
27+
content = node.at_css('.hiddenContent')
28+
next if content.nil?
29+
30+
title.name = 'dt'
31+
32+
content.remove_attribute('class')
33+
content.remove_attribute('style')
34+
content.name = 'dd'
35+
36+
attributes = at_css('.attributes')
37+
unless attributes.nil?
38+
title.parent = attributes
39+
content.parent = attributes
40+
end
41+
end
2542

2643
signature = at_css('#signature')
27-
signature.replace %Q|
28-
<h2 id="signature">#{signature.inner_html}</h2>
29-
|
44+
signature.replace "<h2 id=\"signature\">#{signature.inner_html}</h2>"
3045

3146
css('div.members > h3').each do |node|
32-
change_tag! 'h2', node
47+
node.name = 'h2'
3348
end
3449

3550
css('div.members > ol').each do |list|
3651
list.css('li').each do |li|
3752
h3 = doc.document.create_element 'h3'
53+
h3['id'] = li['name'].rpartition('#').last unless li['name'].nil?
54+
3855
li.prepend_child h3
3956
li.css('.shortcomment').remove
57+
4058
modifier = li.at_css('.modifier_kind')
41-
modifier.parent = h3 if modifier
59+
modifier.parent = h3 unless modifier.nil?
60+
61+
kind = li.at_css('.modifier_kind .kind')
62+
kind.content = kind.content + ' ' unless kind.nil?
63+
4264
symbol = li.at_css('.symbol')
43-
symbol.parent = h3 if symbol
65+
symbol.parent = h3 unless symbol.nil?
66+
4467
li.swap li.children
4568
end
69+
4670
list.swap list.children
4771
end
4872

49-
pres = css('.fullcomment pre, .fullcommenttop pre')
50-
pres.each do |pre|
73+
css('.fullcomment pre, .fullcommenttop pre').each do |pre|
5174
pre['data-language'] = 'scala'
75+
pre.content = pre.content
5276
end
53-
pres.add_class 'language-scala'
54-
55-
56-
57-
doc
58-
59-
end
60-
61-
def root
62-
css('#filter').remove # these are filters to search through the types and packages
63-
css('#library').remove # these are icons at the top
64-
doc
65-
end
6677

67-
def other
68-
# these are sections of the documentation which do not seem useful
78+
# Sections of the documentation which do not seem useful
6979
%w(#inheritedMembers #groupedMembers .permalink .hiddenContent .material-icons).each do |selector|
7080
css(selector).remove
7181
end
7282

73-
# This is the kind of thing we have, class, object, trait
74-
kind = at_css('.modifier_kind .kind').content
75-
# this image replacement doesn't do anything on 2.12 docs
76-
img = at_css('img')
77-
img.replace %Q|<span class="img_kind">#{kind}</span>| unless img.nil?
78-
class_to_add = kind == 'object' ? 'value': 'type'
83+
# Things that are not shown on the site, like deprecated members
84+
css('li[visbl=prt]').remove
85+
end
86+
87+
def add_title
88+
css('.permalink').remove
7989

80-
# for 2.10, 2.11, the kind class is associated to the body. we have to
81-
# add it somewhere, so we do that with the #definition.
82-
definition = css('#definition')
83-
definition.css('.big_circle').remove
84-
definition.add_class class_to_add
90+
definition = at_css('#definition')
91+
return if definition.nil?
8592

86-
# this is something that is not shown on the site, such as deprecated members
87-
css('li[visbl=prt]').remove
93+
type_full_name = {a: 'Annotation', c: 'Class', t: 'Trait', o: 'Object', p: 'Package'}
94+
type = type_full_name[definition.at_css('.big-circle').text.to_sym]
95+
name = CGI.escapeHTML definition.at_css('h1').text
8896

89-
doc
90-
end
97+
package = definition.at_css('#owner').text rescue ''
98+
package = package + '.' unless name.empty? || package.empty?
9199

92-
private
100+
other = definition.at_css('.morelinks').dup
101+
other_content = other ? "<h3>#{other.to_html}</h3>" : ''
93102

94-
def change_tag!(new_tag, node)
95-
node.replace %Q|<#{new_tag}>#{node.inner_html}</#{new_tag}>|
103+
title_content = root_page? ? 'Package root' : "#{type} #{package}#{name}".strip
104+
title = "<h1>#{title_content}</h1>"
105+
definition.replace title + other_content
96106
end
97107
end
98108
end

lib/docs/filters/scala/clean_html_210.rb

Lines changed: 0 additions & 32 deletions
This file was deleted.

lib/docs/filters/scala/clean_html_212.rb

Lines changed: 0 additions & 36 deletions
This file was deleted.

lib/docs/filters/scala/entries.rb

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,30 @@
11
module Docs
22
class Scala
33
class EntriesFilter < Docs::EntriesFilter
4+
REPLACEMENTS = {
5+
'$eq' => '=',
6+
'$colon' => ':',
7+
'$less' => '<',
8+
}
9+
410
def get_name
5-
# this first condition is mainly for scala 212 docs, which
6-
# have their package listing as index.html
711
if is_package?
812
symbol = at_css('#definition h1')
913
symbol ? symbol.text.gsub(/\W+/, '') : "package"
1014
else
11-
slug.split('/').last
15+
name = slug.split('/').last
16+
17+
# Some objects have inner objects, show ParentObject$.ChildObject$ instead of ParentObject$$ChildObject$
18+
name = name.gsub('$$', '$.')
19+
20+
# If a dollar sign is used as separator between two characters, replace it with a dot
21+
name = name.gsub(/([^$.])\$([^$.])/, '\1.\2')
22+
23+
REPLACEMENTS.each do |key, value|
24+
name = name.gsub(key, value)
25+
end
26+
27+
name
1228
end
1329
end
1430

@@ -26,6 +42,31 @@ def include_default_entry?
2642
true
2743
end
2844

45+
def additional_entries
46+
entries = []
47+
48+
full_name = "#{type}.#{name}".remove('$')
49+
css(".members li[name^=\"#{full_name}\"]").each do |node|
50+
# Ignore packages
51+
kind = node.at_css('.modifier_kind > .kind')
52+
next if !kind.nil? && kind.content == 'package'
53+
54+
# Ignore deprecated members
55+
next unless node.at_css('.symbol > .name.deprecated').nil?
56+
57+
id = node['name'].rpartition('#').last
58+
member_name = node.at_css('.name')
59+
60+
# Ignore members only existing of hashtags, we can't link to that
61+
next if member_name.nil? || member_name.content.strip.remove('#').blank?
62+
63+
member = "#{name}.#{member_name.content}()"
64+
entries << [member, id]
65+
end
66+
67+
entries
68+
end
69+
2970
private
3071

3172
# For the package name, we use the slug rather than parsing the package
@@ -40,7 +81,6 @@ def package_name
4081
end
4182

4283
def parent_package
43-
name = package_name
4484
parent = package_drop_last(package_name.split('.'))
4585
parent.empty? ? '_root_' : parent
4686
end

0 commit comments

Comments
 (0)