Skip to content

Commit fe827b3

Browse files
committed
Implement Zsh scraper
1 parent afc9e53 commit fe827b3

File tree

6 files changed

+96
-0
lines changed

6 files changed

+96
-0
lines changed

lib/docs/filters/zsh/clean_html.rb

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
module Docs
2+
class Zsh
3+
class CleanHtmlFilter < Filter
4+
def call
5+
css('table.header', 'table.menu', 'hr').remove
6+
7+
# Remove indices from headers.
8+
css('h1', 'h2', 'h3').each do |node|
9+
node.content = node.content.match(/^[\d\.]* (.*)$/)&.captures&.first
10+
end
11+
12+
css('h2.section ~ a').each do |node|
13+
node.next_element['id'] = node['name']
14+
end
15+
16+
doc
17+
end
18+
end
19+
end
20+
end

lib/docs/filters/zsh/entries.rb

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
module Docs
2+
class Zsh
3+
class EntriesFilter < Docs::EntriesFilter
4+
def get_name
5+
extract_header_text(at_css('h1.chapter').content)
6+
end
7+
8+
def additional_entries
9+
entries = []
10+
11+
css('h2.section').each do |node|
12+
type = get_type
13+
14+
# Linkable anchor sits above <h2>.
15+
a = node.xpath('preceding-sibling::a').last
16+
header_text = extract_header_text(node.content)
17+
18+
if type == 'Zsh Modules'
19+
module_name = header_text.match(/The (zsh\/.*) Module/)&.captures&.first
20+
header_text = module_name if module_name.present?
21+
end
22+
23+
entries << [header_text, a['name'], type] if header_text != 'Description'
24+
end
25+
26+
entries
27+
end
28+
29+
def get_type
30+
extract_header_text(at_css('h1.chapter').content)
31+
end
32+
33+
private
34+
35+
# Extracts text from a string, dropping indices preceding it.
36+
def extract_header_text(str)
37+
str.match(/^[\d\.]* (.*)$/)&.captures&.first
38+
end
39+
end
40+
end
41+
end

lib/docs/scrapers/zsh.rb

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
module Docs
2+
class Zsh < UrlScraper
3+
self.type = 'zsh'
4+
self.release = '5.9.0'
5+
self.base_url = 'https://zsh.sourceforge.io/Doc/Release/'
6+
self.root_path = 'index.html'
7+
self.links = {
8+
home: 'https://zsh.sourceforge.io/',
9+
code: 'https://sourceforge.net/p/zsh/web/ci/master/tree/',
10+
}
11+
12+
options[:skip] = %w(
13+
zsh_toc.html
14+
zsh_abt.html
15+
The-Z-Shell-Manual.html
16+
Introduction.html
17+
)
18+
options[:skip_patterns] = [/-Index.html/]
19+
20+
html_filters.push 'zsh/entries', 'zsh/clean_html'
21+
22+
options[:attribution] = <<-HTML
23+
The Z Shell is copyright &copy; 1992&ndash;2017 Paul Falstad, Richard Coleman,
24+
Zoltán Hidvégi, Andrew Main, Peter Stephenson, Sven Wischnowsky, and others.<br />
25+
Licensed under the MIT License.
26+
HTML
27+
28+
def get_latest_version(opts)
29+
body = fetch('https://zsh.sourceforge.io/Doc/Release', opts)
30+
body.scan(/, Zsh version ([0-9.]+)/)[0][0][0...-1]
31+
end
32+
end
33+
end

public/icons/docs/zsh/16.png

687 Bytes
Loading
1.18 KB
Loading

public/icons/docs/zsh/SOURCE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
https://sourceforge.net/p/zsh/web/ci/master/tree/favicon.png
2+

0 commit comments

Comments
 (0)