Skip to content

Commit bf00366

Browse files
committed
Simplify file scraper setup; scrape files in the "docs/[slug]" directory
1 parent 91753ce commit bf00366

File tree

22 files changed

+124
-104
lines changed

22 files changed

+124
-104
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,4 @@ public/fonts
88
public/docs/**/*
99
!public/docs/docs.json
1010
!public/docs/**/index.json
11+
/docs/

lib/docs.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ module Docs
2929
self.rescue_errors = false
3030

3131
class DocNotFound < NameError; end
32+
class SetupError < StandardError; end
3233

3334
def self.all
3435
Dir["#{root_path}/docs/scrapers/**/*.rb"].

lib/docs/core/doc.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,9 @@ def store_page(store, id)
9595
false
9696
end
9797
end
98+
rescue Docs::SetupError => error
99+
puts "ERROR: #{error.message}"
100+
false
98101
end
99102

100103
def store_pages(store)
@@ -118,6 +121,9 @@ def store_pages(store)
118121
false
119122
end
120123
end
124+
rescue Docs::SetupError => error
125+
puts "ERROR: #{error.message}"
126+
false
121127
end
122128

123129
private

lib/docs/core/scrapers/file_scraper.rb

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,39 @@
11
module Docs
22
class FileScraper < Scraper
3+
SOURCE_DIRECTORY = File.expand_path '../../../../../docs', __FILE__
4+
35
Response = Struct.new :body, :url
46

57
class << self
6-
attr_accessor :dir
7-
88
def inherited(subclass)
99
super
1010
subclass.base_url = base_url
11-
subclass.dir = dir
1211
end
1312
end
1413

1514
self.base_url = 'http://localhost/'
1615

1716
html_filters.push 'clean_local_urls'
1817

18+
def source_directory
19+
@source_directory ||= File.join(SOURCE_DIRECTORY, self.class.path)
20+
end
21+
1922
private
2023

24+
def assert_source_directory_exists
25+
unless Dir.exists?(source_directory)
26+
raise SetupError, "The #{self.class.name} scraper requires the original documentation files to be stored in the \"#{source_directory}\" directory."
27+
end
28+
end
29+
2130
def request_one(url)
22-
Response.new read_file(file_path_for(url)), URL.parse(url)
31+
assert_source_directory_exists
32+
Response.new read_file(url_to_path(url)), URL.parse(url)
2333
end
2434

2535
def request_all(urls)
36+
assert_source_directory_exists
2637
queue = [urls].flatten
2738
until queue.empty?
2839
result = yield request_one(queue.shift)
@@ -34,12 +45,12 @@ def process_response?(response)
3445
response.body.present?
3546
end
3647

37-
def file_path_for(url)
38-
File.join self.class.dir, url.remove(base_url.to_s)
48+
def url_to_path(url)
49+
url.remove(base_url.to_s)
3950
end
4051

4152
def read_file(path)
42-
File.read(path)
53+
File.read(File.join(source_directory, path))
4354
rescue
4455
instrument 'warn.doc', msg: "Failed to open file: #{path}"
4556
nil

lib/docs/scrapers/c.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
module Docs
22
class C < FileScraper
33
self.type = 'c'
4-
self.dir = '/Users/Thibaut/DevDocs/Docs/c'
54
self.base_url = 'http://en.cppreference.com/w/c/'
65
self.root_path = 'header.html'
76

lib/docs/scrapers/cpp.rb

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ class Cpp < FileScraper
33
self.name = 'C++'
44
self.slug = 'cpp'
55
self.type = 'c'
6-
self.dir = '/Users/Thibaut/DevDocs/Docs/cpp'
76
self.base_url = 'http://en.cppreference.com/w/cpp/'
87
self.root_path = 'header.html'
98

lib/docs/scrapers/dart.rb

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,11 @@ class Dart < FileScraper
2424

2525
version '2' do
2626
self.release = '2.0.0'
27-
self.dir = '/Users/Thibaut/DevDocs/Docs/Dart2'
2827
self.base_url = "https://api.dartlang.org/stable/#{release}/"
2928
end
3029

3130
version '1' do
3231
self.release = '1.24.3'
33-
self.dir = '/Users/Thibaut/DevDocs/Docs/Dart1'
3432
self.base_url = "https://api.dartlang.org/stable/#{release}/"
3533
end
3634
end

lib/docs/scrapers/django.rb

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,37 +36,31 @@ class Django < FileScraper
3636

3737
version '2.1' do
3838
self.release = '2.1.0'
39-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django21'
4039
self.base_url = 'https://docs.djangoproject.com/en/2.1/'
4140
end
4241

4342
version '2.0' do
4443
self.release = '2.0.7'
45-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django20'
4644
self.base_url = 'https://docs.djangoproject.com/en/2.0/'
4745
end
4846

4947
version '1.11' do
5048
self.release = '1.11.9'
51-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django111'
5249
self.base_url = 'https://docs.djangoproject.com/en/1.11/'
5350
end
5451

5552
version '1.10' do
5653
self.release = '1.10.8'
57-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django110'
5854
self.base_url = 'https://docs.djangoproject.com/en/1.10/'
5955
end
6056

6157
version '1.9' do
6258
self.release = '1.9.13'
63-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django19'
6459
self.base_url = 'https://docs.djangoproject.com/en/1.9/'
6560
end
6661

6762
version '1.8' do
6863
self.release = '1.8.18'
69-
self.dir = '/Users/Thibaut/DevDocs/Docs/Django18'
7064
self.base_url = 'https://docs.djangoproject.com/en/1.8/'
7165
end
7266
end

lib/docs/scrapers/erlang.rb

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -42,22 +42,18 @@ class Erlang < FileScraper
4242

4343
version '21' do
4444
self.release = '21.0'
45-
self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang21'
4645
end
4746

4847
version '20' do
4948
self.release = '20.3'
50-
self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang20'
5149
end
5250

5351
version '19' do
5452
self.release = '19.3'
55-
self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang19'
5653
end
5754

5855
version '18' do
5956
self.release = '18.3'
60-
self.dir = '/Users/Thibaut/DevDocs/Docs/Erlang18'
6157
end
6258
end
6359
end

lib/docs/scrapers/gnu/gcc.rb

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -48,61 +48,53 @@ class Gcc < Gnu
4848

4949
version '7' do
5050
self.release = '7.3.0'
51-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcc7'
5251
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/"
5352
end
5453

5554
version '7 CPP' do
5655
self.release = '7.3.0'
57-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp7'
5856
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/"
5957

6058
options[:replace_paths] = CPP_PATHS
6159
end
6260

6361
version '6' do
6462
self.release = '6.4.0'
65-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcc6'
6663
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/"
6764

6865
options[:root_title] = 'Using the GNU Compiler Collection (GCC)'
6966
end
7067

7168
version '6 CPP' do
7269
self.release = '6.4.0'
73-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp6'
7470
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/"
7571

7672
options[:replace_paths] = CPP_PATHS
7773
end
7874

7975
version '5' do
8076
self.release = '5.4.0'
81-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcc5'
8277
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/"
8378

8479
options[:root_title] = 'Using the GNU Compiler Collection (GCC)'
8580
end
8681

8782
version '5 CPP' do
8883
self.release = '5.4.0'
89-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp5'
9084
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/"
9185

9286
options[:replace_paths] = CPP_PATHS
9387
end
9488

9589
version '4' do
9690
self.release = '4.9.3'
97-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcc4'
9891
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/gcc/"
9992

10093
options[:root_title] = 'Using the GNU Compiler Collection (GCC)'
10194
end
10295

10396
version '4 CPP' do
10497
self.release = '4.9.3'
105-
self.dir = '/Users/Thibaut/DevDocs/Docs/gcpp4'
10698
self.base_url = "https://gcc.gnu.org/onlinedocs/gcc-#{release}/cpp/"
10799

108100
options[:replace_paths] = CPP_PATHS

0 commit comments

Comments
 (0)