Skip to content

Commit 14623be

Browse files
committed
Add Django Rest Framework scrapper
1 parent b45090f commit 14623be

File tree

6 files changed

+134
-0
lines changed

6 files changed

+134
-0
lines changed
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
module Docs
2+
class RestFramework
3+
class CleanHtmlFilter < Docs::Filter
4+
def call
5+
css('hr').remove
6+
7+
css('.badges').each do |node|
8+
node.remove
9+
end
10+
11+
css('pre').each do |node|
12+
node['data-language'] = 'python'
13+
end
14+
15+
css('h1').each do |node|
16+
node['style'] = nil
17+
end
18+
19+
# Translate source files links to DevDocs links
20+
links = Nokogiri::XML::Node.new('p', doc)
21+
links['class'] = '_links'
22+
23+
css('a.github').each do |node|
24+
span = node.at_css('span')
25+
node.content = span.content
26+
span.remove
27+
node['class'] = '_links-link'
28+
links.add_child(node)
29+
end
30+
doc.add_child(links)
31+
32+
doc
33+
end
34+
end
35+
end
36+
end
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
module Docs
2+
class RestFramework
3+
class EntriesFilter < Docs::EntriesFilter
4+
5+
def get_name
6+
name = css('h1').first.content
7+
name.slice! 'Tutorial '
8+
name = '0: ' + name if name.include? 'Quickstart'
9+
name
10+
end
11+
12+
def get_type
13+
case subpath
14+
when /\Atutorial/
15+
'Tutorial'
16+
when /\Aapi-guide/
17+
'API Guide'
18+
end
19+
end
20+
21+
def additional_entries
22+
return [] if type == nil || type == 'Tutorial'
23+
24+
# Framework classes are provided in two different ways:
25+
# - as H2's after H1 category titled:
26+
accepted_headers = ['API Reference', 'API Guide']
27+
# - as headers (1 or 2) with these endings:
28+
endings = ['Validator', 'Field', 'View', 'Mixin', 'Default', 'Serializer']
29+
30+
# To avoid writing down all the endings
31+
# and to ensure all entries in API categories are matched
32+
# two different ways of finding them are used
33+
34+
entries = []
35+
36+
local_type = 'Ref: ' + name
37+
in_category = false
38+
39+
css('h1, h2').each do |node|
40+
# Third party category contains entries that could be matched (and shouldn't be)
41+
break if node.content === 'Third party packages'
42+
43+
if in_category
44+
if node.name === 'h1'
45+
in_category = false
46+
next
47+
end
48+
entries << [node.content, node['id'], local_type]
49+
elsif accepted_headers.include? node.content
50+
in_category = true
51+
elsif endings.any? { |word| node.content.ends_with?(word) }
52+
entries << [node.content, node['id'], local_type]
53+
end
54+
end
55+
56+
entries
57+
end
58+
end
59+
end
60+
end

lib/docs/scrapers/rest_framework.rb

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
module Docs
2+
class RestFramework < UrlScraper
3+
self.name = 'Django REST Framework'
4+
self.release = '3.9.2'
5+
self.slug = 'rest_framework'
6+
self.type = 'mkdocs'
7+
self.base_url = 'https://www.django-rest-framework.org/'
8+
self.root_path = 'index.html'
9+
self.links = {
10+
home: 'https://www.django-rest-framework.org/',
11+
code: 'https://github.com/encode/django-rest-framework'
12+
}
13+
14+
html_filters.push 'mkdocs/clean_html', 'rest_framework/clean_html', 'rest_framework/entries'
15+
16+
options[:skip_patterns] = [
17+
/\Atopics\//,
18+
/\Acommunity\//,
19+
]
20+
21+
options[:attribution] = <<-HTML
22+
Copyright 2011&ndash;present Encode OSS Ltd<br>
23+
Licensed under the BSD License.
24+
HTML
25+
26+
private
27+
28+
def handle_response(response)
29+
# Some scrapped urls don't have ending slash
30+
# which leads to page duplication
31+
if !response.url.path.ends_with?('/') && !response.url.path.ends_with?('index.html')
32+
response.url.path << '/'
33+
end
34+
super
35+
end
36+
end
37+
end
1.14 KB
Loading
4.15 KB
Loading
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://github.com/encode/django-rest-framework/blob/master/docs_theme/img/favicon.ico

0 commit comments

Comments
 (0)