Skip to content

Commit 716fc26

Browse files
dig412Thibaut
authored andcommitted
Add a Terraform scraper
1 parent eca9d7e commit 716fc26

File tree

7 files changed

+132
-0
lines changed

7 files changed

+132
-0
lines changed

assets/stylesheets/application.css.scss

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,7 @@
9696
'pages/support_tables',
9797
'pages/tcl_tk',
9898
'pages/tensorflow',
99+
'pages/terraform',
99100
'pages/underscore',
100101
'pages/vue',
101102
'pages/webpack',
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
._terraform {
2+
@extend %simple;
3+
.note, .alert { @extend %note; }
4+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
module Docs
2+
class Terraform
3+
class CleanHtmlFilter < Filter
4+
def call
5+
@doc = at_css('#inner')
6+
7+
css('hr', 'a.anchor').remove
8+
9+
css('.alert').each do |node|
10+
node.name = 'blockquote'
11+
end
12+
13+
css('pre').each do |node|
14+
if language = node['class'][/(json|shell|ruby)/, 1]
15+
node['data-language'] = language
16+
end
17+
# HCL isn't currently supported by Prism, Ruby syntax does an acceptable job for now
18+
if language = node['class'][/(hcl)/, 1]
19+
node['data-language'] = 'ruby'
20+
end
21+
node.content = node.content
22+
end
23+
24+
doc
25+
end
26+
end
27+
end
28+
end

lib/docs/filters/terraform/entries.rb

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
module Docs
2+
class Terraform
3+
class EntriesFilter < Docs::EntriesFilter
4+
5+
# Some providers have non-trivial mappings between the directory they live in and their name
6+
# Anything *not* in this list will be capitalized instead.
7+
PROVIDER_NAME_MAP = {
8+
'aws' => 'AWS',
9+
'azure' => 'Azure (Legacy)',
10+
'azurerm' => 'Azure',
11+
'centurylinkcloud' => 'CenturyLinkCloud',
12+
'cloudscale' => 'CloudScale.ch',
13+
'cloudstack' => 'CloudStack',
14+
'dme' => 'DNSMadeEasy',
15+
'dns' => 'DNS',
16+
'dnsimple' => 'DNSimple',
17+
'do' => 'DigitalOcean',
18+
'github' => 'GitHub',
19+
'google' => 'Google Cloud',
20+
'http' => 'HTTP',
21+
'mysql' => 'MySQL',
22+
'newrelic' => 'New Relic',
23+
'oneandone' => '1&1',
24+
'opentelekomcloud' => 'OpenTelekomCloud',
25+
'opsgenie' => 'OpsGenie',
26+
'opc' => 'Oracle Public Cloud',
27+
'oraclepaas' => 'Oracle Cloud Platform',
28+
'ovh' => 'OVH',
29+
'pagerduty' => 'PagerDuty',
30+
'panos' => 'Palo Alto Networks',
31+
'postgresql' => 'PostgreSQL',
32+
'powerdns' => 'PowerDNS',
33+
'profitbricks' => 'ProfitBricks',
34+
'rabbitmq' => 'RabbitMQ',
35+
'softlayer' => 'SoftLayer',
36+
'statuscake' => 'StatusCake',
37+
'tls' => 'TLS',
38+
'ultradns' => 'UltraDNS',
39+
'vcd' => 'VMware vCloud Director',
40+
'nsxt' => 'VMware NSX-T',
41+
'vsphere' => 'VMware vSphere',
42+
}
43+
44+
# Some providers have a lot (> 100) entries, which makes browsing them unwieldy.
45+
# Any present in the list below will have an extra set of types added, breaking the pages out into the different
46+
# products they offer.
47+
LARGE_PROVIDERS = {
48+
"aws" => true,
49+
"azurerm" => true,
50+
"google" => true,
51+
}
52+
53+
54+
def get_name
55+
name ||= at_css('#inner h1').content
56+
name.remove! "» "
57+
name.remove! "Data Source: "
58+
name
59+
end
60+
61+
def get_type
62+
category, subcategory, subfolder, page = *slug.split('/')
63+
provider = page ? subcategory : category
64+
nice_provider = PROVIDER_NAME_MAP[provider] || provider.capitalize
65+
66+
if LARGE_PROVIDERS[provider]
67+
category_node = at_css('ul > li > ul > li.active')
68+
parent_node = category_node.parent.previous_element if category_node
69+
nice_provider = nice_provider + ": #{parent_node.content}" if category_node
70+
end
71+
72+
nice_provider
73+
end
74+
end
75+
end
76+
end

lib/docs/scrapers/terraform.rb

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
module Docs
2+
class Terraform < UrlScraper
3+
self.name = 'Terraform'
4+
self.type = 'terraform'
5+
self.release = '0.11.7'
6+
self.base_url = 'https://www.terraform.io/docs/'
7+
# self.dir = '/mnt/c/Users/Doug/Code/terraform-docs/www.terraform.io/docs'
8+
self.root_path = 'index.html'
9+
self.links = {
10+
home: 'https://www.terraform.io/',
11+
code: 'https://github.com/hashicorp/terraform'
12+
}
13+
14+
html_filters.push 'terraform/entries', 'terraform/clean_html'
15+
16+
options[:skip_patterns] = [/enterprise/, /enterprise-legacy/]
17+
18+
options[:attribution] = <<-HTML
19+
Copyright &copy; 2018 HashiCorp</br>
20+
Licensed under the MPL 2.0 License.
21+
HTML
22+
end
23+
end

public/icons/docs/terraform/16.png

806 Bytes
Loading
1.08 KB
Loading

0 commit comments

Comments
 (0)