Skip to content

Commit 96a1059

Browse files
authored
Merge pull request #2607 from denilsonsa/graphviz
New scraper: graphviz (also known as DOT language)
2 parents 7bbcb5a + cbaedce commit 96a1059

File tree

8 files changed

+215
-1
lines changed

8 files changed

+215
-1
lines changed

assets/javascripts/vendor/prism.js

Lines changed: 78 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/* PrismJS 1.30.0
2-
https://prismjs.com/download.html#themes=prism&languages=markup+css+clike+javascript+bash+c+cpp+cmake+coffeescript+crystal+d+dart+diff+django+elixir+erlang+go+groovy+java+json+julia+kotlin+latex+lua+markdown+markup-templating+matlab+nginx+nim+nix+ocaml+perl+php+python+qml+r+jsx+ruby+rust+scss+scala+shell-session+sql+tcl+typescript+yaml+zig */
2+
https://prismjs.com/download.html#themes=prism&languages=markup+css+clike+javascript+bash+c+cpp+cmake+coffeescript+crystal+d+dart+diff+django+dot+elixir+erlang+go+groovy+java+json+julia+kotlin+latex+lua+markdown+markup-templating+matlab+nginx+nim+nix+ocaml+perl+php+python+qml+r+jsx+ruby+rust+scss+scala+shell-session+sql+tcl+typescript+yaml+zig */
33
/// <reference lib="WebWorker"/>
44

55
var _self = (typeof window !== 'undefined')
@@ -2929,6 +2929,83 @@ Prism.languages.insertBefore('d', 'function', {
29292929

29302930
}(Prism));
29312931

2932+
// https://www.graphviz.org/doc/info/lang.html
2933+
2934+
(function (Prism) {
2935+
2936+
var ID = '(?:' + [
2937+
// an identifier
2938+
/[a-zA-Z_\x80-\uFFFF][\w\x80-\uFFFF]*/.source,
2939+
// a number
2940+
/-?(?:\.\d+|\d+(?:\.\d*)?)/.source,
2941+
// a double-quoted string
2942+
/"[^"\\]*(?:\\[\s\S][^"\\]*)*"/.source,
2943+
// HTML-like string
2944+
/<(?:[^<>]|(?!<!--)<(?:[^<>"']|"[^"]*"|'[^']*')+>|<!--(?:[^-]|-(?!->))*-->)*>/.source
2945+
].join('|') + ')';
2946+
2947+
var IDInside = {
2948+
'markup': {
2949+
pattern: /(^<)[\s\S]+(?=>$)/,
2950+
lookbehind: true,
2951+
alias: ['language-markup', 'language-html', 'language-xml'],
2952+
inside: Prism.languages.markup
2953+
}
2954+
};
2955+
2956+
/**
2957+
* @param {string} source
2958+
* @param {string} flags
2959+
* @returns {RegExp}
2960+
*/
2961+
function withID(source, flags) {
2962+
return RegExp(source.replace(/<ID>/g, function () { return ID; }), flags);
2963+
}
2964+
2965+
Prism.languages.dot = {
2966+
'comment': {
2967+
pattern: /\/\/.*|\/\*[\s\S]*?\*\/|^#.*/m,
2968+
greedy: true
2969+
},
2970+
'graph-name': {
2971+
pattern: withID(/(\b(?:digraph|graph|subgraph)[ \t\r\n]+)<ID>/.source, 'i'),
2972+
lookbehind: true,
2973+
greedy: true,
2974+
alias: 'class-name',
2975+
inside: IDInside
2976+
},
2977+
'attr-value': {
2978+
pattern: withID(/(=[ \t\r\n]*)<ID>/.source),
2979+
lookbehind: true,
2980+
greedy: true,
2981+
inside: IDInside
2982+
},
2983+
'attr-name': {
2984+
pattern: withID(/([\[;, \t\r\n])<ID>(?=[ \t\r\n]*=)/.source),
2985+
lookbehind: true,
2986+
greedy: true,
2987+
inside: IDInside
2988+
},
2989+
'keyword': /\b(?:digraph|edge|graph|node|strict|subgraph)\b/i,
2990+
'compass-point': {
2991+
pattern: /(:[ \t\r\n]*)(?:[ewc_]|[ns][ew]?)(?![\w\x80-\uFFFF])/,
2992+
lookbehind: true,
2993+
alias: 'builtin'
2994+
},
2995+
'node': {
2996+
pattern: withID(/(^|[^-.\w\x80-\uFFFF\\])<ID>/.source),
2997+
lookbehind: true,
2998+
greedy: true,
2999+
inside: IDInside
3000+
},
3001+
'operator': /[=:]|-[->]/,
3002+
'punctuation': /[\[\]{};,]/
3003+
};
3004+
3005+
Prism.languages.gv = Prism.languages.dot;
3006+
3007+
}(Prism));
3008+
29323009
Prism.languages.elixir = {
29333010
'doc': {
29343011
pattern: /@(?:doc|moduledoc)\s+(?:("""|''')[\s\S]*?\1|("|')(?:\\(?:\r\n|[\s\S])|(?!\2)[^\\\r\n])*\2)/,

lib/docs/core/requester.rb

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,12 @@ def build_and_queue_request(url, options = {}, &block)
5454
end
5555

5656
def handle_response(response)
57+
if ENV['RETRY'] == '1' && [0, 500, 501, 502, 503, 504].include?(response.code.to_i)
58+
instrument 'handle_response.retry', url: response.url do
59+
build_and_queue_request(response.url)
60+
end
61+
return
62+
end
5763
instrument 'handle_response.requester', url: response.url do
5864
on_response.each do |callback|
5965
result = callback.call(response)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
module Docs
2+
class Graphviz
3+
class CleanHtmlFilter < Filter
4+
def call
5+
css('[tabindex]').remove_attribute('tabindex')
6+
7+
content = at_css('.td-content')
8+
@doc = content if content
9+
10+
css('a:contains("Search the Graphviz codebase")').remove
11+
css('.td-page-meta__lastmod').remove
12+
13+
css('pre:has(code)').each do |node|
14+
pre = Nokogiri::XML::Node.new('pre', @doc)
15+
code = node.at_css('code')
16+
17+
if code['data-lang']
18+
# Syntax highlighting is embedded into this HTML markup.
19+
pre['data-language'] = code['data-lang']
20+
else
21+
# Plain example source-code without highlighting.
22+
# Let's guess the language.
23+
sourcecode = code.content.strip
24+
if sourcecode =~ /^\$/
25+
# Starts with '$'? Probably a shell session.
26+
pre['data-language'] = 'shell-session'
27+
elsif sourcecode =~ /^cmd /
28+
# Command line example. No highlighting needed.
29+
pre['data-language'] = ''
30+
elsif sourcecode =~ /^void /
31+
# C language.
32+
pre['data-language'] = 'c'
33+
else
34+
# Nothing else? Let's guess DOT.
35+
pre['data-language'] = 'dot'
36+
end
37+
end
38+
pre.content = code.content
39+
40+
node.replace(pre)
41+
end
42+
43+
doc
44+
end
45+
end
46+
end
47+
end
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
module Docs
2+
class Graphviz
3+
class EntriesFilter < Docs::EntriesFilter
4+
5+
def get_name
6+
name = at_css('h1').content.strip
7+
end
8+
9+
def get_type
10+
breadcrumbs = css('nav ol.breadcrumb li.breadcrumb-item')
11+
category = breadcrumbs[1]&.content&.strip
12+
13+
# These categories have several sub-pages.
14+
return category if [
15+
'Attribute Types',
16+
'Attributes',
17+
'Command Line',
18+
'Layout Engines',
19+
'Output Formats',
20+
].include?(category)
21+
22+
# Several categories have only one page each. Let's group them together.
23+
return 'Documentation'
24+
end
25+
26+
end
27+
end
28+
end

lib/docs/scrapers/graphviz.rb

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
module Docs
2+
class Graphviz < UrlScraper
3+
self.name = 'Graphviz'
4+
self.slug = 'graphviz'
5+
self.type = 'simple'
6+
7+
self.links = {
8+
home: 'https://www.graphviz.org/',
9+
code: 'https://gitlab.com/graphviz/graphviz'
10+
}
11+
12+
options[:container] = 'main'
13+
14+
# These images are too large:
15+
# 980KB https://www.graphviz.org/doc/info/plugins.png
16+
# 650KB https://www.graphviz.org/Gallery/twopi/twopi2.svg
17+
# All other files are under 100KB
18+
options[:max_image_size] = 100_000
19+
20+
# TODO: the UrlScraper is very unreliable on this website.
21+
# I often get several errors:
22+
# - SSL connect error
23+
# - Failure when receiving data from the peer
24+
# - was slow to process (30s)
25+
# Setting a :rate_limit doesn't help.
26+
# We have to figure out a more reliable solution.
27+
#options[:rate_limit] = 100
28+
29+
options[:attribution] = <<-HTML
30+
&copy; 2025 The Graphviz Authors<br>
31+
Licensed under the Eclipse Public License 1.0.
32+
HTML
33+
34+
html_filters.push 'graphviz/entries', 'graphviz/clean_html'
35+
36+
self.release = '14.01'
37+
self.base_url = 'https://www.graphviz.org/'
38+
self.root_path = 'documentation/'
39+
options[:only_patterns] = [
40+
/^documentation\//,
41+
/^doc\//,
42+
/^docs\//,
43+
]
44+
options[:replace_paths] = {
45+
# Redirections:
46+
'docs/outputs/cmap/' => 'docs/outputs/imap/',
47+
'doc/info/output.html' => 'docs/outputs/',
48+
}
49+
50+
def get_latest_version(opts)
51+
tags = get_gitlab_tags('gitlab.com', 'graphviz', 'graphviz', opts)
52+
tags[0]['name']
53+
end
54+
end
55+
end

public/icons/docs/graphviz/16.png

395 Bytes
Loading
744 Bytes
Loading

public/icons/docs/graphviz/SOURCE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://gitlab.com/graphviz/graphviz.gitlab.io/-/blob/main/static/Resources/favicon.png

0 commit comments

Comments
 (0)