Skip to content

Commit b501250

Browse files
committed
Fix: crawl the new docusaurus site.
1 parent 632f481 commit b501250

File tree

7 files changed

+246
-131
lines changed

7 files changed

+246
-131
lines changed

.rubocop_todo.yml

Lines changed: 35 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,23 @@
11
# This configuration was generated by
22
# `rubocop --auto-gen-config`
3-
# on 2023-03-19 01:49:28 UTC using RuboCop version 1.36.0.
3+
# on 2025-09-06 21:08:28 UTC using RuboCop version 1.36.0.
44
# The point is for the user to remove these configuration records
55
# one by one as the offenses are removed from the code base.
66
# Note that changes in the inspected code, or installation of new
77
# versions of RuboCop, may require this file to be generated again.
88

9-
# Offense count: 7
9+
# Offense count: 1
10+
# Configuration parameters: AllowKeywordBlockArguments.
11+
Lint/UnderscorePrefixedVariableName:
12+
Exclude:
13+
- 'tasks/lib/slack_api/methods_spider.rb'
14+
15+
# Offense count: 4
1016
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods, CountRepeatedAttributes.
1117
Metrics/AbcSize:
12-
Max: 41
18+
Max: 45
1319

14-
# Offense count: 2
20+
# Offense count: 3
1521
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
1622
# AllowedMethods: refine
1723
Metrics/BlockLength:
@@ -20,19 +26,40 @@ Metrics/BlockLength:
2026
# Offense count: 1
2127
# Configuration parameters: CountComments, CountAsOne.
2228
Metrics/ClassLength:
23-
Max: 153
29+
Max: 125
2430

2531
# Offense count: 2
2632
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
2733
Metrics/CyclomaticComplexity:
28-
Max: 11
34+
Max: 17
2935

30-
# Offense count: 9
36+
# Offense count: 5
3137
# Configuration parameters: CountComments, CountAsOne, ExcludedMethods, AllowedMethods, AllowedPatterns, IgnoredMethods.
3238
Metrics/MethodLength:
3339
Max: 33
3440

3541
# Offense count: 1
3642
# Configuration parameters: AllowedMethods, AllowedPatterns, IgnoredMethods.
3743
Metrics/PerceivedComplexity:
38-
Max: 10
44+
Max: 16
45+
46+
# Offense count: 3
47+
Security/Open:
48+
Exclude:
49+
- 'tasks/lib/docs/downloader.rb'
50+
51+
# Offense count: 1
52+
# Configuration parameters: AllowedConstants.
53+
Style/Documentation:
54+
Exclude:
55+
- 'spec/**/*'
56+
- 'test/**/*'
57+
- 'tasks/lib/docs/downloader.rb'
58+
59+
# Offense count: 1
60+
# This cop supports unsafe autocorrection (--autocorrect-all).
61+
# Configuration parameters: EnforcedStyle.
62+
# SupportedStyles: always, always_true, never
63+
Style/FrozenStringLiteralComment:
64+
Exclude:
65+
- 'tasks/lib/docs/downloader.rb'

Rakefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,5 @@ require 'fileutils'
99

1010
Bundler.setup :default, :development
1111

12+
load 'tasks/download.rake'
1213
load 'tasks/update.rake'

tasks/download.rake

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# frozen_string_literal: true
2+
3+
require_relative 'lib/docs/downloader'
4+
5+
namespace :api do
6+
namespace :ref do
7+
desc 'Download JSON reference.'
8+
task :download do
9+
Rake::Task['api:ref:clean_files'].invoke('docs.slack.dev')
10+
downloader = SlackApi::Docs::Downloader.new
11+
downloader.download!
12+
puts "\nFinished downloading reference."
13+
end
14+
15+
desc 'Delete all generated files except undocumented ones.'
16+
task :clean_files, :dirs do |_t, args|
17+
files = Dir["./{#{Array(args[:dirs]).join(',')}}/*"].grep_v(%r{/undocumented\b})
18+
FileUtils.rm_rf files
19+
end
20+
end
21+
end

tasks/lib/docs/downloader.rb

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
module SlackApi
2+
module Docs
3+
class Downloader
4+
def target_path
5+
@target_path ||= File.expand_path('../../../docs.slack.dev', __dir__)
6+
end
7+
8+
def events_dir
9+
File.join(target_path, 'events')
10+
end
11+
12+
def events_path
13+
File.join(events_dir, 'events.json')
14+
end
15+
16+
def methods_dir
17+
File.join(target_path, 'methods')
18+
end
19+
20+
def methods_path
21+
File.join(methods_dir, 'methods.json')
22+
end
23+
24+
def events_url
25+
'https://docs.slack.dev/reference/events.json'
26+
end
27+
28+
def methods_url
29+
'https://docs.slack.dev/reference/methods.json'
30+
end
31+
32+
def method_url(method)
33+
"https://docs.slack.dev/reference/methods/#{method}.json"
34+
end
35+
36+
def method_target_path(method_name)
37+
File.join(methods_dir, "#{method_name}.json")
38+
end
39+
40+
def download!
41+
download_methods!
42+
download_events!
43+
end
44+
45+
def download_methods!
46+
puts "#{methods_url} => #{methods_path}"
47+
FileUtils.mkdir_p(methods_dir)
48+
URI.open(methods_url) do |file|
49+
json = JSON.parse(file.read)
50+
File.write(methods_path, JSON.pretty_generate(json))
51+
json.each do |method|
52+
method_name = method['name']
53+
method_url = method_url(method_name)
54+
method_target_path = method_target_path(method_name)
55+
puts "#{method_url} => #{method_target_path}"
56+
URI.open(method_url) do |method_file|
57+
method_json = JSON.parse(method_file.read)
58+
File.write(method_target_path, JSON.pretty_generate(method_json))
59+
end
60+
end
61+
end
62+
end
63+
64+
def download_events!
65+
puts "#{events_url} => #{events_path}"
66+
URI.open(events_url) do |file|
67+
json = JSON.parse(file.read)
68+
FileUtils.mkdir_p(events_dir)
69+
File.write(events_path, JSON.pretty_generate(json))
70+
end
71+
end
72+
end
73+
end
74+
end

tasks/lib/slack_api/events_spider.rb

Lines changed: 34 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,57 +3,55 @@
33
module SlackApi
44
# Scrapes Slack events
55
class EventsSpider < BaseSpider
6-
handle 'https://api.slack.com/events', :process_list
6+
handle 'https://docs.slack.dev/reference/events/', :process_list
7+
8+
def downloader
9+
@downloader ||= SlackApi::Docs::Downloader.new
10+
end
711

812
def process_list(page, _default_data = {})
9-
events_page = ensure!(page, '.apiEventsPage')
10-
list = events_page.search('.apiEventPage__eventList')
11-
ref = list.search('[data-automount-component=ApiDocsFilterableReferenceList]')
12-
data = JSON.parse(ref.attribute('data-automount-props'))
13-
raise(ElementNotFound, 'Could not parse events reference') unless data['items'].any?
14-
15-
data['items'].each do |event|
16-
next unless event['isPublic']
17-
next if event['isDeprecated']
18-
next unless event['groups'].include?('RTM')
19-
20-
handle resolve_url(event['link'], page),
13+
events = JSON.load_file(downloader.events_path)
14+
15+
events.each do |event|
16+
next unless event['APIs']&.include?('RTM')
17+
18+
handle resolve_url(event['name'], page),
2119
:process_event,
2220
name: event['name'],
2321
desc: event['description'],
2422
required_scope: 'RTM'
2523
end
2624
end
2725

28-
def process_event(page, data = {})
29-
event_page = ensure!(page, '.apiEventPage', data[:name])
30-
descriptions = event_page.search('.apiDocsPage__markdownOutput p')
31-
long_desc = descriptions.map(&:text).join(' ').gsub("\n", ' ').strip
26+
def process_event(_page, data = {})
27+
# event_page = ensure!(page, '#__docusaurus')
28+
# descriptions = event_page.search('.apiDocsPage__markdownOutput p')
29+
# long_desc = descriptions.map(&:text).join(' ').gsub("\n", ' ').strip
3230
# required_scopes = event_page.search('.apiReference__scope code').map(&:text).map(&:strip).join(', ')
3331

3432
json_hash = {
3533
'name' => data[:name],
36-
'desc' => "#{data[:desc]}.",
37-
'long_desc' => long_desc,
38-
'required_scope' => data[:required_scope]
34+
'desc' => "#{data[:desc]}."
35+
# 'long_desc' => long_desc,
36+
# 'required_scope' => data[:required_scope]
3937
}
4038

41-
example = begin
42-
JSON.parse(
43-
event_page.search('.apiDocsPage__markdownOutput pre:first code')
44-
.text
45-
.gsub('…', '')
46-
.gsub('...', '')
47-
.gsub("\n", ' ')
48-
.gsub(/\s+/, ' ')
49-
.gsub(', }', '}')
50-
.gsub(', ]', ']')
51-
)
52-
rescue StandardError
53-
nil
54-
end
55-
56-
json_hash['example'] = example if example
39+
# example = begin
40+
# JSON.parse(
41+
# event_page.search('.apiDocsPage__markdownOutput pre:first code')
42+
# .text
43+
# .gsub('…', '')
44+
# .gsub('...', '')
45+
# .gsub("\n", ' ')
46+
# .gsub(/\s+/, ' ')
47+
# .gsub(', }', '}')
48+
# .gsub(', ]', ']')
49+
# )
50+
# rescue StandardError
51+
# nil
52+
# end
53+
54+
# json_hash['example'] = example if example
5755

5856
record(file_name: "events/#{data[:name]}.json", json: JSON.pretty_generate(json_hash))
5957
end

0 commit comments

Comments
 (0)