Skip to content

Commit f4e2ebd

Browse files
committed
Update pytorch scraper, include various 2.x versions
1. Various 2.x versions are included separately. Pytorch versions are not backward compatible, it has different compatibilities between CUDA etc, so people may use specific versions for a extended period of time. 2. Removed the type replacement table for `get_type`. Instead, get the type from breadcrumbs directly. IMO this produces better results that matches the index in the original website (the left side menu in docs.python.org). Also, the `TYPE_REPLACEMENT` table was opiniated and hard to maintain across versions. 3. Always include default entry (removed `include_default_entry?` function). I don't see the downside of this. Previously some pages are missing because of this (e.g. torchrun https://docs.pytorch.org/docs/1.13/elastic/run.html)
1 parent 7579a06 commit f4e2ebd

File tree

2 files changed

+43
-83
lines changed

2 files changed

+43
-83
lines changed

lib/docs/filters/pytorch/entries.rb

Lines changed: 2 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,92 +1,18 @@
11
module Docs
22
class Pytorch
33
class EntriesFilter < Docs::EntriesFilter
4-
TYPE_REPLACEMENTS = {
5-
"torch.Tensor" => "Tensor",
6-
"torch.nn" => "Neuro Network",
7-
"Probability distributions - torch.distributions" => "Probability Distributions",
8-
"torch" => "Torch",
9-
"Quantization" => "Quantization",
10-
"torch.optim" => "Optimization",
11-
"torch.Storage" => "Storage",
12-
"torch.nn.functional" => "NN Functions",
13-
"torch.cuda" => "CUDA",
14-
"Torch Distributed Elastic" => "Distributed Elastic",
15-
"torch.fx" => "FX",
16-
"TorchScript" => "Torch Script",
17-
"torch.onnx" => "ONNX",
18-
"Distributed communication package - torch.distributed" => "Distributed Communication",
19-
"Automatic differentiation package - torch.autograd" => "Automatic Differentiation",
20-
"torch.linalg" => "Linear Algebra",
21-
"Distributed Checkpoint - torch.distributed.checkpoint" => "Distributed Checkpoint",
22-
"Distributed RPC Framework" => "Distributed RPC",
23-
"torch.special" => "SciPy-like Special",
24-
"torch.package" => "Package",
25-
"torch.backends" => "Backends",
26-
"FullyShardedDataParallel" => "Fully Sharded Data Parallel",
27-
"torch.sparse" => "Sparse Tensors",
28-
"torch.export" => "Traced Graph Export",
29-
"torch.fft" => "Discrete Fourier Transforms",
30-
"torch.utils.data" => "Datasets and Data Loaders",
31-
"torch.monitor" => "Monitor",
32-
"Automatic Mixed Precision package - torch.amp" => "Automatic Mixed Precision",
33-
"torch.utils.tensorboard" => "Tensorboard",
34-
"torch.profiler" => "Profiler",
35-
"torch.mps" => "MPS",
36-
"DDP Communication Hooks" => "DDP Communication Hooks",
37-
"Benchmark Utils - torch.utils.benchmark" => "Benchmark Utils",
38-
"torch.nn.init" => "Parameter Initializations",
39-
"Tensor Parallelism - torch.distributed.tensor.parallel" => "Tensor Parallelism",
40-
"torch.func" => "JAX-like Function Transforms",
41-
"Distributed Optimizers" => "Distributed Optimizers",
42-
"torch.signal" => "SciPy-like Signal",
43-
"torch.futures" => "Miscellaneous",
44-
"torch.utils.cpp_extension" => "Miscellaneous",
45-
"torch.overrides" => "Miscellaneous",
46-
"Generic Join Context Manager" => "Miscellaneous",
47-
"torch.hub" => "Miscellaneous",
48-
"torch.cpu" => "Miscellaneous",
49-
"torch.random" => "Miscellaneous",
50-
"torch.compiler" => "Miscellaneous",
51-
"Pipeline Parallelism" => "Miscellaneous",
52-
"Named Tensors" => "Miscellaneous",
53-
"Multiprocessing package - torch.multiprocessing" => "Miscellaneous",
54-
"torch.utils" => "Miscellaneous",
55-
"torch.library" => "Miscellaneous",
56-
"Tensor Attributes" => "Miscellaneous",
57-
"torch.testing" => "Miscellaneous",
58-
"torch.nested" => "Miscellaneous",
59-
"Understanding CUDA Memory Usage" => "Miscellaneous",
60-
"torch.utils.dlpack" => "Miscellaneous",
61-
"torch.utils.checkpoint" => "Miscellaneous",
62-
"torch.__config__" => "Miscellaneous",
63-
"Type Info" => "Miscellaneous",
64-
"torch.utils.model_zoo" => "Miscellaneous",
65-
"torch.utils.mobile_optimizer" => "Miscellaneous",
66-
"torch._logging" => "Miscellaneous",
67-
"torch.masked" => "Miscellaneous",
68-
"torch.utils.bottleneck" => "Miscellaneous"
69-
}
70-
714
def get_breadcrumbs
725
css('.pytorch-breadcrumbs > li').map {
736
|node| node.content.delete_suffix(' >').strip
747
}.reject { |item| item.nil? || item.empty? }
758
end
769

7710
def get_name
78-
b = get_breadcrumbs
79-
b[(b[1] == 'torch' ? 2 : 1)..].join('.')
11+
get_breadcrumbs[-1]
8012
end
8113

8214
def get_type
83-
t = get_breadcrumbs[1]
84-
TYPE_REPLACEMENTS.fetch(t, t)
85-
end
86-
87-
def include_default_entry?
88-
# Only include API entries to simplify and unify the list
89-
return name.start_with?('torch.')
15+
get_breadcrumbs[1]
9016
end
9117

9218
def additional_entries
@@ -108,8 +34,6 @@ def additional_entries
10834
entries << [id + '()', id]
10935
when 'py class', 'py attribute', 'py property'
11036
entries << [id, id]
111-
when 'footnote brackets', 'field-list simple'
112-
next
11337
end
11438
end
11539

lib/docs/scrapers/pytorch.rb

Lines changed: 41 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,20 +12,56 @@ class Pytorch < UrlScraper
1212

1313
options[:skip] = ['cpp_index.html', 'deploy.html', 'packages.html', 'py-modindex.html', 'genindex.html']
1414
options[:skip_patterns] = [/\Acommunity/, /\A_modules/, /\Anotes/, /\Aorg\/pytorch\//]
15+
options[:max_image_size] = 1_000_000
1516

1617
options[:attribution] = <<-HTML
17-
&copy; 2024, PyTorch Contributors<br>
18+
&copy; 2025, PyTorch Contributors<br>
1819
PyTorch has a BSD-style license, as found in the <a href="https://github.com/pytorch/pytorch/blob/main/LICENSE">LICENSE</a> file.
1920
HTML
2021

21-
version '2' do
22+
version '2.7' do
23+
self.release = '2.7'
24+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
25+
end
26+
27+
version '2.6' do
28+
self.release = '2.6'
29+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
30+
end
31+
32+
version '2.5' do
33+
self.release = '2.5'
34+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
35+
end
36+
37+
version '2.4' do
38+
self.release = '2.4'
39+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
40+
end
41+
42+
version '2.3' do
43+
self.release = '2.3'
44+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
45+
end
46+
47+
version '2.2' do
48+
self.release = '2.2'
49+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
50+
end
51+
52+
version '2.1' do
2253
self.release = '2.1'
23-
self.base_url = "https://pytorch.org/docs/#{release}/"
54+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
55+
end
56+
57+
version '2.0' do
58+
self.release = '2.0'
59+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
2460
end
2561

26-
version '1' do
62+
version '1.13' do
2763
self.release = '1.13'
28-
self.base_url = "https://pytorch.org/docs/#{release}/"
64+
self.base_url = "https://docs.pytorch.org/docs/#{release}/"
2965
end
3066

3167
def get_latest_version(opts)

0 commit comments

Comments
 (0)