11#
22# Copyright (c) 2010 Doug Hellmann. All rights reserved.
33#
4- """Spelling checker extension for Sphinx.
5- """
4+ """Spelling checker extension for Sphinx."""
65
76# TODO - Words with multiple uppercase letters treated as classes and ignored
87
98import builtins
109import importlib
1110import subprocess
1211import sys
13- from xmlrpc import client as xmlrpc_client
1412
13+ import requests
1514from enchant .tokenize import Filter , get_tokenizer , tokenize , unit_tokenize
1615from sphinx .util import logging
1716
@@ -22,18 +21,19 @@ class AcronymFilter(Filter):
2221 """If a word looks like an acronym (all upper case letters),
2322 ignore it.
2423 """
24+
2525 def _skip (self , word ):
2626 return (
27- word .isupper () or # all caps
27+ word .isupper () # all caps
28+ or
2829 # pluralized acronym ("URLs")
29- (word [- 1 ].lower () == 's' and word [:- 1 ].isupper ())
30+ (word [- 1 ].lower () == "s" and word [:- 1 ].isupper ())
3031 )
3132
3233
3334class list_tokenize (tokenize ):
34-
3535 def __init__ (self , words ):
36- super ().__init__ ('' )
36+ super ().__init__ ("" )
3737 self ._words = words
3838
3939 def next (self ):
@@ -44,8 +44,8 @@ def next(self):
4444
4545
4646class ContractionFilter (Filter ):
47- """Strip common contractions from words.
48- """
47+ """Strip common contractions from words."""
48+
4949 splits = {
5050 "aren't" : ["are" , "not" ],
5151 "can't" : ["can" , "not" ],
@@ -138,8 +138,7 @@ def _split(self, word):
138138
139139
140140class IgnoreWordsFilter (Filter ):
141- """Given a set of words, ignore them all.
142- """
141+ """Given a set of words, ignore them all."""
143142
144143 def __init__ (self , tokenizer , word_set ):
145144 self .word_set = set (word_set )
@@ -150,7 +149,6 @@ def _skip(self, word):
150149
151150
152151class IgnoreWordsFilterFactory :
153-
154152 def __init__ (self , words ):
155153 self .words = words
156154
@@ -159,23 +157,31 @@ def __call__(self, tokenizer):
159157
160158
161159class PyPIFilterFactory (IgnoreWordsFilterFactory ):
162- """Build an IgnoreWordsFilter for all of the names of packages on PyPI.
163- """
160+ """Build an IgnoreWordsFilter for all of the names of packages on PyPI."""
161+
164162 def __init__ (self ):
165- client = xmlrpc_client .ServerProxy ('https://pypi.python.org/pypi' )
166- super ().__init__ (client .list_packages ())
163+ r = requests .get (
164+ "https://pypi.org/simple/" ,
165+ headers = {
166+ "user-agent" : "sphinxcontrib.spelling" ,
167+ "accept" : "application/vnd.pypi.simple.v1+json" ,
168+ },
169+ )
170+ names = [i ["name" ] for i in r .json ()["projects" ]]
171+ logger .debug ("retrieved %d project names from pypi.org" , len (names ))
172+ super ().__init__ (names )
167173
168174
169175class PythonBuiltinsFilter (Filter ):
170- """Ignore names of built-in Python symbols.
171- """
176+ """Ignore names of built-in Python symbols."""
177+
172178 def _skip (self , word ):
173179 return hasattr (builtins , word )
174180
175181
176182class ImportableModuleFilter (Filter ):
177- """Ignore names of modules that we could import.
178- """
183+ """Ignore names of modules that we could import."""
184+
179185 def __init__ (self , tokenizer ):
180186 super ().__init__ (tokenizer )
181187 self .found_modules = set (sys .builtin_module_names )
@@ -185,7 +191,7 @@ def __init__(self, tokenizer):
185191 # valid module, which is consistent with the behavior before
186192 # version 7.3.1. See
187193 # https://github.com/sphinx-contrib/spelling/issues/141
188- self .sought_modules .add (' __main__' )
194+ self .sought_modules .add (" __main__" )
189195
190196 def _skip (self , word ):
191197 # If the word looks like a python module filename, strip the
@@ -195,13 +201,13 @@ def _skip(self, word):
195201 # it look like Sphinx is complaining about a commandline
196202 # argument. See
197203 # https://github.com/sphinx-contrib/spelling/issues/142
198- if word .endswith (' .py' ):
204+ if word .endswith (" .py" ):
199205 logger .debug (
200- ' removing .py extension from %r before searching for module' ,
201- word )
206+ " removing .py extension from %r before searching for module" , word
207+ )
202208 word = word [:- 3 ]
203209
204- valid_module_name = all (n .isidentifier () for n in word .split ('.' ))
210+ valid_module_name = all (n .isidentifier () for n in word .split ("." ))
205211 if not valid_module_name :
206212 return False
207213
@@ -214,8 +220,7 @@ def _skip(self, word):
214220 # error out of distutils, or something else triggered
215221 # by failing to be able to import a parent package to
216222 # use the metadata to search for a subpackage.
217- logger .debug ('find_spec(%r) failed, invalid module name: %s' ,
218- word , err )
223+ logger .debug ("find_spec(%r) failed, invalid module name: %s" , word , err )
219224 else :
220225 if mod is not None :
221226 self .found_modules .add (word )
@@ -230,25 +235,28 @@ class ContributorFilter(IgnoreWordsFilter):
230235 tokens that are in the set.
231236 """
232237
233- _pretty_format = (
234- '%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn'
235- )
238+ _pretty_format = "%(trailers:key=Co-Authored-By,separator=%x0A)%x0A%an%x0A%cn"
236239
237240 def __init__ (self , tokenizer ):
238241 contributors = self ._get_contributors ()
239242 super ().__init__ (tokenizer , contributors )
240243
241244 def _get_contributors (self ):
242- logger .info ('Scanning contributors' )
243- cmd = ['git' , 'log' , '--quiet' , '--no-color' ,
244- f'--pretty=format:{ self ._pretty_format } ' ]
245+ logger .info ("Scanning contributors" )
246+ cmd = [
247+ "git" ,
248+ "log" ,
249+ "--quiet" ,
250+ "--no-color" ,
251+ f"--pretty=format:{ self ._pretty_format } " ,
252+ ]
245253
246254 try :
247255 p = subprocess .run (cmd , check = True , stdout = subprocess .PIPE )
248256 except (subprocess .CalledProcessError , FileNotFoundError ) as err :
249- logger .warning (' Called: %s' , ' ' .join (cmd ))
250- logger .warning (' Failed to scan contributors: %s' , err )
257+ logger .warning (" Called: %s" , " " .join (cmd ))
258+ logger .warning (" Failed to scan contributors: %s" , err )
251259 return set ()
252- output = p .stdout .decode (' utf-8' )
253- tokenizer = get_tokenizer (' en_US' , filters = [])
260+ output = p .stdout .decode (" utf-8" )
261+ tokenizer = get_tokenizer (" en_US" , filters = [])
254262 return {word for word , pos in tokenizer (output )}
0 commit comments