Skip to content

Commit e2b3b43

Browse files
committed
Update pylint and fix linting errors
1 parent e89b9d0 commit e2b3b43

File tree

9 files changed

+40
-105
lines changed

9 files changed

+40
-105
lines changed

.pylintrc

Lines changed: 1 addition & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -60,16 +60,7 @@ confidence=
6060
# --enable=similarities". If you want to run only the classes checker, but have
6161
# no Warning level messages displayed, use "--disable=all --enable=classes
6262
# --disable=W".
63-
disable=print-statement,
64-
parameter-unpacking,
65-
unpacking-in-except,
66-
old-raise-syntax,
67-
backtick,
68-
long-suffix,
69-
old-ne-operator,
70-
old-octal-literal,
71-
import-star-module-level,
72-
non-ascii-bytes-literal,
63+
disable=
7364
raw-checker-failed,
7465
bad-inline-option,
7566
locally-disabled,
@@ -78,69 +69,7 @@ disable=print-statement,
7869
useless-suppression,
7970
deprecated-pragma,
8071
use-symbolic-message-instead,
81-
apply-builtin,
82-
basestring-builtin,
83-
buffer-builtin,
84-
cmp-builtin,
85-
coerce-builtin,
86-
execfile-builtin,
87-
file-builtin,
88-
long-builtin,
89-
raw_input-builtin,
90-
reduce-builtin,
91-
standarderror-builtin,
92-
unicode-builtin,
93-
xrange-builtin,
94-
coerce-method,
95-
delslice-method,
96-
getslice-method,
97-
setslice-method,
98-
no-absolute-import,
99-
old-division,
100-
dict-iter-method,
101-
dict-view-method,
102-
next-method-called,
103-
metaclass-assignment,
104-
indexing-exception,
105-
raising-string,
106-
reload-builtin,
107-
oct-method,
108-
hex-method,
109-
nonzero-method,
110-
cmp-method,
111-
input-builtin,
112-
round-builtin,
113-
intern-builtin,
114-
unichr-builtin,
115-
map-builtin-not-iterating,
116-
zip-builtin-not-iterating,
117-
range-builtin-not-iterating,
118-
filter-builtin-not-iterating,
119-
using-cmp-argument,
120-
eq-without-hash,
121-
div-method,
122-
idiv-method,
123-
rdiv-method,
124-
exception-message-attribute,
125-
invalid-str-codec,
126-
sys-max-int,
127-
bad-python3-import,
128-
deprecated-string-function,
129-
deprecated-str-translate-call,
130-
deprecated-itertools-function,
131-
deprecated-types-field,
132-
next-method-defined,
133-
dict-items-not-iterating,
134-
dict-keys-not-iterating,
135-
dict-values-not-iterating,
136-
deprecated-operator-function,
137-
deprecated-urllib-function,
138-
xreadlines-attribute,
139-
deprecated-sys-function,
140-
exception-escape,
141-
comprehension-escape,
14272
arguments-differ,
143-
bad-continuation,
14473
broad-except,
14574
consider-using-in,
14675
dangerous-default-value,
@@ -152,7 +81,6 @@ disable=print-statement,
15281
literal-comparison,
15382
missing-docstring,
15483
no-else-return,
155-
no-self-use,
15684
pointless-string-statement,
15785
protected-access,
15886
redefined-builtin,
@@ -364,13 +292,6 @@ max-line-length=120
364292
# Maximum number of lines in a module.
365293
max-module-lines=1000
366294

367-
# List of optional constructs for which whitespace checking is disabled. `dict-
368-
# separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
369-
# `trailing-comma` allows a space between comma and closing bracket: (a, ).
370-
# `empty-line` allows space-only lines.
371-
no-space-check=trailing-comma,
372-
dict-separator
373-
374295
# Allow the body of a class to be on the same line as the declaration if body
375296
# contains single statement.
376297
single-line-class-stmt=no

Pipfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,4 @@ python-keycloak-client = "==0.2.3"
1515
pylint = "==2.15.7"
1616
tox = "==3.27.0"
1717
tox-pipenv = "==1.10.1"
18+
wrapt = "*"

Pipfile.lock

Lines changed: 25 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

scraper/src/config/browser_handler.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,14 +24,13 @@ def init(config_original_content, js_render, user_agent):
2424
chrome_options = Options()
2525
chrome_options.add_argument('--no-sandbox')
2626
chrome_options.add_argument('--headless')
27-
chrome_options.add_argument('user-agent={0}'.format(user_agent))
27+
chrome_options.add_argument(f'user-agent={user_agent}')
2828

2929
CHROMEDRIVER_PATH = os.environ.get('CHROMEDRIVER_PATH',
3030
"/usr/bin/chromedriver")
3131
if not os.path.isfile(CHROMEDRIVER_PATH):
3232
raise Exception(
33-
"Env CHROMEDRIVER_PATH='{}' is not a path to a file".format(
34-
CHROMEDRIVER_PATH))
33+
f"Env CHROMEDRIVER_PATH='{CHROMEDRIVER_PATH}' is not a path to a file")
3534
driver = webdriver.Chrome(
3635
CHROMEDRIVER_PATH,
3736
options=chrome_options)

scraper/src/config/config_loader.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class ConfigLoader:
4242
stop_content = []
4343
strategy = 'default'
4444
strict_redirect = True
45-
strip_chars = u".,;:§¶"
45+
strip_chars = ".,;:§¶"
4646
use_anchors = False
4747
user_agent = 'Meilisearch docs-scraper'
4848
only_content_level = False
@@ -90,7 +90,7 @@ def __init__(self, config):
9090
def _load_config(self, config):
9191
if os.path.isfile(config):
9292
self.config_file = config
93-
with open(self.config_file, 'r') as f:
93+
with open(self.config_file, mode='r', encoding='utf-8') as f:
9494
config = f.read()
9595

9696
try:

scraper/src/documentation_spider.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,8 +156,7 @@ def add_records(self, response, from_sitemap):
156156
# Arbitrary limit
157157
if self.nb_hits_max > 0 and DocumentationSpider.NB_INDEXED > self.nb_hits_max:
158158
DocumentationSpider.NB_INDEXED = 0
159-
self.reason_to_stop = "Too much hits, Docs-Scraper only handle {} records".format(
160-
int(self.nb_hits_max))
159+
self.reason_to_stop = f"Too much hits, Docs-Scraper only handle {int(self.nb_hits_max)} records"
161160
raise ValueError(self.reason_to_stop)
162161

163162
def parse_from_sitemap(self, response):

scraper/src/index.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,7 +113,7 @@ def run_config(config):
113113

114114
if DocumentationSpider.NB_INDEXED > 0:
115115
# meilisearch_helper.commit_tmp_index()
116-
print('Nb hits: {}'.format(DocumentationSpider.NB_INDEXED))
116+
print(f'Nb hits: {DocumentationSpider.NB_INDEXED}')
117117
else:
118118
print('Crawling issue: nbHits 0 for ' + config.index_uid)
119119
# meilisearch_helper.report_crawling_issue()

scraper/src/meilisearch_helper.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -123,8 +123,7 @@ def add_records(self, records, url, from_sitemap):
123123
color = "96" if from_sitemap else "94"
124124

125125
print(
126-
'\033[{}m> Docs-Scraper: \033[0m{}\033[93m {} records\033[0m)'.format(
127-
color, url, record_count))
126+
f'\033[{color}m> Docs-Scraper: \033[0m{url}\033[93m {record_count} records\033[0m)')
128127

129128
# Algolia's settings:
130129
# {"minWordSizefor1Typo"=>3,

scraper/src/strategies/default_strategy.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,9 @@ def get_records_from_response(self, response):
4848
def _update_hierarchy_with_global_content(self, hierarchy,
4949
current_level_int):
5050
for index in range(0, current_level_int + 1):
51-
if 'lvl{}'.format(index) in self.global_content:
52-
hierarchy['lvl{}'.format(index)] = self.global_content[
53-
'lvl{}'.format(index)]
51+
if f'lvl{index}' in self.global_content:
52+
hierarchy[f'lvl{index}'] = self.global_content[
53+
f'lvl{index}']
5454

5555
return hierarchy
5656

@@ -100,8 +100,8 @@ def get_records_from_dom(self, current_page_url=None):
100100
anchors[current_level] = Anchor.get_anchor(node)
101101

102102
for index in range(current_level_int + 1, 7):
103-
hierarchy['lvl{}'.format(index)] = None
104-
anchors['lvl{}'.format(index)] = None
103+
hierarchy[f'lvl{index}'] = None
104+
anchors[f'lvl{index}'] = None
105105
previous_hierarchy = hierarchy
106106

107107
if self.config.only_content_level:
@@ -229,7 +229,7 @@ def _get_text_content_for_level(self, node, current_level, selectors):
229229
def _get_closest_anchor(anchors):
230230
# Getting the element anchor as the closest one
231231
for index in list(range(6, -1, -1)):
232-
potential_anchor = anchors['lvl{}'.format(index)]
232+
potential_anchor = anchors[f'lvl{index}']
233233
if potential_anchor is None:
234234
continue
235235
return potential_anchor

0 commit comments

Comments
 (0)