Skip to content

Commit fcb6e02

Browse files
committed
chore: fix lints
1 parent ba95b5f commit fcb6e02

27 files changed

+239
-237
lines changed

pyproject.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,11 @@ select = [
7575
"SIM", # flake8-simplify
7676
"I", # isort
7777
]
78-
ignore = ["E501", "B011"]
78+
ignore = [
79+
"E501",
80+
"B011",
81+
"UP038", # 'isinstance(x, int | float)' causes slower code
82+
]
7983

8084
[tool.ruff.lint.flake8-type-checking]
8185
# Add quotes around type annotations, if doing so would allow

src/neosca/ns_envar.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
import logging
44
import os
55
import os.path as os_path
6-
from typing import List, Optional
76

87
from neosca.ns_platform_info import IS_LINUX, IS_MAC, IS_WINDOWS
98
from neosca.ns_print import color_print
@@ -13,7 +12,7 @@
1312
STANFORD_TREGEX_HOME = "STANFORD_TREGEX_HOME"
1413

1514

16-
def _setenv_windows(env_var: str, paths: List[str], is_refresh: bool = False) -> None:
15+
def _setenv_windows(env_var: str, paths: list[str], is_refresh: bool = False) -> None:
1716
import ctypes # Allows interface with low-level C API's
1817
import winreg # Allows access to the windows registry
1918

@@ -47,7 +46,7 @@ def _setenv_windows(env_var: str, paths: List[str], is_refresh: bool = False) ->
4746
)
4847

4948

50-
def _setenv_unix(env_var: str, paths: List[str], is_refresh: bool = False) -> None:
49+
def _setenv_unix(env_var: str, paths: list[str], is_refresh: bool = False) -> None:
5150
shell = os.environ.get("SHELL")
5251
if shell is None:
5352
logging.warning(
@@ -101,7 +100,7 @@ def _setenv_unix(env_var: str, paths: List[str], is_refresh: bool = False) -> No
101100
f.write("\n".join(configs))
102101

103102

104-
def setenv(envar: str, paths: List[str], is_override: bool = False, is_quiet: bool = False) -> None:
103+
def setenv(envar: str, paths: list[str], is_override: bool = False, is_quiet: bool = False) -> None:
105104
assert any((IS_WINDOWS, IS_MAC, IS_LINUX))
106105
if IS_WINDOWS:
107106
_setenv_windows(envar, paths, is_override)
@@ -116,6 +115,6 @@ def setenv(envar: str, paths: List[str], is_override: bool = False, is_quiet: bo
116115
)
117116

118117

119-
def get_dir_frm_env(envar: str) -> Optional[str]:
118+
def get_dir_frm_env(envar: str) -> str | None:
120119
directory = os.getenv(envar, "")
121120
return directory if os_path.isdir(directory) else None

src/neosca/ns_io.py

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@
99
import pickle
1010
import sys
1111
import zipfile
12+
from collections.abc import Generator, Iterable, Sequence
1213
from os import PathLike
1314
from pathlib import Path
14-
from typing import Any, Dict, Generator, Iterable, List, Sequence, Tuple, Union
15+
from typing import Any
1516
from xml.etree.ElementTree import XML, fromstring
1617

1718
from charset_normalizer import detect
@@ -32,7 +33,7 @@ def __new__(cls, name, bases, dict_):
3233
class Ns_IO(metaclass=Ns_IO_Meta):
3334
# Type checker does not detect definition in Ns_IO_Meta, so declare here to
3435
# silence the "access unknown member warning"
35-
SUPPORTED_EXTENSIONS: Tuple[str, ...] = tuple()
36+
SUPPORTED_EXTENSIONS: tuple[str, ...] = tuple()
3637

3738
DOCX_NAMESPACE = "{http://schemas.openxmlformats.org/wordprocessingml/2006/main}"
3839
DOCX_PARA = DOCX_NAMESPACE + "p"
@@ -95,7 +96,7 @@ def read_odt(cls, path: str) -> str:
9596
return "\n".join("".join(node.itertext()) for node in paragraphs)
9697

9798
@classmethod
98-
def suffix(cls, file_path: Union[str, PathLike], *, strip_dot: bool = False) -> str:
99+
def suffix(cls, file_path: str | PathLike, *, strip_dot: bool = False) -> str:
99100
"""
100101
>>> suffix('my/library/setup.py')
101102
.py
@@ -112,14 +113,14 @@ def suffix(cls, file_path: Union[str, PathLike], *, strip_dot: bool = False) ->
112113
return extension
113114

114115
@classmethod
115-
def supports(cls, file_path: Union[str, PathLike]) -> bool:
116+
def supports(cls, file_path: str | PathLike) -> bool:
116117
# Can instead use hasattr(f"read_{extension}").
117118
# The SUPPORTED_EXTENSIONS is required by ns_main_cli:74 to list
118119
# supported extensions to users.
119120
return cls.suffix(file_path, strip_dot=True) in cls.SUPPORTED_EXTENSIONS
120121

121122
@classmethod
122-
def not_supports(cls, file_path: Union[str, PathLike]) -> bool:
123+
def not_supports(cls, file_path: str | PathLike) -> bool:
123124
return not cls.supports(file_path)
124125

125126
@classmethod
@@ -152,16 +153,14 @@ def is_writable(cls, filename: str) -> Ns_Procedure_Result:
152153
return True, None
153154

154155
@classmethod
155-
def _check_file_extension(
156-
cls, file_path: Union[str, PathLike], valid_extensions: Union[str, Tuple[str, ...]]
157-
):
156+
def _check_file_extension(cls, file_path: str | PathLike, valid_extensions: str | tuple[str, ...]):
158157
if not os_path.isfile(file_path):
159158
raise FileNotFoundError(f"File {file_path} does not exist")
160159
if not str(file_path).endswith(valid_extensions):
161160
raise ValueError(f"{file_path} does not have a valid extension")
162161

163162
@classmethod
164-
def load_pickle_lzma(cls, file_path: Union[str, PathLike]) -> Any:
163+
def load_pickle_lzma(cls, file_path: str | PathLike) -> Any:
165164
cls._check_file_extension(file_path, (".pickle.lzma", ".pkl.lzma"))
166165

167166
with open(file_path, "rb") as f:
@@ -171,30 +170,30 @@ def load_pickle_lzma(cls, file_path: Union[str, PathLike]) -> Any:
171170
return pickle.loads(data_pickle)
172171

173172
@classmethod
174-
def load_pickle(cls, file_path: Union[str, PathLike]) -> Any:
173+
def load_pickle(cls, file_path: str | PathLike) -> Any:
175174
cls._check_file_extension(file_path, (".pickle", ".pkl"))
176175

177176
with open(file_path, "rb") as f:
178177
data_pickle = f.read()
179178
return pickle.loads(data_pickle)
180179

181180
@classmethod
182-
def load_lzma(cls, file_path: Union[str, PathLike]) -> bytes:
181+
def load_lzma(cls, file_path: str | PathLike) -> bytes:
183182
cls._check_file_extension(file_path, ".lzma")
184183

185184
with open(file_path, "rb") as f:
186185
data_lzma = f.read()
187186
return lzma.decompress(data_lzma)
188187

189188
@classmethod
190-
def load_json(cls, file_path: Union[str, PathLike]) -> Any:
189+
def load_json(cls, file_path: str | PathLike) -> Any:
191190
cls._check_file_extension(file_path, ".json")
192191

193192
with open(file_path, "rb") as f:
194193
return json.load(f)
195194

196195
@classmethod
197-
def dump_json(cls, data: Any, path: Union[str, PathLike]) -> None:
196+
def dump_json(cls, data: Any, path: str | PathLike) -> None:
198197
try:
199198
with open(path, "w") as f:
200199
json.dump(data, f, ensure_ascii=False, indent=2)
@@ -203,7 +202,7 @@ def dump_json(cls, data: Any, path: Union[str, PathLike]) -> None:
203202
cls.dump_json(data, path)
204203

205204
@classmethod
206-
def dump_bytes(cls, data: bytes, path: Union[str, PathLike]) -> None:
205+
def dump_bytes(cls, data: bytes, path: str | PathLike) -> None:
207206
try:
208207
with open(path, "wb") as f:
209208
f.write(data)
@@ -212,7 +211,7 @@ def dump_bytes(cls, data: bytes, path: Union[str, PathLike]) -> None:
212211
cls.dump_bytes(data, path)
213212

214213
@classmethod
215-
def get_verified_ifile_list(cls, ifile_list: Iterable[str]) -> List[str]:
214+
def get_verified_ifile_list(cls, ifile_list: Iterable[str]) -> list[str]:
216215
verified_ifile_list = []
217216
for path in ifile_list:
218217
# File path
@@ -245,10 +244,10 @@ def get_verified_ifile_list(cls, ifile_list: Iterable[str]) -> List[str]:
245244
return verified_ifile_list
246245

247246
@classmethod
248-
def get_verified_subfiles_list(cls, subfiles_list: List[List[str]]) -> List[List[str]]:
247+
def get_verified_subfiles_list(cls, subfiles_list: list[list[str]]) -> list[list[str]]:
249248
verified_subfiles_list = []
250249
for subfiles in subfiles_list:
251-
verified_subfiles: List[str] = cls.get_verified_ifile_list(subfiles)
250+
verified_subfiles: list[str] = cls.get_verified_ifile_list(subfiles)
252251
if len(verified_subfiles) == 1:
253252
logging.critical(
254253
f"Only 1 subfile provided: ({verified_subfiles.pop()}). There should be 2"
@@ -271,15 +270,15 @@ def ensure_unique_filestem(cls, stem: str, existing_stems: Sequence[str]) -> str
271270
class Ns_Cache:
272271
CACHE_EXTENSION = ".pickle.lzma"
273272
# fpath_cname: { "/absolute/path/to/foo.txt": "foo.pickle.lzma", ... }
274-
fpath_cname: Dict[str, str] = (
273+
fpath_cname: dict[str, str] = (
275274
Ns_IO.load_json(CACHE_INFO_PATH)
276275
if CACHE_INFO_PATH.exists() and os_path.getsize(CACHE_INFO_PATH) > 0
277276
else {}
278277
)
279278
info_changed: bool = False
280279

281280
@classmethod
282-
def get_cache_path(cls, file_path: str) -> Tuple[str, bool]:
281+
def get_cache_path(cls, file_path: str) -> tuple[str, bool]:
283282
"""
284283
return (cache_path, available: whether the cache is usable)
285284
"""
@@ -306,7 +305,7 @@ def get_cache_path(cls, file_path: str) -> Tuple[str, bool]:
306305
return cache_path, True
307306

308307
@classmethod
309-
def _size_fmt(cls, filesize: Union[int, float], suffix: str = "B") -> str:
308+
def _size_fmt(cls, filesize: int | float, suffix: str = "B") -> str:
310309
# https://github.com/gaogaotiantian/viztracer/blob/3ecd46aa0e70df7dd78f720a2660d6da211c4a51/src/viztracer/util.py#L12
311310
for unit in ("", "Ki", "Mi", "Gi"):
312311
if abs(filesize) < 1024.0:
@@ -315,7 +314,7 @@ def _size_fmt(cls, filesize: Union[int, float], suffix: str = "B") -> str:
315314
return f"{filesize:.1f} {'Ti'}{suffix}"
316315

317316
@classmethod
318-
def yield_cname_cpath_csize_fpath(cls) -> Generator[Tuple[str, str, str, str], None, None]:
317+
def yield_cname_cpath_csize_fpath(cls) -> Generator[tuple[str, str, str, str], None, None]:
319318
for file_path, cache_name in Ns_Cache.fpath_cname.items():
320319
cache_path = Ns_Cache._name2path(cache_name)
321320
if not os_path.exists(cache_path):

src/neosca/ns_lca/ns_lca.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import os
55
import os.path as os_path
66
import sys
7-
from typing import Dict, List, Literal, Optional, Tuple, Union
7+
from typing import Literal
88

99
from neosca.ns_io import Ns_Cache, Ns_IO
1010
from neosca.ns_lca.ns_lca_counter import Ns_LCA_Counter
@@ -50,16 +50,14 @@ def __init__(
5050
self.is_save_matches = is_save_matches
5151
self.is_save_values = is_save_values
5252

53-
self.counters: List[Ns_LCA_Counter] = []
53+
self.counters: list[Ns_LCA_Counter] = []
5454

55-
def get_lempos_frm_text(
56-
self, text: str, /, cache_path: Optional[str] = None
57-
) -> Tuple[Tuple[str, str], ...]:
55+
def get_lempos_frm_text(self, text: str, /, cache_path: str | None = None) -> tuple[tuple[str, str], ...]:
5856
from neosca.ns_nlp import Ns_NLP_Stanza
5957

6058
return Ns_NLP_Stanza.get_lemma_and_pos(text, tagset=self.tagset, cache_path=cache_path)
6159

62-
def get_lempos_frm_file(self, file_path: str, /) -> Tuple[Tuple[str, str], ...]:
60+
def get_lempos_frm_file(self, file_path: str, /) -> tuple[tuple[str, str], ...]:
6361
from neosca.ns_nlp import Ns_NLP_Stanza
6462

6563
cache_path, is_cache_available = Ns_Cache.get_cache_path(file_path)
@@ -73,7 +71,7 @@ def get_lempos_frm_file(self, file_path: str, /) -> Tuple[Tuple[str, str], ...]:
7371
text = Ns_IO.load_file(file_path)
7472

7573
if not self.is_cache:
76-
cache_path: Optional[str] = None # type: ignore
74+
cache_path: str | None = None # type: ignore
7775

7876
try:
7977
return self.get_lempos_frm_text(text)
@@ -107,7 +105,7 @@ def run_on_text(self, text: str, *, file_path: str = "cli_text", clear: bool = T
107105
if self.is_save_values:
108106
self.dump_values()
109107

110-
def run_on_file_or_subfiles(self, file_or_subfiles: Union[str, List[str]]) -> Ns_LCA_Counter:
108+
def run_on_file_or_subfiles(self, file_or_subfiles: str | list[str]) -> Ns_LCA_Counter:
111109
if isinstance(file_or_subfiles, str):
112110
file_path = file_or_subfiles
113111
lempos_tuples = self.get_lempos_frm_file(file_path)
@@ -126,7 +124,7 @@ def run_on_file_or_subfiles(self, file_or_subfiles: Union[str, List[str]]) -> Ns
126124
return counter
127125

128126
def run_on_file_or_subfiles_list(
129-
self, file_or_subfiles_list: List[Union[str, List[str]]], *, clear: bool = True
127+
self, file_or_subfiles_list: list[str | list[str]], *, clear: bool = True
130128
) -> None:
131129
if clear:
132130
self.counters.clear()
@@ -146,7 +144,7 @@ def dump_values(self) -> None:
146144
if len(self.counters) == 0:
147145
raise ValueError("empty counter list")
148146

149-
value_tables: List[Dict[str, str]] = [
147+
value_tables: list[dict[str, str]] = [
150148
counter.get_all_values(self.precision) for counter in self.counters
151149
]
152150

src/neosca/ns_lca/ns_lca_counter.py

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,11 @@
66
import random
77
import shutil
88
import sys
9+
from collections import OrderedDict
10+
from collections.abc import Sequence
911
from math import log as _log
1012
from math import sqrt as _sqrt
11-
from typing import Dict, List, Literal, Optional, OrderedDict, Sequence, Tuple, Union
13+
from typing import Literal
1214

1315
from neosca import DATA_DIR
1416
from neosca.ns_io import Ns_IO
@@ -59,7 +61,7 @@ class Ns_LCA_Counter:
5961
"AdvV": "adverb variation",
6062
"ModV": "modifier variation",
6163
}
62-
DEFAULT_MEASURES: List[str] = [
64+
DEFAULT_MEASURES: list[str] = [
6365
*(item + suffix for item in COUNT_ITEMS for suffix in ("types", "tokens")),
6466
*FREQ_ITEMS,
6567
]
@@ -76,8 +78,8 @@ def __init__(
7678
) -> None:
7779
self.file_path = file_path
7880

79-
self.count_table: Dict[str, List[str]] = {item: [] for item in self.COUNT_ITEMS}
80-
self.freq_table: Dict[str, Optional[Union[int, float]]] = {item: None for item in self.FREQ_ITEMS}
81+
self.count_table: dict[str, list[str]] = {item: [] for item in self.COUNT_ITEMS}
82+
self.freq_table: dict[str, int | float | None] = {item: None for item in self.FREQ_ITEMS}
8183

8284
word_data_path = DATA_DIR / self.WORDLIST_DATAFILE_MAP[wordlist]
8385
logging.debug(f"Loading {word_data_path}...")
@@ -130,18 +132,18 @@ def get_msttr(cls, lemma_sequence: Sequence[str], *, section_size: int):
130132
Mean Segmental TTR
131133
"""
132134
sample_no: int = 0
133-
msttr: Union[int, float] = 0
135+
msttr: int | float = 0
134136
for chunk in chunks(lemma_sequence, section_size):
135137
if len(chunk) == section_size:
136138
sample_no += 1
137139
msttr += safe_div(len(set(chunk)), section_size)
138140
return safe_div(msttr, sample_no)
139141

140-
def determine_counts(self, lempos_tuples: Tuple[Tuple[str, str], ...]):
142+
def determine_counts(self, lempos_tuples: tuple[tuple[str, str], ...]):
141143
filtered_lempos_tuples = tuple(
142144
filter(lambda lempos: not self.word_classifier.is_("misc", *lempos), lempos_tuples)
143145
)
144-
self.count_table["word"] = list(next(zip(*filtered_lempos_tuples)))
146+
self.count_table["word"] = list(next(zip(*filtered_lempos_tuples, strict=False)))
145147

146148
for lemma, pos in filtered_lempos_tuples:
147149
is_sophisticated = False
@@ -198,7 +200,7 @@ def determine_counts(self, lempos_tuples: Tuple[Tuple[str, str], ...]):
198200
self.count_table["sverb"].append(lemma)
199201
logging.debug(f'Counted "{lemma}" as a sophisticated verb')
200202

201-
def determine_freqs(self, *, section_size: Optional[int] = None) -> None:
203+
def determine_freqs(self, *, section_size: int | None = None) -> None:
202204
if section_size is None:
203205
section_size = self.section_size
204206

@@ -269,11 +271,11 @@ def determine_freqs(self, *, section_size: Optional[int] = None) -> None:
269271
self.freq_table["AdvV"] = safe_div(adv_type_no, lex_token_no)
270272
self.freq_table["ModV"] = safe_div((adv_type_no + adj_type_no), lex_token_no)
271273

272-
def determine_all_values(self, lempos_tuples: Tuple[Tuple[str, str], ...]) -> None:
274+
def determine_all_values(self, lempos_tuples: tuple[tuple[str, str], ...]) -> None:
273275
self.determine_counts(lempos_tuples)
274276
self.determine_freqs()
275277

276-
def get_value(self, key: str, /, precision: int = 4) -> Union[int, float]:
278+
def get_value(self, key: str, /, precision: int = 4) -> int | float:
277279
if (trimmed_key := key.removesuffix("types").removesuffix("tokens")) in self.COUNT_ITEMS:
278280
if key.endswith("types"):
279281
return len(set(self.count_table[trimmed_key]))
@@ -287,7 +289,7 @@ def get_value(self, key: str, /, precision: int = 4) -> Union[int, float]:
287289
else:
288290
raise ValueError(f"Unknown key: {key}")
289291

290-
def get_matches(self, key: str, /) -> List[str]:
292+
def get_matches(self, key: str, /) -> list[str]:
291293
if (trimmed_key := key.removesuffix("types").removesuffix("tokens")) in self.COUNT_ITEMS:
292294
if key.endswith("types"):
293295
return list(dict.fromkeys(self.count_table[trimmed_key]))

0 commit comments

Comments
 (0)