Skip to content

Commit 36e7d8c

Browse files
authored
Dev (#35)
### Improvements + Add option to install spaCy from Chinese mirror site and en\_core\_web\_sm from sourceforge.net
1 parent a0d4f80 commit 36e7d8c

File tree

14 files changed

+297
-83
lines changed

14 files changed

+297
-83
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
<div align="center"><h1>Changelog</h1></div>
22

3+
## [0.0.53](https://github.com/tanloong/neosca/releases/tag/0.0.53) (25 September 2023)
4+
5+
### Improvements
6+
7+
+ Add option to install spaCy from Chinese mirror site and en\_core\_web\_sm from sourceforge.net
8+
39
## [0.0.52](https://github.com/tanloong/neosca/releases/tag/0.0.52) (1 September 2023)
410

511
### New features

README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -461,7 +461,7 @@ BibTeX
461461

462462
```BibTeX
463463
@misc{tan2022neosca,
464-
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.52},
464+
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.53},
465465
author = {Long Tan},
466466
howpublished = {\url{https://github.com/tanloong/neosca}},
467467
year = {2022}
@@ -476,7 +476,7 @@ year = {2022}
476476
APA (7th edition)
477477
</summary>
478478

479-
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.52) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
479+
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.53) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
480480

481481
</details>
482482

@@ -486,7 +486,7 @@ APA (7th edition)
486486
MLA (9th edition)
487487
</summary>
488488

489-
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.52, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
489+
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.53, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
490490

491491
</details>
492492

README_zh_cn.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ BibTeX
451451

452452
```BibTeX
453453
@misc{tan2022neosca,
454-
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.52},
454+
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.53},
455455
author = {Long Tan},
456456
howpublished = {\url{https://github.com/tanloong/neosca}},
457457
year = {2022}
@@ -466,7 +466,7 @@ year = {2022}
466466
APA (7th edition)
467467
</summary>
468468

469-
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.52) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
469+
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.53) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
470470

471471
</details>
472472

@@ -476,7 +476,7 @@ APA (7th edition)
476476
MLA (9th edition)
477477
</summary>
478478

479-
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.52, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
479+
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.53, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
480480

481481
</details>
482482

README_zh_tw.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -451,7 +451,7 @@ BibTeX
451451

452452
```BibTeX
453453
@misc{tan2022neosca,
454-
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.52},
454+
title = {NeoSCA: A Fork of L2 Syntactic Complexity Analyzer, version 0.0.53},
455455
author = {Long Tan},
456456
howpublished = {\url{https://github.com/tanloong/neosca}},
457457
year = {2022}
@@ -466,7 +466,7 @@ year = {2022}
466466
APA (7th edition)
467467
</summary>
468468

469-
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.52) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
469+
<pre>Tan, L. (2022). <i>NeoSCA</i> (version 0.0.53) [Computer software]. Github. https://github.com/tanloong/neosca</pre>
470470

471471
</details>
472472

@@ -476,7 +476,7 @@ APA (7th edition)
476476
MLA (9th edition)
477477
</summary>
478478

479-
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.52, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
479+
<pre>Tan, Long. <i>NeoSCA</i>. version 0.0.53, GitHub, 2022, https://github.com/tanloong/neosca.</pre>
480480

481481
</details>
482482

neosca/about.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
#!/usr/bin/env python3
22
# -*- coding=utf-8 -*-
33

4-
__version__ = "0.0.52"
4+
__version__ = "0.0.53"

neosca/depends_installer.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -245,29 +245,28 @@ def _callbackfunc(self, block_num: int, block_size: int, total_size):
245245
)
246246
same_line_print(s, width=100)
247247

248-
def _download(self, download_url: str, name: str) -> str:
249-
logging.info(f"Downloading {download_url}...")
250-
if name == JAVA:
251-
filename = self._get_java_filename(download_url)
252-
# e.g., jdk-18.0.2
253-
else:
254-
filename = urllib.parse.urlparse(download_url).path.rpartition("/")[-1]
248+
def download(self, url: str, target_basename: Optional[str] = None) -> str:
249+
logging.info(f"Downloading {url}...")
250+
if target_basename is None:
251+
target_basename = urllib.parse.urlparse(url).path.rpartition("/")[-1]
255252
# e.g. stanford-tregex-4.2.0.zip, stanford-parser-4.2.0.zip
256-
filename = os_path.join(tempfile.gettempdir(), filename) # type: ignore
253+
target_path = os_path.join(tempfile.gettempdir(), target_basename) # type: ignore
254+
257255
try:
258256
opener = urllib.request.build_opener()
259257
opener.addheaders = list(self.headers.items())
260258
urllib.request.install_opener(opener)
261-
urllib.request.urlretrieve(download_url, filename, self._callbackfunc)
259+
urllib.request.urlretrieve(url, target_path, self._callbackfunc)
262260
same_line_print("", width=100)
263261
except URLError as e:
264262
if hasattr(e, "reason"):
265-
raise URLError(f"Requesting to {download_url} failed.\nReason: {e.reason}")
263+
raise URLError(f"Requesting to {url} failed.\nReason: {e.reason}")
266264
elif hasattr(e, "code"):
267-
raise URLError(f"Requesting to {download_url} failed.\nReason: {e.code}")
265+
raise URLError(f"Requesting to {url} failed.\nReason: {e.code}")
268266
else:
269-
raise URLError(f"Requesting to {download_url} failed.")
270-
return filename
267+
raise URLError(f"Requesting to {url} failed.")
268+
269+
return target_path
271270

272271
def ask_install(self, name: str, is_assume_yes: bool = False) -> bool:
273272
reason_dict = {
@@ -312,7 +311,9 @@ def install_java(
312311
),
313312
)
314313
url = self.get_java_download_url(version, operating_system, arch, impl)
315-
jdk_archive = self._download(url, name=JAVA)
314+
target_basename = self._get_java_filename(url) # e.g., jdk-18.0.2
315+
jdk_archive = self.download(url, target_basename)
316+
316317
jdk_ext = self._get_normalized_archive_ext(jdk_archive)
317318
jdk_dir = self._decompress_archive(jdk_archive, jdk_ext, target_dir)
318319
jdk_bin = os_path.join(jdk_dir, "bin")
@@ -344,7 +345,7 @@ def install_stanford(
344345
),
345346
}
346347
return False, manual_install_prompt_dict[name]
347-
archive_file = self._download(url, name=name)
348+
archive_file = self.download(url)
348349
archive_ext = self._get_normalized_archive_ext(archive_file)
349350
unzipped_directory = self._decompress_archive(archive_file, archive_ext, target_dir)
350351
if archive_file:

neosca/lca/lca.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -520,9 +520,6 @@ def analyze(
520520

521521
handle.close()
522522

523-
if not self.is_stdout:
524-
logging.info(f"Output has been saved to {self.ofile}. Done.")
525-
526523
return True, None
527524

528525
def ensure_spacy_initialized(func: Callable): # type:ignore

neosca/lca/main.py

Lines changed: 64 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ class LCAUI:
1414
def __init__(self) -> None:
1515
self.args_parser: argparse.ArgumentParser = self.create_args_parser()
1616
self.options: argparse.Namespace = argparse.Namespace()
17+
self.scaio = SCAIO()
1718

1819
self.is_spacy_initialized: bool = False
1920

@@ -106,7 +107,7 @@ def parse_args(self, argv: List[str]) -> SCAProcedureResult:
106107
return False, "Unexpected argument(s):\n\n{}".format("\n".join(ifile_list))
107108
self.verified_ifiles = None
108109
else:
109-
self.verified_ifiles = SCAIO.get_verified_ifile_list(ifile_list)
110+
self.verified_ifiles = self.scaio.get_verified_ifile_list(ifile_list)
110111

111112
self.init_kwargs = {
112113
"wordlist": options.wordlist,
@@ -122,24 +123,50 @@ def install_spacy(self) -> SCAProcedureResult:
122123
from subprocess import CalledProcessError
123124

124125
command = [sys.executable, "-m", "pip", "install", "-U", "spacy"]
126+
if get_yes_or_no(
127+
"Do you want to download spaCy from a Chinese mirror site? If you"
128+
" are inside of China, you may want to use this for a faster network"
129+
" connection."
130+
):
131+
command.extend(["-i", "https://pypi.tuna.tsinghua.edu.cn/simple"])
132+
125133
try:
126134
subprocess.run(command, check=True, capture_output=False)
127135
except CalledProcessError as e:
128136
return False, f"Failed to install spaCy: {e}"
129137

130-
command = [sys.executable, "-m", "spacy", "download", "en_core_web_sm"]
138+
return True, None
139+
140+
def install_model(self) -> SCAProcedureResult:
141+
import subprocess
142+
from subprocess import CalledProcessError
143+
144+
if get_yes_or_no(
145+
"Do you want to download en_core_web_sm from sourceforge.net? If you"
146+
" are inside of China, you may want to use this for a faster network"
147+
" connection."
148+
):
149+
command = [
150+
sys.executable,
151+
"-m",
152+
"pip",
153+
"install",
154+
"https://master.dl.sourceforge.net/project/en-core-web-sm/en_core_web_sm-3.6.0-py3-none-any.whl?viasf=1",
155+
]
156+
else:
157+
command = [sys.executable, "-m", "spacy", "download", "en_core_web_sm"]
158+
131159
try:
132160
subprocess.run(command, check=True, capture_output=False)
133161
except CalledProcessError as e:
134162
return False, f"Failed to download en_core_web_sm: {e}"
135163

136164
return True, None
137165

138-
def check_spacy(self) -> SCAProcedureResult:
166+
def check_spacy_and_model(self) -> SCAProcedureResult:
139167
try:
140168
logging.info("Trying to load spaCy...")
141169
import spacy # type: ignore # noqa: F401 'spacy' imported but unused
142-
import en_core_web_sm # type: ignore # noqa: F401 'en_core_web_sm' imported but unused
143170
except ModuleNotFoundError:
144171
is_install = get_yes_or_no(
145172
"Running LCA requires spaCy. Do you want me to install it for you?"
@@ -148,26 +175,56 @@ def check_spacy(self) -> SCAProcedureResult:
148175
return (
149176
False,
150177
(
151-
"\nspaCy installation refused. You need to manually install it using:"
178+
"\nspaCy installation is refused. You need to manually install it using:"
152179
"\npip install spacy"
153-
"\npython -m spacy download en_core_web_sm"
154180
),
155181
)
156182
return self.install_spacy()
157183
else:
158184
color_print("OKGREEN", "ok", prefix="spaCy has already been installed. ")
185+
186+
try:
187+
logging.info("Trying to load en_core_web_sm...")
188+
import en_core_web_sm # type: ignore # noqa: F401 'en_core_web_sm' imported but unused
189+
except ModuleNotFoundError:
190+
is_install = get_yes_or_no(
191+
"Running LCA requires spaCy's en_core_web_sm model. Do you want me to install it for you?"
192+
)
193+
if not is_install:
194+
return (
195+
False,
196+
(
197+
"\nen_core_web_sm installation is refused. You need to manually install it using:"
198+
"\npython -m spacy download en_core_web_sm"
199+
),
200+
)
201+
return self.install_model()
202+
else:
203+
color_print("OKGREEN", "ok", prefix="en_core_web_sm has already been installed. ")
159204
return True, None
160205

206+
def exit_routine(self) -> None:
207+
if self.options.is_quiet or self.options.is_stdout:
208+
return
209+
210+
color_print(
211+
"OKGREEN",
212+
f"{os_path.abspath(self.options.ofile)}",
213+
prefix="Output has been saved to ",
214+
postfix=". Done.",
215+
)
216+
161217
def run_tmpl(func: Callable): # type:ignore
162218
def wrapper(self, *args, **kwargs):
163-
sucess, err_msg = self.check_spacy()
219+
sucess, err_msg = self.check_spacy_and_model()
164220
if not sucess:
165221
return sucess, err_msg
166222
if not self.options.is_stdout:
167223
sucess, err_msg = SCAIO.is_writable(self.options.ofile)
168224
if not sucess:
169225
return sucess, err_msg
170226
func(self, *args, **kwargs)
227+
self.exit_routine()
171228
return True, None
172229

173230
return wrapper

neosca/main.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@
2222

2323
class SCAUI:
2424
def __init__(self) -> None:
25-
self.supported_ifile_types = [".txt", ".docx", ".odt"]
25+
self.scaio = SCAIO()
26+
self.supported_ifile_types = list(self.scaio.ext_read_map.keys())
2627
self.cwd = os.getcwd()
2728
self.args_parser: argparse.ArgumentParser = self.create_args_parser()
2829
self.options: argparse.Namespace = argparse.Namespace()
@@ -317,7 +318,7 @@ def parse_args(self, argv: List[str]) -> SCAProcedureResult:
317318
logging.info(f"Command-line text: {options.text}")
318319
self.verified_ifiles = None
319320
else:
320-
self.verified_ifiles = SCAIO.get_verified_ifile_list(ifile_list)
321+
self.verified_ifiles = self.scaio.get_verified_ifile_list(ifile_list)
321322

322323
if options.subfiles_list is None:
323324
self.verified_subfiles_list: List[list] = []

0 commit comments

Comments
 (0)