Skip to content

Commit 93bb4f7

Browse files
qsodiaCopilot
andauthored
Update en_core_web_sm version (#64)
* update en_core_web_sm version * set en-core-web-sm version in pyproject.toml * Update module_textdeid.py Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> * raise error if en_core_web_sm_model not found --------- Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
1 parent 5e88eaa commit 93bb4f7

File tree

4 files changed

+23
-2
lines changed

4 files changed

+23
-2
lines changed

Makefile

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ deps: deps-python deps-deid deps-electron
2424
deps-python:
2525
uv --version || (echo "uv is not installed. Please install uv and try again." && exit 1)
2626
uv sync
27-
uv run python -m spacy download en_core_web_sm
2827

2928
deps-deid:
3029
cd deid && npm install

module_textdeid.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,17 @@
5151
def create_nlp_engine():
5252
if getattr(sys, 'frozen', False):
5353
bundle_dir = os.path.abspath(os.path.dirname(sys.executable))
54-
model_path = os.path.join(bundle_dir, '_internal', 'en_core_web_sm', 'en_core_web_sm-3.7.1')
54+
models_base_dir = os.path.join(bundle_dir, '_internal', 'en_core_web_sm')
55+
model_subdir = None
56+
if os.path.isdir(models_base_dir):
57+
for entry in os.listdir(models_base_dir):
58+
full_path = os.path.join(models_base_dir, entry)
59+
if entry.startswith('en_core_web_sm-') and os.path.isdir(full_path):
60+
model_subdir = full_path
61+
break
62+
if model_subdir is None:
63+
raise FileNotFoundError(f"en_core_web_sm model not found in {models_base_dir}")
64+
model_path = model_subdir
5565
import spacy
5666
from presidio_analyzer.nlp_engine import SpacyNlpEngine
5767
nlp_engine = SpacyNlpEngine(models=[{"lang_code": "en", "model_name": model_path}])

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,11 @@ dependencies = [
3636
"ruamel-yaml-clib>=0.2.15",
3737
"six>=1.17.0",
3838
"spacy>=3.8.11",
39+
"en-core-web-sm @ https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl",
3940
"tenacity>=9.1.4",
4041
"typing-extensions>=4.15.0",
4142
"tzdata>=2025.3",
4243
"tzlocal>=5.3.1",
4344
"zipp>=3.23.0",
4445
]
46+

uv.lock

Lines changed: 10 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)