Skip to content

Commit 20b6338

Browse files
committed
Can fetch from local dir #1334
1 parent b7d5fea commit 20b6338

File tree

1 file changed

+13
-8
lines changed

1 file changed

+13
-8
lines changed

NLP/textsummary.py

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
import os
2-
from sentence_transformers import SentenceTransformer
3-
from sklearn.metrics.pairwise import cosine_similarity
4-
import numpy as np
5-
from nltk.tokenize import sent_tokenize, word_tokenize
62
import nltk
3+
from nltk.tokenize import sent_tokenize, word_tokenize
74
from nltk.corpus import stopwords
85
from collections import Counter
6+
from sentence_transformers import SentenceTransformer
7+
from sklearn.metrics.pairwise import cosine_similarity
8+
import numpy as np
99

1010
MODEL_NAME = 'all-MiniLM-L6-v2'
1111
MODEL_FOLDER = 'model'
12+
NLTK_DATA_FOLDER = os.path.join(MODEL_FOLDER, 'nltk_data')
1213

1314
def load_or_download_model():
1415
model_path = os.path.join(MODEL_FOLDER, MODEL_NAME)
@@ -24,13 +25,17 @@ def load_or_download_model():
2425
return model
2526

2627
def download_nltk_resources():
27-
resources = ['punkt', 'stopwords']
28-
for resource in resources:
28+
nltk.data.path.append(NLTK_DATA_FOLDER)
29+
os.makedirs(NLTK_DATA_FOLDER, exist_ok=True)
30+
31+
resources = [('punkt', 'tokenizers'), ('stopwords', 'corpora')]
32+
for resource, folder in resources:
2933
try:
30-
nltk.data.find(f'tokenizers/{resource}')
34+
nltk.data.find(f'{folder}/{resource}')
35+
print(f"{resource} is being Loaded.")
3136
except LookupError:
3237
print(f"Downloading {resource}...")
33-
nltk.download(resource, quiet=True)
38+
nltk.download(resource, download_dir=NLTK_DATA_FOLDER, quiet=True)
3439

3540
def extract_keywords(text, model, top_n=10):
3641
# Tokenize the text

0 commit comments

Comments
 (0)