1818
1919import json
2020import logging
21+ from functools import lru_cache
2122
2223# AI library imports must be optional to allow installing Toolium without `ai` extra dependency
2324try :
3637# Configure logger
3738logger = logging .getLogger (__name__ )
3839
40+ @lru_cache (maxsize = 8 )
41+ def get_nlp (model_name ):
42+ """
43+ get spaCy model.
44+ This method uses lru cache to get spaCy model to improve performance.
45+
46+ :param model_name: spaCy model name
47+ :return: spaCy model
48+ """
49+ return spacy .load (model_name )
50+
3951def is_negator (tok ):
4052 """
4153 Check if a token is a negator using Universal Dependencies guidelines
@@ -93,7 +105,8 @@ def preprocess_with_ud_negation(text, nlp):
93105
94106def get_text_similarity_with_spacy (text , expected_text , model_name = None ):
95107 """
96- Return similarity between two texts using spaCy
108+ Return similarity between two texts using spaCy.
109+ This method normalize both texts before comparing them.
97110
98111 :param text: string to compare
99112 :param expected_text: string with the expected text
@@ -103,13 +116,12 @@ def get_text_similarity_with_spacy(text, expected_text, model_name=None):
103116 # NOTE: spaCy similarity performance can be enhanced using some strategies like:
104117 # - Normalizing texts (lowercase, extra points, etc.)
105118 # - Use only models that include word vectors (e.g., 'en_core_news_md' or 'en_core_news_lg')
106- # - Preprocessing texts. In this approach, we only preprocess negations.
119+ # - Preprocessing texts. Now we only preprocess negations.
107120 if spacy is None :
108121 raise ImportError ("spaCy is not installed. Please run 'pip install toolium[ai]' to use spaCy features" )
109122 config = DriverWrappersPool .get_default_wrapper ().config
110123 model_name = model_name or config .get_optional ('AI' , 'spacy_model' , 'es_core_news_md' )
111- # TODO: Cache loaded models to improve performance using @lru_cache(maxsize=N) as decorator
112- model = spacy .load (model_name )
124+ model = get_nlp (model_name )
113125 text = model (preprocess_with_ud_negation (text , model ))
114126 expected_text = model (preprocess_with_ud_negation (expected_text , model ))
115127 similarity = model (text ).similarity (model (expected_text ))
0 commit comments