Bump version to 0.2.0 and update Python dependency range; adjust pywin32 version for Windows compatibility

michaelbeale-IL · michaelbeale-IL · commit e87b90aedbbb · 2025-12-15T11:36:09.000-08:00
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -4,5 +4,15 @@
     ],
     "python.testing.unittestEnabled": false,
     "python.testing.pytestEnabled": true,
-    "python.testing.debugPort": 3000
+    "python.testing.debugPort": 3000,
+    "python-envs.defaultEnvManager": "ms-python.python:poetry",
+    "python-envs.defaultPackageManager": "ms-python.python:poetry",
+    "python-envs.pythonProjects": [
+        {
+            "path": "",
+            "envManager": "ms-python.python:poetry",
+            "packageManager": "ms-python.python:poetry"
+        }
+    ],
+    "sarif-viewer.connectToGithubCodeScanning": "off"
 }
diff --git a/convassist/predictor/sentence_completion_predictor.py b/convassist/predictor/sentence_completion_predictor.py
@@ -23,6 +23,7 @@
     SQLiteDatabaseConnector,
 )
 from convassist.predictor.utilities.svo_util import SVOUtil
+from convassist.utilities.utils import smart_readlines
 
 
 class SentenceCompletionPredictor(Predictor):
@@ -68,15 +69,13 @@ def configure(self):
         # We will normalize our vectors to unit length, then is Inner Product equal to cosine similarity
         self.index = hnswlib.Index(space="cosine", dim=self.embedding_size)
 
-        with open(self.retrieve_database) as f:
-            self.corpus_sentences = [s.strip() for s in f.readlines()]
+        self.corpus_sentences = smart_readlines(self.retrieve_database)
 
-        with open(self.blacklist_file) as f:
-            self.blacklist_words = [s.strip() for s in f.readlines()]
+        self.blacklist_words = smart_readlines(self.blacklist_file)
 
         self.personalized_allowed_toxicwords = self._read_personalized_toxic_words()
 
-        self.svo_util = SVOUtil(self.stopwordsFile)
+        self.svo_util = SVOUtil(self.stopwordsFile, nlp_path=self._personalized_resources_path)
 
         if not Path.is_file(Path(self.embedding_cache_path)):
             self.corpus_embeddings = self.embedder.encode(
@@ -174,8 +173,9 @@ def _read_personalized_toxic_words(self):
             with open(self.personalized_allowed_toxicwords_file, "w") as f:
                 pass
 
-        with open(self.personalized_allowed_toxicwords_file) as f:
-            self.personalized_allowed_toxicwords = f.readlines()
+        # with open(self.personalized_allowed_toxicwords_file) as f:
+        #     self.personalized_allowed_toxicwords = f.readlines()
+        self.personalized_allowed_toxicwords = smart_readlines(self.personalized_allowed_toxicwords_file)
 
         self.personalized_allowed_toxicwords = [
             s.strip() for s in self.personalized_allowed_toxicwords
@@ -222,7 +222,7 @@ def _retrieve_fromDataset(self, context):
         pred = Prediction()
         probs = {}
 
-        lines = open(self.retrieve_database).readlines()
+        lines = smart_readlines(self.retrieve_database)
         retrieved = []
         totalsent = len(lines)
         for each in lines:
@@ -509,12 +509,11 @@ def predict(self, max_partial_prediction_size: int, filter: Optional[str] = None
     def load_n_start_sentences(self, max_partial_prediction_size=-1):
         predictions = Prediction()
 
-        with open(self.startsents) as f:
-            data = f.readlines()
-            for sentence in data[0:max_partial_prediction_size]:
-                predictions.add_suggestion(
-                    Suggestion(sentence.strip(), float(1 / len(data)), self.predictor_name)
-                )
+        data = smart_readlines(self.startsents)
+        for sentence in data[0:max_partial_prediction_size]:
+            predictions.add_suggestion(
+                Suggestion(sentence.strip(), float(1 / len(data)), self.predictor_name)
+            )
         return predictions
 
     # Base class method
diff --git a/convassist/predictor/smoothed_ngram_predictor/canned_word_predictor.py b/convassist/predictor/smoothed_ngram_predictor/canned_word_predictor.py
@@ -5,7 +5,6 @@
 
 from tqdm import tqdm
 
-from convassist.predictor.utilities.nlp import NLP
 from convassist.utilities.ngram.ngram_map import NgramMap
 from convassist.utilities.ngram.ngramutil import NGramUtil
 
@@ -23,7 +22,6 @@ def configure(self):
     def extract_svo(self, sent) -> str:
         return " ".join(self.svo_utils.extract_svo(sent))
 
-
     def recreate_database(self):
         """
         Recreates the sentence and n-gram databases by adding new phrases and removing outdated ones.
@@ -55,5 +53,3 @@ def recreate_database(self):
     @property
     def startwords(self):
         return os.path.join(self._personalized_resources_path, self._startwords)
-
-
diff --git a/convassist/predictor/smoothed_ngram_predictor/general_word_predictor.py b/convassist/predictor/smoothed_ngram_predictor/general_word_predictor.py
@@ -4,18 +4,20 @@
 import collections
 import json
 import os
+from convassist.utilities.utils import smart_readlines
 
 from convassist.predictor.smoothed_ngram_predictor.smoothed_ngram_predictor import SmoothedNgramPredictor
 
 
 class GeneralWordPredictor(SmoothedNgramPredictor):
+
     def configure(self):
         super().configure()
 
         # Store the set of most frequent starting words based on an AAC dataset
         # These will be displayed during empty context
         if not os.path.isfile(self.startwords):
-            aac_lines = open(self.aac_dataset).readlines()
+            aac_lines = smart_readlines(self.aac_dataset)
             startwords = []
             for line in aac_lines:
                 w = line.lower().split()[0]
diff --git a/convassist/predictor/smoothed_ngram_predictor/smoothed_ngram_predictor.py b/convassist/predictor/smoothed_ngram_predictor/smoothed_ngram_predictor.py
@@ -33,7 +33,7 @@ def configure(self) -> None:
     def extract_svo(self, sent):
         return sent
     
-    def get_frequent_start_words(self, max_count = 10) -> Prediction:
+    def get_frequent_start_words(self, max_count=10) -> Prediction:
         word_predictions = Prediction()
 
         try:
diff --git a/convassist/predictor/utilities/canned_data.py b/convassist/predictor/utilities/canned_data.py
@@ -2,6 +2,7 @@
 # SPDX-License-Identifier: GPL-3.0-or-later
 
 from convassist.utilities.databaseutils.sqllite_dbconnector import SQLiteDatabaseConnector
+from convassist.utilities.utils import smart_readlines
 
 
 class cannedData:
@@ -99,8 +100,7 @@ def all_phrases_as_list(self) -> list:
     def _read_personalized_corpus(self, corpus_path):
         corpus = []
 
-        with open(corpus_path) as f:
-            corpus = f.readlines()
-            corpus = [s.strip() for s in corpus]
+        corpus = smart_readlines(corpus_path)
+        corpus = [s.strip() for s in corpus]
 
         return corpus
diff --git a/convassist/predictor/utilities/nlp.py b/convassist/predictor/utilities/nlp.py
@@ -1,43 +1,26 @@
 # Copyright (C) 2024 Intel Corporation
 # SPDX-License-Identifier: GPL-3.0-or-later
-
 import os
-import sys
-from pathlib import Path
-
 import spacy
-
+from spacy.cli.download import download
 from convassist.utilities.singleton import Singleton
 
 
 class NLP(metaclass=Singleton):
-    def __init__(self):
+    def __init__(self, path: str):
+        self.path = path
         self.nlp = self.load_nlp()
 
     def load_nlp(self):
-        # TODO: Move this to always download the model from the internet
-        # TODO: Move this to a config file
-
-        nlp_loc = "en_core_web_sm"
-        # spacy model is in _MEIPASS when running as a pyinstaller executable
-        if hasattr(sys, "_MEIPASS"):  # pragma: no cover
-            base_path = sys._MEIPASS  # type: ignore
-            nlp_loc = os.path.join(base_path, nlp_loc)
-
-        if os.path.exists(nlp_loc):
-            # Loading the model from a path
-            child_dirs = [child for child in Path(nlp_loc).iterdir() if child.is_dir()]
-            if len(child_dirs) > 0:
-                nlp = spacy.load(child_dirs[0])
-
-        else:
-            # Loading the model from the installed package
-            if not spacy.util.is_package(nlp_loc):
-                spacy.cli.download(nlp_loc)
-
-            nlp = spacy.load(nlp_loc)
-
-        return nlp
+        nlp_model = "en_core_web_sm"
+
+        try:
+            if not spacy.util.is_package(nlp_model):
+                download(nlp_model)
+            nlp = spacy.load(nlp_model)
+            return nlp
+        except Exception as e:
+            raise RuntimeError(f"Failed to load spaCy model '{nlp_model}': {e}")
 
     def get_nlp(self):
         return self.nlp
diff --git a/convassist/predictor/utilities/svo_util.py b/convassist/predictor/utilities/svo_util.py
@@ -1,31 +1,30 @@
 
 from convassist.predictor.utilities.nlp import NLP
 
+
 class SVOUtil:
-        
     OBJECT_DEPS = {
-            "dobj",
-            "pobj",
-            "dative",
-            "attr",
-            "oprd",
-            "npadvmod",
-            "amod",
-            "acomp",
-            "advmod",
+        "dobj",
+        "pobj",
+        "dative",
+        "attr",
+        "oprd",
+        "npadvmod",
+        "amod",
+        "acomp",
+        "advmod",
     }
-    
+
     SUBJECT_DEPS = {"nsubj", "nsubjpass", "csubj", "agent", "expl"}
 
-    def __init__(self, stopwordsFile):
-        self.nlp = NLP().get_nlp()
-        
+    def __init__(self, stopwordsFile, nlp_path=""):
+        self.nlp = NLP(nlp_path).get_nlp()
+
         self.stopwords = []
         with open(stopwordsFile) as f:
             self.stopwords = f.read().splitlines()
             self.stopwords = [word.strip() for word in self.stopwords]
 
-
     def extract_svo(self, sent) -> list[str]:
         doc = self.nlp(sent)
         sub = []
diff --git a/convassist/predictor_registry.py b/convassist/predictor_registry.py
@@ -90,7 +90,7 @@ def _add_predictor(
 
     def get_predictor_class(self, predictor_name, config):
 
-        #TODO: Fix this hack. 
+        # TODO: Fix this hack.
         # This is a hack to get the predictor class from the config file.
         # The config file should have a mapping of predictor_name to predictor_class
         # but two predictor classes were renamed and the config file was not updated.
@@ -100,7 +100,7 @@ def get_predictor_class(self, predictor_name, config):
             return "CannedWordPredictor"
         elif predictor_name == "DefaultSmoothedNgramPredictor":
             return "GeneralWordPredictor"
-        else: 
+        else:
             return config.get(predictor_name, "predictor_class")
 
     def model_status(self) -> bool:
diff --git a/convassist/utilities/utils.py b/convassist/utilities/utils.py
@@ -0,0 +1,8 @@
+from charset_normalizer import from_path
+
+
+def smart_readlines(path):
+    result = from_path(path).best()
+    enc = result.encoding if result else "utf-8"
+    with open(path, encoding=enc) as f:
+        return f.readlines()
diff --git a/interfaces/ACAT/acatconvassist/ConvAssist.spec b/interfaces/ACAT/acatconvassist/ConvAssist.spec
@@ -8,9 +8,10 @@ SCRIPT_DIR = os.path.dirname(os.path.realpath(__name__))
 
 additionaldata = []
 additionaldata = [('assets', 'assets')]
-additionaldata += copy_metadata('convassist')
+additionaldata += copy_metadata('ConvAssist')
 additionaldata += collect_data_files("sv_ttk")
 additionaldata += collect_data_files("en_core_web_sm")
+additionaldata += collect_data_files("en_core_web_sm-3.8.0.dist-info")
 additionaldata += collect_data_files("spellchecker")
 
 print(f'script dir: {SCRIPT_DIR}')
diff --git a/interfaces/ACAT/acatconvassist/acatconvassist.py b/interfaces/ACAT/acatconvassist/acatconvassist.py
@@ -223,13 +223,13 @@ def handle_incoming_messages(self):
                 except AttributeError as e:
                     self.logger.error(f"Attribute error: {e}.")
                     continue
-                
-                except TimeoutError as e:
+
+                except TimeoutError:
                     self.logger.info("Timeout Error waiting for named pipe.  Try again.")
                     continue
 
                 except BrokenPipeError as e:
-                    if False:
+                    if True:
                         # If the pipe is broken, exit the loop
                         self.logger.critical(f"Broken Pipe Error. Bailing. {e}.")
                         send_response = False
@@ -244,7 +244,17 @@ def handle_incoming_messages(self):
                 except Exception as e:
                     self.logger.critical(f"Catastrophic Error.  Bailing. {e}.", stack_info=True, exc_info=True)
                     send_response = False
-                    self.app_quit_event.set()
+                    # self.app_quit_event.set()
+                    continue
+
+                if messageReceived.MessageType == ConvAssistMessageTypes.STATUSCHECK:
+                    if self.ready:
+                        self.logger.info("Already ready for predictions.")
+                        PredictionResponse.MessageType = ConvAssistMessageTypes.READYFORPREDICTIONS
+                    else:
+                        self.logger.info("Not ready for predictions.")
+                        PredictionResponse.MessageType = ConvAssistMessageTypes.NOTREADY
+                    self.messageHandler.send_message(PredictionResponse.jsonSerialize())
                     continue
 
                 # TODO - Handle more gracefully
@@ -433,6 +443,12 @@ def handle_parameter_change(self, messageReceived: ConvAssistMessage):
                 setattr(self, attr_name, param.Value)
                 self.prefs.save(attr_name, param.Value)
 
+            # Special handling for path.  We need something to trigger
+            # re-initialization of the ConvAssist instances.  Even if the
+            # path hasn't changed, we still need to re-initialize.
+            if attr_name == "path":
+                changed = True
+
         except Exception as e:
             self.logger.error(f"handle_parameters exception: {e}.")
 
@@ -465,10 +481,10 @@ def initialize_or_configure_convassists(self):
 
             self.logger.info(f"convassist {convassist.name} updated.")
 
-        #TODO - Check if all convassists are initialized
-        # I"m assuming that if I've made it here, they are all initialized
+        # TODO - Check if all convassists are initialized
+        # I'm assuming that if I've made it here, they are all initialized
         self.ready = True
-        
+
     def ConnectToACAT(self, connection_type=None) -> bool:
         retries = 0
         self.logger.info("Trying to connect to ACAT server.")
@@ -506,8 +522,8 @@ def run(self):
         try:
             self.initialize_or_configure_convassists()
         except Exception:
-            self.logger.warning(f"ConvAssist not ready.  Will wait for configuration.")
-            
+            self.logger.warning("ConvAssist not ready.  Will wait for configuration.")
+
         self.logger.debug(f"ACATConvAssistInterface initialized in {time.time() - starttime} seconds.")
 
         if not self.ConnectToACAT():
diff --git a/interfaces/ACAT/acatconvassist/build.ps1 b/interfaces/ACAT/acatconvassist/build.ps1
@@ -4,7 +4,7 @@ $scriptPath = "ConvAssist.spec"
 # Measure the time taken to run the PyInstaller build
 $timeTaken = Measure-Command {
     Write-Host "Building ConvAssist.exe using PyInstaller"
-    & pyinstaller --noconfirm $scriptPath
+    & poetry run python -m PyInstaller --noconfirm $scriptPath
 }
 
 # Output the time taken
diff --git a/interfaces/ACAT/acatconvassist/file_version_info.txt b/interfaces/ACAT/acatconvassist/file_version_info.txt
@@ -33,14 +33,14 @@ VSVersionInfo(
         [StringStruct('Comments', 'Assistive Context-Aware Toolkit (ACAT)'),
         StringStruct('CompanyName', 'https://www.intel.com/content/www/us/en/developer/tools/open/acat/overview.html'),
         StringStruct('FileDescription', 'ConvAssist Word Predictor Utility'),
-        StringStruct('FileVersion', '3.12'),
+        StringStruct('FileVersion', '4.0.0'),
         StringStruct('InternalName', 'ConvAssist.exe'),
         StringStruct('LegalCopyright', 'Copyright 2025 by Intel Corporation'),
         StringStruct('LegalTrademarks', ''),
         StringStruct('OriginalFilename', 'ConvAssist.exe'),
         StringStruct('ProductName', 'ConvAssist'),
-        StringStruct('ProductVersion', '3.12'),
-        StringStruct('Assembly Version', '3.12.1.2725')])
+        StringStruct('ProductVersion', '4.0.0'),
+        StringStruct('Assembly Version', '4.0.8.2625')])
       ])
   ]
 )
diff --git a/interfaces/ACAT/acatconvassist/preferences.py b/interfaces/ACAT/acatconvassist/preferences.py
diff --git a/interfaces/ACAT/utilities/ACATMessageTypes.py b/interfaces/ACAT/utilities/ACATMessageTypes.py
diff --git a/interfaces/ACAT/utilities/message_handler/Win32PipeHandler.py b/interfaces/ACAT/utilities/message_handler/Win32PipeHandler.py
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml