no model.get_max_length() function so set tokenizer max when it's None

HenryL27 · HenryL27 · commit 8b0d2637fd3f · 2024-02-15T14:25:32.000-08:00
Signed-off-by: HenryL27 &lt;hmlindeman@yahoo.com&gt;
diff --git a/opensearch_py_ml/ml_models/crossencodermodel.py b/opensearch_py_ml/ml_models/crossencodermodel.py
@@ -120,7 +120,7 @@ def zip_model(self, framework: str = "pt", zip_fname: str = "model.zip") -> Path
         # save tokenizer file
         tk_path = Path(f"/tmp/{mname}-tokenizer")
         tk.save_pretrained(tk_path)
-        if tk.model_max_length > model.get_max_length():
+        if tk.model_max_length is None:
             model_config = AutoConfig.from_pretrained(self._hf_model_id)
             if hasattr(model_config, "max_position_embeddings"):
                 tk.model_max_length = model_config.max_position_embeddings
@@ -129,7 +129,7 @@ def zip_model(self, framework: str = "pt", zip_fname: str = "model.zip") -> Path
             else:
                 tk.model_max_length = 2**15  # =32768. Set to something big I guess
             print(
-                f"The model_max_length is not properly defined in tokenizer_config.json. Setting it to be {tk.model_max_length}"
+                f"The model_max_length is not found in tokenizer_config.json. Setting it to be {tk.model_max_length}"
             )
         _fix_tokenizer(tk.model_max_length, tk_path)