Machine-Learning-for-Medical-Language · ianbulovic · Apr 22, 2025 · Apr 22, 2025 · Apr 22, 2025
diff --git a/src/cnlpt/CnlpModelForClassification.py b/src/cnlpt/CnlpModelForClassification.py
@@ -296,12 +296,8 @@ def __init__(
         encoder_model = AutoModel.from_config(encoder_config)
         self.encoder = encoder_model.from_pretrained(config.encoder_name)
 
-        # part of the motivation for leaving this
-        # logic alone for character level models is that
-        # at the time of writing,  CANINE and Flair are the only game in town.
-        # CANINE's hashable embeddings for unicode codepoints allows for
-        # additional parameterization, which rn doesn't seem so relevant
-        if not config.character_level:
+        embeddings = self.encoder.get_input_embeddings()
+        if not embeddings.weight.is_meta:
             self.encoder.resize_token_embeddings(encoder_config.vocab_size)
 
         # This would seem to be redundant with the label list, which maps from tasks to labels,
@@ -369,6 +365,21 @@ def __init__(
 
         # self.init_weights()
 
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path, *model_args, **kwargs):
+        model = super().from_pretrained(
+            pretrained_model_name_or_path, *model_args, **kwargs
+        )
+        embeddings = model.encoder.get_input_embeddings()
+        if embeddings.weight.is_meta:
+            tokenizer = kwargs.get("tokenizer", None)
+            if tokenizer is not None:
+                model.encoder.resize_token_embeddings(len(tokenizer))
+            elif hasattr(model, "config") and hasattr(model.config, "vocab_size"):
+                model.encoder.resize_token_embeddings(model.config.vocab_size)
+
+        return model
+
     @property
     def num_layers(self):
         if self.encoder.config.model_type == "modernbert":

diff --git a/src/cnlpt/train_system.py b/src/cnlpt/train_system.py
@@ -715,6 +715,7 @@ def compute_metrics_fn(p: EvalPrediction):
                             )
                         )
 
+            metrics["one_score"] = one_score
             return metrics
 
         return compute_metrics_fn
-Original file line number
+Diff line change
@@ Expand Up / @@ -715,6 +715,7 @@ def compute_metrics_fn(p: EvalPrediction): @@
                                 )
                             )
+                metrics["one_score"] = one_score
                 return metrics
             return compute_metrics_fn
@@ Expand Down @@