reshape into 2 class preds?

Jemoka · Jemoka · commit a334cf91aafe · 2025-07-27T22:01:51.000+02:00
diff --git a/stanza/models/coref/model.py b/stanza/models/coref/model.py
@@ -387,7 +387,7 @@ def infer(self, raw_words, sent_ids) -> CorefResult:
         coref_id_to_real_id_map = {}
         
         for indx,(i,j) in enumerate(zip(raw_words,
-                                        ((zeros_preds[word_start] > 0.5)
+                                        ((zeros_preds[word_start].argmax(dim=-1) > 0)
                                          .squeeze(-1)
                                          .tolist()))):
             if j:
@@ -575,8 +575,8 @@ def train(self, log=False):
                 c_loss = self._coref_criterion(res.coref_scores, res.coref_y)
 
                 if (res.zeros_y == 1).any():
-                    zeros_preds = res.zeros_scores[res.zeros_y != -100].reshape(1, -1)
-                    labels = res.zeros_y[res.zeros_y != -100].reshape(1, -1)
+                    zeros_preds = res.zeros_scores[res.zeros_y != -100].reshape(-1, 2)
+                    labels = res.zeros_y[res.zeros_y != -100].reshape(-1, 2)
                     # reweight such that the zeros and nonzeros count for equal weighting
                     # that is, artifically balance the "number of samples" by weighting between
                     # them equally
@@ -668,8 +668,8 @@ def train(self, log=False):
     def _bertify(self, doc: Doc, return_subwords=False) -> torch.Tensor:
         if return_subwords:
             (nonblank_batches,
-            nonblank_labels) = bert.get_subwords_batches(doc, self.config,
-                                                        self.tokenizer, nonblank_only=True)
+             nonblank_labels) = bert.get_subwords_batches(doc, self.config,
+                                                          self.tokenizer, nonblank_only=True)
         all_batches = bert.get_subwords_batches(doc, self.config, self.tokenizer)
 
         # we index the batches n at a time to prevent oom
@@ -768,8 +768,9 @@ def _build_model(self, foundation_cache):
         self.rough_scorer = RoughScorer(bert_emb, self.config).to(self.config.device)
         self.sp = SpanPredictor(bert_emb, self.config.sp_embedding_size).to(self.config.device)
         self.zeros_predictor = nn.Sequential(
-            nn.Linear(self.bert.config.hidden_size, 1),
-            nn.Sigmoid()
+            nn.Linear(self.bert.config.hidden_size, self.bert.config.hidden_size),
+            nn.ReLU(),
+            nn.Linear(self.bert.config.hidden_size, 2),
         ).to(self.config.device)
 
         self.trainable: Dict[str, torch.nn.Module] = {