update layoutxlm/modeling.py (#2285)

Intsigstephon · yingyibiao · web-flow · commit e1014ee6faae · 2022-06-01T00:31:59.000+08:00
Co-authored-by: yingyibiao &lt;yyb0576@163.com&gt;
diff --git a/paddlenlp/transformers/layoutxlm/modeling.py b/paddlenlp/transformers/layoutxlm/modeling.py
@@ -29,8 +29,8 @@
 
 __all__ = [
     'LayoutXLMModel', "LayoutXLMPretrainedModel",
-    "LayoutXLMForTokenClassification", "LayoutXLMForPretraining",
-    "LayoutXLMForRelationExtraction"
+    "LayoutXLMForTokenClassification", "LayoutXLMForSequenceClassification",
+    "LayoutXLMForPretraining", "LayoutXLMForRelationExtraction"
 ]
 
 
@@ -63,6 +63,34 @@ def relative_position_bucket(relative_position,
     return ret
 
 
+def token_featue_to_sequence_feature(input_ids, seq_length, sequence_output):
+    """
+    used to transform token feature into sequence feature by 
+    averaging all the token features of certain sequence
+    """
+    batches = input_ids.shape[0]
+    for batch_id in range(batches):
+        start_idx = -1
+        for i in range(0, seq_length):
+            if input_ids[batch_id, i] == 6:
+                if start_idx > -1:
+                    feature_block = sequence_output[batch_id, start_idx + 1:i]
+                    sequence_output[batch_id, start_idx] = paddle.mean(
+                        feature_block, axis=0)
+                start_idx = i
+
+            if input_ids[batch_id, i] == 1:
+                feature_block = sequence_output[batch_id, start_idx + 1:i]
+                sequence_output[batch_id, start_idx] = paddle.mean(
+                    feature_block, axis=0)
+                break
+
+        if i == seq_length - 1:
+            sequence_output[batch_id, start_idx] = paddle.mean(
+                feature_block, axis=0)
+        return
+
+
 class LayoutXLMPooler(Layer):
     def __init__(self, hidden_size, with_pool):
         super(LayoutXLMPooler, self).__init__()
@@ -911,6 +939,73 @@ def forward(
         return outputs
 
 
+class LayoutXLMForSequenceClassification(LayoutXLMPretrainedModel):
+    def __init__(self, layoutxlm, num_classes=2, dropout=None):
+        super(LayoutXLMForSequenceClassification, self).__init__()
+        self.num_classes = num_classes
+        if isinstance(layoutxlm, dict):
+            self.layoutxlm = LayoutXLMModel(**layoutxlm)
+        else:
+            self.layoutxlm = layoutxlm
+        self.dropout = nn.Dropout(dropout if dropout is not None else
+                                  self.layoutxlm.config["hidden_dropout_prob"])
+        self.classifier = nn.Linear(self.layoutxlm.config["hidden_size"],
+                                    num_classes)
+        self.classifier.apply(self.init_weights)
+
+    def get_input_embeddings(self):
+        return self.layoutxlm.embeddings.word_embeddings
+
+    def forward(
+            self,
+            input_ids=None,
+            bbox=None,
+            image=None,
+            attention_mask=None,
+            token_type_ids=None,
+            position_ids=None,
+            head_mask=None,
+            labels=None, ):
+        outputs = self.layoutxlm(
+            input_ids=input_ids,
+            bbox=bbox,
+            image=image,
+            attention_mask=attention_mask,
+            token_type_ids=token_type_ids,
+            position_ids=position_ids,
+            head_mask=head_mask, )
+        seq_length = input_ids.shape[1]
+        # sequence out and image out
+        sequence_output, image_output = outputs[0][:, :seq_length], outputs[
+            0][:, seq_length:]
+
+        # token feature to sequence feature
+        token_featue_to_sequence_feature(input_ids, seq_length, sequence_output)
+
+        sequence_output = self.dropout(sequence_output)
+        logits = self.classifier(sequence_output)
+
+        outputs = logits,
+
+        if labels is not None:
+            loss_fct = nn.CrossEntropyLoss()
+
+            if attention_mask is not None:
+                active_loss = attention_mask.reshape([-1, ]) == 1
+                active_logits = logits.reshape(
+                    [-1, self.num_classes])[active_loss]
+                active_labels = labels.reshape([-1, ])[active_loss]
+                loss = loss_fct(active_logits, active_labels)
+            else:
+                loss = loss_fct(
+                    logits.reshape([-1, self.num_classes]),
+                    labels.reshape([-1, ]))
+
+            outputs = (loss, ) + outputs
+
+        return outputs
+
+
 class LayoutXLMPredictionHead(Layer):
     """
     Bert Model with a `language modeling` head on top for CLM fine-tuning.
@@ -1036,12 +1131,10 @@ def build_relation(self, relations, entities):
         for b in range(batch_size):
             if len(entities[b]["start"]) <= 2:
                 entities[b] = {"end": [1, 1], "label": [0, 0], "start": [0, 0]}
-            all_possible_relations = set([
-                (i, j)
-                for i in range(len(entities[b]["label"]))
-                for j in range(len(entities[b]["label"]))
-                if entities[b]["label"][i] == 1 and entities[b]["label"][j] == 2
-            ])
+            all_possible_relations = set(
+                [(i, j) for i in range(len(entities[b]["label"]))
+                 for j in range(len(entities[b]["label"])) if
+                 entities[b]["label"][i] == 1 and entities[b]["label"][j] == 2])
             if len(all_possible_relations) == 0:
                 all_possible_relations = {(0, 1)}
             positive_relations = set(