Merge pull request #1411 from Steffy-zxf/upstream-develop

wawltor · web-flow · commit f3010d9fc5b4 · 2021-12-09T16:21:17.000+08:00
fix skep data type on the win
diff --git a/examples/sentiment_analysis/skep/predict_aspect.py b/examples/sentiment_analysis/skep/predict_aspect.py
@@ -104,8 +104,8 @@ def convert_example(example,
         text_pair=example["text_pair"],
         max_seq_len=max_seq_length)
 
-    input_ids = encoded_inputs["input_ids"]
-    token_type_ids = encoded_inputs["token_type_ids"]
+    input_ids = np.array(encoded_inputs["input_ids"], dtype="int64")
+    token_type_ids = np.array(encoded_inputs["token_type_ids"], dtype="int64")
 
     if not is_test:
         label = np.array([example["label"]], dtype="int64")
diff --git a/examples/sentiment_analysis/skep/predict_opinion.py b/examples/sentiment_analysis/skep/predict_opinion.py
@@ -70,9 +70,9 @@ def convert_example(example, tokenizer, max_seq_length=512, is_test=False):
         return_length=True,
         is_split_into_words=True,
         max_seq_len=max_seq_length)
-    input_ids = encoded_inputs["input_ids"]
-    token_type_ids = encoded_inputs["token_type_ids"]
-    seq_len = encoded_inputs["seq_len"]
+    input_ids = np.array(encoded_inputs["input_ids"], dtype="int64")
+    token_type_ids = np.array(encoded_inputs["token_type_ids"], dtype="int64")
+    seq_len = np.array(encoded_inputs["seq_len"], dtype="int64")
 
     return input_ids, token_type_ids, seq_len
 
diff --git a/examples/sentiment_analysis/skep/predict_sentence.py b/examples/sentiment_analysis/skep/predict_sentence.py
@@ -71,8 +71,8 @@ def convert_example(example,
         token_type_ids(obj: `list[int]`): List of sequence pair mask. 
     """
     encoded_inputs = tokenizer(text=example, max_seq_len=max_seq_length)
-    input_ids = encoded_inputs["input_ids"]
-    token_type_ids = encoded_inputs["token_type_ids"]
+    input_ids = np.array(encoded_inputs["input_ids"], dtype="int64")
+    token_type_ids = np.array(encoded_inputs["token_type_ids"], dtype="int64")
 
     return input_ids, token_type_ids
 
diff --git a/examples/sentiment_analysis/skep/train_aspect.py b/examples/sentiment_analysis/skep/train_aspect.py
@@ -93,8 +93,8 @@ def convert_example(example,
         text_pair=example["text_pair"],
         max_seq_len=max_seq_length)
 
-    input_ids = encoded_inputs["input_ids"]
-    token_type_ids = encoded_inputs["token_type_ids"]
+    input_ids = np.array(encoded_inputs["input_ids"], dtype="int64")
+    token_type_ids = np.array(encoded_inputs["token_type_ids"], dtype="int64")
 
     if not is_test:
         label = np.array([example["label"]], dtype="int64")
diff --git a/examples/sentiment_analysis/skep/train_opinion.py b/examples/sentiment_analysis/skep/train_opinion.py
@@ -94,9 +94,9 @@ def convert_example_to_feature(example,
         is_split_into_words=True,
         max_seq_len=max_seq_len)
 
-    input_ids = tokenized_input['input_ids']
-    token_type_ids = tokenized_input['token_type_ids']
-    seq_len = tokenized_input['seq_len']
+    input_ids = np.array(tokenized_input['input_ids'], dtype="int64")
+    token_type_ids = np.array(tokenized_input['token_type_ids'], dtype="int64")
+    seq_len = np.array(tokenized_input['seq_len'], dtype="int64")
 
     if is_test:
         return input_ids, token_type_ids, seq_len
diff --git a/examples/sentiment_analysis/skep/train_sentence.py b/examples/sentiment_analysis/skep/train_sentence.py
@@ -122,8 +122,8 @@ def convert_example(example,
         encoded_inputs = tokenizer(
             text=example["text"], max_seq_len=max_seq_length)
 
-    input_ids = encoded_inputs["input_ids"]
-    token_type_ids = encoded_inputs["token_type_ids"]
+    input_ids = np.array(encoded_inputs["input_ids"], dtype="int64")
+    token_type_ids = np.array(encoded_inputs["token_type_ids"], dtype="int64")
 
     if not is_test:
         if dataset_name == "sst-2":