Use tiny-random model for prompt tests to speed up CI (#4190)

sijunhe · LemonNoel · web-flow · commit 08e6b3c5d532 · 2022-12-22T10:59:18.000+08:00
* use small models

* change test_modeling

Co-authored-by: Noel &lt;wanghuijuan03@baidu.com&gt;
diff --git a/tests/prompt/test_prompt_model.py b/tests/prompt/test_prompt_model.py
@@ -30,8 +30,12 @@
 class PromptModelTest(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.tokenizer = AutoTokenizer.from_pretrained("__internal_testing__/ernie")
-        cls.model = AutoModelForMaskedLM.from_pretrained("__internal_testing__/ernie")
+        cls.tokenizer = AutoTokenizer.from_pretrained("__internal_testing__/tiny-random-ernie")
+        cls.model = AutoModelForMaskedLM.from_pretrained("__internal_testing__/tiny-random-ernie")
+        cls.num_labels = 2
+        cls.seq_cls_model = AutoModelForSequenceClassification.from_pretrained(
+            "__internal_testing__/tiny-random-ernie", num_labels=cls.num_labels
+        )
 
         cls.template = AutoTemplate.create_from(
             prompt="{'soft'}{'text': 'text'}{'mask'}", tokenizer=cls.tokenizer, max_length=512, model=cls.model
@@ -71,36 +75,32 @@ def test_sequence_classification_with_labels(self):
         self.assertEqual(model_outputs.hidden_states.shape[0], len(examples))
 
     def test_efl_no_labels(self):
-        num_labels = 2
-        model = AutoModelForSequenceClassification.from_pretrained("__internal_testing__/ernie", num_labels=num_labels)
-        prompt_model = PromptModelForSequenceClassification(model, self.template, verbalizer=None)
+        prompt_model = PromptModelForSequenceClassification(self.seq_cls_model, self.template, verbalizer=None)
         examples = [{"text": "百度飞桨深度学习框架"}, {"text": "这是一个测试"}]
         encoded_examples = [self.template(i) for i in examples]
         logits, hidden_states = prompt_model(**self.data_collator(encoded_examples))
         self.assertEqual(logits.shape[0], len(examples))
-        self.assertEqual(logits.shape[1], num_labels)
+        self.assertEqual(logits.shape[1], self.num_labels)
         self.assertEqual(hidden_states.shape[0], len(examples))
 
         model_outputs = prompt_model(**self.data_collator(encoded_examples), return_dict=True)
         self.assertIsNone(model_outputs.loss)
         self.assertEqual(model_outputs.logits.shape[0], len(examples))
-        self.assertEqual(model_outputs.logits.shape[1], num_labels)
+        self.assertEqual(model_outputs.logits.shape[1], self.num_labels)
         self.assertEqual(model_outputs.hidden_states.shape[0], len(examples))
 
     def test_efl_with_labels(self):
-        num_labels = 2
-        model = AutoModelForSequenceClassification.from_pretrained("__internal_testing__/ernie", num_labels=num_labels)
-        prompt_model = PromptModelForSequenceClassification(model, self.template, verbalizer=None)
+        prompt_model = PromptModelForSequenceClassification(self.seq_cls_model, self.template, verbalizer=None)
         examples = [{"text": "百度飞桨深度学习框架", "labels": 0}, {"text": "这是一个测试", "labels": 1}]
         encoded_examples = [self.template(i) for i in examples]
         loss, logits, hidden_states = prompt_model(**self.data_collator(encoded_examples))
         self.assertIsNotNone(loss)
         self.assertEqual(logits.shape[0], len(examples))
-        self.assertEqual(logits.shape[1], num_labels)
+        self.assertEqual(logits.shape[1], self.num_labels)
         self.assertEqual(hidden_states.shape[0], len(examples))
 
         model_outputs = prompt_model(**self.data_collator(encoded_examples), return_dict=True)
         self.assertIsNotNone(model_outputs.loss)
         self.assertEqual(model_outputs.logits.shape[0], len(examples))
-        self.assertEqual(model_outputs.logits.shape[1], num_labels)
+        self.assertEqual(model_outputs.logits.shape[1], self.num_labels)
         self.assertEqual(model_outputs.hidden_states.shape[0], len(examples))
diff --git a/tests/prompt/test_template.py b/tests/prompt/test_template.py
@@ -25,8 +25,8 @@
 class TemplateTest(unittest.TestCase):
     @classmethod
     def setUpClass(cls):
-        cls.tokenizer = AutoTokenizer.from_pretrained("ernie-3.0-nano-zh")
-        cls.model = AutoModelForMaskedLM.from_pretrained("ernie-3.0-nano-zh")
+        cls.tokenizer = AutoTokenizer.from_pretrained("__internal_testing__/tiny-random-ernie")
+        cls.model = AutoModelForMaskedLM.from_pretrained("__internal_testing__/tiny-random-ernie")
         cls.example = {"text_a": "天气晴朗", "text_b": "下雪了", "choices": ["不", "很"], "labels": 0}
         cls.max_length = 20
         cls.tokenizer.add_special_tokens({"additional_special_tokens": ["[O-MASK]"]})
diff --git a/tests/transformers/bert/test_modeling.py b/tests/transformers/bert/test_modeling.py
@@ -590,7 +590,7 @@ def test_auto_model(self):
 class BertModelIntegrationTest(ModelTesterPretrainedMixin, unittest.TestCase):
     base_model_class = BertModel
     hf_remote_test_model_path = "PaddleCI/tiny-random-bert"
-    paddlehub_remote_test_model_path = "__internal_testing__/bert"
+    paddlehub_remote_test_model_path = "__internal_testing__/tiny-random-bert"
 
     @slow
     def test_inference_no_attention(self):
diff --git a/tests/transformers/ernie/test_modeling.py b/tests/transformers/ernie/test_modeling.py
@@ -439,7 +439,7 @@ def test_model_from_pretrained(self):
 class ErnieModelIntegrationTest(unittest.TestCase, ModelTesterPretrainedMixin):
     base_model_class = ErniePretrainedModel
     hf_remote_test_model_path = "PaddleCI/tiny-random-ernie"
-    paddlehub_remote_test_model_path = "__internal_testing__/ernie"
+    paddlehub_remote_test_model_path = "__internal_testing__/tiny-random-ernie"
 
     @slow
     def test_inference_no_attention(self):