modify test problem (#2662)

a31413510 · web-flow · commit f6ebf4d5508d · 2025-09-23T19:54:18.000+08:00
diff --git a/tests/data/test_data_collator.py b/tests/data/test_data_collator.py
@@ -22,8 +22,6 @@
 
 from paddleformers.data import default_data_collator
 
-from ..testing_utils import skip_for_none_ce_case
-
 
 class DataCollatorIntegrationTest(unittest.TestCase):
     def setUp(self):
@@ -90,13 +88,3 @@ def test_default_with_no_labels(self):
         batch = default_data_collator(features)
         self.assertTrue("labels" not in batch)
         self.assertEqual(batch["inputs"].shape, [8, 6])
-
-    @skip_for_none_ce_case
-    def test_data_collator_for_language_modeling(self):
-        no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}]
-        pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}]
-        self._test_no_pad_and_pad(no_pad_features, pad_features)
-
-        no_pad_features = [list(range(10)), list(range(10))]
-        pad_features = [list(range(5)), list(range(10))]
-        self._test_no_pad_and_pad(no_pad_features, pad_features)
diff --git a/tests/generation/test_streamers.py b/tests/generation/test_streamers.py
@@ -77,9 +77,10 @@ def test_text_streamer_decode_kwargs(self):
         # Tests that we can pass `decode_kwargs` to the streamer to control how the tokens are decoded. Must be tested
         # with actual models -- the dummy models' tokenizers are not aligned with their models, and
         # `skip_special_tokens=True` has no effect on them
-        tokenizer = AutoTokenizer.from_pretrained("PaddleNLP/bloom-560m")
-        model = AutoModelForCausalLM.from_pretrained("PaddleNLP/bloom-560m")
+        tokenizer = AutoTokenizer.from_pretrained("Paddleformers/tiny-random-qwen3")
+        model = AutoModelForCausalLM.from_pretrained("Paddleformers/tiny-random-qwen3", convert_from_hf=True)
         model.config.eos_token_id = -1
+        model.config.bos_token_id = 1
 
         input_ids = paddle.ones([1, 5], dtype="int64") * model.config.bos_token_id
         attention_mask = paddle.ones_like(input_ids, dtype="bool")
diff --git a/tests/trainer/test_unified_checkpoint.py b/tests/trainer/test_unified_checkpoint.py
@@ -233,219 +233,6 @@ def testTP2Sharding4(self):
             np.testing.assert_allclose(res[0], res[1], self.rtol)
 
 
-@pytest.mark.xdist_group(name="UC")
-class TestUnifiedCheckpointFull(TestUnifiedCheckpointBase):
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testTP8(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["TP8"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testTP4DP2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["TP4DP2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testTP4Sharding2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["TP4Sharding2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testTP2PP4(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["TP2PP4"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testPP8(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["PP8"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testPP4DP2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["PP4DP2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testPP4Sharding2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["PP4Sharding2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding8S1(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding8S1"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding8S2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding8S2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding4S1DP2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding4S1DP2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding4S2DP2(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding4S2DP2"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding2S1DP4(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding2S1DP4"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testSharding2S2DP4(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["Sharding2S2DP4"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-    @skip_for_none_ce_case
-    @require_paddle_at_least_8_gpu
-    def testDP8(self):
-        remove_logs()
-        remove_ckpt(pretrain_arguments["output_dir"])
-
-        train_args = self.configs["DP8"]
-        self.runfirst(train_args)
-        self.rerun(train_args)
-
-        if self.need_allclose:
-            res = check_acc()
-            assert len(res) == 2
-            np.testing.assert_allclose(res[0], res[1], self.rtol)
-
-
 @pytest.mark.skipif(True, reason="Skip for None CE")
 class TestUnifiedCheckpointOnN2C4(TestUnifiedCheckpointBase):
     def setUp(self):
@@ -460,28 +247,6 @@ def rerun(self, train_args):
         self.run_n2c4(self.run_pretrain_file, **train_args)
 
 
-# Test Unified Checkpoint Hybrid Parallel Strategy Convert on N1C8
-@pytest.mark.skipif(True, reason="Skip for failed")
-class TestUnifiedCheckpointOnN1C8Dynamic(TestUnifiedCheckpointFull):
-    def setUp(self):
-        super().setUp()
-        self.need_allclose = False
-        self.rtol = 1e-4
-        self.k = MAX_CONVERT_CONFIGS  # max: 16, min: 1
-
-    def runfirst(self, train_args):
-        self.run_n1c8(self.run_pretrain_file, **train_args)
-
-    def rerun(self, train_args):
-        configs = random_sample(self.configs.keys(), k=self.k)
-        for config_name in configs:
-            print(f"Rerun using {config_name}")
-            config = self.configs[config_name]
-            self.run_n1c8(self.run_pretrain_file, **config)
-            res = check_acc()
-            np.testing.assert_allclose(res[0], res[-1], rtol=self.rtol)
-
-
 # Test Unified Checkpoint Hybrid Parallel Strategy Convert on N2C4
 @pytest.mark.skipif(True, reason="Skip for failed")
 class TestUnifiedCheckpointOnN2C4Dynamic(TestUnifiedCheckpointBase):
@@ -1132,42 +897,3 @@ def rerun(self, train_args):
             self.run_n1c8(self.run_pretrain_file, **config)
             res = check_acc()
             np.testing.assert_allclose(res[0], res[-1], rtol=self.rtol)
-
-
-@pytest.mark.skipif(True, reason="Skip for None CE")
-class TestUnifiedCheckpointOnN1C8SaveLoadSpeed(TestUnifiedCheckpointFull):
-    def setUp(self):
-        super().setUp()
-        for config_key in self.configs:
-            self.configs[config_key]["skip_profile_timer"] = 0
-            self.configs[config_key]["unified_checkpoint"] = 1
-            self.configs[config_key]["save_steps"] = 6
-            self.configs[config_key]["unified_checkpoint_config"] = "skip_save_model_weight master_weight_compatible"
-
-        self.need_allclose = False
-        self.rtol = 1e-7
-
-    def runfirst(self, train_args):
-        self.run_n1c8(self.run_pretrain_file, log_dir="log_uc", **train_args)
-
-    def rerun(self, train_args):
-        self.run_n1c8(self.run_pretrain_file, log_dir="log_uc", **train_args)
-
-
-@pytest.mark.skipif(True, reason="Skip for None CE")
-class TestPaddleCheckpointOnN1C8SaveLoadSpeed(TestUnifiedCheckpointFull):
-    def setUp(self):
-        super().setUp()
-        for config_key in self.configs:
-            self.configs[config_key]["skip_profile_timer"] = 0
-            self.configs[config_key]["unified_checkpoint"] = 0
-            self.configs[config_key]["save_steps"] = 6
-
-        self.need_allclose = False
-        self.rtol = 1e-7
-
-    def runfirst(self, train_args):
-        self.run_n1c8(self.run_pretrain_file, log_dir="log_pd", **train_args)
-
-    def rerun(self, train_args):
-        self.run_n1c8(self.run_pretrain_file, log_dir="log_pd", **train_args)
diff --git a/tests/transformers/llama/test_modeling.py b/tests/transformers/llama/test_modeling.py
@@ -356,15 +356,15 @@ def test_inference_no_attention(self):
         with paddle.no_grad():
             output = model(input_ids, attention_mask=attention_mask)[0]
 
-        expected_shape = [1, 11, 768]
+        expected_shape = [1, 11, 16]
         self.assertEqual(output.shape, expected_shape)
 
         expected_slice = paddle.to_tensor(
             [
                 [
-                    [0.20443289, 0.18662477, -0.75216216],
-                    [0.37699354, -0.38747141, -1.21889985],
-                    [0.31100151, -0.40143669, -0.64101797],
+                    [-1.41304350, 0.50553894, 1.06899679],
+                    [-0.22070873, -0.05084248, -0.92472076],
+                    [-0.45478714, -1.70910871, 0.60857075],
                 ]
             ]
         )
@@ -379,14 +379,14 @@ def test_inference_with_attention(self):
         with paddle.no_grad():
             output = model(input_ids, attention_mask=attention_mask)[0]
 
-        expected_shape = [1, 11, 768]
+        expected_shape = [1, 11, 16]
         self.assertEqual(output.shape, expected_shape)
         expected_slice = paddle.to_tensor(
             [
                 [
-                    [0.20443289, 0.18662477, -0.75216216],
-                    [0.37699354, -0.38747141, -1.21889985],
-                    [0.31100151, -0.40143669, -0.64101797],
+                    [-1.41304350, 0.50553894, 1.06899679],
+                    [-0.22070873, -0.05084248, -0.92472076],
+                    [-0.45478714, -1.70910871, 0.60857075],
                 ]
             ]
         )

Original file line number	Diff line number	Diff line change
`@@ -356,15 +356,15 @@ def test_inference_no_attention(self):`
`356`	`356`	`with paddle.no_grad():`
`357`	`357`	`output = model(input_ids, attention_mask=attention_mask)[0]`
`358`	`358`
`359`		`- expected_shape = [1, 11, 768]`
	`359`	`+ expected_shape = [1, 11, 16]`
`360`	`360`	`self.assertEqual(output.shape, expected_shape)`
`361`	`361`
`362`	`362`	`expected_slice = paddle.to_tensor(`
`363`	`363`	`[`
`364`	`364`	`[`
`365`		`- [0.20443289, 0.18662477, -0.75216216],`
`366`		`- [0.37699354, -0.38747141, -1.21889985],`
`367`		`- [0.31100151, -0.40143669, -0.64101797],`
	`365`	`+ [-1.41304350, 0.50553894, 1.06899679],`
	`366`	`+ [-0.22070873, -0.05084248, -0.92472076],`
	`367`	`+ [-0.45478714, -1.70910871, 0.60857075],`
`368`	`368`	`]`
`369`	`369`	`]`
`370`	`370`	`)`
`@@ -379,14 +379,14 @@ def test_inference_with_attention(self):`
`379`	`379`	`with paddle.no_grad():`
`380`	`380`	`output = model(input_ids, attention_mask=attention_mask)[0]`
`381`	`381`
`382`		`- expected_shape = [1, 11, 768]`
	`382`	`+ expected_shape = [1, 11, 16]`
`383`	`383`	`self.assertEqual(output.shape, expected_shape)`
`384`	`384`	`expected_slice = paddle.to_tensor(`
`385`	`385`	`[`
`386`	`386`	`[`
`387`		`- [0.20443289, 0.18662477, -0.75216216],`
`388`		`- [0.37699354, -0.38747141, -1.21889985],`
`389`		`- [0.31100151, -0.40143669, -0.64101797],`
	`387`	`+ [-1.41304350, 0.50553894, 1.06899679],`
	`388`	`+ [-0.22070873, -0.05084248, -0.92472076],`
	`389`	`+ [-0.45478714, -1.70910871, 0.60857075],`
`390`	`390`	`]`
`391`	`391`	`]`
`392`	`392`	`)`