add test (#4865)

lugimzzz · web-flow · commit 33be7e59b07b · 2023-02-17T17:08:08.000+08:00
diff --git a/paddlenlp/experimental/autonlp/text_classification.py b/paddlenlp/experimental/autonlp/text_classification.py
@@ -364,6 +364,9 @@ def evaluate(self, trial_id=None, eval_dataset=None):
         )
 
         eval_metrics = trainer.evaluate()
+        if os.path.exists(self.training_path):
+            logger.info(f"Removing {self.training_path} to conserve disk space")
+            shutil.rmtree(self.training_path)
         return eval_metrics
 
     def _compute_metrics(self, eval_preds: EvalPrediction) -> Dict[str, float]:
@@ -506,4 +509,8 @@ def export(self, export_path, trial_id=None):
         # save id2label
         shutil.copyfile(os.path.join(self.output_dir, "id2label.json"), os.path.join(export_path, "id2label.json"))
 
+        if os.path.exists(self.training_path):
+            logger.info("Removing training checkpoints to conserve disk space")
+            shutil.rmtree(self.training_path)
+
         logger.info(f"Exported {trial_id} to {export_path}")
diff --git a/tests/experimental/autonlp/test_text_classification.py b/tests/experimental/autonlp/test_text_classification.py
@@ -126,6 +126,15 @@ def test_multiclass(self, custom_model_candidate, hp_overrides):
                     result_hp_key = f"config/candidates/{hp_key}"
                     self.assertEqual(results_df[result_hp_key][0], hp_value)
 
+            # test save
+            self.assertTrue(os.path.exists(os.path.join(auto_trainer.output_dir, "id2label.json")))
+            save_path = os.path.join(auto_trainer._get_model_result().log_dir, auto_trainer.save_path)
+            self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))
+            self.assertTrue(os.path.exists(os.path.join(save_path, "tokenizer_config.json")))
+            if custom_model_candidate["trainer_type"] == "PromptTrainer":
+                self.assertTrue(os.path.exists(os.path.join(save_path, "template_config.json")))
+                self.assertTrue(os.path.exists(os.path.join(save_path, "verbalizer_config.json")))
+
             # test export
             temp_export_path = os.path.join(temp_dir_path, "test_export")
             auto_trainer.export(export_path=temp_export_path)
@@ -159,6 +168,9 @@ def test_multiclass(self, custom_model_candidate, hp_overrides):
                 for prediction in test_result["predictions"]:
                     self.assertIn(prediction["label"], auto_trainer.label2id)
 
+            # test training_path
+            self.assertFalse(os.path.exists(os.path.join(auto_trainer.training_path)))
+
     @parameterized.expand(
         [
             (finetune_model_candidate, {"TrainingArguments.max_steps": 2}),
@@ -205,6 +217,15 @@ def test_multilabel(self, custom_model_candidate, hp_overrides):
                     result_hp_key = f"config/candidates/{hp_key}"
                     self.assertEqual(results_df[result_hp_key][0], hp_value)
 
+            # test save
+            self.assertTrue(os.path.exists(os.path.join(auto_trainer.output_dir, "id2label.json")))
+            save_path = os.path.join(auto_trainer._get_model_result().log_dir, auto_trainer.save_path)
+            self.assertTrue(os.path.exists(os.path.join(save_path, "model_state.pdparams")))
+            self.assertTrue(os.path.exists(os.path.join(save_path, "tokenizer_config.json")))
+            if custom_model_candidate["trainer_type"] == "PromptTrainer":
+                self.assertTrue(os.path.exists(os.path.join(save_path, "template_config.json")))
+                self.assertTrue(os.path.exists(os.path.join(save_path, "verbalizer_config.json")))
+
             # test export
             temp_export_path = os.path.join(temp_dir_path, "test_export")
             auto_trainer.export(export_path=temp_export_path)
@@ -240,6 +261,9 @@ def test_multilabel(self, custom_model_candidate, hp_overrides):
                     self.assertIn(prediction["label"], auto_trainer.label2id)
                     self.assertGreater(prediction["score"], taskflow.task_instance.multilabel_threshold)
 
+            # test training_path
+            self.assertFalse(os.path.exists(os.path.join(auto_trainer.training_path)))
+
     def test_untrained_auto_trainer(self):
         with TemporaryDirectory() as temp_dir:
             train_ds = copy.deepcopy(self.multi_class_train_ds)