diff --git a/ltsm/data_pipeline/anormly_pipeline.py b/ltsm/data_pipeline/anormly_pipeline.py index 2bdeea7..3086085 100644 --- a/ltsm/data_pipeline/anormly_pipeline.py +++ b/ltsm/data_pipeline/anormly_pipeline.py @@ -23,7 +23,8 @@ import logging from transformers import ( Trainer, - TrainingArguments + TrainingArguments, + TrainerCallback, ) logging.basicConfig( @@ -51,6 +52,7 @@ def compute_loss(self, model, inputs, return_outputs=False): loss = nn.functional.cross_entropy(outputs, labels) #loss = nn.functional.cross_entropy(outputs.reshape(B*L,-1), inputs["labels"][:,1:].long().reshape(B*L)) return (loss, outputs) if return_outputs else loss + def compute_metrics(self, p): preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions print(preds.shape, p.label_ids.shape) @@ -67,6 +69,27 @@ def compute_metrics(self, p): "recall": recall_score(label_ids, preds_class, average="micro"), "f1": f1_score(label_ids, preds_class, average="micro") } + + +class CustomTrainer(Trainer): + """ + Custom Trainer class that extends the Trainer class from the Transformers library. + This class is used to add custom logging to the Trainer. + """ + def training_step(self, model, inputs): + # this func is used to get more information during training + # here is used to check the existence of label 1 in the batch + labels = inputs["labels"] + has_label_one = (labels == 1.).any().item() if labels is not None else False + self.current_label_check = has_label_one + + return super().training_step(model, inputs) + + def log(self, logs): + # this func add the custom log to Trainer + if hasattr(self, "current_label_check"): + logs["has_label_one"] = self.current_label_check + super().log(logs) class AnomalyTrainingPipeline(): """ @@ -113,7 +136,7 @@ def run(self): fp16=False, save_steps=100, eval_steps=25, - logging_steps=5, + logging_steps=1, learning_rate=self.args.learning_rate, gradient_accumulation_steps=self.args.gradient_accumulation_steps, save_total_limit=10, @@ -125,7 +148,7 @@ def run(self): train_dataset, eval_dataset, test_datasets, _ = get_datasets(self.args) train_dataset, eval_dataset= HF_Dataset(train_dataset), HF_Dataset(eval_dataset) - trainer = Trainer( + trainer = CustomTrainer( model=model, args=training_args, data_collator=self.model_manager.collate_fn, @@ -139,7 +162,7 @@ def run(self): # Overload the trainer API if not self.args.eval: trainer.compute_loss = self.model_manager.compute_loss - trainer.prediction_step = self.model_manager.prediction_step + trainer.prediction_step = self.model_manager.prediction_step train_results = trainer.train() trainer.save_model() trainer.log_metrics("train", train_results.metrics) diff --git a/tests/test_scripts/anomaly_config/config-1.json b/tests/test_scripts/anomaly_config/config-1.json index ea43e36..f02e2a6 100644 --- a/tests/test_scripts/anomaly_config/config-1.json +++ b/tests/test_scripts/anomaly_config/config-1.json @@ -9,14 +9,14 @@ "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split", "data_processing": "standard_scaler", "learning_rate": 1e-4, - "batch_size": 100, + "batch_size": 8, "num_workers": 10, - "train_epochs": 4, + "train_epochs": 1, "train_ratio": 0.7, "val_ratio": 0.1, "do_anomaly": true, - "seq_len": 133, - "pred_len": 133, + "seq_len": 113, + "pred_len": 113, "prompt_len": 133, "lora": false, "lora_dim": 128, @@ -36,7 +36,7 @@ "tmax": 10, "eval": 0, "itr": 1, - "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_creditcard_113/", + "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_creditcard_113_check_bsize=8/", "downsample_rate": 20, "llm_layers": 32, "decay_fac": 0.75, diff --git a/tests/test_scripts/anomaly_config/config.json b/tests/test_scripts/anomaly_config/config.json index b12a82b..cbb4516 100644 --- a/tests/test_scripts/anomaly_config/config.json +++ b/tests/test_scripts/anomaly_config/config.json @@ -9,7 +9,7 @@ "prompt_data_path": "../../prompt_bank/stat-prompt/prompt_data_normalize_split", "data_processing": "standard_scaler", "learning_rate": 2e-5, - "batch_size": 100, + "batch_size": 8, "num_workers": 10, "train_epochs": 4, "train_ratio": 0.7, @@ -20,7 +20,7 @@ "prompt_len": 133, "lora": false, "lora_dim": 128, - "gpt_layers": 3, + "gpt_layers": 1, "d_model": 1024, "n_heads": 16, "d_ff": 512, @@ -36,12 +36,12 @@ "tmax": 10, "eval": 0, "itr": 1, - "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_113", + "output_dir_template": "output/ltsm_lr{learning_rate}_loraFalse_down{downsample_rate}_freeze{freeze}_e{train_epochs}_pred{pred_len}_113_check_bsize=8_grad_accumulate=16_layer=1", "downsample_rate": 20, "llm_layers": 32, "decay_fac": 0.75, "lradj": "type1", "patience": 3, - "gradient_accumulation_steps": 64 + "gradient_accumulation_steps": 16 } \ No newline at end of file diff --git a/tests/test_scripts/train_anomaly_main_ltsm.sh b/tests/test_scripts/train_anomaly_main_ltsm.sh index 73c0ca1..0b2884f 100644 --- a/tests/test_scripts/train_anomaly_main_ltsm.sh +++ b/tests/test_scripts/train_anomaly_main_ltsm.sh @@ -1,4 +1,4 @@ -CONFIG_PATH="./anomaly_config/config-1.json" +CONFIG_PATH="./anomaly_config/config.json" CUDA_VISIBLE_DEVICES=6,7 python3 anomaly_main_ltsm.py \ --config_path ${CONFIG_PATH}