create better debug report to show more detailed paths and files

liyin2015 · liyin2015 · commit 4c8f6ba369e7 · 2024-12-15T13:38:23.000-08:00
diff --git a/adalflow/adalflow/core/generator.py b/adalflow/adalflow/core/generator.py
@@ -940,10 +940,18 @@ def __init__(self, **kwargs):
         if kwargs is None:
             kwargs = {}
         kwargs["template"] = FEEDBACK_ENGINE_TEMPLATE
+
         super().__init__(**kwargs)
         self.name = "BackwardEngine"
         self.teacher_mode = False
 
+    def call(self, **kwargs) -> GeneratorOutputType:
+        r"""Catch the rate limit error and raise it."""
+        output = super().call(**kwargs)
+        if output and output.error is not None and "429" in output.error:
+            raise ValueError(f"Error in the backward engine: {output.error}")
+        return output
+
     @staticmethod
     def failure_message_to_optimizer(
         gradient_response: GeneratorOutput,
diff --git a/adalflow/adalflow/optim/parameter.py b/adalflow/adalflow/optim/parameter.py
@@ -453,6 +453,9 @@ def get_gradient_and_context_text(self) -> str:
                 [self.gradients_context[g] for g in self.gradients],
             )
         )
+        # set all gradients value to None
+        for g in self.gradients:
+            g.data = None
 
         gradient_context_combined_str = Prompt(
             template=COMBINED_GRADIENTS_TEMPLATE,
diff --git a/adalflow/adalflow/optim/text_grad/tgd_optimizer.py b/adalflow/adalflow/optim/text_grad/tgd_optimizer.py
@@ -386,6 +386,8 @@ def propose(self):
         if self.proposing:
             raise ValueError("Already proposing a value.")
 
+        print("Proposing a new value.")
+
         # no cache so that new proposal can be made
         no_cache = True
         # print("Proposing a new value.")
diff --git a/adalflow/adalflow/optim/trainer/adal.py b/adalflow/adalflow/optim/trainer/adal.py
@@ -249,13 +249,18 @@ def evaluate_samples(
             )
 
             for future in concurrent.futures.as_completed(futures):
-                i = futures[future]
-                acc_list[i] = (
-                    future.result()
-                )  # Place the result in the correct position
-                progress_bar.update(
-                    1
-                )  # Update progress bar after each result is collected
+                try:
+                    i = futures[future]
+                    acc_list[i] = (
+                        future.result()
+                    )  # Place the result in the correct position
+                    progress_bar.update(
+                        1
+                    )  # Update progress bar after each result is collected
+                except Exception as e:
+
+                    progress_bar.close()
+                    raise ValueError(f"Exception in task {i}: {e}")
 
         avg_score = float(np.mean(np.array(acc_list)))
         return EvaluationResult(avg_score=avg_score, per_item_scores=acc_list)
diff --git a/adalflow/adalflow/optim/trainer/trainer.py b/adalflow/adalflow/optim/trainer/trainer.py
@@ -317,8 +317,8 @@ def diagnose_report(
 
     def debug_report(
         self,
-        text_grad_debug_path: Optional[str] = None,
-        few_shot_demo_debug_path: Optional[str] = None,
+        text_grad_debug_path: Optional[Dict[str, object]] = None,
+        few_shot_demo_debug_path: Optional[Dict[str, object]] = None,
     ):
         import colorama
         from colorama import Fore
@@ -361,9 +361,12 @@ def fit(
         resume_from_ckpt: Optional[
             str
         ] = None,  # TODO: have a more comprehensive ckpt loading in the future
-    ):
+    ) -> Tuple[str, TrainerResult]:
         r"""
         train_loader: An iterable or collection of iterables specifying training samples.
+
+        Returns:
+            Tuple[str, TrainerResult]: Checkpoint file and the TrainerResult object
         """
         start_time = time.time()
 
@@ -491,7 +494,7 @@ def fit(
                     train_loader, train_dataset, val_dataset, test_dataset
                 )
             self.debug_report(text_grad_debug_path, few_shot_demo_debug_path)
-            return
+            return self.ckpt_file, trainer_results
 
         ########Run text_optimizers and demo optimizers in sequential order ########
         if (
@@ -557,6 +560,7 @@ def fit(
         end_time = time.time()
         print(f"Training time: {end_time - start_time}s")
         print(f"ckpt_file: {self.ckpt_file}")
+        return self.ckpt_file, trainer_results
 
     @staticmethod
     def _estimate_num_epochs(train_loader: Any, max_steps: int):
@@ -684,7 +688,7 @@ def _pre_fit(self, val_dataset: Any, test_dataset: Any) -> TrainerResult:
 
     def _fit_demos_one_step_for_debug(
         self, train_loader, train_dataset: Any, val_dataset: Any, test_dataset: Any
-    ) -> str:
+    ) -> Dict[str, object]:
         """Trace both the teacher and the student demos with scores and for sampling.
         For demos: we need to run both the teacher mode and the student mode."""
 
@@ -760,6 +764,8 @@ def _fit_demos_one_step_for_debug(
 
         # 2. run student mode
 
+        demo_debug_result_path = None
+
         for batch_idx, batch in enumerate(train_loader):
             print(f"Training step: {batch_idx}")
             if batch_idx > 0:
@@ -820,7 +826,9 @@ def _fit_demos_one_step_for_debug(
             self._demo_optimizers_propose()
             graph_path = os.path.join(debug_path, "student_graph")
 
-            paths = losses_student[0].draw_graph(filepath=graph_path)  # noqa F841
+            demo_debug_result_path = losses_student[0].draw_graph(
+                filepath=graph_path
+            )  # noqa F841
 
             # test step
             self._demo_optimizers_step()
@@ -851,9 +859,9 @@ def _fit_demos_one_step_for_debug(
                     if len(param._demos) == 0:
                         raise ValueError(f"No demos found, param: {param}")
 
-        return debug_path
+        return demo_debug_result_path
 
-    def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> str:
+    def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> Dict[str, str]:
         printc(
             "Debugging fitting one step with batch size 2 for text optimizer", "blue"
         )
@@ -901,8 +909,8 @@ def _fit_text_grads_one_step_for_debug(self, train_loader: Any) -> str:
         # test optimizer
         self._propose_text_optimizers()
 
-        total_loss.draw_graph(filepath=debug_path, full_trace=True)
-        return debug_path
+        debug_files = total_loss.draw_graph(filepath=debug_path, full_trace=True)
+        return debug_files
 
     def _set_demo_optimizers_dataset(self, train_dataset: Any):
         # init the dataset
@@ -1701,6 +1709,9 @@ def _text_grad_constraint_propose_step(
         all_y_preds,
         include_demo_optimizers: bool = False,
     ):
+        """Handles both the mixed training and the separate training.
+        When include_demo_optimizers is True, the demo optimizers are included in the training
+        """
         # comptute moving batch acc
         from adalflow.optim.parameter import Parameter
 
@@ -1894,6 +1905,7 @@ def _fit_text_grad_constraint(
                         trainer_results.prompts[-1],
                         total_steps,
                     )
+                    self._add_failed_proposals_text_optimizers()
                     continue
 
                 # prune the correct sample size if its too big, same with error samples
diff --git a/use_cases/config.py b/use_cases/config.py
@@ -41,7 +41,7 @@
 gpt_4o_model = {
     "model_client": OpenAIClient(),
     "model_kwargs": {
-        "model": "gpt-4o",
+        "model": "gpt-4o-mini",
         "temperature": 1,
         "top_p": 0.99,
         "max_tokens": 1000,
diff --git a/use_cases/question_answering/bbh/object_count/task.py b/use_cases/question_answering/bbh/object_count/task.py
@@ -60,6 +60,14 @@ def call(
         self, question: str, id: str = None
     ) -> Union[adal.GeneratorOutput, adal.Parameter]:
         output = self.llm_counter(prompt_kwargs={"input_str": question}, id=id)
+        print(f"output: {output}, training: {self.training}")
+        if self.training:
+            if output.full_response.error and "429" in output.full_response.error:
+                raise ValueError("Rate limit exceeded")
+        else:
+            if output.error and "429" in output.error:
+                print("rate limit exceeded:")
+                raise ValueError("Rate limit exceeded")
         return output
 
 
diff --git a/use_cases/question_answering/bbh/object_count/train_new.py b/use_cases/question_answering/bbh/object_count/train_new.py
@@ -130,22 +130,33 @@ def train(
     print(trainer)
 
     train_dataset, val_dataset, test_dataset = load_datasets()
-    trainer.fit(
+    ckpt, _ = trainer.fit(
         train_dataset=train_dataset,
         val_dataset=val_dataset,
         test_dataset=test_dataset,
         resume_from_ckpt=resume_from_ckpt,
     )
+    return ckpt
 
 
 if __name__ == "__main__":
+    import sys
+    import json
 
-    train(
+    ckpt = train(
         debug=True,
-        max_steps=12,
+        max_steps=1,
         strategy="constrained",
         exclude_input_fields_from_bootstrap_demos=True,
     )
+    print(f"ckpt: {ckpt}")
+    # Save ckpt to a file passed as an argument
+    if len(sys.argv) > 1:  # Check if a file path is provided
+        with open(sys.argv[1], "w") as f:
+            json.dump({"ckpt": ckpt}, f)
 
     # train_diagnose(**gpt_3_model)
-    # train_diagnose_teacher(**gpt_4o_model)
+    # train_diagnose_teacher(**gpt_4o_model) # 4omini works well as an optimizer too
+    # /Users/liyin/.adalflow/ckpt/ObjectCountAdalComponent/constrained_max_steps_12_49c63_run_1.json
+    # 0.72 -> 0.9 val
+    # 0.79 -> 0.92 test
diff --git a/use_cases/text_grad_2.0_train.py b/use_cases/text_grad_2.0_train.py
@@ -1,24 +1,58 @@
 import subprocess
+import tempfile
+import json
 
 # List of experiments to run
+object_count = "use_cases/question_answering/bbh/object_count/train_new.py"
+hotpot_qa_multi_hop_rag = "benchmarks/hotpot_qa/adal_exp/train_multi_hop_rag.py"
+
+ckpt_values = []
 experiments = [
-    "experiment1.py",
-    "experiment2.py",
-    "experiment3.py",
+    object_count,
+    # hotpot_qa_multi_hop_rag,
 ]
 
 # Optional: Arguments for each experiment (if needed)
 experiment_args = {
-    "experiment1.py": "",
-    "experiment2.py": "",
-    "experiment3.py": "",
+    object_count: "",
+    # hotpot_qa_multi_hop_rag: "",
 }
+ckpt_values = {}
+
 
-# Loop through experiments and run them
-for experiment in experiments:
-    args = experiment_args.get(experiment, "")
+def run_experiment(script, args):
     try:
-        print(f"Running {experiment} with args: {args}")
-        subprocess.run(f"python {experiment} {args}", check=True, shell=True)
+        # Use a temporary file to store the ckpt
+        with tempfile.NamedTemporaryFile(suffix=".json", delete=False) as temp_file:
+            temp_path = temp_file.name
+
+        print(f"Running {script} with args: {args}")
+        subprocess.run(
+            f"python {script} {temp_path} {args}",
+            check=True,
+            shell=True,
+            text=True,
+        )
+
+        # Read the ckpt value from the temporary file
+        with open(temp_path, "r") as f:
+            data = json.load(f)
+            ckpt = data.get("ckpt")
+            print(f"Checkpoint from {script}: {ckpt}")
+            return ckpt
+
     except subprocess.CalledProcessError as e:
-        print(f"Experiment {experiment} failed with error: {e}")
+        print(f"Experiment {script} failed with error: {e}")
+        return None
+
+
+if __name__ == "__main__":
+    for experiment in experiments:
+        args = experiment_args.get(experiment, "")
+        ckpt = run_experiment(experiment, args)
+        if ckpt:
+            ckpt_values[experiment] = ckpt
+
+    print("\nAll Checkpoints:")
+    for experiment, ckpt in ckpt_values.items():
+        print(f"{experiment}: {ckpt}")

Original file line number	Diff line number	Diff line change
`@@ -453,6 +453,9 @@ def get_gradient_and_context_text(self) -> str:`
`453`	`453`	`[self.gradients_context[g] for g in self.gradients],`
`454`	`454`	`)`
`455`	`455`	`)`
	`456`	`+ # set all gradients value to None`
	`457`	`+ for g in self.gradients:`
	`458`	`+ g.data = None`
`456`	`459`
`457`	`460`	`gradient_context_combined_str = Prompt(`
`458`	`461`	`template=COMBINED_GRADIENTS_TEMPLATE,`