Nathan adds wanddb logging (#685)

NathanHB · web-flow · commit 96e885d466f5 · 2025-04-30T14:27:06.000+02:00
diff --git a/src/lighteval/logging/evaluation_tracker.py b/src/lighteval/logging/evaluation_tracker.py
@@ -250,8 +250,11 @@ def save(self) -> None:
             )
 
     def push_to_wandb(self, results_dict: dict, details_datasets: dict) -> None:
+        # reformat the results key to replace ':' with '/'
+        results_dict = {k.replace(":", "/"): v for k, v in results_dict["results"].items()}
+
         self.wandb_run.log(
-            {**results_dict["results"]},
+            {**results_dict},
         )
         self.wandb_run.finish()
 
diff --git a/src/lighteval/main_endpoint.py b/src/lighteval/main_endpoint.py
@@ -200,6 +200,13 @@ def tgi(
     save_details: Annotated[
         bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
     ] = False,
+    wandb: Annotated[
+        bool,
+        Option(
+            help="Push results to wandb. This will only work if you have wandb installed and logged in. We use env variable to configure wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ] = False,
     # === debug ===
     max_samples: Annotated[
         Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
@@ -225,6 +232,7 @@ def tgi(
         push_to_tensorboard=push_to_tensorboard,
         public=public_run,
         hub_results_org=results_org,
+        wandb=wandb,
     )
 
     parallelism_manager = ParallelismManager.TGI
@@ -312,6 +320,13 @@ def litellm(
     save_details: Annotated[
         bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
     ] = False,
+    wandb: Annotated[
+        bool,
+        Option(
+            help="Push results to wandb. This will only work if you have wandb installed and logged in. We use env variable to configure wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ] = False,
     # === debug ===
     max_samples: Annotated[
         Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
@@ -337,6 +352,7 @@ def litellm(
         push_to_tensorboard=push_to_tensorboard,
         public=public_run,
         hub_results_org=results_org,
+        wandb=wandb,
     )
 
     parallelism_manager = ParallelismManager.NONE
@@ -422,6 +438,13 @@ def inference_providers(
     save_details: Annotated[
         bool, Option(help="Save detailed, sample per sample, results.", rich_help_panel=HELP_PANEL_NAME_2)
     ] = False,
+    wandb: Annotated[
+        bool,
+        Option(
+            help="Push results to wandb. This will only work if you have wandb installed and logged in. We use env variable to configure wandb. see here: https://docs.wandb.ai/guides/track/environment-variables/",
+            rich_help_panel=HELP_PANEL_NAME_2,
+        ),
+    ] = False,
     # === debug ===
     max_samples: Annotated[
         Optional[int], Option(help="Maximum number of samples to evaluate on.", rich_help_panel=HELP_PANEL_NAME_3)
@@ -447,6 +470,7 @@ def inference_providers(
         push_to_tensorboard=push_to_tensorboard,
         public=public_run,
         hub_results_org=results_org,
+        wandb=wandb,
     )
 
     # TODO (nathan): better handling of model_args

Original file line number	Diff line number	Diff line change
`@@ -250,8 +250,11 @@ def save(self) -> None:`
`250`	`250`	`)`
`251`	`251`
`252`	`252`	`def push_to_wandb(self, results_dict: dict, details_datasets: dict) -> None:`
	`253`	`+ # reformat the results key to replace ':' with '/'`
	`254`	`+ results_dict = {k.replace(":", "/"): v for k, v in results_dict["results"].items()}`
	`255`	`+`
`253`	`256`	`self.wandb_run.log(`
`254`		`- {**results_dict["results"]},`
	`257`	`+ {**results_dict},`
`255`	`258`	`)`
`256`	`259`	`self.wandb_run.finish()`
`257`	`260`