Output path fix (#2993)

Niccolo-Ajroldi · baberabb · web-flow · commit 178fa84da05d · 2025-05-21T13:32:58.000+05:00
* fix(output_path): support direct JSON file paths

* fix linting

* turn off external Lm tests for now

* Update help text for `output_path`

---------

Co-authored-by: Baber &lt;baber@hey.com&gt;
diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml
@@ -79,36 +79,36 @@ jobs:
           path: |
             test_logs/*
 
-  testmodels:
-    name: External LM Tests
-    runs-on: ubuntu-latest
-    timeout-minutes: 30
-    steps:
-      - name: Checkout Code
-        uses: actions/checkout@v4
-      - name: Set up Python 3.9
-        uses: actions/setup-python@v5
-        with:
-          python-version: 3.9
-          cache: pip
-          cache-dependency-path: pyproject.toml
-
-      # Cache HuggingFace cache directory for External LM tests
-      - name: Cache HuggingFace cache (External LM tests)
-        uses: actions/cache@v3
-        id: cache-hf-lm
-        with:
-          path: ~/.cache/huggingface
-          key: ${{ runner.os }}-hf-cache-external-lm
-          restore-keys: |
-            ${{ runner.os }}-hf-cache-external-lm
-
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install -e '.[dev,optimum,deepsparse,sparseml,api]' --extra-index-url https://download.pytorch.org/whl/cpu
-          pip install -U transformers peft accelerate
-
-      - name: Test with pytest
-        run: python -m pytest tests/models --showlocals -s -vv
-        continue-on-error: true  # Continue workflow even if tests fail
+#  testmodels:
+#    name: External LM Tests
+#    runs-on: ubuntu-latest
+#    timeout-minutes: 30
+#    steps:
+#      - name: Checkout Code
+#        uses: actions/checkout@v4
+#      - name: Set up Python 3.9
+#        uses: actions/setup-python@v5
+#        with:
+#          python-version: 3.9
+#          cache: pip
+#          cache-dependency-path: pyproject.toml
+#
+#      # Cache HuggingFace cache directory for External LM tests
+#      - name: Cache HuggingFace cache (External LM tests)
+#        uses: actions/cache@v3
+#        id: cache-hf-lm
+#        with:
+#          path: ~/.cache/huggingface
+#          key: ${{ runner.os }}-hf-cache-external-lm
+#          restore-keys: |
+#            ${{ runner.os }}-hf-cache-external-lm
+#
+#      - name: Install dependencies
+#        run: |
+#          python -m pip install --upgrade pip
+#          pip install -e '.[dev,optimum,deepsparse,sparseml,api]' --extra-index-url https://download.pytorch.org/whl/cpu
+#          pip install -U transformers peft accelerate
+#
+#      - name: Test with pytest
+#        run: python -m pytest tests/models --showlocals -s -vv
+#        continue-on-error: true  # Continue workflow even if tests fail
diff --git a/lm_eval/__main__.py b/lm_eval/__main__.py
@@ -135,7 +135,7 @@ def setup_parser() -> argparse.ArgumentParser:
         default=None,
         type=str,
         metavar="DIR|DIR/file.json",
-        help="The path to the output file where the result metrics will be saved. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
+        help="Path where result metrics will be saved. Can be either a directory or a .json file. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",
     )
     parser.add_argument(
         "--limit",
diff --git a/lm_eval/loggers/evaluation_tracker.py b/lm_eval/loggers/evaluation_tracker.py
@@ -229,11 +229,21 @@ def save_results_aggregated(
                 )
 
                 path = Path(self.output_path if self.output_path else Path.cwd())
-                path = path.joinpath(self.general_config_tracker.model_name_sanitized)
-                path.mkdir(parents=True, exist_ok=True)
-
                 self.date_id = datetime.now().isoformat().replace(":", "-")
-                file_results_aggregated = path.joinpath(f"results_{self.date_id}.json")
+                if path.suffix == ".json":
+                    path.parent.mkdir(parents=True, exist_ok=True)
+                    file_results_aggregated = path.with_name(
+                        f"{path.stem}_{self.date_id}.json"
+                    )
+                else:
+                    path = path.joinpath(
+                        self.general_config_tracker.model_name_sanitized
+                    )
+                    path.mkdir(parents=True, exist_ok=True)
+                    file_results_aggregated = path.joinpath(
+                        f"results_{self.date_id}.json"
+                    )
+
                 file_results_aggregated.open("w", encoding="utf-8").write(dumped)
 
                 if self.api and self.push_results_to_hub:
@@ -250,12 +260,10 @@ def save_results_aggregated(
                     )
                     self.api.upload_file(
                         repo_id=repo_id,
-                        path_or_fileobj=str(
-                            path.joinpath(f"results_{self.date_id}.json")
-                        ),
+                        path_or_fileobj=str(file_results_aggregated),
                         path_in_repo=os.path.join(
                             self.general_config_tracker.model_name,
-                            f"results_{self.date_id}.json",
+                            file_results_aggregated.name,
                         ),
                         repo_type="dataset",
                         commit_message=f"Adding aggregated results for {self.general_config_tracker.model_name}",
@@ -290,7 +298,12 @@ def save_results_samples(
                 eval_logger.info(f"Saving per-sample results for: {task_name}")
 
                 path = Path(self.output_path if self.output_path else Path.cwd())
-                path = path.joinpath(self.general_config_tracker.model_name_sanitized)
+                if path.suffix == ".json":
+                    path = path.parent
+                else:
+                    path = path.joinpath(
+                        self.general_config_tracker.model_name_sanitized
+                    )
                 path.mkdir(parents=True, exist_ok=True)
 
                 file_results_samples = path.joinpath(

Original file line number	Diff line number	Diff line change
`@@ -135,7 +135,7 @@ def setup_parser() -> argparse.ArgumentParser:`
`135`	`135`	`default=None,`
`136`	`136`	`type=str,`
`137`	`137`	`metavar="DIR\|DIR/file.json",`
`138`		`- help="The path to the output file where the result metrics will be saved. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",`
	`138`	`+ help="Path where result metrics will be saved. Can be either a directory or a .json file. If the path is a directory and log_samples is true, the results will be saved in the directory. Else the parent directory will be used.",`
`139`	`139`	`)`
`140`	`140`	`parser.add_argument(`
`141`	`141`	`"--limit",`