File tree Expand file tree Collapse file tree
project/paperbench/paperbench/nano Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -28,13 +28,13 @@ jobs:
2828 steps :
2929 - uses : actions/checkout@v4
3030 with :
31- lfs : true
31+ lfs : false
3232 fetch-depth : 1
3333
3434 - name : Hydrate PaperBench data
3535 run : |
36- git lfs fetch --include "project/paperbench/data/**" --exclude ""
37- git lfs checkout project/paperbench/data
36+ git lfs fetch --include "project/paperbench/data/papers/lca-on-the-line/ **" --exclude ""
37+ git lfs checkout project/paperbench/data/papers/lca-on-the-line
3838
3939 - name : Install uv
4040 uses : astral-sh/setup-uv@v5
4848
4949 - name : Set up Docker
5050 uses : docker/setup-buildx-action@v3
51-
52- - uses : docker/setup-buildx-action@v3
5351 with :
5452 driver : docker
55- install : true
5653
5754 - name : Build pb-env
5855 working-directory : ./project/paperbench
Original file line number Diff line number Diff line change @@ -339,10 +339,22 @@ def check_for_docker(self) -> None:
339339 )
340340
341341 def check_for_lfs (self ) -> None :
342- # Check dataset has been pulled from git lfs
342+ """
343+ Ensure required papers for the selected split are hydrated from LFS.
344+
345+ We only validate papers that appear in the active ``paper_split`` to allow
346+ lightweight CI runs that hydrate a minimal subset of the dataset.
347+ """
348+
343349 papers_dir = get_paperbench_data_dir () / "papers"
344- papers = list (papers_dir .glob ("**/paper.md" ))
350+ split_path = get_experiments_dir () / "splits" / f"{ self .paper_split } .txt"
351+
352+ paper_ids = [line .strip () for line in split_path .read_text ().splitlines () if line .strip ()]
345353
346- for paper in papers :
347- with open (paper , "r" ) as f :
348- assert len (f .readlines ()) > 5 , f"Paper at { paper } should be pulled from git lfs"
354+ for paper_id in paper_ids :
355+ paper_path = papers_dir / paper_id / "paper.md"
356+ assert paper_path .exists (), f"Paper at { paper_path } is missing; hydrate via git lfs."
357+ with open (paper_path , "r" ) as f :
358+ assert len (f .readlines ()) > 5 , (
359+ f"Paper at { paper_path } should be pulled from git lfs"
360+ )
You can’t perform that action at this time.
0 commit comments