Skip to content

Commit e630219

Browse files
committed
Add file exclusion (e.g., From_BayesFlow_1.1_to_2.0.ipynb to minimize BF1 context)
1 parent ed0741e commit e630219

File tree

1 file changed

+36
-14
lines changed

1 file changed

+36
-14
lines changed

llm_context/build_llm_context.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@
2727
OUTPUT_DIR = BASE_DIR / "llm_context"
2828

2929
EXCLUDED_DIR_NAMES = ["experimental"]
30+
EXCLUDED_FILE_NAMES = ["From_BayesFlow_1.1_to_2.0.ipynb"]
3031

3132

32-
def convert_notebooks_to_md(src_dir: Path, dst_dir: Path) -> List[Path]:
33+
def convert_notebooks_to_md(
34+
src_dir: Path, dst_dir: Path, exclude_files: Sequence[str] = ()
35+
) -> List[Path]:
3336
"""
3437
Convert Jupyter notebooks (*.ipynb) to Markdown files.
3538
@@ -39,6 +42,8 @@ def convert_notebooks_to_md(src_dir: Path, dst_dir: Path) -> List[Path]:
3942
Source directory containing Jupyter notebooks.
4043
dst_dir : Path
4144
Destination directory where converted Markdown files will be written.
45+
exclude_files : Sequence[str], optional
46+
File names to exclude from conversion.
4247
4348
Returns
4449
-------
@@ -51,8 +56,12 @@ def convert_notebooks_to_md(src_dir: Path, dst_dir: Path) -> List[Path]:
5156
If no notebooks are found in `src_dir`.
5257
"""
5358
created: List[Path] = []
59+
excluded = set(exclude_files)
5460

5561
for ipynb_file in sorted(src_dir.glob("*.ipynb")):
62+
if ipynb_file.name in excluded:
63+
continue
64+
5665
notebook = json.loads(ipynb_file.read_text(encoding="utf-8"))
5766
parts: List[str] = []
5867

@@ -74,24 +83,33 @@ def convert_notebooks_to_md(src_dir: Path, dst_dir: Path) -> List[Path]:
7483
return created
7584

7685

77-
def collect_py_files(root: Path, exclude: Sequence[str] = ()) -> List[Path]:
86+
def collect_py_files(
87+
root: Path, exclude_dirs: Sequence[str] = (), exclude_files: Sequence[str] = ()
88+
) -> List[Path]:
7889
"""
79-
Collect Python source files from a directory, excluding specified folders.
90+
Collect Python source files from a directory, excluding specified folders and files.
8091
8192
Parameters
8293
----------
8394
root : Path
8495
Root directory to search for Python files.
85-
exclude : Sequence[str], optional
86-
Names of directories to exclude from the search (default is empty).
96+
exclude_dirs : Sequence[str], optional
97+
Names of directories to exclude from the search.
98+
exclude_files : Sequence[str], optional
99+
Names of files to exclude from the search.
87100
88101
Returns
89102
-------
90103
List[Path]
91104
Sorted list of resolved paths to Python files.
92105
"""
93-
excluded = set(exclude)
94-
return sorted(f.resolve() for f in root.rglob("*.py") if not any(p.name in excluded for p in f.parents))
106+
excluded_d = set(exclude_dirs)
107+
excluded_f = set(exclude_files)
108+
return sorted(
109+
f.resolve()
110+
for f in root.rglob("*.py")
111+
if f.name not in excluded_f and not any(p.name in excluded_d for p in f.parents)
112+
)
95113

96114

97115
def run_gitingest(work_dir: Path, output: Path) -> None:
@@ -134,10 +152,10 @@ def main() -> None:
134152
--------
135153
1. Validate presence of README and examples directory.
136154
2. Remove old context files from the output directory.
137-
3. Convert Jupyter notebooks in `examples/` to Markdown.
155+
3. Convert Jupyter notebooks in `examples/` to Markdown, excluding specified files.
138156
4. Build two bundles:
139-
- Compact: README + examples
140-
- Full: README + examples + source files (excluding certain directories)
157+
- Compact: README + examples
158+
- Full: README + examples + source files (excluding specified directories and files)
141159
5. Run `gitingest` to generate Markdown bundles.
142160
143161
Raises
@@ -169,8 +187,10 @@ def main() -> None:
169187
tmp_compact = Path(tmp_compact)
170188
tmp_full = Path(tmp_full)
171189

172-
# Convert notebooks
173-
example_mds = convert_notebooks_to_md(EXAMPLES_DIR, tmp_examples)
190+
# Convert notebooks, respecting file exclusions
191+
example_mds = convert_notebooks_to_md(
192+
EXAMPLES_DIR, tmp_examples, EXCLUDED_FILE_NAMES
193+
)
174194

175195
# ==== Compact bundle ====
176196
(tmp_compact / "examples").mkdir(parents=True, exist_ok=True)
@@ -186,7 +206,9 @@ def main() -> None:
186206
shutil.copy(md, tmp_full / "examples" / md.name)
187207

188208
if SRC_DIR.exists():
189-
for pyfile in collect_py_files(SRC_DIR, EXCLUDED_DIR_NAMES):
209+
for pyfile in collect_py_files(
210+
SRC_DIR, EXCLUDED_DIR_NAMES, EXCLUDED_FILE_NAMES
211+
):
190212
rel = pyfile.relative_to(SRC_DIR)
191213
dest = tmp_full / "bayesflow" / rel
192214
dest.parent.mkdir(parents=True, exist_ok=True)
@@ -198,4 +220,4 @@ def main() -> None:
198220

199221

200222
if __name__ == "__main__":
201-
main()
223+
main()

0 commit comments

Comments
 (0)