Skip to content

Commit 7020877

Browse files
committed
feature : add export to py feature
recursively convert all ipynb files exclude utils/ and tests/ folders
1 parent 8202286 commit 7020877

File tree

1 file changed

+96
-0
lines changed

1 file changed

+96
-0
lines changed

utils/export_to_py.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
'''
2+
Utility to export a Python script from a Jupyter notebook.
3+
Convert one ipynb or all recursively.
4+
'''
5+
6+
import argparse
7+
import multiprocessing as mp
8+
import pathlib
9+
import subprocess
10+
import sys
11+
12+
from typing import List
13+
14+
15+
import nbformat
16+
import nbconvert
17+
18+
19+
def parse_argv(argv: List[str]) -> argparse.Namespace:
20+
parser = argparse.ArgumentParser(description='Export Jupyter notebooks to Python scripts.')
21+
parser.add_argument('notebook', type=str, nargs='*', help='Path to the notebook(s) to convert.')
22+
parser.add_argument('--destination', type=pathlib.Path, default=get_proj_folder() / 'temp', help='Destination directory for the converted scripts.')
23+
parser.add_argument('--recursive', action='store_true', help='Recursively convert all notebooks in subdirectories.')
24+
return parser.parse_args(argv[1:])
25+
26+
27+
def get_proj_folder() -> pathlib.Path:
28+
"""Get the project root folder."""
29+
return pathlib.Path(__file__).parent.parent.resolve()
30+
31+
32+
def main(argv:List[str]):
33+
args = parse_argv(argv)
34+
notebooks = args.notebook
35+
36+
# get list of notebooks if not specified
37+
if not notebooks:
38+
notebooks = [
39+
path for path in get_proj_folder().rglob('*.ipynb')
40+
] if args.recursive else [
41+
path for path in get_proj_folder().glob('*.ipynb')
42+
]
43+
44+
# exclude notebooks in the utils and tests folders
45+
notebooks = [
46+
notebook for notebook in notebooks
47+
if (
48+
('utils' not in notebook.parts)
49+
and
50+
('tests' not in notebook.parts)
51+
)
52+
]
53+
54+
converter = nbconvert.PythonExporter()
55+
56+
print(f'Using {mp.cpu_count()} processes for conversion.')
57+
pool = mp.Pool(mp.cpu_count())
58+
results = pool.starmap(
59+
convert,
60+
zip(
61+
notebooks,
62+
[args.destination]*len(notebooks),
63+
[converter]*len(notebooks),
64+
)
65+
)
66+
pool.close()
67+
pool.join()
68+
69+
print(f'Converted {len(results) - results.count(None)} / {len(results)} notebooks to Python scripts.')
70+
71+
72+
def convert(notebook:pathlib.Path, destination:pathlib.Path, converter:nbconvert.Exporter) -> subprocess.CompletedProcess:
73+
"""Convert a single notebook to a Python script."""
74+
if not notebook.is_file() or not notebook.suffix == '.ipynb':
75+
print(f'Skipping non-notebook file: {notebook.relative_to(get_proj_folder())}')
76+
result = None
77+
else:
78+
nb = nbformat.read(notebook, as_version=nbformat.NO_CONVERT)
79+
try:
80+
nbformat.validate(nb)
81+
except nbformat.ValidationError as e:
82+
print(f'Skipping invalid notebook: {notebook.relative_to(get_proj_folder())}')
83+
result = None
84+
else:
85+
script_path = destination / ((notebook.relative_to(get_proj_folder())).with_suffix('.py'))
86+
script_path.parent.mkdir(exist_ok=True)
87+
print(f'Converting {notebook.relative_to(get_proj_folder())} to {script_path.relative_to(get_proj_folder())}')
88+
output, resources_dict = nbconvert.export(converter, nb, output=script_path)
89+
script_path.write_text(output, encoding='utf-8')
90+
result = output
91+
return result
92+
93+
94+
if "__main__" == __name__:
95+
# Run the main function with command line arguments
96+
main(sys.argv)

0 commit comments

Comments
 (0)