Skip to content

Commit 4e41aeb

Browse files
Merge pull request #41 from Azure-Samples/chienyuanchang/notebook_test
Add test for notebooks
2 parents cdef17e + 8d70882 commit 4e41aeb

File tree

2 files changed

+184
-0
lines changed

2 files changed

+184
-0
lines changed

tools/README.md

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
# test_notebooks.py
2+
3+
This script is designed for **testing and validating** that all Jupyter notebooks in the `notebooks/` directory (or a specified directory) execute successfully from start to finish. It is especially useful for pre-merge checks and for contributors to verify that their changes do not break any notebook workflows.
4+
5+
## Features
6+
- **Automatic Discovery:** Recursively scans a directory for `.ipynb` files (excluding hidden files).
7+
- **Selective Skipping:** Supports a skip list to exclude specific notebooks from execution (e.g., those requiring manual input or special setup).
8+
- **Execution Reporting:** Prints a summary of successful and failed notebooks, including error messages for failures.
9+
- **Command Line Usage:** Can run all notebooks in a directory or a specified list of notebook files.
10+
11+
## Usage
12+
13+
### Run All Notebooks in a Directory
14+
15+
```bash
16+
python3 tools/test_notebooks.py
17+
```
18+
This will scan the `notebooks/` directory by default, skipping any notebooks listed in the `skip_list` variable.
19+
20+
### Run Specific Notebooks
21+
22+
```bash
23+
python3 tools/test_notebooks.py notebooks/example1.ipynb notebooks/example2.ipynb
24+
```
25+
This will execute only the specified notebooks.
26+
27+
## Setting Up Environment Variables
28+
Some notebooks require access to Azure Storage or other resources. You may need to set environment variables in the [.env](../notebooks/.env) file before running the tests. For example, to test notebooks that use training data or reference documents, follow these steps:
29+
30+
1. **Prepare Azure Storage:**
31+
- Create an Azure Storage Account and a Blob Container (can follow the guide to [create an Azure Storage Account](https://aka.ms/create-a-storage-account)).
32+
- Use Azure Storage Explorer to generate a Shared Access Signature (SAS) URL with `Read`, `Write`, and `List` permissions for the container.
33+
2. **Set Environment Variables:**
34+
- Add the following variables to the [.env](../notebooks/.env) file in your project root:
35+
36+
```env
37+
TRAINING_DATA_SAS_URL=<Blob container SAS URL>
38+
TRAINING_DATA_PATH=<Designated folder path under the blob container>
39+
REFERENCE_DOC_SAS_URL=<Blob container SAS URL>
40+
REFERENCE_DOC_PATH=<Designated folder path under the blob container>
41+
```
42+
- These variables will be used by notebooks that require access to training/reference data.
43+
- You can refer to [Set env for training data and reference doc](../docs/set_env_for_training_data_and_reference_doc.md) for setting up these variables.
44+
45+
## Skip List
46+
You can modify the `skip_list` variable in the script to add or remove notebooks that should be skipped during execution. The skip list can contain full paths or substrings.
47+
48+
## Dependencies
49+
- Python 3
50+
- `nbformat`
51+
- `nbconvert`
52+
53+
Install dependencies with:
54+
```bash
55+
pip3 install nbformat nbconvert
56+
```
57+
58+
## Exit Codes
59+
- Returns `0` if all notebooks succeed.
60+
- Returns `1` if any notebook fails or if no notebooks are found.
61+
62+
## Notes
63+
- Notebooks that require manual input, special setup, or specific environment variables could be added to the skip list or set up the requirements accordingly.

tools/test_notebooks.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
import os
2+
import sys
3+
from typing import Tuple, Optional, List
4+
5+
import nbformat
6+
from nbconvert.preprocessors import ExecutePreprocessor
7+
8+
9+
SINGLE_NOTEBOOK_TIMEOUT = 1200
10+
11+
12+
def should_skip(notebook_path: str, skip_list: List[str]) -> bool:
13+
return any(skip in notebook_path for skip in skip_list)
14+
15+
16+
def run_notebook(notebook_path: str, root: str) -> Tuple[bool, Optional[str]]:
17+
"""Execute a single notebook."""
18+
try:
19+
with open(notebook_path, encoding="utf-8") as f:
20+
nb = nbformat.read(f, as_version=4)
21+
22+
ep = ExecutePreprocessor(
23+
timeout=SINGLE_NOTEBOOK_TIMEOUT,
24+
kernel_name="python3")
25+
ep.preprocess(nb, {"metadata": {"path": root}})
26+
return True, None
27+
except Exception as e:
28+
return False, str(e)
29+
30+
31+
def run_all_notebooks(path: str = ".", skip_list: List[str] = None) -> None:
32+
abs_path = os.path.abspath(path)
33+
print(f"🔍 Scanning for notebooks in: {abs_path}\n")
34+
35+
skip_list = skip_list or []
36+
37+
notebook_found: int = 0
38+
success_notebooks: List[str] = []
39+
failed_notebooks: List[Tuple[str, str]] = []
40+
41+
for root, _, files in os.walk(abs_path):
42+
for file in files:
43+
if file.endswith(".ipynb") and not file.startswith("."):
44+
notebook_path = os.path.join(root, file)
45+
46+
if should_skip(notebook_path, skip_list):
47+
print(f"⏭️ Skipped: {notebook_path}")
48+
continue
49+
50+
notebook_found += 1
51+
print(f"▶️ Running: {notebook_path}")
52+
success, error = run_notebook(notebook_path, root)
53+
54+
if success:
55+
print(f"✅ Success: {notebook_path}\n")
56+
success_notebooks.append(notebook_path)
57+
else:
58+
print(f"❌ Failed: {notebook_path}\nError: {error}\n")
59+
failed_notebooks.append((notebook_path, error))
60+
61+
# 📋 Summary
62+
print("🧾 Notebook Execution Summary")
63+
print(f"✅ {len(success_notebooks)} succeeded")
64+
print(f"❌ {len(failed_notebooks)} failed\n")
65+
66+
if failed_notebooks:
67+
print("🚨 Failed notebooks:")
68+
for nb, error in failed_notebooks:
69+
last_line = error.strip().splitlines()[-1] if error else "Unknown error"
70+
print(f" - {nb}\n{last_line}")
71+
sys.exit(1)
72+
73+
if notebook_found == 0:
74+
print("❌ No notebooks were found. Check the folder path or repo contents.")
75+
sys.exit(1)
76+
77+
print("🏁 All notebooks completed successfully.")
78+
79+
80+
if __name__ == "__main__":
81+
args: List[str] = sys.argv[1:]
82+
83+
# NOTE: Define skip list (can use full paths or substrings)
84+
skip_list = [
85+
"build_person_directory.ipynb", # Skip due to "new_face_image_path" needed to be added manually
86+
]
87+
88+
if not args:
89+
run_all_notebooks("notebooks", skip_list=skip_list)
90+
else:
91+
failed: List[Tuple[str, str]] = []
92+
for notebook_path in args:
93+
if should_skip(notebook_path, skip_list):
94+
print(f"⏭️ Skipped: {notebook_path}")
95+
continue
96+
97+
if notebook_path.endswith(".ipynb") and os.path.isfile(notebook_path):
98+
print(f"▶️ Running: {notebook_path}")
99+
success, error = run_notebook(notebook_path, os.path.dirname(notebook_path))
100+
if success:
101+
print(f"✅ Success: {notebook_path}\n")
102+
else:
103+
print(f"❌ Failed: {notebook_path}\nError: {error}\n")
104+
failed.append((notebook_path, error))
105+
else:
106+
print(f"⚠️ Not a valid notebook file: {notebook_path}")
107+
failed.append((notebook_path, "Invalid path or not a .ipynb file"))
108+
109+
# Summary
110+
print("🧾 Execution Summary")
111+
print(f"✅ {len(args) - len(failed)} succeeded")
112+
print(f"❌ {len(failed)} failed")
113+
114+
if failed:
115+
print("🚨 Failed notebooks:")
116+
for nb, error in failed:
117+
last_line = error.strip().splitlines()[-1] if error else "Unknown error"
118+
print(f" - {nb}\n{last_line}")
119+
sys.exit(1)
120+
else:
121+
print("🏁 All selected notebooks completed successfully.")

0 commit comments

Comments
 (0)