Skip to content

Commit cc11104

Browse files
committed
simplify code
1 parent eb1d4b0 commit cc11104

File tree

1 file changed

+71
-158
lines changed

1 file changed

+71
-158
lines changed

scripts/generate_gallery.py

Lines changed: 71 additions & 158 deletions
Original file line numberDiff line numberDiff line change
@@ -56,12 +56,8 @@ def load_categories_from_index(index_path: Path) -> dict[str, list[str]]:
5656
if current_category and current_category != "Example Gallery":
5757
categories[current_category] = []
5858
# Check for notebook links under current category
59-
elif current_category and ":link:" in line:
60-
# Extract notebook name from :link: notebook_name
61-
link_match = re.search(r":link:\s+(\S+)", line)
62-
if link_match:
63-
notebook_name = link_match.group(1)
64-
categories[current_category].append(notebook_name)
59+
elif current_category and (match := re.search(r":link:\s+(\S+)", line)):
60+
categories[current_category].append(match.group(1))
6561
except Exception as e:
6662
print(f"Warning: Could not load categories from {index_path}: {e}")
6763

@@ -71,56 +67,40 @@ def load_categories_from_index(index_path: Path) -> dict[str, list[str]]:
7167
def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str:
7268
"""Determine the category for a notebook from the loaded mapping."""
7369
notebook_name = filename.replace(".ipynb", "")
74-
for category, notebooks in category_mapping.items():
75-
if notebook_name in notebooks:
76-
return category
77-
return "Other"
70+
return next(
71+
(
72+
cat
73+
for cat, notebooks in category_mapping.items()
74+
if notebook_name in notebooks
75+
),
76+
"Other",
77+
)
7878

7979

80-
def extract_metadata(notebook_path: Path) -> tuple[str, str]:
81-
"""Extract title and description from notebook."""
80+
def extract_metadata(notebook_path: Path) -> str:
81+
"""Extract title from notebook."""
8282
with open(notebook_path, "r", encoding="utf-8") as f:
8383
nb = nbformat.read(f, as_version=4)
8484

85-
title = None
86-
description = ""
87-
8885
# Look for title in first markdown cell
8986
for cell in nb.cells:
9087
if cell.cell_type == "markdown":
91-
source = cell.source.strip()
92-
# Look for H1 or H2 title
93-
title_match = re.match(r"^#+\s+(.+)$", source, re.MULTILINE)
94-
if title_match:
95-
title = title_match.group(1).strip()
96-
# Get description from rest of first markdown cell
97-
lines = source.split("\n")
98-
description_lines = []
99-
found_title = False
100-
for line in lines:
101-
if re.match(r"^#+\s+", line):
102-
found_title = True
103-
continue
104-
if found_title and line.strip():
105-
# Skip MyST directives and formulas
106-
stripped = line.strip()
107-
if stripped.startswith(":::"):
108-
break # Stop at first MyST directive
109-
if stripped.startswith("$$") or stripped.startswith("$"):
110-
continue # Skip math formulas
111-
if stripped.startswith("*") and ":" in stripped:
112-
continue # Skip list items that are definitions
113-
description_lines.append(stripped)
114-
if len(description_lines) >= 2: # Take first 2 meaningful lines
115-
break
116-
description = " ".join(description_lines)
117-
break
88+
if match := re.search(r"^#+\s+(.+)$", cell.source.strip(), re.MULTILINE):
89+
return match.group(1).strip()
11890

11991
# Fallback to filename-based title
120-
if not title:
121-
title = notebook_path.stem.replace("_", " ").title()
92+
return notebook_path.stem.replace("_", " ").title()
12293

123-
return title, description
94+
95+
def _find_image_in_notebook(nb) -> str | None:
96+
"""Find first PNG image in notebook outputs."""
97+
for cell in nb.cells:
98+
if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs:
99+
for output in cell.outputs:
100+
if output.output_type in ("display_data", "execute_result"):
101+
if image_data := output.get("data", {}).get("image/png"):
102+
return image_data
103+
return None
124104

125105

126106
def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None:
@@ -129,55 +109,27 @@ def extract_first_image(notebook_path: Path, output_dir: Path) -> str | None:
129109
return None
130110

131111
try:
132-
# Read notebook
133112
with open(notebook_path, "r", encoding="utf-8") as f:
134113
nb = nbformat.read(f, as_version=4)
135114

136-
# First, try to find images in existing outputs (no execution needed)
137-
for cell in nb.cells:
138-
if cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs:
139-
for output in cell.outputs:
140-
if (
141-
output.output_type == "display_data"
142-
or output.output_type == "execute_result"
143-
):
144-
if "image/png" in output.get("data", {}):
145-
image_data = output["data"]["image/png"]
146-
return _save_thumbnail(
147-
notebook_path, output_dir, image_data
148-
)
149-
150-
# If no images found in existing outputs, try executing (with short timeout)
151-
# Only execute if notebook appears to have no outputs
152-
has_outputs = any(
115+
# Try to find images in existing outputs first
116+
if image_data := _find_image_in_notebook(nb):
117+
return _save_thumbnail(notebook_path, output_dir, image_data)
118+
119+
# Execute if notebook has no outputs
120+
if not any(
153121
cell.cell_type == "code" and hasattr(cell, "outputs") and cell.outputs
154122
for cell in nb.cells
155-
)
156-
157-
if not has_outputs:
123+
):
158124
print(f" Executing {notebook_path.name} to generate thumbnail...")
159-
ep = ExecutePreprocessor(
160-
timeout=120, kernel_name="python3"
161-
) # 2 min timeout
162125
try:
163-
ep.preprocess(nb, {"metadata": {"path": str(notebook_path.parent)}})
126+
ExecutePreprocessor(timeout=120, kernel_name="python3").preprocess(
127+
nb, {"metadata": {"path": str(notebook_path.parent)}}
128+
)
129+
if image_data := _find_image_in_notebook(nb):
130+
return _save_thumbnail(notebook_path, output_dir, image_data)
164131
except Exception as e:
165132
print(f" Warning: Failed to execute {notebook_path.name}: {e}")
166-
return None
167-
168-
# Find first image in outputs after execution
169-
for cell in nb.cells:
170-
if cell.cell_type == "code" and hasattr(cell, "outputs"):
171-
for output in cell.outputs:
172-
if (
173-
output.output_type == "display_data"
174-
or output.output_type == "execute_result"
175-
):
176-
if "image/png" in output.get("data", {}):
177-
image_data = output["data"]["image/png"]
178-
return _save_thumbnail(
179-
notebook_path, output_dir, image_data
180-
)
181133

182134
return None
183135
except Exception as e:
@@ -193,30 +145,22 @@ def _save_thumbnail(
193145
thumbnail_name = f"{notebook_path.stem}.png"
194146
thumbnail_path = output_dir / thumbnail_name
195147

196-
img_data = base64.b64decode(image_data)
197-
with open(thumbnail_path, "wb") as img_file:
198-
img_file.write(img_data)
199-
200-
# Resize thumbnail to uniform square-like size (crop/pad to maintain aspect ratio)
201-
try:
202-
img = Image.open(thumbnail_path)
203-
# Target size for uniform thumbnails - more square-like
204-
target_size = (400, 250)
205-
206-
# Calculate scaling to fit within target while maintaining aspect ratio
207-
img.thumbnail(target_size, Image.Resampling.LANCZOS)
208-
209-
# Create a new image with target size and paste centered
210-
new_img = Image.new("RGB", target_size, (255, 255, 255))
211-
# Calculate position to center the image
212-
x_offset = (target_size[0] - img.size[0]) // 2
213-
y_offset = (target_size[1] - img.size[1]) // 2
214-
new_img.paste(img, (x_offset, y_offset))
215-
new_img.save(thumbnail_path)
216-
except Exception as e:
217-
print(f"Warning: Could not resize thumbnail for {notebook_path.name}: {e}")
218-
219-
# Use relative path: from notebooks/ subdirectory, go up to source root, then to _static
148+
# Decode and save image
149+
thumbnail_path.write_bytes(base64.b64decode(image_data))
150+
151+
# Resize to uniform size (400x250) with padding
152+
img = Image.open(thumbnail_path)
153+
target_size = (400, 250)
154+
img.thumbnail(target_size, Image.Resampling.LANCZOS)
155+
156+
# Create padded image
157+
new_img = Image.new("RGB", target_size, (255, 255, 255))
158+
new_img.paste(
159+
img,
160+
((target_size[0] - img.size[0]) // 2, (target_size[1] - img.size[1]) // 2),
161+
)
162+
new_img.save(thumbnail_path)
163+
220164
return f"../_static/thumbnails/{thumbnail_name}"
221165
except Exception as e:
222166
print(f"Warning: Could not save thumbnail for {notebook_path.name}: {e}")
@@ -232,56 +176,35 @@ def generate_gallery_markdown(
232176
# Group notebooks by category
233177
categories: dict[str, list[dict]] = {}
234178
for nb_data in notebooks_data:
235-
category = nb_data["category"]
236-
if category not in categories:
237-
categories[category] = []
238-
categories[category].append(nb_data)
239-
240-
# Sort categories - maintain order from index.md (order of appearance)
241-
# Use the order from category_mapping to preserve the structure
242-
sorted_categories = [cat for cat in category_mapping.keys() if cat in categories]
243-
# Add any categories found in notebooks but not in mapping (shouldn't happen, but handle gracefully)
244-
for cat in categories.keys():
245-
if cat not in sorted_categories:
246-
sorted_categories.append(cat)
179+
categories.setdefault(nb_data["category"], []).append(nb_data)
180+
181+
# Sort categories maintaining order from index.md
182+
sorted_categories = [
183+
cat for cat in category_mapping.keys() if cat in categories
184+
] + [cat for cat in categories.keys() if cat not in category_mapping]
247185

248186
# Generate markdown
249187
lines = ["# Example Gallery\n"]
250188

251189
for category in sorted_categories:
252-
if category not in categories:
253-
continue
254-
255-
notebooks = categories[category]
256-
# Sort notebooks within category
257-
notebooks.sort(key=lambda x: x["filename"])
190+
notebooks = sorted(categories[category], key=lambda x: x["filename"])
258191

259-
lines.append(f"## {category}\n")
260-
lines.append("::::{grid} 1 2 3 3\n")
261-
lines.append(":gutter: 3\n\n")
192+
lines.extend([f"## {category}\n", "::::{grid} 1 2 3 3\n", ":gutter: 3\n\n"])
262193

263194
for nb in notebooks:
264-
# Title goes on the same line as grid-item-card (escape braces in f-string)
265-
card_lines = [f":::{'{grid-item-card}'} {nb['title']}\n"]
266-
# Add class to ensure uniform card height
267-
card_lines.append(":class-card: sd-card-h-100\n")
268-
195+
doc_name = nb["filename"].replace(".ipynb", "")
196+
card_lines = [
197+
f":::{'{grid-item-card}'} {nb['title']}\n",
198+
":class-card: sd-card-h-100\n",
199+
]
269200
if nb.get("thumbnail"):
270201
card_lines.append(f":img-top: {nb['thumbnail']}\n")
271-
272-
# Use document name without extension (relative to current directory)
273-
# Since index.md is in notebooks/, links are relative to that directory
274-
doc_name = nb["filename"].replace(".ipynb", "")
275-
card_lines.append(f":link: {doc_name}\n")
276-
card_lines.append(":link-type: doc\n")
277-
card_lines.append(":::\n")
202+
card_lines.extend([f":link: {doc_name}\n", ":link-type: doc\n", ":::\n"])
278203
lines.extend(card_lines)
279204

280205
lines.append("::::\n\n")
281206

282-
# Write to file
283-
with open(output_path, "w", encoding="utf-8") as f:
284-
f.write("".join(lines))
207+
output_path.write_text("".join(lines), encoding="utf-8")
285208

286209

287210
def main():
@@ -314,22 +237,12 @@ def main():
314237
for nb_path in notebook_files:
315238
print(f"Processing {nb_path.name}...")
316239

317-
# Extract metadata
318-
title, description = extract_metadata(nb_path)
319-
320-
# Determine category from index.md structure
321-
category = get_notebook_category(nb_path.name, category_mapping)
322-
323-
# Generate thumbnail
324-
thumbnail = extract_first_image(nb_path, thumbnails_dir)
325-
326240
notebooks_data.append(
327241
{
328242
"filename": nb_path.name,
329-
"title": title,
330-
"description": description,
331-
"category": category,
332-
"thumbnail": thumbnail,
243+
"title": extract_metadata(nb_path),
244+
"category": get_notebook_category(nb_path.name, category_mapping),
245+
"thumbnail": extract_first_image(nb_path, thumbnails_dir),
333246
}
334247
)
335248

0 commit comments

Comments
 (0)