add descriptions

juanitorduz · juanitorduz · commit 3bd5e8d46287 · 2025-11-12T20:42:52.000+01:00
diff --git a/docs/source/notebooks/index.md b/docs/source/notebooks/index.md
@@ -3,6 +3,9 @@
 On this page you can find a gallery of example notebooks that demonstrate the use of CausalPy.
 
 ## ANCOVA
+
+Analysis of covariance is a simple linear model, typically with one continuous predictor (the covariate) and a categorical variable (which may correspond to treatment or control group). In the context of this package, ANCOVA could be useful in pre-post treatment designs, either with or without random assignment. This is similar to the approach of difference in differences, but only applicable with a single pre and post treatment measure.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -15,6 +18,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Difference in Differences
+
+Analysis where the treatment effect is estimated as a difference between treatment conditions in the differences between pre-treatment to post treatment observations.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -39,6 +45,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Geographical lift testing
+
+Geolift (geographical lift testing) is a method for measuring the causal impact of interventions in geographic regions. It combines synthetic control methods with difference-in-differences approaches to estimate treatment effects when interventions are applied to specific geographic areas.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -57,6 +66,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Instrumental Variables Regression
+
+A quasi-experimental design to estimate a treatment effect where there is a risk of confounding between the treatment and the outcome due to endogeneity. Instrumental variables help identify causal effects by using variables that affect treatment assignment but not the outcome directly.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -75,6 +87,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Interrupted Time Series
+
+A quasi-experimental design that uses time series methods to generate counterfactuals and estimate treatment effects. A series of observations are collected before and after a treatment, and the pre-treatment trend (or any time-series model) is used to predict what would have happened in the absence of treatment.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -99,6 +114,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Inverse Propensity Score Weighting
+
+A method for estimating causal effects by weighting observations by the inverse of their probability of receiving treatment (propensity score). This helps adjust for confounding by creating a pseudo-population where treatment assignment is independent of observed covariates.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -117,6 +135,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Regression Discontinuity
+
+A quasi-experimental design where treatment assignment is determined by a cutoff point along a running variable (e.g., test score, age, income). The treatment effect is estimated by comparing outcomes just above and below the cutoff, assuming units near the cutoff are similar except for treatment status.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -147,6 +168,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Regression Kink Design
+
+A variation of regression discontinuity where treatment affects the slope (rate of change) of the outcome with respect to the running variable, rather than causing a discrete jump. The treatment effect is identified by a change in the slope at the cutoff point.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
@@ -159,6 +183,9 @@ On this page you can find a gallery of example notebooks that demonstrate the us
 ::::
 
 ## Synthetic Control
+
+The synthetic control method is a statistical method used to evaluate the effect of an intervention in comparative case studies. It involves the construction of a weighted combination of groups used as controls, to which the treatment group is compared.
+
 ::::{grid} 1 2 3 3
 :gutter: 3
 
diff --git a/scripts/generate_gallery.py b/scripts/generate_gallery.py
@@ -1,15 +1,14 @@
 #!/usr/bin/env python3
 """
-Generate example gallery for CausalPy documentation.
+Generate thumbnails for CausalPy documentation gallery.
 
-This script scans notebooks in docs/source/notebooks/, extracts metadata,
-generates thumbnails from the first plot in each notebook, and creates
-a gallery page using sphinx-design cards.
+This script scans notebooks in docs/source/notebooks/ and generates
+thumbnails from the first plot in each notebook. The index.md file
+should be maintained manually.
 """
 
 import base64
 import io
-import re
 import sys
 from pathlib import Path
 
@@ -29,65 +28,6 @@
     Image = None  # type: ignore[assignment,misc]
 
 
-def load_categories_from_index(index_path: Path) -> dict[str, list[str]]:
-    """
-    Load category structure from existing index.md.
-
-    Reads the markdown file and extracts:
-    - Category names from ## headers
-    - Notebook names from :link: fields under each category
-
-    Returns
-    -------
-    dict[str, list[str]]
-        Mapping from category name to list of notebook names (without .ipynb)
-    """
-    if not index_path.exists():
-        return {}
-
-    try:
-        categories: dict[str, list[str]] = {}
-        current_category = None
-        for line in index_path.read_text(encoding="utf-8").splitlines():
-            if line.startswith("## "):
-                current_category = line[3:].strip()
-                if current_category and current_category != "Example Gallery":
-                    categories[current_category] = []
-            elif current_category and (match := re.search(r":link:\s+(\S+)", line)):
-                categories[current_category].append(match.group(1))
-        return categories
-    except Exception as e:
-        print(f"Warning: Could not load categories from {index_path}: {e}")
-        return {}
-
-
-def get_notebook_category(filename: str, category_mapping: dict[str, list[str]]) -> str:
-    """Determine the category for a notebook from the loaded mapping."""
-    notebook_name = filename.replace(".ipynb", "")
-    return next(
-        (
-            cat
-            for cat, notebooks in category_mapping.items()
-            if notebook_name in notebooks
-        ),
-        "Other",
-    )
-
-
-def extract_metadata(notebook_path: Path) -> str:
-    """Extract title from notebook."""
-    nb = nbformat.reads(notebook_path.read_text(encoding="utf-8"), as_version=4)
-
-    # Look for title in first markdown cell
-    for cell in nb.cells:
-        if cell.cell_type == "markdown":
-            if match := re.search(r"^#+\s+(.+)$", cell.source.strip(), re.MULTILINE):
-                return match.group(1).strip()
-
-    # Fallback to filename-based title
-    return notebook_path.stem.replace("_", " ").title()
-
-
 def _find_image_in_notebook(nb) -> str | None:
     """Find first PNG image in notebook outputs."""
     for cell in nb.cells:
@@ -161,60 +101,16 @@ def _save_thumbnail(
         return None
 
 
-def generate_gallery_markdown(
-    notebooks_data: list[dict],
-    output_path: Path,
-    category_mapping: dict[str, list[str]],
-):
-    """Generate gallery markdown file with sphinx-design cards."""
-    # Group notebooks by category
-    categories: dict[str, list[dict]] = {}
-    for nb_data in notebooks_data:
-        categories.setdefault(nb_data["category"], []).append(nb_data)
-
-    # Sort categories alphabetically
-    sorted_categories = sorted(categories.keys())
-
-    # Generate markdown
-    lines = ["# Example Gallery\n"]
-
-    for category in sorted_categories:
-        notebooks = sorted(categories[category], key=lambda x: x["filename"])
-
-        lines.extend([f"## {category}\n", "::::{grid} 1 2 3 3\n", ":gutter: 3\n\n"])
-
-        for nb in notebooks:
-            doc_name = nb["filename"].replace(".ipynb", "")
-            card_lines = [
-                f":::{'{grid-item-card}'} {nb['title']}\n",
-                ":class-card: sd-card-h-100\n",
-            ]
-            if nb.get("thumbnail"):
-                card_lines.append(f":img-top: {nb['thumbnail']}\n")
-            card_lines.extend([f":link: {doc_name}\n", ":link-type: doc\n", ":::\n"])
-            lines.extend(card_lines)
-
-        lines.append("::::\n\n")
-
-    output_path.write_text("".join(lines), encoding="utf-8")
-
-
 def main():
-    """Main function to generate gallery."""
+    """Main function to generate thumbnails only."""
     # Paths
     repo_root = Path(__file__).parent.parent
     notebooks_dir = repo_root / "docs" / "source" / "notebooks"
     thumbnails_dir = repo_root / "docs" / "source" / "_static" / "thumbnails"
-    output_file = notebooks_dir / "index.md"
 
     # Create thumbnails directory
     thumbnails_dir.mkdir(parents=True, exist_ok=True)
 
-    # Load category structure from existing index.md
-    category_mapping = load_categories_from_index(output_file)
-    if category_mapping:
-        print(f"Loaded {len(category_mapping)} categories from index.md")
-
     # Find all notebooks
     notebook_files = sorted(notebooks_dir.glob("*.ipynb"))
 
@@ -224,25 +120,11 @@ def main():
 
     print(f"Found {len(notebook_files)} notebooks")
 
-    # Process each notebook
-    notebooks_data = []
+    # Process each notebook to generate thumbnails
     for nb_path in notebook_files:
         print(f"Processing {nb_path.name}...")
+        extract_first_image(nb_path, thumbnails_dir)
 
-        notebooks_data.append(
-            {
-                "filename": nb_path.name,
-                "title": extract_metadata(nb_path),
-                "category": get_notebook_category(nb_path.name, category_mapping),
-                "thumbnail": extract_first_image(nb_path, thumbnails_dir),
-            }
-        )
-
-    # Generate gallery markdown
-    print("Generating gallery markdown...")
-    generate_gallery_markdown(notebooks_data, output_file, category_mapping)
-
-    print(f"Gallery generated successfully at {output_file}")
     print(f"Thumbnails saved to {thumbnails_dir}")