diff --git a/.github/workflows/check-colab-notebooks.yml b/.github/workflows/check-colab-notebooks.yml
new file mode 100644
index 00000000..7116b4a0
--- /dev/null
+++ b/.github/workflows/check-colab-notebooks.yml
@@ -0,0 +1,55 @@
+name: Check Colab notebooks
+
+on:
+ push:
+ branches: [ main ]
+ paths:
+ - 'docs/notebook_source/*.py'
+ pull_request:
+ branches: [ main ]
+ paths:
+ - 'docs/notebook_source/*.py'
+ workflow_dispatch:
+
+jobs:
+ check-colab-notebooks:
+ name: Check Colab Notebooks
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Install uv
+ uses: astral-sh/setup-uv@v5
+ with:
+ version: "latest"
+ python-version: "3.11"
+ enable-cache: true
+
+ - name: Install dependencies
+ run: |
+ uv sync --group notebooks --group docs
+
+ - name: Generate Colab notebooks
+ run: |
+ make generate-colab-notebooks
+
+ - name: Check for differences
+ run: |
+ # Get the diff, filtering out cell ID changes (which are randomly generated)
+ # Filter out: file markers (--- and +++), and "id" lines
+ MEANINGFUL_DIFF=$(git diff docs/colab_notebooks/ | grep -E '^[+-]' | grep -v '^---' | grep -v '^+++' | grep -vE '^[+-]\s*"id": "[0-9a-fA-F]+",?$' || true)
+
+ if [ -z "$MEANINGFUL_DIFF" ]; then
+ echo "β
Colab notebooks are up-to-date (ignoring cell ID changes)"
+ else
+ echo "β Colab notebooks are out of sync with source files"
+ echo ""
+ echo "The generated notebooks differ from the committed ones."
+ echo "Please run 'make generate-colab-notebooks' locally and commit the changes."
+ echo ""
+ echo "Differences found:"
+ echo "$MEANINGFUL_DIFF"
+ exit 1
+ fi
diff --git a/Makefile b/Makefile
index 5dd1f59d..295400ce 100644
--- a/Makefile
+++ b/Makefile
@@ -36,6 +36,7 @@ help:
@echo "π οΈ Utilities:"
@echo " clean - Remove coverage reports and cache files"
@echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext"
+ @echo " generate-colab-notebooks - Generate Colab-compatible notebooks"
@echo " serve-docs-locally - Serve documentation locally"
@echo " check-license-headers - Check if all files have license headers"
@echo " update-license-headers - Add license headers to all files"
@@ -95,6 +96,11 @@ convert-execute-notebooks:
rm docs/notebook_source/*.csv
@echo "β
Notebooks created in docs/notebooks/"
+generate-colab-notebooks:
+ @echo "π Generating Colab-compatible notebooks..."
+ uv run --group notebooks python docs/scripts/generate_colab_notebooks.py
+ @echo "β
Colab notebooks created in docs/colab_notebooks/"
+
serve-docs-locally:
@echo "π Building and serving docs..."
uv sync --group docs
@@ -125,4 +131,4 @@ install-dev-notebooks:
$(call install-pre-commit-hooks)
@echo "β
Dev + notebooks installation complete!"
-.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks
+.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks
diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb
new file mode 100644
index 00000000..ee9e5ccc
--- /dev/null
+++ b/docs/colab_notebooks/1-the-basics.ipynb
@@ -0,0 +1,537 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a4ac4d55",
+ "metadata": {},
+ "source": [
+ "# π¨ Data Designer Tutorial: The Basics\n",
+ "\n",
+ "#### π What you'll learn\n",
+ "\n",
+ "This notebook demonstrates the basics of Data Designer by generating a simple product review dataset.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9e9f3c47",
+ "metadata": {},
+ "source": [
+ "### β‘ Colab Setup\n",
+ "\n",
+ "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "41b31194",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -qU data-designer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "502b3aba",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "try:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n",
+ "except userdata.SecretNotFoundError:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8c512fbc",
+ "metadata": {},
+ "source": [
+ "### π¦ Import the essentials\n",
+ "\n",
+ "- The `essentials` module provides quick access to the most commonly used objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8fae521f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from data_designer.essentials import (\n",
+ " CategorySamplerParams,\n",
+ " DataDesigner,\n",
+ " DataDesignerConfigBuilder,\n",
+ " InferenceParameters,\n",
+ " LLMTextColumnConfig,\n",
+ " ModelConfig,\n",
+ " PersonFromFakerSamplerParams,\n",
+ " SamplerColumnConfig,\n",
+ " SamplerType,\n",
+ " SubcategorySamplerParams,\n",
+ " UniformSamplerParams,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e71d0256",
+ "metadata": {},
+ "source": [
+ "### βοΈ Initialize the Data Designer interface\n",
+ "\n",
+ "- `DataDesigner` is the main object is responsible for managing the data generation process.\n",
+ "\n",
+ "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "68fc7172",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_designer = DataDesigner()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9a821a27",
+ "metadata": {},
+ "source": [
+ "### ποΈ Define model configurations\n",
+ "\n",
+ "- Each `ModelConfig` defines a model that can be used during the generation process.\n",
+ "\n",
+ "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n",
+ "\n",
+ "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n",
+ "\n",
+ "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a9515141",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This name is set in the model provider configuration.\n",
+ "MODEL_PROVIDER = \"nvidia\"\n",
+ "\n",
+ "# The model ID is from build.nvidia.com.\n",
+ "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n",
+ "\n",
+ "# We choose this alias to be descriptive for our use case.\n",
+ "MODEL_ALIAS = \"nemotron-nano-v2\"\n",
+ "\n",
+ "# This sets reasoning to False for the nemotron-nano-v2 model.\n",
+ "SYSTEM_PROMPT = \"/no_think\"\n",
+ "\n",
+ "model_configs = [\n",
+ " ModelConfig(\n",
+ " alias=MODEL_ALIAS,\n",
+ " model=MODEL_ID,\n",
+ " provider=MODEL_PROVIDER,\n",
+ " inference_parameters=InferenceParameters(\n",
+ " temperature=0.5,\n",
+ " top_p=1.0,\n",
+ " max_tokens=1024,\n",
+ " ),\n",
+ " )\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b940ab9",
+ "metadata": {},
+ "source": [
+ "### ποΈ Initialize the Data Designer Config Builder\n",
+ "\n",
+ "- The Data Designer config defines the dataset schema and generation process.\n",
+ "\n",
+ "- The config builder provides an intuitive interface for building this configuration.\n",
+ "\n",
+ "- The list of model configs is provided to the builder at initialization.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ec21da7e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "85b2324e",
+ "metadata": {},
+ "source": [
+ "## π² Getting started with sampler columns\n",
+ "\n",
+ "- Sampler columns offer non-LLM based generation of synthetic data.\n",
+ "\n",
+ "- They are particularly useful for **steering the diversity** of the generated data, as we demonstrate below.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "You can view available samplers using the config builder's `info` property:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f49f435e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder.info.display(\"samplers\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f582b642",
+ "metadata": {},
+ "source": [
+ "Let's start designing our product review dataset by adding product category and subcategory columns.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8cfc43b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"product_category\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(\n",
+ " values=[\n",
+ " \"Electronics\",\n",
+ " \"Clothing\",\n",
+ " \"Home & Kitchen\",\n",
+ " \"Books\",\n",
+ " \"Home Office\",\n",
+ " ],\n",
+ " ),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"product_subcategory\",\n",
+ " sampler_type=SamplerType.SUBCATEGORY,\n",
+ " params=SubcategorySamplerParams(\n",
+ " category=\"product_category\",\n",
+ " values={\n",
+ " \"Electronics\": [\n",
+ " \"Smartphones\",\n",
+ " \"Laptops\",\n",
+ " \"Headphones\",\n",
+ " \"Cameras\",\n",
+ " \"Accessories\",\n",
+ " ],\n",
+ " \"Clothing\": [\n",
+ " \"Men's Clothing\",\n",
+ " \"Women's Clothing\",\n",
+ " \"Winter Coats\",\n",
+ " \"Activewear\",\n",
+ " \"Accessories\",\n",
+ " ],\n",
+ " \"Home & Kitchen\": [\n",
+ " \"Appliances\",\n",
+ " \"Cookware\",\n",
+ " \"Furniture\",\n",
+ " \"Decor\",\n",
+ " \"Organization\",\n",
+ " ],\n",
+ " \"Books\": [\n",
+ " \"Fiction\",\n",
+ " \"Non-Fiction\",\n",
+ " \"Self-Help\",\n",
+ " \"Textbooks\",\n",
+ " \"Classics\",\n",
+ " ],\n",
+ " \"Home Office\": [\n",
+ " \"Desks\",\n",
+ " \"Chairs\",\n",
+ " \"Storage\",\n",
+ " \"Office Supplies\",\n",
+ " \"Lighting\",\n",
+ " ],\n",
+ " },\n",
+ " ),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"target_age_range\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Optionally validate that the columns are configured correctly.\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2d0eea21",
+ "metadata": {},
+ "source": [
+ "Next, let's add samplers to generate data related to the customer and their review.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b5e65724",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"customer\",\n",
+ " sampler_type=SamplerType.PERSON_FROM_FAKER,\n",
+ " params=PersonFromFakerSamplerParams(age_range=[18, 70], locale=\"en_US\"),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"number_of_stars\",\n",
+ " sampler_type=SamplerType.UNIFORM,\n",
+ " params=UniformSamplerParams(low=1, high=5),\n",
+ " convert_to=\"int\", # Convert the sampled float to an integer.\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"review_style\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(\n",
+ " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n",
+ " weights=[1, 2, 2, 1],\n",
+ " ),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e6788771",
+ "metadata": {},
+ "source": [
+ "## π¦ LLM-generated columns\n",
+ "\n",
+ "- The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data.\n",
+ "\n",
+ "- When prompting the LLM, we can use Jinja templating to reference other columns in the dataset.\n",
+ "\n",
+ "- As we see below, nested json fields can be accessed using dot notation.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2705cd9",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder.add_column(\n",
+ " LLMTextColumnConfig(\n",
+ " name=\"product_name\",\n",
+ " prompt=(\n",
+ " \"You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\\n\\n\"\n",
+ " \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n",
+ " \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n",
+ " \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n",
+ " ),\n",
+ " system_prompt=SYSTEM_PROMPT,\n",
+ " model_alias=MODEL_ALIAS,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " LLMTextColumnConfig(\n",
+ " name=\"customer_review\",\n",
+ " prompt=(\n",
+ " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n",
+ " \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n",
+ " \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n",
+ " \"The style of the review should be '{{ review_style }}'.\"\n",
+ " ),\n",
+ " system_prompt=SYSTEM_PROMPT,\n",
+ " model_alias=MODEL_ALIAS,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e3dd2f69",
+ "metadata": {},
+ "source": [
+ "### π Iteration is key βΒ preview the dataset!\n",
+ "\n",
+ "1. Use the `preview` method to generate a sample of records quickly.\n",
+ "\n",
+ "2. Inspect the results for quality and format issues.\n",
+ "\n",
+ "3. Adjust column configurations, prompts, or parameters as needed.\n",
+ "\n",
+ "4. Re-run the preview until satisfied.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c6e43147",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "preview = data_designer.preview(config_builder, num_records=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fab77d01",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run this cell multiple times to cycle through the 2 preview records.\n",
+ "preview.display_sample_record()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "875ee6a6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The preview dataset is available as a pandas DataFrame.\n",
+ "preview.dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "87b59e4b",
+ "metadata": {},
+ "source": [
+ "### π Analyze the generated data\n",
+ "\n",
+ "- Data Designer automatically generates a basic statistical analysis of the generated data.\n",
+ "\n",
+ "- This analysis is available via the `analysis` property of generation result objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d347f4c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Print the analysis as a table.\n",
+ "preview.analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d2fb84f2",
+ "metadata": {},
+ "source": [
+ "### π Scale up!\n",
+ "\n",
+ "- Happy with your preview data?\n",
+ "\n",
+ "- Use the `create` method to submit larger Data Designer generation jobs.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "71a31e85",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = data_designer.create(config_builder, num_records=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "501e9092",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the generated dataset as a pandas DataFrame.\n",
+ "dataset = results.load_dataset()\n",
+ "\n",
+ "dataset.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6f217b4a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the analysis results into memory.\n",
+ "analysis = results.load_analysis()\n",
+ "\n",
+ "analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4da82b0f",
+ "metadata": {},
+ "source": [
+ "## βοΈ Next Steps\n",
+ "\n",
+ "Now that you've seen the basics of Data Designer, check out the following notebooks to learn more about:\n",
+ "\n",
+ "- [Structured outputs and jinja expressions](/notebooks/2-structured-outputs-and-jinja-expressions/)\n",
+ "\n",
+ "- [Seeding synthetic data generation with an external dataset](/notebooks/3-seeding-with-a-dataset/)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb
new file mode 100644
index 00000000..10156a10
--- /dev/null
+++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb
@@ -0,0 +1,567 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "a70798bc",
+ "metadata": {},
+ "source": [
+ "# π¨ Data Designer Tutorial: Structured Outputs and Jinja Expressions\n",
+ "\n",
+ "#### π What you'll learn\n",
+ "\n",
+ "In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n",
+ "\n",
+ "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4ed4c65d",
+ "metadata": {},
+ "source": [
+ "### β‘ Colab Setup\n",
+ "\n",
+ "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "73432e8e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -qU data-designer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1a6aac78",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "try:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n",
+ "except userdata.SecretNotFoundError:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "03df6d1c",
+ "metadata": {},
+ "source": [
+ "### π¦ Import the essentials\n",
+ "\n",
+ "- The `essentials` module provides quick access to the most commonly used objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "befb6573",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from data_designer.essentials import (\n",
+ " CategorySamplerParams,\n",
+ " DataDesigner,\n",
+ " DataDesignerConfigBuilder,\n",
+ " ExpressionColumnConfig,\n",
+ " InferenceParameters,\n",
+ " LLMStructuredColumnConfig,\n",
+ " ModelConfig,\n",
+ " PersonFromFakerSamplerParams,\n",
+ " SamplerColumnConfig,\n",
+ " SamplerType,\n",
+ " SubcategorySamplerParams,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a022d1ae",
+ "metadata": {},
+ "source": [
+ "### βοΈ Initialize the Data Designer interface\n",
+ "\n",
+ "- `DataDesigner` is the main object that is used to interface with the library.\n",
+ "\n",
+ "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "568e1d91",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_designer_client = DataDesigner()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "de1ef709",
+ "metadata": {},
+ "source": [
+ "### ποΈ Define model configurations\n",
+ "\n",
+ "- Each `ModelConfig` defines a model that can be used during the generation process.\n",
+ "\n",
+ "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n",
+ "\n",
+ "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n",
+ "\n",
+ "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6757f43c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This name is set in the model provider configuration.\n",
+ "MODEL_PROVIDER = \"nvidia\"\n",
+ "\n",
+ "# The model ID is from build.nvidia.com.\n",
+ "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n",
+ "\n",
+ "# We choose this alias to be descriptive for our use case.\n",
+ "MODEL_ALIAS = \"nemotron-nano-v2\"\n",
+ "\n",
+ "# This sets reasoning to False for the nemotron-nano-v2 model.\n",
+ "SYSTEM_PROMPT = \"/no_think\"\n",
+ "\n",
+ "model_configs = [\n",
+ " ModelConfig(\n",
+ " alias=MODEL_ALIAS,\n",
+ " model=MODEL_ID,\n",
+ " provider=MODEL_PROVIDER,\n",
+ " inference_parameters=InferenceParameters(\n",
+ " temperature=0.5,\n",
+ " top_p=1.0,\n",
+ " max_tokens=1024,\n",
+ " ),\n",
+ " )\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3d1ea9b6",
+ "metadata": {},
+ "source": [
+ "### ποΈ Initialize the Data Designer Config Builder\n",
+ "\n",
+ "- The Data Designer config defines the dataset schema and generation process.\n",
+ "\n",
+ "- The config builder provides an intuitive interface for building this configuration.\n",
+ "\n",
+ "- The list of model configs is provided to the builder at initialization.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f9ad410a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8f918afa",
+ "metadata": {},
+ "source": [
+ "### π§βπ¨ Designing our data\n",
+ "\n",
+ "- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.\n",
+ "\n",
+ "- Structured outputs let you specify the exact schema of the data you want to generate.\n",
+ "\n",
+ "- Data Designer supports schemas specified using either json schema or Pydantic data models (recommended).\n",
+ "\n",
+ "
\n",
+ "\n",
+ "We'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models\n",
+ "\n",
+ "> π‘ **Why Pydantic?**\n",
+ ">\n",
+ "> - Pydantic models provide better IDE support and type validation.\n",
+ ">\n",
+ "> - They are more Pythonic than raw JSON schemas.\n",
+ ">\n",
+ "> - They integrate seamlessly with Data Designer's structured output system.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6aafd123",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from decimal import Decimal\n",
+ "from typing import Literal\n",
+ "\n",
+ "from pydantic import BaseModel, Field\n",
+ "\n",
+ "\n",
+ "# We define a Product schema so that the name, description, and price are generated\n",
+ "# in one go, with the types and constraints specified.\n",
+ "class Product(BaseModel):\n",
+ " name: str = Field(description=\"The name of the product\")\n",
+ " description: str = Field(description=\"A description of the product\")\n",
+ " price: Decimal = Field(description=\"The price of the product\", ge=10, le=1000, decimal_places=2)\n",
+ "\n",
+ "\n",
+ "class ProductReview(BaseModel):\n",
+ " rating: int = Field(description=\"The rating of the product\", ge=1, le=5)\n",
+ " customer_mood: Literal[\"irritated\", \"mad\", \"happy\", \"neutral\", \"excited\"] = Field(\n",
+ " description=\"The mood of the customer\"\n",
+ " )\n",
+ " review: str = Field(description=\"A review of the product\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9727c5ae",
+ "metadata": {},
+ "source": [
+ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9f9df709",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Since we often only want a few attributes from Person objects, we can\n",
+ "# set drop=True in the column config to drop the column from the final dataset.\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"customer\",\n",
+ " sampler_type=SamplerType.PERSON_FROM_FAKER,\n",
+ " params=PersonFromFakerSamplerParams(),\n",
+ " drop=True,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"product_category\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(\n",
+ " values=[\n",
+ " \"Electronics\",\n",
+ " \"Clothing\",\n",
+ " \"Home & Kitchen\",\n",
+ " \"Books\",\n",
+ " \"Home Office\",\n",
+ " ],\n",
+ " ),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"product_subcategory\",\n",
+ " sampler_type=SamplerType.SUBCATEGORY,\n",
+ " params=SubcategorySamplerParams(\n",
+ " category=\"product_category\",\n",
+ " values={\n",
+ " \"Electronics\": [\n",
+ " \"Smartphones\",\n",
+ " \"Laptops\",\n",
+ " \"Headphones\",\n",
+ " \"Cameras\",\n",
+ " \"Accessories\",\n",
+ " ],\n",
+ " \"Clothing\": [\n",
+ " \"Men's Clothing\",\n",
+ " \"Women's Clothing\",\n",
+ " \"Winter Coats\",\n",
+ " \"Activewear\",\n",
+ " \"Accessories\",\n",
+ " ],\n",
+ " \"Home & Kitchen\": [\n",
+ " \"Appliances\",\n",
+ " \"Cookware\",\n",
+ " \"Furniture\",\n",
+ " \"Decor\",\n",
+ " \"Organization\",\n",
+ " ],\n",
+ " \"Books\": [\n",
+ " \"Fiction\",\n",
+ " \"Non-Fiction\",\n",
+ " \"Self-Help\",\n",
+ " \"Textbooks\",\n",
+ " \"Classics\",\n",
+ " ],\n",
+ " \"Home Office\": [\n",
+ " \"Desks\",\n",
+ " \"Chairs\",\n",
+ " \"Storage\",\n",
+ " \"Office Supplies\",\n",
+ " \"Lighting\",\n",
+ " ],\n",
+ " },\n",
+ " ),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"target_age_range\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Sampler columns support conditional params, which are used if the condition is met.\n",
+ "# In this example, we set the review style to rambling if the target age range is 18-25.\n",
+ "# Note conditional parameters are only supported for Sampler column types.\n",
+ "config_builder.add_column(\n",
+ " SamplerColumnConfig(\n",
+ " name=\"review_style\",\n",
+ " sampler_type=SamplerType.CATEGORY,\n",
+ " params=CategorySamplerParams(\n",
+ " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n",
+ " weights=[1, 2, 2, 1],\n",
+ " ),\n",
+ " conditional_params={\n",
+ " \"target_age_range == '18-25'\": CategorySamplerParams(values=[\"rambling\"]),\n",
+ " },\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# Optionally validate that the columns are configured correctly.\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f42b7843",
+ "metadata": {},
+ "source": [
+ "Next, we will use more advanced Jinja expressions to create new columns.\n",
+ "\n",
+ "Jinja expressions let you:\n",
+ "\n",
+ "- Access nested attributes: `{{ customer.first_name }}`\n",
+ "\n",
+ "- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`\n",
+ "\n",
+ "- Use conditional logic: `{% if condition %}...{% endif %}`\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d231f52e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We can create new columns using Jinja expressions that reference\n",
+ "# existing columns, including attributes of nested objects.\n",
+ "config_builder.add_column(\n",
+ " ExpressionColumnConfig(name=\"customer_name\", expr=\"{{ customer.first_name }} {{ customer.last_name }}\")\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(ExpressionColumnConfig(name=\"customer_age\", expr=\"{{ customer.age }}\"))\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " LLMStructuredColumnConfig(\n",
+ " name=\"product\",\n",
+ " prompt=(\n",
+ " \"Create a product in the '{{ product_category }}' category, focusing on products \"\n",
+ " \"related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n",
+ " \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n",
+ " ),\n",
+ " system_prompt=SYSTEM_PROMPT,\n",
+ " output_format=Product,\n",
+ " model_alias=MODEL_ALIAS,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.\n",
+ "config_builder.add_column(\n",
+ " LLMStructuredColumnConfig(\n",
+ " name=\"customer_review\",\n",
+ " prompt=(\n",
+ " \"Your task is to write a review for the following product:\\n\\n\"\n",
+ " \"Product Name: {{ product.name }}\\n\"\n",
+ " \"Product Description: {{ product.description }}\\n\"\n",
+ " \"Price: {{ product.price }}\\n\\n\"\n",
+ " \"Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. \"\n",
+ " \"Write the review in a style that is '{{ review_style }}'.\"\n",
+ " \"{% if target_age_range == '18-25' %}\"\n",
+ " \"Make sure the review is more informal and conversational.\"\n",
+ " \"{% else %}\"\n",
+ " \"Make sure the review is more formal and structured.\"\n",
+ " \"{% endif %}\"\n",
+ " ),\n",
+ " system_prompt=SYSTEM_PROMPT,\n",
+ " output_format=ProductReview,\n",
+ " model_alias=MODEL_ALIAS,\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "01dbf368",
+ "metadata": {},
+ "source": [
+ "### π Iteration is key βΒ preview the dataset!\n",
+ "\n",
+ "1. Use the `preview` method to generate a sample of records quickly.\n",
+ "\n",
+ "2. Inspect the results for quality and format issues.\n",
+ "\n",
+ "3. Adjust column configurations, prompts, or parameters as needed.\n",
+ "\n",
+ "4. Re-run the preview until satisfied.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7d0389c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "preview = data_designer_client.preview(config_builder, num_records=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b312d760",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run this cell multiple times to cycle through the 2 preview records.\n",
+ "preview.display_sample_record()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "6252ae9a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The preview dataset is available as a pandas DataFrame.\n",
+ "preview.dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea32a75e",
+ "metadata": {},
+ "source": [
+ "### π Analyze the generated data\n",
+ "\n",
+ "- Data Designer automatically generates a basic statistical analysis of the generated data.\n",
+ "\n",
+ "- This analysis is available via the `analysis` property of generation result objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c094a400",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Print the analysis as a table.\n",
+ "preview.analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8565a283",
+ "metadata": {},
+ "source": [
+ "### π Scale up!\n",
+ "\n",
+ "- Happy with your preview data?\n",
+ "\n",
+ "- Use the `create` method to submit larger Data Designer generation jobs.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d48a294c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "job_results = data_designer_client.create(config_builder, num_records=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "96268be4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the generated dataset as a pandas DataFrame.\n",
+ "dataset = job_results.load_dataset()\n",
+ "\n",
+ "dataset.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a2137aaf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the analysis results into memory.\n",
+ "analysis = job_results.load_analysis()\n",
+ "\n",
+ "analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c43a66cc",
+ "metadata": {},
+ "source": [
+ "## βοΈ Next Steps\n",
+ "\n",
+ "Check out the following notebook to learn more about:\n",
+ "\n",
+ "- [Seeding synthetic data generation with an external dataset](/notebooks/3-seeding-with-a-dataset/)\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb
new file mode 100644
index 00000000..db5c9d27
--- /dev/null
+++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb
@@ -0,0 +1,465 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "c4c424f2",
+ "metadata": {},
+ "source": [
+ "# π¨ Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n",
+ "\n",
+ "#### π What you'll learn\n",
+ "\n",
+ "In this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.\n",
+ "\n",
+ "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2ceb13f2",
+ "metadata": {},
+ "source": [
+ "### β‘ Colab Setup\n",
+ "\n",
+ "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ad4096cd",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -qU data-designer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "21336bf3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "try:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n",
+ "except userdata.SecretNotFoundError:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "db8fa976",
+ "metadata": {},
+ "source": [
+ "### π¦ Import the essentials\n",
+ "\n",
+ "- The `essentials` module provides quick access to the most commonly used objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2e17b2b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from data_designer.essentials import (\n",
+ " DataDesigner,\n",
+ " DataDesignerConfigBuilder,\n",
+ " InferenceParameters,\n",
+ " ModelConfig,\n",
+ " SeedConfig,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "29c28647",
+ "metadata": {},
+ "source": [
+ "### βοΈ Initialize the Data Designer interface\n",
+ "\n",
+ "- `DataDesigner` is the main object is responsible for managing the data generation process.\n",
+ "\n",
+ "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "cecf9242",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_designer_client = DataDesigner()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "74eab801",
+ "metadata": {},
+ "source": [
+ "### ποΈ Define model configurations\n",
+ "\n",
+ "- Each `ModelConfig` defines a model that can be used during the generation process.\n",
+ "\n",
+ "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n",
+ "\n",
+ "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n",
+ "\n",
+ "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b31991e8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This name is set in the model provider configuration.\n",
+ "MODEL_PROVIDER = \"nvidia\"\n",
+ "\n",
+ "# The model ID is from build.nvidia.com.\n",
+ "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n",
+ "\n",
+ "# We choose this alias to be descriptive for our use case.\n",
+ "MODEL_ALIAS = \"nemotron-nano-v2\"\n",
+ "\n",
+ "# This sets reasoning to False for the nemotron-nano-v2 model.\n",
+ "SYSTEM_PROMPT = \"/no_think\"\n",
+ "\n",
+ "model_configs = [\n",
+ " ModelConfig(\n",
+ " alias=MODEL_ALIAS,\n",
+ " model=MODEL_ID,\n",
+ " provider=MODEL_PROVIDER,\n",
+ " inference_parameters=InferenceParameters(\n",
+ " temperature=0.5,\n",
+ " top_p=1.0,\n",
+ " max_tokens=1024,\n",
+ " ),\n",
+ " )\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "006d1625",
+ "metadata": {},
+ "source": [
+ "### ποΈ Initialize the Data Designer Config Builder\n",
+ "\n",
+ "- The Data Designer config defines the dataset schema and generation process.\n",
+ "\n",
+ "- The config builder provides an intuitive interface for building this configuration.\n",
+ "\n",
+ "- The list of model configs is provided to the builder at initialization.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "396a4ed5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9860369b",
+ "metadata": {},
+ "source": [
+ "## π₯ Prepare a seed dataset\n",
+ "\n",
+ "- For this notebook, we'll create a synthetic dataset of patient notes.\n",
+ "\n",
+ "- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).\n",
+ "\n",
+ "- We already have the dataset downloaded in the [data](../data) directory of this repository.\n",
+ "\n",
+ "
\n",
+ "\n",
+ "> π± **Why use a seed dataset?**\n",
+ ">\n",
+ "> - Seed datasets let you steer the generation process by providing context that is specific to your use case.\n",
+ ">\n",
+ "> - Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.\n",
+ ">\n",
+ "> - During generation, prompt templates can reference any of the seed dataset fields.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "65a2fa3c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Download sample dataset from Github\n",
+ "import urllib.request\n",
+ "\n",
+ "url = \"https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv\"\n",
+ "local_filename, headers = urllib.request.urlretrieve(url, \"gretelai_symptom_to_diagnosis.csv\")\n",
+ "\n",
+ "seed_dataset = SeedConfig(dataset=local_filename)\n",
+ "\n",
+ "# Pass the reference to the config builder for use during generation.\n",
+ "config_builder.with_seed_dataset(seed_dataset)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ea11134a",
+ "metadata": {},
+ "source": [
+ "## π¨ Designing our synthetic patient notes dataset\n",
+ "\n",
+ "- Here we use `add_column` with keyword arguments (rather than imported config objects).\n",
+ "\n",
+ "- Generally, we recommend using concrete objects, but this is a convenient shorthand.\n",
+ "\n",
+ "- **Note**: The prompt template can reference fields from our seed dataset:\n",
+ " - `{{ diagnosis }}` - the medical diagnosis from the seed data\n",
+ " - `{{ patient_summary }}` - the symptom description from the seed data\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "209af41c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder.add_column(\n",
+ " name=\"patient_sampler\",\n",
+ " column_type=\"sampler\",\n",
+ " sampler_type=\"person_from_faker\",\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"doctor_sampler\",\n",
+ " column_type=\"sampler\",\n",
+ " sampler_type=\"person_from_faker\",\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"patient_id\",\n",
+ " column_type=\"sampler\",\n",
+ " sampler_type=\"uuid\",\n",
+ " params={\n",
+ " \"prefix\": \"PT-\",\n",
+ " \"short_form\": True,\n",
+ " \"uppercase\": True,\n",
+ " },\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"first_name\",\n",
+ " column_type=\"expression\",\n",
+ " expr=\"{{ patient_sampler.first_name}}\",\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"last_name\",\n",
+ " column_type=\"expression\",\n",
+ " expr=\"{{ patient_sampler.last_name }}\",\n",
+ ")\n",
+ "\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"dob\",\n",
+ " column_type=\"expression\",\n",
+ " expr=\"{{ patient_sampler.birth_date }}\",\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"symptom_onset_date\",\n",
+ " column_type=\"sampler\",\n",
+ " sampler_type=\"datetime\",\n",
+ " params={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"},\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"date_of_visit\",\n",
+ " column_type=\"sampler\",\n",
+ " sampler_type=\"timedelta\",\n",
+ " params={\"dt_min\": 1, \"dt_max\": 30, \"reference_column_name\": \"symptom_onset_date\"},\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"physician\",\n",
+ " column_type=\"expression\",\n",
+ " expr=\"Dr. {{ doctor_sampler.last_name }}\",\n",
+ ")\n",
+ "\n",
+ "config_builder.add_column(\n",
+ " name=\"physician_notes\",\n",
+ " column_type=\"llm-text\",\n",
+ " prompt=\"\"\"\\\n",
+ "You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},\n",
+ "who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.\n",
+ "The date of today's visit is {{ date_of_visit }}.\n",
+ "\n",
+ "{{ patient_summary }}\n",
+ "\n",
+ "Write careful notes about your visit with {{ first_name }},\n",
+ "as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.\n",
+ "\n",
+ "Format the notes as a busy doctor might.\n",
+ "\"\"\",\n",
+ " model_alias=MODEL_ALIAS,\n",
+ " system_prompt=SYSTEM_PROMPT,\n",
+ ")\n",
+ "\n",
+ "config_builder.validate()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "32f43d20",
+ "metadata": {},
+ "source": [
+ "### π Iteration is key βΒ preview the dataset!\n",
+ "\n",
+ "1. Use the `preview` method to generate a sample of records quickly.\n",
+ "\n",
+ "2. Inspect the results for quality and format issues.\n",
+ "\n",
+ "3. Adjust column configurations, prompts, or parameters as needed.\n",
+ "\n",
+ "4. Re-run the preview until satisfied.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "402a1025",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "preview = data_designer_client.preview(config_builder, num_records=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "de294013",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run this cell multiple times to cycle through the 2 preview records.\n",
+ "preview.display_sample_record()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a38c360d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The preview dataset is available as a pandas DataFrame.\n",
+ "preview.dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4a385031",
+ "metadata": {},
+ "source": [
+ "### π Analyze the generated data\n",
+ "\n",
+ "- Data Designer automatically generates a basic statistical analysis of the generated data.\n",
+ "\n",
+ "- This analysis is available via the `analysis` property of generation result objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3531b1e4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Print the analysis as a table.\n",
+ "preview.analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7fec6251",
+ "metadata": {},
+ "source": [
+ "### π Scale up!\n",
+ "\n",
+ "- Happy with your preview data?\n",
+ "\n",
+ "- Use the `create` method to submit larger Data Designer generation jobs.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bace6acb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "job_results = data_designer_client.create(config_builder, num_records=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3eaafc74",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the generated dataset as a pandas DataFrame.\n",
+ "dataset = job_results.load_dataset()\n",
+ "\n",
+ "dataset.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "473a0c89",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the analysis results into memory.\n",
+ "analysis = job_results.load_analysis()\n",
+ "\n",
+ "analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7408eef8",
+ "metadata": {},
+ "source": [
+ "## βοΈ Next Steps\n",
+ "\n",
+ "Use Data Designer to generate synthetic data for your specific use case!\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb
new file mode 100644
index 00000000..bac63375
--- /dev/null
+++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb
@@ -0,0 +1,540 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ebd062e8",
+ "metadata": {},
+ "source": [
+ "# π¨ Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3ce5b7f7",
+ "metadata": {},
+ "source": [
+ "#### π What you'll learn\n",
+ "\n",
+ "This notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.\n",
+ "\n",
+ "- β¨ **Visual Document Processing**: Converting images to chat-ready format for model consumption\n",
+ "- π **Vision-Language Generation**: Using vision models to generate detailed summaries from images\n",
+ "\n",
+ "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "04d0bfd4",
+ "metadata": {},
+ "source": [
+ "### β‘ Colab Setup\n",
+ "\n",
+ "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "661740a3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -qU data-designer"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b9c4188b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "!pip install -q pillow>=12.0.0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ddaeb938",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import getpass\n",
+ "import os\n",
+ "\n",
+ "from google.colab import userdata\n",
+ "\n",
+ "try:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n",
+ "except userdata.SecretNotFoundError:\n",
+ " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c3aac5e8",
+ "metadata": {},
+ "source": [
+ "### π¦ Import the essentials\n",
+ "\n",
+ "- The `essentials` module provides quick access to the most commonly used objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c8b905ee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Standard library imports\n",
+ "import base64\n",
+ "import io\n",
+ "import uuid\n",
+ "\n",
+ "# Third-party imports\n",
+ "import pandas as pd\n",
+ "import rich\n",
+ "from datasets import load_dataset\n",
+ "from IPython.display import display\n",
+ "from rich.panel import Panel\n",
+ "\n",
+ "# Data Designer imports\n",
+ "from data_designer.essentials import (\n",
+ " DataDesigner,\n",
+ " DataDesignerConfigBuilder,\n",
+ " ImageContext,\n",
+ " ImageFormat,\n",
+ " InferenceParameters,\n",
+ " LLMTextColumnConfig,\n",
+ " ModalityDataType,\n",
+ " ModelConfig,\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f508d655",
+ "metadata": {},
+ "source": [
+ "### βοΈ Initialize the Data Designer interface\n",
+ "\n",
+ "- `DataDesigner` is the main object is responsible for managing the data generation process.\n",
+ "\n",
+ "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f0607008",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "data_designer = DataDesigner()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a0467d8c",
+ "metadata": {},
+ "source": [
+ "### ποΈ Define model configurations\n",
+ "\n",
+ "- Each `ModelConfig` defines a model that can be used during the generation process.\n",
+ "\n",
+ "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n",
+ "\n",
+ "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n",
+ "\n",
+ "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1e4d9a15",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This name is set in the model provider configuration.\n",
+ "MODEL_PROVIDER = \"nvidia\"\n",
+ "\n",
+ "model_configs = [\n",
+ " ModelConfig(\n",
+ " alias=\"vision\",\n",
+ " model=\"meta/llama-4-scout-17b-16e-instruct\",\n",
+ " provider=MODEL_PROVIDER,\n",
+ " inference_parameters=InferenceParameters(\n",
+ " temperature=0.60,\n",
+ " top_p=0.95,\n",
+ " max_tokens=2048,\n",
+ " ),\n",
+ " ),\n",
+ "]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6d66a8a6",
+ "metadata": {},
+ "source": [
+ "### ποΈ Initialize the Data Designer Config Builder\n",
+ "\n",
+ "- The Data Designer config defines the dataset schema and generation process.\n",
+ "\n",
+ "- The config builder provides an intuitive interface for building this configuration.\n",
+ "\n",
+ "- The list of model configs is provided to the builder at initialization.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80ad9b04",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "75935c33",
+ "metadata": {},
+ "source": [
+ "### π± Seed Dataset Creation\n",
+ "\n",
+ "In this section, we'll prepare our visual documents as a seed dataset for summarization:\n",
+ "\n",
+ "- **Loading Visual Documents**: We use the ColPali dataset containing document images\n",
+ "- **Image Processing**: Convert images to base64 format for vision model consumption\n",
+ "- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)\n",
+ "\n",
+ "The seed dataset will be used to generate detailed text summaries of each document image."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "868e41af",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Dataset processing configuration\n",
+ "IMG_COUNT = 512 # Number of images to process\n",
+ "BASE64_IMAGE_HEIGHT = 512 # Standardized height for model input\n",
+ "\n",
+ "# Load ColPali dataset for visual documents\n",
+ "img_dataset_cfg = {\"path\": \"vidore/colpali_train_set\", \"split\": \"train\", \"streaming\": True}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5466e56f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def resize_image(image, height: int):\n",
+ " \"\"\"\n",
+ " Resize image while maintaining aspect ratio.\n",
+ "\n",
+ " Args:\n",
+ " image: PIL Image object\n",
+ " height: Target height in pixels\n",
+ "\n",
+ " Returns:\n",
+ " Resized PIL Image object\n",
+ " \"\"\"\n",
+ " original_width, original_height = image.size\n",
+ " width = int(original_width * (height / original_height))\n",
+ " return image.resize((width, height))\n",
+ "\n",
+ "\n",
+ "def convert_image_to_chat_format(record, height: int) -> dict:\n",
+ " \"\"\"\n",
+ " Convert PIL image to base64 format for chat template usage.\n",
+ "\n",
+ " Args:\n",
+ " record: Dataset record containing image and metadata\n",
+ " height: Target height for image resizing\n",
+ "\n",
+ " Returns:\n",
+ " Updated record with base64_image and uuid fields\n",
+ " \"\"\"\n",
+ " # Resize image for consistent processing\n",
+ " image = resize_image(record[\"image\"], height)\n",
+ "\n",
+ " # Convert to base64 string\n",
+ " img_buffer = io.BytesIO()\n",
+ " image.save(img_buffer, format=\"PNG\")\n",
+ " byte_data = img_buffer.getvalue()\n",
+ " base64_encoded_data = base64.b64encode(byte_data)\n",
+ " base64_string = base64_encoded_data.decode(\"utf-8\")\n",
+ "\n",
+ " # Return updated record\n",
+ " return record | {\"base64_image\": base64_string, \"uuid\": str(uuid.uuid4())}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bbca6568",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load and process the visual document dataset\n",
+ "print(\"π₯ Loading and processing document images...\")\n",
+ "\n",
+ "img_dataset_iter = iter(\n",
+ " load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n",
+ ")\n",
+ "img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n",
+ "\n",
+ "print(f\"β
Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5ce17e71",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "img_dataset.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9c0faf28",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Add the seed dataset containing our processed images\n",
+ "df_seed = pd.DataFrame(img_dataset)[[\"uuid\", \"image_filename\", \"base64_image\", \"page\", \"options\", \"source\"]]\n",
+ "config_builder.with_seed_dataset(\n",
+ " DataDesigner.make_seed_reference_from_dataframe(df_seed, file_path=\"colpali_train_set.csv\")\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "35bc8b77",
+ "metadata": {
+ "lines_to_next_cell": 2
+ },
+ "outputs": [],
+ "source": [
+ "# Add a column to generate detailed document summaries\n",
+ "config_builder.add_column(\n",
+ " LLMTextColumnConfig(\n",
+ " name=\"summary\",\n",
+ " model_alias=\"vision\",\n",
+ " prompt=(\n",
+ " \"Provide a detailed summary of the content in this image in Markdown format. \"\n",
+ " \"Start from the top of the image and then describe it from top to bottom. \"\n",
+ " \"Place a summary at the bottom.\"\n",
+ " ),\n",
+ " multi_modal_context=[\n",
+ " ImageContext(\n",
+ " column_name=\"base64_image\",\n",
+ " data_type=ModalityDataType.BASE64,\n",
+ " image_format=ImageFormat.PNG,\n",
+ " )\n",
+ " ],\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "16ac3f58",
+ "metadata": {
+ "lines_to_next_cell": 2
+ },
+ "source": []
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1d13a0e4",
+ "metadata": {},
+ "source": [
+ "### π Iteration is key β preview the dataset!\n",
+ "\n",
+ "1. Use the `preview` method to generate a sample of records quickly.\n",
+ "\n",
+ "2. Inspect the results for quality and format issues.\n",
+ "\n",
+ "3. Adjust column configurations, prompts, or parameters as needed.\n",
+ "\n",
+ "4. Re-run the preview until satisfied.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "336af89f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "preview = data_designer.preview(config_builder, num_records=2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5a57a536",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Run this cell multiple times to cycle through the 2 preview records.\n",
+ "preview.display_sample_record()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9e05b387",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# The preview dataset is available as a pandas DataFrame.\n",
+ "preview.dataset"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "f69d543e",
+ "metadata": {},
+ "source": [
+ "### π Analyze the generated data\n",
+ "\n",
+ "- Data Designer automatically generates a basic statistical analysis of the generated data.\n",
+ "\n",
+ "- This analysis is available via the `analysis` property of generation result objects.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3cb66a7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Print the analysis as a table.\n",
+ "preview.analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "60815241",
+ "metadata": {},
+ "source": [
+ "### π Visual Inspection\n",
+ "\n",
+ "Let's compare the original document image with the generated summary to validate quality:\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3c9dddf6",
+ "metadata": {
+ "lines_to_next_cell": 2
+ },
+ "outputs": [],
+ "source": [
+ "# Compare original document with generated summary\n",
+ "index = 0 # Change this to view different examples\n",
+ "\n",
+ "# Merge preview data with original images for comparison\n",
+ "comparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[[\"uuid\", \"image\"]], how=\"left\", on=\"uuid\")\n",
+ "\n",
+ "# Extract the record for display\n",
+ "record = comparison_dataset.iloc[index]\n",
+ "\n",
+ "print(\"π Original Document Image:\")\n",
+ "display(resize_image(record.image, BASE64_IMAGE_HEIGHT))\n",
+ "\n",
+ "print(\"\\nπ Generated Summary:\")\n",
+ "rich.print(Panel(record.summary, title=\"Document Summary\", title_align=\"left\"))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "eb6e2469",
+ "metadata": {},
+ "source": [
+ "### π Scale up!\n",
+ "\n",
+ "- Happy with your preview data?\n",
+ "\n",
+ "- Use the `create` method to submit larger Data Designer generation jobs.\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d6a0ed1c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "results = data_designer.create(config_builder, num_records=10)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "255b8f6f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the generated dataset as a pandas DataFrame.\n",
+ "dataset = results.load_dataset()\n",
+ "\n",
+ "dataset.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "43b935b3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Load the analysis results into memory.\n",
+ "analysis = results.load_analysis()\n",
+ "\n",
+ "analysis.to_report()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a74f1121",
+ "metadata": {},
+ "source": [
+ "## βοΈ Next Steps\n",
+ "\n",
+ "Now that you've learned how to use visual context for image summarization in Data Designer, explore more:\n",
+ "\n",
+ "- Experiment with different vision models for specific document types\n",
+ "- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)\n",
+ "- Combine vision-based summaries with other column types for multi-modal workflows\n",
+ "- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/docs/notebook_source/4-providing-images-as-context.py b/docs/notebook_source/4-providing-images-as-context.py
index 10afd4bd..dc2513cd 100644
--- a/docs/notebook_source/4-providing-images-as-context.py
+++ b/docs/notebook_source/4-providing-images-as-context.py
@@ -26,12 +26,6 @@
# If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.
#
-# %% [markdown]
-# ### β¬οΈ Install dependencies (if required)
-
-# %%
-# !uv pip install pillow
-
# %% [markdown]
# ### π¦ Import the essentials
#
diff --git a/docs/overrides/main.html b/docs/overrides/main.html
index b2d13089..be7a7c51 100644
--- a/docs/overrides/main.html
+++ b/docs/overrides/main.html
@@ -2,16 +2,29 @@
{% block outdated %}
You're not viewing the latest version.
-
+
Click here to go to latest.
{% endblock %}
{% block content %}
{% if page.nb_url %}
-
- {% include ".icons/material/download.svg" %}
+
+
+
+
+
+
+ {% include ".icons/material/download.svg" %}
+
{% endif %}
{{ super() }}
diff --git a/docs/scripts/generate_colab_notebooks.py b/docs/scripts/generate_colab_notebooks.py
new file mode 100644
index 00000000..cb375776
--- /dev/null
+++ b/docs/scripts/generate_colab_notebooks.py
@@ -0,0 +1,185 @@
+# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+"""Script to generate Colab-compatible notebooks from notebook source files.
+
+This script processes jupytext percent-format Python files and:
+1. Injects Colab-specific setup cells (pip install, API key from secrets)
+2. Injects cells before the "Import the essentials" section
+3. Saves the result as .ipynb files in docs/colab_notebooks
+"""
+
+from __future__ import annotations
+
+import argparse
+from pathlib import Path
+
+import jupytext
+from nbformat import NotebookNode
+from nbformat.v4 import new_code_cell, new_markdown_cell
+
+COLAB_SETUP_MARKDOWN = """\
+### β‘ Colab Setup
+
+Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).
+"""
+
+ADDITIONAL_DEPENDENCIES = {
+ "4-providing-images-as-context.py": "pillow>=12.0.0",
+}
+
+COLAB_INSTALL_CELL = """\
+!pip install -qU data-designer"""
+
+COLAB_DEPENDENCIES_CELL = """\
+!pip install -q {deps}"""
+
+COLAB_API_KEY_CELL = """\
+import getpass
+import os
+
+from google.colab import userdata
+
+try:
+ os.environ["NVIDIA_API_KEY"] = userdata.get("NVIDIA_API_KEY")
+except userdata.SecretNotFoundError:
+ os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter your NVIDIA API key: ")"""
+
+
+def create_colab_setup_cells(additional_dependencies: str) -> list[NotebookNode]:
+ """Create the Colab-specific setup cells to inject before imports."""
+ cells = []
+ cells += [new_markdown_cell(source=COLAB_SETUP_MARKDOWN)]
+ cells += [new_code_cell(source=COLAB_INSTALL_CELL)]
+ if additional_dependencies:
+ cells += [new_code_cell(source=COLAB_DEPENDENCIES_CELL.format(deps=additional_dependencies))]
+ cells += [new_code_cell(source=COLAB_API_KEY_CELL)]
+ return cells
+
+
+def find_import_section_index(cells: list[NotebookNode]) -> int:
+ """Find the index of the 'Import the essentials' markdown cell."""
+ first_code_cell_index = -1
+ for i, cell in enumerate(cells):
+ if first_code_cell_index == -1 and cell.get("cell_type") == "code":
+ first_code_cell_index = i
+
+ if cell.get("cell_type") == "markdown":
+ source = cell.get("source", "")
+ if "import" in source.lower() and "essentials" in source.lower():
+ return i
+ return first_code_cell_index
+
+
+def process_notebook(notebook: NotebookNode, source_path: Path) -> NotebookNode:
+ """Process a notebook to make it Colab-compatible.
+
+ Args:
+ notebook: The input notebook
+
+ Returns:
+ The processed notebook with Colab setup cells injected
+ """
+ cells = notebook.cells
+
+ additional_dependencies = ADDITIONAL_DEPENDENCIES.get(source_path.name, "")
+
+ # Find where to insert Colab setup (before "Import the essentials")
+ import_idx = find_import_section_index(cells)
+
+ if import_idx == -1:
+ # If not found, insert after first cell (title)
+ import_idx = 1
+
+ # Insert Colab setup cells before the import section
+ colab_cells = create_colab_setup_cells(additional_dependencies)
+ processed_cells = cells[:import_idx] + colab_cells + cells[import_idx:]
+
+ notebook.cells = processed_cells
+ return notebook
+
+
+def generate_colab_notebook(source_path: Path, output_dir: Path) -> Path:
+ """Generate a Colab-compatible notebook from a source file.
+
+ Args:
+ source_path: Path to the jupytext percent-format Python source file
+ output_dir: Directory to save the output notebook
+
+ Returns:
+ Path to the generated notebook
+ """
+ # Read the source file using jupytext
+ notebook = jupytext.read(source_path)
+
+ # Process the notebook for Colab
+ notebook = process_notebook(notebook, source_path)
+
+ # Determine output path
+ output_path = output_dir / f"{source_path.stem}.ipynb"
+
+ # Ensure output directory exists
+ output_dir.mkdir(parents=True, exist_ok=True)
+
+ # Write the notebook
+ jupytext.write(notebook, output_path, config={"metadata": {"jupytext": {"cell_metadata_filter": "-id"}}})
+
+ return output_path
+
+
+def main() -> None:
+ """Main entry point for the script."""
+ parser = argparse.ArgumentParser(description="Generate Colab-compatible notebooks from notebook source files.")
+ parser.add_argument(
+ "--source-dir",
+ type=Path,
+ default=Path("docs/notebook_source"),
+ help="Directory containing notebook source files (default: docs/notebook_source)",
+ )
+ parser.add_argument(
+ "--output-dir",
+ type=Path,
+ default=Path("docs/colab_notebooks"),
+ help="Directory to save Colab notebooks (default: docs/colab_notebooks)",
+ )
+ parser.add_argument(
+ "--files",
+ nargs="*",
+ help="Specific files to process (if not specified, process all .py files)",
+ )
+
+ args = parser.parse_args()
+
+ # Get list of source files
+ if args.files:
+ source_files = [args.source_dir / f for f in args.files]
+ else:
+ source_files = sorted(args.source_dir.glob("*.py"))
+ # Filter out files starting with underscore (like _README.md, _pyproject.toml)
+ source_files = [f for f in source_files if not f.name.startswith("_")]
+
+ if not source_files:
+ print(f"No source files found in {args.source_dir}")
+ return
+
+ print(f"π Generating Colab notebooks from {len(source_files)} source file(s)...")
+ print(f" Source: {args.source_dir}")
+ print(f" Output: {args.output_dir}")
+ print()
+
+ for source_path in source_files:
+ if not source_path.exists():
+ print(f"β οΈ Skipping {source_path} (file not found)")
+ continue
+
+ try:
+ output_path = generate_colab_notebook(source_path, args.output_dir)
+ print(f"β
{source_path.name} β {output_path.name}")
+ except Exception as e:
+ print(f"β {source_path.name}: {e}")
+
+ print()
+ print(f"β¨ Colab notebooks saved to {args.output_dir}/")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/pyproject.toml b/pyproject.toml
index e8a488c8..49a6de9b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -77,6 +77,7 @@ docs = [
notebooks = [
"jupyter>=1.0.0",
"ipykernel>=6.29.0",
+ "pillow>=12.0.0",
]
[build-system]
diff --git a/uv.lock b/uv.lock
index 85e2405e..5e7cb007 100644
--- a/uv.lock
+++ b/uv.lock
@@ -754,6 +754,7 @@ docs = [
notebooks = [
{ name = "ipykernel" },
{ name = "jupyter" },
+ { name = "pillow" },
]
[package.metadata]
@@ -812,6 +813,7 @@ docs = [
notebooks = [
{ name = "ipykernel", specifier = ">=6.29.0" },
{ name = "jupyter", specifier = ">=1.0.0" },
+ { name = "pillow", specifier = ">=12.0.0" },
]
[[package]]
@@ -3045,6 +3047,104 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" },
]
+[[package]]
+name = "pillow"
+version = "12.0.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" },
+ { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" },
+ { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" },
+ { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" },
+ { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" },
+ { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" },
+ { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" },
+ { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" },
+ { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" },
+ { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" },
+ { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" },
+ { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" },
+ { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" },
+ { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" },
+ { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" },
+ { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" },
+ { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" },
+ { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" },
+ { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" },
+ { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" },
+ { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" },
+ { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" },
+ { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" },
+ { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" },
+ { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" },
+ { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" },
+ { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" },
+ { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" },
+ { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" },
+ { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" },
+ { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" },
+ { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" },
+ { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" },
+ { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" },
+ { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" },
+ { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" },
+ { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" },
+ { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" },
+ { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" },
+ { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" },
+ { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" },
+ { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" },
+ { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" },
+ { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" },
+ { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" },
+ { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" },
+ { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" },
+ { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" },
+ { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" },
+ { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" },
+ { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" },
+ { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" },
+ { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" },
+ { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" },
+ { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" },
+ { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" },
+ { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" },
+ { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" },
+ { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" },
+ { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" },
+ { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" },
+ { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" },
+ { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" },
+ { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" },
+ { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" },
+ { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" },
+ { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" },
+ { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" },
+ { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" },
+ { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" },
+ { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" },
+ { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" },
+ { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" },
+ { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" },
+ { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" },
+ { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" },
+ { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" },
+ { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" },
+]
+
[[package]]
name = "platformdirs"
version = "4.5.0"