diff --git a/.github/workflows/check-colab-notebooks.yml b/.github/workflows/check-colab-notebooks.yml new file mode 100644 index 00000000..7116b4a0 --- /dev/null +++ b/.github/workflows/check-colab-notebooks.yml @@ -0,0 +1,55 @@ +name: Check Colab notebooks + +on: + push: + branches: [ main ] + paths: + - 'docs/notebook_source/*.py' + pull_request: + branches: [ main ] + paths: + - 'docs/notebook_source/*.py' + workflow_dispatch: + +jobs: + check-colab-notebooks: + name: Check Colab Notebooks + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install uv + uses: astral-sh/setup-uv@v5 + with: + version: "latest" + python-version: "3.11" + enable-cache: true + + - name: Install dependencies + run: | + uv sync --group notebooks --group docs + + - name: Generate Colab notebooks + run: | + make generate-colab-notebooks + + - name: Check for differences + run: | + # Get the diff, filtering out cell ID changes (which are randomly generated) + # Filter out: file markers (--- and +++), and "id" lines + MEANINGFUL_DIFF=$(git diff docs/colab_notebooks/ | grep -E '^[+-]' | grep -v '^---' | grep -v '^+++' | grep -vE '^[+-]\s*"id": "[0-9a-fA-F]+",?$' || true) + + if [ -z "$MEANINGFUL_DIFF" ]; then + echo "βœ… Colab notebooks are up-to-date (ignoring cell ID changes)" + else + echo "❌ Colab notebooks are out of sync with source files" + echo "" + echo "The generated notebooks differ from the committed ones." + echo "Please run 'make generate-colab-notebooks' locally and commit the changes." + echo "" + echo "Differences found:" + echo "$MEANINGFUL_DIFF" + exit 1 + fi diff --git a/Makefile b/Makefile index 5dd1f59d..295400ce 100644 --- a/Makefile +++ b/Makefile @@ -36,6 +36,7 @@ help: @echo "πŸ› οΈ Utilities:" @echo " clean - Remove coverage reports and cache files" @echo " convert-execute-notebooks - Convert notebooks from .py to .ipynb using jupytext" + @echo " generate-colab-notebooks - Generate Colab-compatible notebooks" @echo " serve-docs-locally - Serve documentation locally" @echo " check-license-headers - Check if all files have license headers" @echo " update-license-headers - Add license headers to all files" @@ -95,6 +96,11 @@ convert-execute-notebooks: rm docs/notebook_source/*.csv @echo "βœ… Notebooks created in docs/notebooks/" +generate-colab-notebooks: + @echo "πŸ““ Generating Colab-compatible notebooks..." + uv run --group notebooks python docs/scripts/generate_colab_notebooks.py + @echo "βœ… Colab notebooks created in docs/colab_notebooks/" + serve-docs-locally: @echo "πŸ“ Building and serving docs..." uv sync --group docs @@ -125,4 +131,4 @@ install-dev-notebooks: $(call install-pre-commit-hooks) @echo "βœ… Dev + notebooks installation complete!" -.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks +.PHONY: clean coverage format format-check lint lint-fix test check-license-headers update-license-headers check-all check-all-fix install install-dev install-dev-notebooks generate-colab-notebooks diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb new file mode 100644 index 00000000..ee9e5ccc --- /dev/null +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -0,0 +1,537 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a4ac4d55", + "metadata": {}, + "source": [ + "# 🎨 Data Designer Tutorial: The Basics\n", + "\n", + "#### πŸ“š What you'll learn\n", + "\n", + "This notebook demonstrates the basics of Data Designer by generating a simple product review dataset.\n" + ] + }, + { + "cell_type": "markdown", + "id": "9e9f3c47", + "metadata": {}, + "source": [ + "### ⚑ Colab Setup\n", + "\n", + "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "41b31194", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU data-designer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "502b3aba", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "from google.colab import userdata\n", + "\n", + "try:\n", + " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n", + "except userdata.SecretNotFoundError:\n", + " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")" + ] + }, + { + "cell_type": "markdown", + "id": "8c512fbc", + "metadata": {}, + "source": [ + "### πŸ“¦ Import the essentials\n", + "\n", + "- The `essentials` module provides quick access to the most commonly used objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fae521f", + "metadata": {}, + "outputs": [], + "source": [ + "from data_designer.essentials import (\n", + " CategorySamplerParams,\n", + " DataDesigner,\n", + " DataDesignerConfigBuilder,\n", + " InferenceParameters,\n", + " LLMTextColumnConfig,\n", + " ModelConfig,\n", + " PersonFromFakerSamplerParams,\n", + " SamplerColumnConfig,\n", + " SamplerType,\n", + " SubcategorySamplerParams,\n", + " UniformSamplerParams,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "e71d0256", + "metadata": {}, + "source": [ + "### βš™οΈ Initialize the Data Designer interface\n", + "\n", + "- `DataDesigner` is the main object is responsible for managing the data generation process.\n", + "\n", + "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "68fc7172", + "metadata": {}, + "outputs": [], + "source": [ + "data_designer = DataDesigner()" + ] + }, + { + "cell_type": "markdown", + "id": "9a821a27", + "metadata": {}, + "source": [ + "### πŸŽ›οΈ Define model configurations\n", + "\n", + "- Each `ModelConfig` defines a model that can be used during the generation process.\n", + "\n", + "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n", + "\n", + "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n", + "\n", + "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9515141", + "metadata": {}, + "outputs": [], + "source": [ + "# This name is set in the model provider configuration.\n", + "MODEL_PROVIDER = \"nvidia\"\n", + "\n", + "# The model ID is from build.nvidia.com.\n", + "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "\n", + "# We choose this alias to be descriptive for our use case.\n", + "MODEL_ALIAS = \"nemotron-nano-v2\"\n", + "\n", + "# This sets reasoning to False for the nemotron-nano-v2 model.\n", + "SYSTEM_PROMPT = \"/no_think\"\n", + "\n", + "model_configs = [\n", + " ModelConfig(\n", + " alias=MODEL_ALIAS,\n", + " model=MODEL_ID,\n", + " provider=MODEL_PROVIDER,\n", + " inference_parameters=InferenceParameters(\n", + " temperature=0.5,\n", + " top_p=1.0,\n", + " max_tokens=1024,\n", + " ),\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "3b940ab9", + "metadata": {}, + "source": [ + "### πŸ—οΈ Initialize the Data Designer Config Builder\n", + "\n", + "- The Data Designer config defines the dataset schema and generation process.\n", + "\n", + "- The config builder provides an intuitive interface for building this configuration.\n", + "\n", + "- The list of model configs is provided to the builder at initialization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ec21da7e", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)" + ] + }, + { + "cell_type": "markdown", + "id": "85b2324e", + "metadata": {}, + "source": [ + "## 🎲 Getting started with sampler columns\n", + "\n", + "- Sampler columns offer non-LLM based generation of synthetic data.\n", + "\n", + "- They are particularly useful for **steering the diversity** of the generated data, as we demonstrate below.\n", + "\n", + "
\n", + "\n", + "You can view available samplers using the config builder's `info` property:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f49f435e", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder.info.display(\"samplers\")" + ] + }, + { + "cell_type": "markdown", + "id": "f582b642", + "metadata": {}, + "source": [ + "Let's start designing our product review dataset by adding product category and subcategory columns.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cfc43b1", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"product_category\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", + " values=[\n", + " \"Electronics\",\n", + " \"Clothing\",\n", + " \"Home & Kitchen\",\n", + " \"Books\",\n", + " \"Home Office\",\n", + " ],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"product_subcategory\",\n", + " sampler_type=SamplerType.SUBCATEGORY,\n", + " params=SubcategorySamplerParams(\n", + " category=\"product_category\",\n", + " values={\n", + " \"Electronics\": [\n", + " \"Smartphones\",\n", + " \"Laptops\",\n", + " \"Headphones\",\n", + " \"Cameras\",\n", + " \"Accessories\",\n", + " ],\n", + " \"Clothing\": [\n", + " \"Men's Clothing\",\n", + " \"Women's Clothing\",\n", + " \"Winter Coats\",\n", + " \"Activewear\",\n", + " \"Accessories\",\n", + " ],\n", + " \"Home & Kitchen\": [\n", + " \"Appliances\",\n", + " \"Cookware\",\n", + " \"Furniture\",\n", + " \"Decor\",\n", + " \"Organization\",\n", + " ],\n", + " \"Books\": [\n", + " \"Fiction\",\n", + " \"Non-Fiction\",\n", + " \"Self-Help\",\n", + " \"Textbooks\",\n", + " \"Classics\",\n", + " ],\n", + " \"Home Office\": [\n", + " \"Desks\",\n", + " \"Chairs\",\n", + " \"Storage\",\n", + " \"Office Supplies\",\n", + " \"Lighting\",\n", + " ],\n", + " },\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"target_age_range\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n", + " )\n", + ")\n", + "\n", + "# Optionally validate that the columns are configured correctly.\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "2d0eea21", + "metadata": {}, + "source": [ + "Next, let's add samplers to generate data related to the customer and their review.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b5e65724", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"customer\",\n", + " sampler_type=SamplerType.PERSON_FROM_FAKER,\n", + " params=PersonFromFakerSamplerParams(age_range=[18, 70], locale=\"en_US\"),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"number_of_stars\",\n", + " sampler_type=SamplerType.UNIFORM,\n", + " params=UniformSamplerParams(low=1, high=5),\n", + " convert_to=\"int\", # Convert the sampled float to an integer.\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"review_style\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", + " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n", + " weights=[1, 2, 2, 1],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "e6788771", + "metadata": {}, + "source": [ + "## 🦜 LLM-generated columns\n", + "\n", + "- The real power of Data Designer comes from leveraging LLMs to generate text, code, and structured data.\n", + "\n", + "- When prompting the LLM, we can use Jinja templating to reference other columns in the dataset.\n", + "\n", + "- As we see below, nested json fields can be accessed using dot notation.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2705cd9", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder.add_column(\n", + " LLMTextColumnConfig(\n", + " name=\"product_name\",\n", + " prompt=(\n", + " \"You are a helpful assistant that generates product names. DO NOT add quotes around the product name.\\n\\n\"\n", + " \"Come up with a creative product name for a product in the '{{ product_category }}' category, focusing \"\n", + " \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n", + " \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n", + " ),\n", + " system_prompt=SYSTEM_PROMPT,\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " LLMTextColumnConfig(\n", + " name=\"customer_review\",\n", + " prompt=(\n", + " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", + " \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n", + " \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n", + " \"The style of the review should be '{{ review_style }}'.\"\n", + " ),\n", + " system_prompt=SYSTEM_PROMPT,\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "e3dd2f69", + "metadata": {}, + "source": [ + "### πŸ” Iteration is key – preview the dataset!\n", + "\n", + "1. Use the `preview` method to generate a sample of records quickly.\n", + "\n", + "2. Inspect the results for quality and format issues.\n", + "\n", + "3. Adjust column configurations, prompts, or parameters as needed.\n", + "\n", + "4. Re-run the preview until satisfied.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6e43147", + "metadata": {}, + "outputs": [], + "source": [ + "preview = data_designer.preview(config_builder, num_records=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fab77d01", + "metadata": {}, + "outputs": [], + "source": [ + "# Run this cell multiple times to cycle through the 2 preview records.\n", + "preview.display_sample_record()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "875ee6a6", + "metadata": {}, + "outputs": [], + "source": [ + "# The preview dataset is available as a pandas DataFrame.\n", + "preview.dataset" + ] + }, + { + "cell_type": "markdown", + "id": "87b59e4b", + "metadata": {}, + "source": [ + "### πŸ“Š Analyze the generated data\n", + "\n", + "- Data Designer automatically generates a basic statistical analysis of the generated data.\n", + "\n", + "- This analysis is available via the `analysis` property of generation result objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5d347f4c", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the analysis as a table.\n", + "preview.analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "d2fb84f2", + "metadata": {}, + "source": [ + "### πŸ†™ Scale up!\n", + "\n", + "- Happy with your preview data?\n", + "\n", + "- Use the `create` method to submit larger Data Designer generation jobs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71a31e85", + "metadata": {}, + "outputs": [], + "source": [ + "results = data_designer.create(config_builder, num_records=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "501e9092", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the generated dataset as a pandas DataFrame.\n", + "dataset = results.load_dataset()\n", + "\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6f217b4a", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the analysis results into memory.\n", + "analysis = results.load_analysis()\n", + "\n", + "analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "4da82b0f", + "metadata": {}, + "source": [ + "## ⏭️ Next Steps\n", + "\n", + "Now that you've seen the basics of Data Designer, check out the following notebooks to learn more about:\n", + "\n", + "- [Structured outputs and jinja expressions](/notebooks/2-structured-outputs-and-jinja-expressions/)\n", + "\n", + "- [Seeding synthetic data generation with an external dataset](/notebooks/3-seeding-with-a-dataset/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb new file mode 100644 index 00000000..10156a10 --- /dev/null +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -0,0 +1,567 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "a70798bc", + "metadata": {}, + "source": [ + "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", + "\n", + "#### πŸ“š What you'll learn\n", + "\n", + "In this notebook, we will continue our exploration of Data Designer, demonstrating more advanced data generation using structured outputs and Jinja expressions.\n", + "\n", + "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n" + ] + }, + { + "cell_type": "markdown", + "id": "4ed4c65d", + "metadata": {}, + "source": [ + "### ⚑ Colab Setup\n", + "\n", + "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "73432e8e", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU data-designer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a6aac78", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "from google.colab import userdata\n", + "\n", + "try:\n", + " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n", + "except userdata.SecretNotFoundError:\n", + " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")" + ] + }, + { + "cell_type": "markdown", + "id": "03df6d1c", + "metadata": {}, + "source": [ + "### πŸ“¦ Import the essentials\n", + "\n", + "- The `essentials` module provides quick access to the most commonly used objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "befb6573", + "metadata": {}, + "outputs": [], + "source": [ + "from data_designer.essentials import (\n", + " CategorySamplerParams,\n", + " DataDesigner,\n", + " DataDesignerConfigBuilder,\n", + " ExpressionColumnConfig,\n", + " InferenceParameters,\n", + " LLMStructuredColumnConfig,\n", + " ModelConfig,\n", + " PersonFromFakerSamplerParams,\n", + " SamplerColumnConfig,\n", + " SamplerType,\n", + " SubcategorySamplerParams,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "a022d1ae", + "metadata": {}, + "source": [ + "### βš™οΈ Initialize the Data Designer interface\n", + "\n", + "- `DataDesigner` is the main object that is used to interface with the library.\n", + "\n", + "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "568e1d91", + "metadata": {}, + "outputs": [], + "source": [ + "data_designer_client = DataDesigner()" + ] + }, + { + "cell_type": "markdown", + "id": "de1ef709", + "metadata": {}, + "source": [ + "### πŸŽ›οΈ Define model configurations\n", + "\n", + "- Each `ModelConfig` defines a model that can be used during the generation process.\n", + "\n", + "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n", + "\n", + "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n", + "\n", + "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6757f43c", + "metadata": {}, + "outputs": [], + "source": [ + "# This name is set in the model provider configuration.\n", + "MODEL_PROVIDER = \"nvidia\"\n", + "\n", + "# The model ID is from build.nvidia.com.\n", + "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "\n", + "# We choose this alias to be descriptive for our use case.\n", + "MODEL_ALIAS = \"nemotron-nano-v2\"\n", + "\n", + "# This sets reasoning to False for the nemotron-nano-v2 model.\n", + "SYSTEM_PROMPT = \"/no_think\"\n", + "\n", + "model_configs = [\n", + " ModelConfig(\n", + " alias=MODEL_ALIAS,\n", + " model=MODEL_ID,\n", + " provider=MODEL_PROVIDER,\n", + " inference_parameters=InferenceParameters(\n", + " temperature=0.5,\n", + " top_p=1.0,\n", + " max_tokens=1024,\n", + " ),\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "3d1ea9b6", + "metadata": {}, + "source": [ + "### πŸ—οΈ Initialize the Data Designer Config Builder\n", + "\n", + "- The Data Designer config defines the dataset schema and generation process.\n", + "\n", + "- The config builder provides an intuitive interface for building this configuration.\n", + "\n", + "- The list of model configs is provided to the builder at initialization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9ad410a", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)" + ] + }, + { + "cell_type": "markdown", + "id": "8f918afa", + "metadata": {}, + "source": [ + "### πŸ§‘β€πŸŽ¨ Designing our data\n", + "\n", + "- We will again create a product review dataset, but this time we will use structured outputs and Jinja expressions.\n", + "\n", + "- Structured outputs let you specify the exact schema of the data you want to generate.\n", + "\n", + "- Data Designer supports schemas specified using either json schema or Pydantic data models (recommended).\n", + "\n", + "
\n", + "\n", + "We'll define our structured outputs using [Pydantic](https://docs.pydantic.dev/latest/) data models\n", + "\n", + "> πŸ’‘ **Why Pydantic?**\n", + ">\n", + "> - Pydantic models provide better IDE support and type validation.\n", + ">\n", + "> - They are more Pythonic than raw JSON schemas.\n", + ">\n", + "> - They integrate seamlessly with Data Designer's structured output system.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aafd123", + "metadata": {}, + "outputs": [], + "source": [ + "from decimal import Decimal\n", + "from typing import Literal\n", + "\n", + "from pydantic import BaseModel, Field\n", + "\n", + "\n", + "# We define a Product schema so that the name, description, and price are generated\n", + "# in one go, with the types and constraints specified.\n", + "class Product(BaseModel):\n", + " name: str = Field(description=\"The name of the product\")\n", + " description: str = Field(description=\"A description of the product\")\n", + " price: Decimal = Field(description=\"The price of the product\", ge=10, le=1000, decimal_places=2)\n", + "\n", + "\n", + "class ProductReview(BaseModel):\n", + " rating: int = Field(description=\"The rating of the product\", ge=1, le=5)\n", + " customer_mood: Literal[\"irritated\", \"mad\", \"happy\", \"neutral\", \"excited\"] = Field(\n", + " description=\"The mood of the customer\"\n", + " )\n", + " review: str = Field(description=\"A review of the product\")" + ] + }, + { + "cell_type": "markdown", + "id": "9727c5ae", + "metadata": {}, + "source": [ + "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f9df709", + "metadata": {}, + "outputs": [], + "source": [ + "# Since we often only want a few attributes from Person objects, we can\n", + "# set drop=True in the column config to drop the column from the final dataset.\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"customer\",\n", + " sampler_type=SamplerType.PERSON_FROM_FAKER,\n", + " params=PersonFromFakerSamplerParams(),\n", + " drop=True,\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"product_category\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", + " values=[\n", + " \"Electronics\",\n", + " \"Clothing\",\n", + " \"Home & Kitchen\",\n", + " \"Books\",\n", + " \"Home Office\",\n", + " ],\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"product_subcategory\",\n", + " sampler_type=SamplerType.SUBCATEGORY,\n", + " params=SubcategorySamplerParams(\n", + " category=\"product_category\",\n", + " values={\n", + " \"Electronics\": [\n", + " \"Smartphones\",\n", + " \"Laptops\",\n", + " \"Headphones\",\n", + " \"Cameras\",\n", + " \"Accessories\",\n", + " ],\n", + " \"Clothing\": [\n", + " \"Men's Clothing\",\n", + " \"Women's Clothing\",\n", + " \"Winter Coats\",\n", + " \"Activewear\",\n", + " \"Accessories\",\n", + " ],\n", + " \"Home & Kitchen\": [\n", + " \"Appliances\",\n", + " \"Cookware\",\n", + " \"Furniture\",\n", + " \"Decor\",\n", + " \"Organization\",\n", + " ],\n", + " \"Books\": [\n", + " \"Fiction\",\n", + " \"Non-Fiction\",\n", + " \"Self-Help\",\n", + " \"Textbooks\",\n", + " \"Classics\",\n", + " ],\n", + " \"Home Office\": [\n", + " \"Desks\",\n", + " \"Chairs\",\n", + " \"Storage\",\n", + " \"Office Supplies\",\n", + " \"Lighting\",\n", + " ],\n", + " },\n", + " ),\n", + " )\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"target_age_range\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(values=[\"18-25\", \"25-35\", \"35-50\", \"50-65\", \"65+\"]),\n", + " )\n", + ")\n", + "\n", + "# Sampler columns support conditional params, which are used if the condition is met.\n", + "# In this example, we set the review style to rambling if the target age range is 18-25.\n", + "# Note conditional parameters are only supported for Sampler column types.\n", + "config_builder.add_column(\n", + " SamplerColumnConfig(\n", + " name=\"review_style\",\n", + " sampler_type=SamplerType.CATEGORY,\n", + " params=CategorySamplerParams(\n", + " values=[\"rambling\", \"brief\", \"detailed\", \"structured with bullet points\"],\n", + " weights=[1, 2, 2, 1],\n", + " ),\n", + " conditional_params={\n", + " \"target_age_range == '18-25'\": CategorySamplerParams(values=[\"rambling\"]),\n", + " },\n", + " )\n", + ")\n", + "\n", + "# Optionally validate that the columns are configured correctly.\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "f42b7843", + "metadata": {}, + "source": [ + "Next, we will use more advanced Jinja expressions to create new columns.\n", + "\n", + "Jinja expressions let you:\n", + "\n", + "- Access nested attributes: `{{ customer.first_name }}`\n", + "\n", + "- Combine values: `{{ customer.first_name }} {{ customer.last_name }}`\n", + "\n", + "- Use conditional logic: `{% if condition %}...{% endif %}`\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d231f52e", + "metadata": {}, + "outputs": [], + "source": [ + "# We can create new columns using Jinja expressions that reference\n", + "# existing columns, including attributes of nested objects.\n", + "config_builder.add_column(\n", + " ExpressionColumnConfig(name=\"customer_name\", expr=\"{{ customer.first_name }} {{ customer.last_name }}\")\n", + ")\n", + "\n", + "config_builder.add_column(ExpressionColumnConfig(name=\"customer_age\", expr=\"{{ customer.age }}\"))\n", + "\n", + "config_builder.add_column(\n", + " LLMStructuredColumnConfig(\n", + " name=\"product\",\n", + " prompt=(\n", + " \"Create a product in the '{{ product_category }}' category, focusing on products \"\n", + " \"related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n", + " \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n", + " ),\n", + " system_prompt=SYSTEM_PROMPT,\n", + " output_format=Product,\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "# We can even use if/else logic in our Jinja expressions to create more complex prompt patterns.\n", + "config_builder.add_column(\n", + " LLMStructuredColumnConfig(\n", + " name=\"customer_review\",\n", + " prompt=(\n", + " \"Your task is to write a review for the following product:\\n\\n\"\n", + " \"Product Name: {{ product.name }}\\n\"\n", + " \"Product Description: {{ product.description }}\\n\"\n", + " \"Price: {{ product.price }}\\n\\n\"\n", + " \"Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. \"\n", + " \"Write the review in a style that is '{{ review_style }}'.\"\n", + " \"{% if target_age_range == '18-25' %}\"\n", + " \"Make sure the review is more informal and conversational.\"\n", + " \"{% else %}\"\n", + " \"Make sure the review is more formal and structured.\"\n", + " \"{% endif %}\"\n", + " ),\n", + " system_prompt=SYSTEM_PROMPT,\n", + " output_format=ProductReview,\n", + " model_alias=MODEL_ALIAS,\n", + " )\n", + ")\n", + "\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "01dbf368", + "metadata": {}, + "source": [ + "### πŸ” Iteration is key – preview the dataset!\n", + "\n", + "1. Use the `preview` method to generate a sample of records quickly.\n", + "\n", + "2. Inspect the results for quality and format issues.\n", + "\n", + "3. Adjust column configurations, prompts, or parameters as needed.\n", + "\n", + "4. Re-run the preview until satisfied.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7d0389c6", + "metadata": {}, + "outputs": [], + "source": [ + "preview = data_designer_client.preview(config_builder, num_records=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b312d760", + "metadata": {}, + "outputs": [], + "source": [ + "# Run this cell multiple times to cycle through the 2 preview records.\n", + "preview.display_sample_record()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6252ae9a", + "metadata": {}, + "outputs": [], + "source": [ + "# The preview dataset is available as a pandas DataFrame.\n", + "preview.dataset" + ] + }, + { + "cell_type": "markdown", + "id": "ea32a75e", + "metadata": {}, + "source": [ + "### πŸ“Š Analyze the generated data\n", + "\n", + "- Data Designer automatically generates a basic statistical analysis of the generated data.\n", + "\n", + "- This analysis is available via the `analysis` property of generation result objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c094a400", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the analysis as a table.\n", + "preview.analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "8565a283", + "metadata": {}, + "source": [ + "### πŸ†™ Scale up!\n", + "\n", + "- Happy with your preview data?\n", + "\n", + "- Use the `create` method to submit larger Data Designer generation jobs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d48a294c", + "metadata": {}, + "outputs": [], + "source": [ + "job_results = data_designer_client.create(config_builder, num_records=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "96268be4", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the generated dataset as a pandas DataFrame.\n", + "dataset = job_results.load_dataset()\n", + "\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2137aaf", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the analysis results into memory.\n", + "analysis = job_results.load_analysis()\n", + "\n", + "analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "c43a66cc", + "metadata": {}, + "source": [ + "## ⏭️ Next Steps\n", + "\n", + "Check out the following notebook to learn more about:\n", + "\n", + "- [Seeding synthetic data generation with an external dataset](/notebooks/3-seeding-with-a-dataset/)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb new file mode 100644 index 00000000..db5c9d27 --- /dev/null +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -0,0 +1,465 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c4c424f2", + "metadata": {}, + "source": [ + "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", + "\n", + "#### πŸ“š What you'll learn\n", + "\n", + "In this notebook, we will demonstrate how to seed synthetic data generation in Data Designer with an external dataset.\n", + "\n", + "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n" + ] + }, + { + "cell_type": "markdown", + "id": "2ceb13f2", + "metadata": {}, + "source": [ + "### ⚑ Colab Setup\n", + "\n", + "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ad4096cd", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU data-designer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "21336bf3", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "from google.colab import userdata\n", + "\n", + "try:\n", + " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n", + "except userdata.SecretNotFoundError:\n", + " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")" + ] + }, + { + "cell_type": "markdown", + "id": "db8fa976", + "metadata": {}, + "source": [ + "### πŸ“¦ Import the essentials\n", + "\n", + "- The `essentials` module provides quick access to the most commonly used objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2e17b2b", + "metadata": {}, + "outputs": [], + "source": [ + "from data_designer.essentials import (\n", + " DataDesigner,\n", + " DataDesignerConfigBuilder,\n", + " InferenceParameters,\n", + " ModelConfig,\n", + " SeedConfig,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "29c28647", + "metadata": {}, + "source": [ + "### βš™οΈ Initialize the Data Designer interface\n", + "\n", + "- `DataDesigner` is the main object is responsible for managing the data generation process.\n", + "\n", + "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cecf9242", + "metadata": {}, + "outputs": [], + "source": [ + "data_designer_client = DataDesigner()" + ] + }, + { + "cell_type": "markdown", + "id": "74eab801", + "metadata": {}, + "source": [ + "### πŸŽ›οΈ Define model configurations\n", + "\n", + "- Each `ModelConfig` defines a model that can be used during the generation process.\n", + "\n", + "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n", + "\n", + "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n", + "\n", + "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b31991e8", + "metadata": {}, + "outputs": [], + "source": [ + "# This name is set in the model provider configuration.\n", + "MODEL_PROVIDER = \"nvidia\"\n", + "\n", + "# The model ID is from build.nvidia.com.\n", + "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "\n", + "# We choose this alias to be descriptive for our use case.\n", + "MODEL_ALIAS = \"nemotron-nano-v2\"\n", + "\n", + "# This sets reasoning to False for the nemotron-nano-v2 model.\n", + "SYSTEM_PROMPT = \"/no_think\"\n", + "\n", + "model_configs = [\n", + " ModelConfig(\n", + " alias=MODEL_ALIAS,\n", + " model=MODEL_ID,\n", + " provider=MODEL_PROVIDER,\n", + " inference_parameters=InferenceParameters(\n", + " temperature=0.5,\n", + " top_p=1.0,\n", + " max_tokens=1024,\n", + " ),\n", + " )\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "006d1625", + "metadata": {}, + "source": [ + "### πŸ—οΈ Initialize the Data Designer Config Builder\n", + "\n", + "- The Data Designer config defines the dataset schema and generation process.\n", + "\n", + "- The config builder provides an intuitive interface for building this configuration.\n", + "\n", + "- The list of model configs is provided to the builder at initialization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "396a4ed5", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)" + ] + }, + { + "cell_type": "markdown", + "id": "9860369b", + "metadata": {}, + "source": [ + "## πŸ₯ Prepare a seed dataset\n", + "\n", + "- For this notebook, we'll create a synthetic dataset of patient notes.\n", + "\n", + "- We will _seed_ the generation process with a [symptom-to-diagnosis dataset](https://huggingface.co/datasets/gretelai/symptom_to_diagnosis).\n", + "\n", + "- We already have the dataset downloaded in the [data](../data) directory of this repository.\n", + "\n", + "
\n", + "\n", + "> 🌱 **Why use a seed dataset?**\n", + ">\n", + "> - Seed datasets let you steer the generation process by providing context that is specific to your use case.\n", + ">\n", + "> - Seed datasets are also an excellent way to inject real-world diversity into your synthetic data.\n", + ">\n", + "> - During generation, prompt templates can reference any of the seed dataset fields.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "65a2fa3c", + "metadata": {}, + "outputs": [], + "source": [ + "# Download sample dataset from Github\n", + "import urllib.request\n", + "\n", + "url = \"https://raw.githubusercontent.com/NVIDIA/GenerativeAIExamples/refs/heads/main/nemo/NeMo-Data-Designer/data/gretelai_symptom_to_diagnosis.csv\"\n", + "local_filename, headers = urllib.request.urlretrieve(url, \"gretelai_symptom_to_diagnosis.csv\")\n", + "\n", + "seed_dataset = SeedConfig(dataset=local_filename)\n", + "\n", + "# Pass the reference to the config builder for use during generation.\n", + "config_builder.with_seed_dataset(seed_dataset)" + ] + }, + { + "cell_type": "markdown", + "id": "ea11134a", + "metadata": {}, + "source": [ + "## 🎨 Designing our synthetic patient notes dataset\n", + "\n", + "- Here we use `add_column` with keyword arguments (rather than imported config objects).\n", + "\n", + "- Generally, we recommend using concrete objects, but this is a convenient shorthand.\n", + "\n", + "- **Note**: The prompt template can reference fields from our seed dataset:\n", + " - `{{ diagnosis }}` - the medical diagnosis from the seed data\n", + " - `{{ patient_summary }}` - the symptom description from the seed data\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "209af41c", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder.add_column(\n", + " name=\"patient_sampler\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"person_from_faker\",\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"doctor_sampler\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"person_from_faker\",\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"patient_id\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"uuid\",\n", + " params={\n", + " \"prefix\": \"PT-\",\n", + " \"short_form\": True,\n", + " \"uppercase\": True,\n", + " },\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"first_name\",\n", + " column_type=\"expression\",\n", + " expr=\"{{ patient_sampler.first_name}}\",\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"last_name\",\n", + " column_type=\"expression\",\n", + " expr=\"{{ patient_sampler.last_name }}\",\n", + ")\n", + "\n", + "\n", + "config_builder.add_column(\n", + " name=\"dob\",\n", + " column_type=\"expression\",\n", + " expr=\"{{ patient_sampler.birth_date }}\",\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"symptom_onset_date\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"datetime\",\n", + " params={\"start\": \"2024-01-01\", \"end\": \"2024-12-31\"},\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"date_of_visit\",\n", + " column_type=\"sampler\",\n", + " sampler_type=\"timedelta\",\n", + " params={\"dt_min\": 1, \"dt_max\": 30, \"reference_column_name\": \"symptom_onset_date\"},\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"physician\",\n", + " column_type=\"expression\",\n", + " expr=\"Dr. {{ doctor_sampler.last_name }}\",\n", + ")\n", + "\n", + "config_builder.add_column(\n", + " name=\"physician_notes\",\n", + " column_type=\"llm-text\",\n", + " prompt=\"\"\"\\\n", + "You are a primary-care physician who just had an appointment with {{ first_name }} {{ last_name }},\n", + "who has been struggling with symptoms from {{ diagnosis }} since {{ symptom_onset_date }}.\n", + "The date of today's visit is {{ date_of_visit }}.\n", + "\n", + "{{ patient_summary }}\n", + "\n", + "Write careful notes about your visit with {{ first_name }},\n", + "as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.\n", + "\n", + "Format the notes as a busy doctor might.\n", + "\"\"\",\n", + " model_alias=MODEL_ALIAS,\n", + " system_prompt=SYSTEM_PROMPT,\n", + ")\n", + "\n", + "config_builder.validate()" + ] + }, + { + "cell_type": "markdown", + "id": "32f43d20", + "metadata": {}, + "source": [ + "### πŸ” Iteration is key – preview the dataset!\n", + "\n", + "1. Use the `preview` method to generate a sample of records quickly.\n", + "\n", + "2. Inspect the results for quality and format issues.\n", + "\n", + "3. Adjust column configurations, prompts, or parameters as needed.\n", + "\n", + "4. Re-run the preview until satisfied.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "402a1025", + "metadata": {}, + "outputs": [], + "source": [ + "preview = data_designer_client.preview(config_builder, num_records=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "de294013", + "metadata": {}, + "outputs": [], + "source": [ + "# Run this cell multiple times to cycle through the 2 preview records.\n", + "preview.display_sample_record()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a38c360d", + "metadata": {}, + "outputs": [], + "source": [ + "# The preview dataset is available as a pandas DataFrame.\n", + "preview.dataset" + ] + }, + { + "cell_type": "markdown", + "id": "4a385031", + "metadata": {}, + "source": [ + "### πŸ“Š Analyze the generated data\n", + "\n", + "- Data Designer automatically generates a basic statistical analysis of the generated data.\n", + "\n", + "- This analysis is available via the `analysis` property of generation result objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3531b1e4", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the analysis as a table.\n", + "preview.analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "7fec6251", + "metadata": {}, + "source": [ + "### πŸ†™ Scale up!\n", + "\n", + "- Happy with your preview data?\n", + "\n", + "- Use the `create` method to submit larger Data Designer generation jobs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bace6acb", + "metadata": {}, + "outputs": [], + "source": [ + "job_results = data_designer_client.create(config_builder, num_records=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3eaafc74", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the generated dataset as a pandas DataFrame.\n", + "dataset = job_results.load_dataset()\n", + "\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "473a0c89", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the analysis results into memory.\n", + "analysis = job_results.load_analysis()\n", + "\n", + "analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "7408eef8", + "metadata": {}, + "source": [ + "## ⏭️ Next Steps\n", + "\n", + "Use Data Designer to generate synthetic data for your specific use case!\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb new file mode 100644 index 00000000..bac63375 --- /dev/null +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -0,0 +1,540 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "ebd062e8", + "metadata": {}, + "source": [ + "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" + ] + }, + { + "cell_type": "markdown", + "id": "3ce5b7f7", + "metadata": {}, + "source": [ + "#### πŸ“š What you'll learn\n", + "\n", + "This notebook demonstrates how to provide images as context to generate text descriptions using vision-language models.\n", + "\n", + "- ✨ **Visual Document Processing**: Converting images to chat-ready format for model consumption\n", + "- πŸ” **Vision-Language Generation**: Using vision models to generate detailed summaries from images\n", + "\n", + "If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series.\n" + ] + }, + { + "cell_type": "markdown", + "id": "04d0bfd4", + "metadata": {}, + "source": [ + "### ⚑ Colab Setup\n", + "\n", + "Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com).\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "661740a3", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -qU data-designer" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9c4188b", + "metadata": {}, + "outputs": [], + "source": [ + "!pip install -q pillow>=12.0.0" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ddaeb938", + "metadata": {}, + "outputs": [], + "source": [ + "import getpass\n", + "import os\n", + "\n", + "from google.colab import userdata\n", + "\n", + "try:\n", + " os.environ[\"NVIDIA_API_KEY\"] = userdata.get(\"NVIDIA_API_KEY\")\n", + "except userdata.SecretNotFoundError:\n", + " os.environ[\"NVIDIA_API_KEY\"] = getpass.getpass(\"Enter your NVIDIA API key: \")" + ] + }, + { + "cell_type": "markdown", + "id": "c3aac5e8", + "metadata": {}, + "source": [ + "### πŸ“¦ Import the essentials\n", + "\n", + "- The `essentials` module provides quick access to the most commonly used objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c8b905ee", + "metadata": {}, + "outputs": [], + "source": [ + "# Standard library imports\n", + "import base64\n", + "import io\n", + "import uuid\n", + "\n", + "# Third-party imports\n", + "import pandas as pd\n", + "import rich\n", + "from datasets import load_dataset\n", + "from IPython.display import display\n", + "from rich.panel import Panel\n", + "\n", + "# Data Designer imports\n", + "from data_designer.essentials import (\n", + " DataDesigner,\n", + " DataDesignerConfigBuilder,\n", + " ImageContext,\n", + " ImageFormat,\n", + " InferenceParameters,\n", + " LLMTextColumnConfig,\n", + " ModalityDataType,\n", + " ModelConfig,\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "f508d655", + "metadata": {}, + "source": [ + "### βš™οΈ Initialize the Data Designer interface\n", + "\n", + "- `DataDesigner` is the main object is responsible for managing the data generation process.\n", + "\n", + "- When initialized without arguments, the [default model providers](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) are used.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0607008", + "metadata": {}, + "outputs": [], + "source": [ + "data_designer = DataDesigner()" + ] + }, + { + "cell_type": "markdown", + "id": "a0467d8c", + "metadata": {}, + "source": [ + "### πŸŽ›οΈ Define model configurations\n", + "\n", + "- Each `ModelConfig` defines a model that can be used during the generation process.\n", + "\n", + "- The \"model alias\" is used to reference the model in the Data Designer config (as we will see below).\n", + "\n", + "- The \"model provider\" is the external service that hosts the model (see the [model config](https://nvidia-nemo.github.io/DataDesigner/concepts/models/default-model-settings/) docs for more details).\n", + "\n", + "- By default, we use [build.nvidia.com](https://build.nvidia.com/models) as the model provider.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e4d9a15", + "metadata": {}, + "outputs": [], + "source": [ + "# This name is set in the model provider configuration.\n", + "MODEL_PROVIDER = \"nvidia\"\n", + "\n", + "model_configs = [\n", + " ModelConfig(\n", + " alias=\"vision\",\n", + " model=\"meta/llama-4-scout-17b-16e-instruct\",\n", + " provider=MODEL_PROVIDER,\n", + " inference_parameters=InferenceParameters(\n", + " temperature=0.60,\n", + " top_p=0.95,\n", + " max_tokens=2048,\n", + " ),\n", + " ),\n", + "]" + ] + }, + { + "cell_type": "markdown", + "id": "6d66a8a6", + "metadata": {}, + "source": [ + "### πŸ—οΈ Initialize the Data Designer Config Builder\n", + "\n", + "- The Data Designer config defines the dataset schema and generation process.\n", + "\n", + "- The config builder provides an intuitive interface for building this configuration.\n", + "\n", + "- The list of model configs is provided to the builder at initialization.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80ad9b04", + "metadata": {}, + "outputs": [], + "source": [ + "config_builder = DataDesignerConfigBuilder(model_configs=model_configs)" + ] + }, + { + "cell_type": "markdown", + "id": "75935c33", + "metadata": {}, + "source": [ + "### 🌱 Seed Dataset Creation\n", + "\n", + "In this section, we'll prepare our visual documents as a seed dataset for summarization:\n", + "\n", + "- **Loading Visual Documents**: We use the ColPali dataset containing document images\n", + "- **Image Processing**: Convert images to base64 format for vision model consumption\n", + "- **Metadata Extraction**: Preserve relevant document information (filename, page number, source, etc.)\n", + "\n", + "The seed dataset will be used to generate detailed text summaries of each document image." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "868e41af", + "metadata": {}, + "outputs": [], + "source": [ + "# Dataset processing configuration\n", + "IMG_COUNT = 512 # Number of images to process\n", + "BASE64_IMAGE_HEIGHT = 512 # Standardized height for model input\n", + "\n", + "# Load ColPali dataset for visual documents\n", + "img_dataset_cfg = {\"path\": \"vidore/colpali_train_set\", \"split\": \"train\", \"streaming\": True}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5466e56f", + "metadata": {}, + "outputs": [], + "source": [ + "def resize_image(image, height: int):\n", + " \"\"\"\n", + " Resize image while maintaining aspect ratio.\n", + "\n", + " Args:\n", + " image: PIL Image object\n", + " height: Target height in pixels\n", + "\n", + " Returns:\n", + " Resized PIL Image object\n", + " \"\"\"\n", + " original_width, original_height = image.size\n", + " width = int(original_width * (height / original_height))\n", + " return image.resize((width, height))\n", + "\n", + "\n", + "def convert_image_to_chat_format(record, height: int) -> dict:\n", + " \"\"\"\n", + " Convert PIL image to base64 format for chat template usage.\n", + "\n", + " Args:\n", + " record: Dataset record containing image and metadata\n", + " height: Target height for image resizing\n", + "\n", + " Returns:\n", + " Updated record with base64_image and uuid fields\n", + " \"\"\"\n", + " # Resize image for consistent processing\n", + " image = resize_image(record[\"image\"], height)\n", + "\n", + " # Convert to base64 string\n", + " img_buffer = io.BytesIO()\n", + " image.save(img_buffer, format=\"PNG\")\n", + " byte_data = img_buffer.getvalue()\n", + " base64_encoded_data = base64.b64encode(byte_data)\n", + " base64_string = base64_encoded_data.decode(\"utf-8\")\n", + "\n", + " # Return updated record\n", + " return record | {\"base64_image\": base64_string, \"uuid\": str(uuid.uuid4())}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bbca6568", + "metadata": {}, + "outputs": [], + "source": [ + "# Load and process the visual document dataset\n", + "print(\"πŸ“₯ Loading and processing document images...\")\n", + "\n", + "img_dataset_iter = iter(\n", + " load_dataset(**img_dataset_cfg).map(convert_image_to_chat_format, fn_kwargs={\"height\": BASE64_IMAGE_HEIGHT})\n", + ")\n", + "img_dataset = pd.DataFrame([next(img_dataset_iter) for _ in range(IMG_COUNT)])\n", + "\n", + "print(f\"βœ… Loaded {len(img_dataset)} images with columns: {list(img_dataset.columns)}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5ce17e71", + "metadata": {}, + "outputs": [], + "source": [ + "img_dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9c0faf28", + "metadata": {}, + "outputs": [], + "source": [ + "# Add the seed dataset containing our processed images\n", + "df_seed = pd.DataFrame(img_dataset)[[\"uuid\", \"image_filename\", \"base64_image\", \"page\", \"options\", \"source\"]]\n", + "config_builder.with_seed_dataset(\n", + " DataDesigner.make_seed_reference_from_dataframe(df_seed, file_path=\"colpali_train_set.csv\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35bc8b77", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Add a column to generate detailed document summaries\n", + "config_builder.add_column(\n", + " LLMTextColumnConfig(\n", + " name=\"summary\",\n", + " model_alias=\"vision\",\n", + " prompt=(\n", + " \"Provide a detailed summary of the content in this image in Markdown format. \"\n", + " \"Start from the top of the image and then describe it from top to bottom. \"\n", + " \"Place a summary at the bottom.\"\n", + " ),\n", + " multi_modal_context=[\n", + " ImageContext(\n", + " column_name=\"base64_image\",\n", + " data_type=ModalityDataType.BASE64,\n", + " image_format=ImageFormat.PNG,\n", + " )\n", + " ],\n", + " )\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "16ac3f58", + "metadata": { + "lines_to_next_cell": 2 + }, + "source": [] + }, + { + "cell_type": "markdown", + "id": "1d13a0e4", + "metadata": {}, + "source": [ + "### πŸ” Iteration is key – preview the dataset!\n", + "\n", + "1. Use the `preview` method to generate a sample of records quickly.\n", + "\n", + "2. Inspect the results for quality and format issues.\n", + "\n", + "3. Adjust column configurations, prompts, or parameters as needed.\n", + "\n", + "4. Re-run the preview until satisfied.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "336af89f", + "metadata": {}, + "outputs": [], + "source": [ + "preview = data_designer.preview(config_builder, num_records=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5a57a536", + "metadata": {}, + "outputs": [], + "source": [ + "# Run this cell multiple times to cycle through the 2 preview records.\n", + "preview.display_sample_record()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e05b387", + "metadata": {}, + "outputs": [], + "source": [ + "# The preview dataset is available as a pandas DataFrame.\n", + "preview.dataset" + ] + }, + { + "cell_type": "markdown", + "id": "f69d543e", + "metadata": {}, + "source": [ + "### πŸ“Š Analyze the generated data\n", + "\n", + "- Data Designer automatically generates a basic statistical analysis of the generated data.\n", + "\n", + "- This analysis is available via the `analysis` property of generation result objects.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3cb66a7", + "metadata": {}, + "outputs": [], + "source": [ + "# Print the analysis as a table.\n", + "preview.analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "60815241", + "metadata": {}, + "source": [ + "### πŸ”Ž Visual Inspection\n", + "\n", + "Let's compare the original document image with the generated summary to validate quality:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c9dddf6", + "metadata": { + "lines_to_next_cell": 2 + }, + "outputs": [], + "source": [ + "# Compare original document with generated summary\n", + "index = 0 # Change this to view different examples\n", + "\n", + "# Merge preview data with original images for comparison\n", + "comparison_dataset = preview.dataset.merge(pd.DataFrame(img_dataset)[[\"uuid\", \"image\"]], how=\"left\", on=\"uuid\")\n", + "\n", + "# Extract the record for display\n", + "record = comparison_dataset.iloc[index]\n", + "\n", + "print(\"πŸ“„ Original Document Image:\")\n", + "display(resize_image(record.image, BASE64_IMAGE_HEIGHT))\n", + "\n", + "print(\"\\nπŸ“ Generated Summary:\")\n", + "rich.print(Panel(record.summary, title=\"Document Summary\", title_align=\"left\"))" + ] + }, + { + "cell_type": "markdown", + "id": "eb6e2469", + "metadata": {}, + "source": [ + "### πŸ†™ Scale up!\n", + "\n", + "- Happy with your preview data?\n", + "\n", + "- Use the `create` method to submit larger Data Designer generation jobs.\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6a0ed1c", + "metadata": {}, + "outputs": [], + "source": [ + "results = data_designer.create(config_builder, num_records=10)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "255b8f6f", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the generated dataset as a pandas DataFrame.\n", + "dataset = results.load_dataset()\n", + "\n", + "dataset.head()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "43b935b3", + "metadata": {}, + "outputs": [], + "source": [ + "# Load the analysis results into memory.\n", + "analysis = results.load_analysis()\n", + "\n", + "analysis.to_report()" + ] + }, + { + "cell_type": "markdown", + "id": "a74f1121", + "metadata": {}, + "source": [ + "## ⏭️ Next Steps\n", + "\n", + "Now that you've learned how to use visual context for image summarization in Data Designer, explore more:\n", + "\n", + "- Experiment with different vision models for specific document types\n", + "- Try different prompt variations to generate specialized descriptions (e.g., technical details, key findings)\n", + "- Combine vision-based summaries with other column types for multi-modal workflows\n", + "- Apply this pattern to other vision tasks like image captioning, OCR validation, or visual question answering\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/docs/notebook_source/4-providing-images-as-context.py b/docs/notebook_source/4-providing-images-as-context.py index 10afd4bd..dc2513cd 100644 --- a/docs/notebook_source/4-providing-images-as-context.py +++ b/docs/notebook_source/4-providing-images-as-context.py @@ -26,12 +26,6 @@ # If this is your first time using Data Designer, we recommend starting with the [first notebook](/notebooks/1-the-basics/) in this tutorial series. # -# %% [markdown] -# ### ⬇️ Install dependencies (if required) - -# %% -# !uv pip install pillow - # %% [markdown] # ### πŸ“¦ Import the essentials # diff --git a/docs/overrides/main.html b/docs/overrides/main.html index b2d13089..be7a7c51 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -2,16 +2,29 @@ {% block outdated %} You're not viewing the latest version. - + Click here to go to latest. {% endblock %} {% block content %} {% if page.nb_url %} - - {% include ".icons/material/download.svg" %} +
+ + Open In Colab + +
+ + {% include ".icons/material/download.svg" %} + {% endif %} {{ super() }} diff --git a/docs/scripts/generate_colab_notebooks.py b/docs/scripts/generate_colab_notebooks.py new file mode 100644 index 00000000..cb375776 --- /dev/null +++ b/docs/scripts/generate_colab_notebooks.py @@ -0,0 +1,185 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 +"""Script to generate Colab-compatible notebooks from notebook source files. + +This script processes jupytext percent-format Python files and: +1. Injects Colab-specific setup cells (pip install, API key from secrets) +2. Injects cells before the "Import the essentials" section +3. Saves the result as .ipynb files in docs/colab_notebooks +""" + +from __future__ import annotations + +import argparse +from pathlib import Path + +import jupytext +from nbformat import NotebookNode +from nbformat.v4 import new_code_cell, new_markdown_cell + +COLAB_SETUP_MARKDOWN = """\ +### ⚑ Colab Setup + +Run the cells below to install the dependencies and set up the API key. If you don't have an API key, you can generate one from [build.nvidia.com](https://build.nvidia.com). +""" + +ADDITIONAL_DEPENDENCIES = { + "4-providing-images-as-context.py": "pillow>=12.0.0", +} + +COLAB_INSTALL_CELL = """\ +!pip install -qU data-designer""" + +COLAB_DEPENDENCIES_CELL = """\ +!pip install -q {deps}""" + +COLAB_API_KEY_CELL = """\ +import getpass +import os + +from google.colab import userdata + +try: + os.environ["NVIDIA_API_KEY"] = userdata.get("NVIDIA_API_KEY") +except userdata.SecretNotFoundError: + os.environ["NVIDIA_API_KEY"] = getpass.getpass("Enter your NVIDIA API key: ")""" + + +def create_colab_setup_cells(additional_dependencies: str) -> list[NotebookNode]: + """Create the Colab-specific setup cells to inject before imports.""" + cells = [] + cells += [new_markdown_cell(source=COLAB_SETUP_MARKDOWN)] + cells += [new_code_cell(source=COLAB_INSTALL_CELL)] + if additional_dependencies: + cells += [new_code_cell(source=COLAB_DEPENDENCIES_CELL.format(deps=additional_dependencies))] + cells += [new_code_cell(source=COLAB_API_KEY_CELL)] + return cells + + +def find_import_section_index(cells: list[NotebookNode]) -> int: + """Find the index of the 'Import the essentials' markdown cell.""" + first_code_cell_index = -1 + for i, cell in enumerate(cells): + if first_code_cell_index == -1 and cell.get("cell_type") == "code": + first_code_cell_index = i + + if cell.get("cell_type") == "markdown": + source = cell.get("source", "") + if "import" in source.lower() and "essentials" in source.lower(): + return i + return first_code_cell_index + + +def process_notebook(notebook: NotebookNode, source_path: Path) -> NotebookNode: + """Process a notebook to make it Colab-compatible. + + Args: + notebook: The input notebook + + Returns: + The processed notebook with Colab setup cells injected + """ + cells = notebook.cells + + additional_dependencies = ADDITIONAL_DEPENDENCIES.get(source_path.name, "") + + # Find where to insert Colab setup (before "Import the essentials") + import_idx = find_import_section_index(cells) + + if import_idx == -1: + # If not found, insert after first cell (title) + import_idx = 1 + + # Insert Colab setup cells before the import section + colab_cells = create_colab_setup_cells(additional_dependencies) + processed_cells = cells[:import_idx] + colab_cells + cells[import_idx:] + + notebook.cells = processed_cells + return notebook + + +def generate_colab_notebook(source_path: Path, output_dir: Path) -> Path: + """Generate a Colab-compatible notebook from a source file. + + Args: + source_path: Path to the jupytext percent-format Python source file + output_dir: Directory to save the output notebook + + Returns: + Path to the generated notebook + """ + # Read the source file using jupytext + notebook = jupytext.read(source_path) + + # Process the notebook for Colab + notebook = process_notebook(notebook, source_path) + + # Determine output path + output_path = output_dir / f"{source_path.stem}.ipynb" + + # Ensure output directory exists + output_dir.mkdir(parents=True, exist_ok=True) + + # Write the notebook + jupytext.write(notebook, output_path, config={"metadata": {"jupytext": {"cell_metadata_filter": "-id"}}}) + + return output_path + + +def main() -> None: + """Main entry point for the script.""" + parser = argparse.ArgumentParser(description="Generate Colab-compatible notebooks from notebook source files.") + parser.add_argument( + "--source-dir", + type=Path, + default=Path("docs/notebook_source"), + help="Directory containing notebook source files (default: docs/notebook_source)", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=Path("docs/colab_notebooks"), + help="Directory to save Colab notebooks (default: docs/colab_notebooks)", + ) + parser.add_argument( + "--files", + nargs="*", + help="Specific files to process (if not specified, process all .py files)", + ) + + args = parser.parse_args() + + # Get list of source files + if args.files: + source_files = [args.source_dir / f for f in args.files] + else: + source_files = sorted(args.source_dir.glob("*.py")) + # Filter out files starting with underscore (like _README.md, _pyproject.toml) + source_files = [f for f in source_files if not f.name.startswith("_")] + + if not source_files: + print(f"No source files found in {args.source_dir}") + return + + print(f"πŸ““ Generating Colab notebooks from {len(source_files)} source file(s)...") + print(f" Source: {args.source_dir}") + print(f" Output: {args.output_dir}") + print() + + for source_path in source_files: + if not source_path.exists(): + print(f"⚠️ Skipping {source_path} (file not found)") + continue + + try: + output_path = generate_colab_notebook(source_path, args.output_dir) + print(f"βœ… {source_path.name} β†’ {output_path.name}") + except Exception as e: + print(f"❌ {source_path.name}: {e}") + + print() + print(f"✨ Colab notebooks saved to {args.output_dir}/") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index e8a488c8..49a6de9b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -77,6 +77,7 @@ docs = [ notebooks = [ "jupyter>=1.0.0", "ipykernel>=6.29.0", + "pillow>=12.0.0", ] [build-system] diff --git a/uv.lock b/uv.lock index 85e2405e..5e7cb007 100644 --- a/uv.lock +++ b/uv.lock @@ -754,6 +754,7 @@ docs = [ notebooks = [ { name = "ipykernel" }, { name = "jupyter" }, + { name = "pillow" }, ] [package.metadata] @@ -812,6 +813,7 @@ docs = [ notebooks = [ { name = "ipykernel", specifier = ">=6.29.0" }, { name = "jupyter", specifier = ">=1.0.0" }, + { name = "pillow", specifier = ">=12.0.0" }, ] [[package]] @@ -3045,6 +3047,104 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9e/c3/059298687310d527a58bb01f3b1965787ee3b40dce76752eda8b44e9a2c5/pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523", size = 63772, upload-time = "2023-11-25T06:56:14.81Z" }, ] +[[package]] +name = "pillow" +version = "12.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5a/b0/cace85a1b0c9775a9f8f5d5423c8261c858760e2466c79b2dd184638b056/pillow-12.0.0.tar.gz", hash = "sha256:87d4f8125c9988bfbed67af47dd7a953e2fc7b0cc1e7800ec6d2080d490bb353", size = 47008828, upload-time = "2025-10-15T18:24:14.008Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/08/26e68b6b5da219c2a2cb7b563af008b53bb8e6b6fcb3fa40715fcdb2523a/pillow-12.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:3adfb466bbc544b926d50fe8f4a4e6abd8c6bffd28a26177594e6e9b2b76572b", size = 5289809, upload-time = "2025-10-15T18:21:27.791Z" }, + { url = "https://files.pythonhosted.org/packages/cb/e9/4e58fb097fb74c7b4758a680aacd558810a417d1edaa7000142976ef9d2f/pillow-12.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1ac11e8ea4f611c3c0147424eae514028b5e9077dd99ab91e1bd7bc33ff145e1", size = 4650606, upload-time = "2025-10-15T18:21:29.823Z" }, + { url = "https://files.pythonhosted.org/packages/4b/e0/1fa492aa9f77b3bc6d471c468e62bfea1823056bf7e5e4f1914d7ab2565e/pillow-12.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d49e2314c373f4c2b39446fb1a45ed333c850e09d0c59ac79b72eb3b95397363", size = 6221023, upload-time = "2025-10-15T18:21:31.415Z" }, + { url = "https://files.pythonhosted.org/packages/c1/09/4de7cd03e33734ccd0c876f0251401f1314e819cbfd89a0fcb6e77927cc6/pillow-12.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:c7b2a63fd6d5246349f3d3f37b14430d73ee7e8173154461785e43036ffa96ca", size = 8024937, upload-time = "2025-10-15T18:21:33.453Z" }, + { url = "https://files.pythonhosted.org/packages/2e/69/0688e7c1390666592876d9d474f5e135abb4acb39dcb583c4dc5490f1aff/pillow-12.0.0-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d64317d2587c70324b79861babb9c09f71fbb780bad212018874b2c013d8600e", size = 6334139, upload-time = "2025-10-15T18:21:35.395Z" }, + { url = "https://files.pythonhosted.org/packages/ed/1c/880921e98f525b9b44ce747ad1ea8f73fd7e992bafe3ca5e5644bf433dea/pillow-12.0.0-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d77153e14b709fd8b8af6f66a3afbb9ed6e9fc5ccf0b6b7e1ced7b036a228782", size = 7026074, upload-time = "2025-10-15T18:21:37.219Z" }, + { url = "https://files.pythonhosted.org/packages/28/03/96f718331b19b355610ef4ebdbbde3557c726513030665071fd025745671/pillow-12.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32ed80ea8a90ee3e6fa08c21e2e091bba6eda8eccc83dbc34c95169507a91f10", size = 6448852, upload-time = "2025-10-15T18:21:39.168Z" }, + { url = "https://files.pythonhosted.org/packages/3a/a0/6a193b3f0cc9437b122978d2c5cbce59510ccf9a5b48825096ed7472da2f/pillow-12.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:c828a1ae702fc712978bda0320ba1b9893d99be0badf2647f693cc01cf0f04fa", size = 7117058, upload-time = "2025-10-15T18:21:40.997Z" }, + { url = "https://files.pythonhosted.org/packages/a7/c4/043192375eaa4463254e8e61f0e2ec9a846b983929a8d0a7122e0a6d6fff/pillow-12.0.0-cp310-cp310-win32.whl", hash = "sha256:bd87e140e45399c818fac4247880b9ce719e4783d767e030a883a970be632275", size = 6295431, upload-time = "2025-10-15T18:21:42.518Z" }, + { url = "https://files.pythonhosted.org/packages/92/c6/c2f2fc7e56301c21827e689bb8b0b465f1b52878b57471a070678c0c33cd/pillow-12.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:455247ac8a4cfb7b9bc45b7e432d10421aea9fc2e74d285ba4072688a74c2e9d", size = 7000412, upload-time = "2025-10-15T18:21:44.404Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d2/5f675067ba82da7a1c238a73b32e3fd78d67f9d9f80fbadd33a40b9c0481/pillow-12.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:6ace95230bfb7cd79ef66caa064bbe2f2a1e63d93471c3a2e1f1348d9f22d6b7", size = 2435903, upload-time = "2025-10-15T18:21:46.29Z" }, + { url = "https://files.pythonhosted.org/packages/0e/5a/a2f6773b64edb921a756eb0729068acad9fc5208a53f4a349396e9436721/pillow-12.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:0fd00cac9c03256c8b2ff58f162ebcd2587ad3e1f2e397eab718c47e24d231cc", size = 5289798, upload-time = "2025-10-15T18:21:47.763Z" }, + { url = "https://files.pythonhosted.org/packages/2e/05/069b1f8a2e4b5a37493da6c5868531c3f77b85e716ad7a590ef87d58730d/pillow-12.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3475b96f5908b3b16c47533daaa87380c491357d197564e0ba34ae75c0f3257", size = 4650589, upload-time = "2025-10-15T18:21:49.515Z" }, + { url = "https://files.pythonhosted.org/packages/61/e3/2c820d6e9a36432503ead175ae294f96861b07600a7156154a086ba7111a/pillow-12.0.0-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:110486b79f2d112cf6add83b28b627e369219388f64ef2f960fef9ebaf54c642", size = 6230472, upload-time = "2025-10-15T18:21:51.052Z" }, + { url = "https://files.pythonhosted.org/packages/4f/89/63427f51c64209c5e23d4d52071c8d0f21024d3a8a487737caaf614a5795/pillow-12.0.0-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5269cc1caeedb67e6f7269a42014f381f45e2e7cd42d834ede3c703a1d915fe3", size = 8033887, upload-time = "2025-10-15T18:21:52.604Z" }, + { url = "https://files.pythonhosted.org/packages/f6/1b/c9711318d4901093c15840f268ad649459cd81984c9ec9887756cca049a5/pillow-12.0.0-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:aa5129de4e174daccbc59d0a3b6d20eaf24417d59851c07ebb37aeb02947987c", size = 6343964, upload-time = "2025-10-15T18:21:54.619Z" }, + { url = "https://files.pythonhosted.org/packages/41/1e/db9470f2d030b4995083044cd8738cdd1bf773106819f6d8ba12597d5352/pillow-12.0.0-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bee2a6db3a7242ea309aa7ee8e2780726fed67ff4e5b40169f2c940e7eb09227", size = 7034756, upload-time = "2025-10-15T18:21:56.151Z" }, + { url = "https://files.pythonhosted.org/packages/cc/b0/6177a8bdd5ee4ed87cba2de5a3cc1db55ffbbec6176784ce5bb75aa96798/pillow-12.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:90387104ee8400a7b4598253b4c406f8958f59fcf983a6cea2b50d59f7d63d0b", size = 6458075, upload-time = "2025-10-15T18:21:57.759Z" }, + { url = "https://files.pythonhosted.org/packages/bc/5e/61537aa6fa977922c6a03253a0e727e6e4a72381a80d63ad8eec350684f2/pillow-12.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:bc91a56697869546d1b8f0a3ff35224557ae7f881050e99f615e0119bf934b4e", size = 7125955, upload-time = "2025-10-15T18:21:59.372Z" }, + { url = "https://files.pythonhosted.org/packages/1f/3d/d5033539344ee3cbd9a4d69e12e63ca3a44a739eb2d4c8da350a3d38edd7/pillow-12.0.0-cp311-cp311-win32.whl", hash = "sha256:27f95b12453d165099c84f8a8bfdfd46b9e4bda9e0e4b65f0635430027f55739", size = 6298440, upload-time = "2025-10-15T18:22:00.982Z" }, + { url = "https://files.pythonhosted.org/packages/4d/42/aaca386de5cc8bd8a0254516957c1f265e3521c91515b16e286c662854c4/pillow-12.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:b583dc9070312190192631373c6c8ed277254aa6e6084b74bdd0a6d3b221608e", size = 6999256, upload-time = "2025-10-15T18:22:02.617Z" }, + { url = "https://files.pythonhosted.org/packages/ba/f1/9197c9c2d5708b785f631a6dfbfa8eb3fb9672837cb92ae9af812c13b4ed/pillow-12.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:759de84a33be3b178a64c8ba28ad5c135900359e85fb662bc6e403ad4407791d", size = 2436025, upload-time = "2025-10-15T18:22:04.598Z" }, + { url = "https://files.pythonhosted.org/packages/2c/90/4fcce2c22caf044e660a198d740e7fbc14395619e3cb1abad12192c0826c/pillow-12.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:53561a4ddc36facb432fae7a9d8afbfaf94795414f5cdc5fc52f28c1dca90371", size = 5249377, upload-time = "2025-10-15T18:22:05.993Z" }, + { url = "https://files.pythonhosted.org/packages/fd/e0/ed960067543d080691d47d6938ebccbf3976a931c9567ab2fbfab983a5dd/pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:71db6b4c1653045dacc1585c1b0d184004f0d7e694c7b34ac165ca70c0838082", size = 4650343, upload-time = "2025-10-15T18:22:07.718Z" }, + { url = "https://files.pythonhosted.org/packages/e7/a1/f81fdeddcb99c044bf7d6faa47e12850f13cee0849537a7d27eeab5534d4/pillow-12.0.0-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:2fa5f0b6716fc88f11380b88b31fe591a06c6315e955c096c35715788b339e3f", size = 6232981, upload-time = "2025-10-15T18:22:09.287Z" }, + { url = "https://files.pythonhosted.org/packages/88/e1/9098d3ce341a8750b55b0e00c03f1630d6178f38ac191c81c97a3b047b44/pillow-12.0.0-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:82240051c6ca513c616f7f9da06e871f61bfd7805f566275841af15015b8f98d", size = 8041399, upload-time = "2025-10-15T18:22:10.872Z" }, + { url = "https://files.pythonhosted.org/packages/a7/62/a22e8d3b602ae8cc01446d0c57a54e982737f44b6f2e1e019a925143771d/pillow-12.0.0-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:55f818bd74fe2f11d4d7cbc65880a843c4075e0ac7226bc1a23261dbea531953", size = 6347740, upload-time = "2025-10-15T18:22:12.769Z" }, + { url = "https://files.pythonhosted.org/packages/4f/87/424511bdcd02c8d7acf9f65caa09f291a519b16bd83c3fb3374b3d4ae951/pillow-12.0.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:b87843e225e74576437fd5b6a4c2205d422754f84a06942cfaf1dc32243e45a8", size = 7040201, upload-time = "2025-10-15T18:22:14.813Z" }, + { url = "https://files.pythonhosted.org/packages/dc/4d/435c8ac688c54d11755aedfdd9f29c9eeddf68d150fe42d1d3dbd2365149/pillow-12.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c607c90ba67533e1b2355b821fef6764d1dd2cbe26b8c1005ae84f7aea25ff79", size = 6462334, upload-time = "2025-10-15T18:22:16.375Z" }, + { url = "https://files.pythonhosted.org/packages/2b/f2/ad34167a8059a59b8ad10bc5c72d4d9b35acc6b7c0877af8ac885b5f2044/pillow-12.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:21f241bdd5080a15bc86d3466a9f6074a9c2c2b314100dd896ac81ee6db2f1ba", size = 7134162, upload-time = "2025-10-15T18:22:17.996Z" }, + { url = "https://files.pythonhosted.org/packages/0c/b1/a7391df6adacf0a5c2cf6ac1cf1fcc1369e7d439d28f637a847f8803beb3/pillow-12.0.0-cp312-cp312-win32.whl", hash = "sha256:dd333073e0cacdc3089525c7df7d39b211bcdf31fc2824e49d01c6b6187b07d0", size = 6298769, upload-time = "2025-10-15T18:22:19.923Z" }, + { url = "https://files.pythonhosted.org/packages/a2/0b/d87733741526541c909bbf159e338dcace4f982daac6e5a8d6be225ca32d/pillow-12.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:9fe611163f6303d1619bbcb653540a4d60f9e55e622d60a3108be0d5b441017a", size = 7001107, upload-time = "2025-10-15T18:22:21.644Z" }, + { url = "https://files.pythonhosted.org/packages/bc/96/aaa61ce33cc98421fb6088af2a03be4157b1e7e0e87087c888e2370a7f45/pillow-12.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:7dfb439562f234f7d57b1ac6bc8fe7f838a4bd49c79230e0f6a1da93e82f1fad", size = 2436012, upload-time = "2025-10-15T18:22:23.621Z" }, + { url = "https://files.pythonhosted.org/packages/62/f2/de993bb2d21b33a98d031ecf6a978e4b61da207bef02f7b43093774c480d/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphoneos.whl", hash = "sha256:0869154a2d0546545cde61d1789a6524319fc1897d9ee31218eae7a60ccc5643", size = 4045493, upload-time = "2025-10-15T18:22:25.758Z" }, + { url = "https://files.pythonhosted.org/packages/0e/b6/bc8d0c4c9f6f111a783d045310945deb769b806d7574764234ffd50bc5ea/pillow-12.0.0-cp313-cp313-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:a7921c5a6d31b3d756ec980f2f47c0cfdbce0fc48c22a39347a895f41f4a6ea4", size = 4120461, upload-time = "2025-10-15T18:22:27.286Z" }, + { url = "https://files.pythonhosted.org/packages/5d/57/d60d343709366a353dc56adb4ee1e7d8a2cc34e3fbc22905f4167cfec119/pillow-12.0.0-cp313-cp313-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:1ee80a59f6ce048ae13cda1abf7fbd2a34ab9ee7d401c46be3ca685d1999a399", size = 3576912, upload-time = "2025-10-15T18:22:28.751Z" }, + { url = "https://files.pythonhosted.org/packages/a4/a4/a0a31467e3f83b94d37568294b01d22b43ae3c5d85f2811769b9c66389dd/pillow-12.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:c50f36a62a22d350c96e49ad02d0da41dbd17ddc2e29750dbdba4323f85eb4a5", size = 5249132, upload-time = "2025-10-15T18:22:30.641Z" }, + { url = "https://files.pythonhosted.org/packages/83/06/48eab21dd561de2914242711434c0c0eb992ed08ff3f6107a5f44527f5e9/pillow-12.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5193fde9a5f23c331ea26d0cf171fbf67e3f247585f50c08b3e205c7aeb4589b", size = 4650099, upload-time = "2025-10-15T18:22:32.73Z" }, + { url = "https://files.pythonhosted.org/packages/fc/bd/69ed99fd46a8dba7c1887156d3572fe4484e3f031405fcc5a92e31c04035/pillow-12.0.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:bde737cff1a975b70652b62d626f7785e0480918dece11e8fef3c0cf057351c3", size = 6230808, upload-time = "2025-10-15T18:22:34.337Z" }, + { url = "https://files.pythonhosted.org/packages/ea/94/8fad659bcdbf86ed70099cb60ae40be6acca434bbc8c4c0d4ef356d7e0de/pillow-12.0.0-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:a6597ff2b61d121172f5844b53f21467f7082f5fb385a9a29c01414463f93b07", size = 8037804, upload-time = "2025-10-15T18:22:36.402Z" }, + { url = "https://files.pythonhosted.org/packages/20/39/c685d05c06deecfd4e2d1950e9a908aa2ca8bc4e6c3b12d93b9cafbd7837/pillow-12.0.0-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:0b817e7035ea7f6b942c13aa03bb554fc44fea70838ea21f8eb31c638326584e", size = 6345553, upload-time = "2025-10-15T18:22:38.066Z" }, + { url = "https://files.pythonhosted.org/packages/38/57/755dbd06530a27a5ed74f8cb0a7a44a21722ebf318edbe67ddbd7fb28f88/pillow-12.0.0-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f4f1231b7dec408e8670264ce63e9c71409d9583dd21d32c163e25213ee2a344", size = 7037729, upload-time = "2025-10-15T18:22:39.769Z" }, + { url = "https://files.pythonhosted.org/packages/ca/b6/7e94f4c41d238615674d06ed677c14883103dce1c52e4af16f000338cfd7/pillow-12.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:6e51b71417049ad6ab14c49608b4a24d8fb3fe605e5dfabfe523b58064dc3d27", size = 6459789, upload-time = "2025-10-15T18:22:41.437Z" }, + { url = "https://files.pythonhosted.org/packages/9c/14/4448bb0b5e0f22dd865290536d20ec8a23b64e2d04280b89139f09a36bb6/pillow-12.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:d120c38a42c234dc9a8c5de7ceaaf899cf33561956acb4941653f8bdc657aa79", size = 7130917, upload-time = "2025-10-15T18:22:43.152Z" }, + { url = "https://files.pythonhosted.org/packages/dd/ca/16c6926cc1c015845745d5c16c9358e24282f1e588237a4c36d2b30f182f/pillow-12.0.0-cp313-cp313-win32.whl", hash = "sha256:4cc6b3b2efff105c6a1656cfe59da4fdde2cda9af1c5e0b58529b24525d0a098", size = 6302391, upload-time = "2025-10-15T18:22:44.753Z" }, + { url = "https://files.pythonhosted.org/packages/6d/2a/dd43dcfd6dae9b6a49ee28a8eedb98c7d5ff2de94a5d834565164667b97b/pillow-12.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:4cf7fed4b4580601c4345ceb5d4cbf5a980d030fd5ad07c4d2ec589f95f09905", size = 7007477, upload-time = "2025-10-15T18:22:46.838Z" }, + { url = "https://files.pythonhosted.org/packages/77/f0/72ea067f4b5ae5ead653053212af05ce3705807906ba3f3e8f58ddf617e6/pillow-12.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:9f0b04c6b8584c2c193babcccc908b38ed29524b29dd464bc8801bf10d746a3a", size = 2435918, upload-time = "2025-10-15T18:22:48.399Z" }, + { url = "https://files.pythonhosted.org/packages/f5/5e/9046b423735c21f0487ea6cb5b10f89ea8f8dfbe32576fe052b5ba9d4e5b/pillow-12.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:7fa22993bac7b77b78cae22bad1e2a987ddf0d9015c63358032f84a53f23cdc3", size = 5251406, upload-time = "2025-10-15T18:22:49.905Z" }, + { url = "https://files.pythonhosted.org/packages/12/66/982ceebcdb13c97270ef7a56c3969635b4ee7cd45227fa707c94719229c5/pillow-12.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:f135c702ac42262573fe9714dfe99c944b4ba307af5eb507abef1667e2cbbced", size = 4653218, upload-time = "2025-10-15T18:22:51.587Z" }, + { url = "https://files.pythonhosted.org/packages/16/b3/81e625524688c31859450119bf12674619429cab3119eec0e30a7a1029cb/pillow-12.0.0-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:c85de1136429c524e55cfa4e033b4a7940ac5c8ee4d9401cc2d1bf48154bbc7b", size = 6266564, upload-time = "2025-10-15T18:22:53.215Z" }, + { url = "https://files.pythonhosted.org/packages/98/59/dfb38f2a41240d2408096e1a76c671d0a105a4a8471b1871c6902719450c/pillow-12.0.0-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:38df9b4bfd3db902c9c2bd369bcacaf9d935b2fff73709429d95cc41554f7b3d", size = 8069260, upload-time = "2025-10-15T18:22:54.933Z" }, + { url = "https://files.pythonhosted.org/packages/dc/3d/378dbea5cd1874b94c312425ca77b0f47776c78e0df2df751b820c8c1d6c/pillow-12.0.0-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7d87ef5795da03d742bf49439f9ca4d027cde49c82c5371ba52464aee266699a", size = 6379248, upload-time = "2025-10-15T18:22:56.605Z" }, + { url = "https://files.pythonhosted.org/packages/84/b0/d525ef47d71590f1621510327acec75ae58c721dc071b17d8d652ca494d8/pillow-12.0.0-cp313-cp313t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:aff9e4d82d082ff9513bdd6acd4f5bd359f5b2c870907d2b0a9c5e10d40c88fe", size = 7066043, upload-time = "2025-10-15T18:22:58.53Z" }, + { url = "https://files.pythonhosted.org/packages/61/2c/aced60e9cf9d0cde341d54bf7932c9ffc33ddb4a1595798b3a5150c7ec4e/pillow-12.0.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:8d8ca2b210ada074d57fcee40c30446c9562e542fc46aedc19baf758a93532ee", size = 6490915, upload-time = "2025-10-15T18:23:00.582Z" }, + { url = "https://files.pythonhosted.org/packages/ef/26/69dcb9b91f4e59f8f34b2332a4a0a951b44f547c4ed39d3e4dcfcff48f89/pillow-12.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:99a7f72fb6249302aa62245680754862a44179b545ded638cf1fef59befb57ef", size = 7157998, upload-time = "2025-10-15T18:23:02.627Z" }, + { url = "https://files.pythonhosted.org/packages/61/2b/726235842220ca95fa441ddf55dd2382b52ab5b8d9c0596fe6b3f23dafe8/pillow-12.0.0-cp313-cp313t-win32.whl", hash = "sha256:4078242472387600b2ce8d93ade8899c12bf33fa89e55ec89fe126e9d6d5d9e9", size = 6306201, upload-time = "2025-10-15T18:23:04.709Z" }, + { url = "https://files.pythonhosted.org/packages/c0/3d/2afaf4e840b2df71344ababf2f8edd75a705ce500e5dc1e7227808312ae1/pillow-12.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:2c54c1a783d6d60595d3514f0efe9b37c8808746a66920315bfd34a938d7994b", size = 7013165, upload-time = "2025-10-15T18:23:06.46Z" }, + { url = "https://files.pythonhosted.org/packages/6f/75/3fa09aa5cf6ed04bee3fa575798ddf1ce0bace8edb47249c798077a81f7f/pillow-12.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:26d9f7d2b604cd23aba3e9faf795787456ac25634d82cd060556998e39c6fa47", size = 2437834, upload-time = "2025-10-15T18:23:08.194Z" }, + { url = "https://files.pythonhosted.org/packages/54/2a/9a8c6ba2c2c07b71bec92cf63e03370ca5e5f5c5b119b742bcc0cde3f9c5/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphoneos.whl", hash = "sha256:beeae3f27f62308f1ddbcfb0690bf44b10732f2ef43758f169d5e9303165d3f9", size = 4045531, upload-time = "2025-10-15T18:23:10.121Z" }, + { url = "https://files.pythonhosted.org/packages/84/54/836fdbf1bfb3d66a59f0189ff0b9f5f666cee09c6188309300df04ad71fa/pillow-12.0.0-cp314-cp314-ios_13_0_arm64_iphonesimulator.whl", hash = "sha256:d4827615da15cd59784ce39d3388275ec093ae3ee8d7f0c089b76fa87af756c2", size = 4120554, upload-time = "2025-10-15T18:23:12.14Z" }, + { url = "https://files.pythonhosted.org/packages/0d/cd/16aec9f0da4793e98e6b54778a5fbce4f375c6646fe662e80600b8797379/pillow-12.0.0-cp314-cp314-ios_13_0_x86_64_iphonesimulator.whl", hash = "sha256:3e42edad50b6909089750e65c91aa09aaf1e0a71310d383f11321b27c224ed8a", size = 3576812, upload-time = "2025-10-15T18:23:13.962Z" }, + { url = "https://files.pythonhosted.org/packages/f6/b7/13957fda356dc46339298b351cae0d327704986337c3c69bb54628c88155/pillow-12.0.0-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:e5d8efac84c9afcb40914ab49ba063d94f5dbdf5066db4482c66a992f47a3a3b", size = 5252689, upload-time = "2025-10-15T18:23:15.562Z" }, + { url = "https://files.pythonhosted.org/packages/fc/f5/eae31a306341d8f331f43edb2e9122c7661b975433de5e447939ae61c5da/pillow-12.0.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:266cd5f2b63ff316d5a1bba46268e603c9caf5606d44f38c2873c380950576ad", size = 4650186, upload-time = "2025-10-15T18:23:17.379Z" }, + { url = "https://files.pythonhosted.org/packages/86/62/2a88339aa40c4c77e79108facbd307d6091e2c0eb5b8d3cf4977cfca2fe6/pillow-12.0.0-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:58eea5ebe51504057dd95c5b77d21700b77615ab0243d8152793dc00eb4faf01", size = 6230308, upload-time = "2025-10-15T18:23:18.971Z" }, + { url = "https://files.pythonhosted.org/packages/c7/33/5425a8992bcb32d1cb9fa3dd39a89e613d09a22f2c8083b7bf43c455f760/pillow-12.0.0-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:f13711b1a5ba512d647a0e4ba79280d3a9a045aaf7e0cc6fbe96b91d4cdf6b0c", size = 8039222, upload-time = "2025-10-15T18:23:20.909Z" }, + { url = "https://files.pythonhosted.org/packages/d8/61/3f5d3b35c5728f37953d3eec5b5f3e77111949523bd2dd7f31a851e50690/pillow-12.0.0-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6846bd2d116ff42cba6b646edf5bf61d37e5cbd256425fa089fee4ff5c07a99e", size = 6346657, upload-time = "2025-10-15T18:23:23.077Z" }, + { url = "https://files.pythonhosted.org/packages/3a/be/ee90a3d79271227e0f0a33c453531efd6ed14b2e708596ba5dd9be948da3/pillow-12.0.0-cp314-cp314-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c98fa880d695de164b4135a52fd2e9cd7b7c90a9d8ac5e9e443a24a95ef9248e", size = 7038482, upload-time = "2025-10-15T18:23:25.005Z" }, + { url = "https://files.pythonhosted.org/packages/44/34/a16b6a4d1ad727de390e9bd9f19f5f669e079e5826ec0f329010ddea492f/pillow-12.0.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:fa3ed2a29a9e9d2d488b4da81dcb54720ac3104a20bf0bd273f1e4648aff5af9", size = 6461416, upload-time = "2025-10-15T18:23:27.009Z" }, + { url = "https://files.pythonhosted.org/packages/b6/39/1aa5850d2ade7d7ba9f54e4e4c17077244ff7a2d9e25998c38a29749eb3f/pillow-12.0.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:d034140032870024e6b9892c692fe2968493790dd57208b2c37e3fb35f6df3ab", size = 7131584, upload-time = "2025-10-15T18:23:29.752Z" }, + { url = "https://files.pythonhosted.org/packages/bf/db/4fae862f8fad0167073a7733973bfa955f47e2cac3dc3e3e6257d10fab4a/pillow-12.0.0-cp314-cp314-win32.whl", hash = "sha256:1b1b133e6e16105f524a8dec491e0586d072948ce15c9b914e41cdadd209052b", size = 6400621, upload-time = "2025-10-15T18:23:32.06Z" }, + { url = "https://files.pythonhosted.org/packages/2b/24/b350c31543fb0107ab2599464d7e28e6f856027aadda995022e695313d94/pillow-12.0.0-cp314-cp314-win_amd64.whl", hash = "sha256:8dc232e39d409036af549c86f24aed8273a40ffa459981146829a324e0848b4b", size = 7142916, upload-time = "2025-10-15T18:23:34.71Z" }, + { url = "https://files.pythonhosted.org/packages/0f/9b/0ba5a6fd9351793996ef7487c4fdbde8d3f5f75dbedc093bb598648fddf0/pillow-12.0.0-cp314-cp314-win_arm64.whl", hash = "sha256:d52610d51e265a51518692045e372a4c363056130d922a7351429ac9f27e70b0", size = 2523836, upload-time = "2025-10-15T18:23:36.967Z" }, + { url = "https://files.pythonhosted.org/packages/f5/7a/ceee0840aebc579af529b523d530840338ecf63992395842e54edc805987/pillow-12.0.0-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:1979f4566bb96c1e50a62d9831e2ea2d1211761e5662afc545fa766f996632f6", size = 5255092, upload-time = "2025-10-15T18:23:38.573Z" }, + { url = "https://files.pythonhosted.org/packages/44/76/20776057b4bfd1aef4eeca992ebde0f53a4dce874f3ae693d0ec90a4f79b/pillow-12.0.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b2e4b27a6e15b04832fe9bf292b94b5ca156016bbc1ea9c2c20098a0320d6cf6", size = 4653158, upload-time = "2025-10-15T18:23:40.238Z" }, + { url = "https://files.pythonhosted.org/packages/82/3f/d9ff92ace07be8836b4e7e87e6a4c7a8318d47c2f1463ffcf121fc57d9cb/pillow-12.0.0-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:fb3096c30df99fd01c7bf8e544f392103d0795b9f98ba71a8054bcbf56b255f1", size = 6267882, upload-time = "2025-10-15T18:23:42.434Z" }, + { url = "https://files.pythonhosted.org/packages/9f/7a/4f7ff87f00d3ad33ba21af78bfcd2f032107710baf8280e3722ceec28cda/pillow-12.0.0-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7438839e9e053ef79f7112c881cef684013855016f928b168b81ed5835f3e75e", size = 8071001, upload-time = "2025-10-15T18:23:44.29Z" }, + { url = "https://files.pythonhosted.org/packages/75/87/fcea108944a52dad8cca0715ae6247e271eb80459364a98518f1e4f480c1/pillow-12.0.0-cp314-cp314t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d5c411a8eaa2299322b647cd932586b1427367fd3184ffbb8f7a219ea2041ca", size = 6380146, upload-time = "2025-10-15T18:23:46.065Z" }, + { url = "https://files.pythonhosted.org/packages/91/52/0d31b5e571ef5fd111d2978b84603fce26aba1b6092f28e941cb46570745/pillow-12.0.0-cp314-cp314t-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:d7e091d464ac59d2c7ad8e7e08105eaf9dafbc3883fd7265ffccc2baad6ac925", size = 7067344, upload-time = "2025-10-15T18:23:47.898Z" }, + { url = "https://files.pythonhosted.org/packages/7b/f4/2dd3d721f875f928d48e83bb30a434dee75a2531bca839bb996bb0aa5a91/pillow-12.0.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:792a2c0be4dcc18af9d4a2dfd8a11a17d5e25274a1062b0ec1c2d79c76f3e7f8", size = 6491864, upload-time = "2025-10-15T18:23:49.607Z" }, + { url = "https://files.pythonhosted.org/packages/30/4b/667dfcf3d61fc309ba5a15b141845cece5915e39b99c1ceab0f34bf1d124/pillow-12.0.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:afbefa430092f71a9593a99ab6a4e7538bc9eabbf7bf94f91510d3503943edc4", size = 7158911, upload-time = "2025-10-15T18:23:51.351Z" }, + { url = "https://files.pythonhosted.org/packages/a2/2f/16cabcc6426c32218ace36bf0d55955e813f2958afddbf1d391849fee9d1/pillow-12.0.0-cp314-cp314t-win32.whl", hash = "sha256:3830c769decf88f1289680a59d4f4c46c72573446352e2befec9a8512104fa52", size = 6408045, upload-time = "2025-10-15T18:23:53.177Z" }, + { url = "https://files.pythonhosted.org/packages/35/73/e29aa0c9c666cf787628d3f0dcf379f4791fba79f4936d02f8b37165bdf8/pillow-12.0.0-cp314-cp314t-win_amd64.whl", hash = "sha256:905b0365b210c73afb0ebe9101a32572152dfd1c144c7e28968a331b9217b94a", size = 7148282, upload-time = "2025-10-15T18:23:55.316Z" }, + { url = "https://files.pythonhosted.org/packages/c1/70/6b41bdcddf541b437bbb9f47f94d2db5d9ddef6c37ccab8c9107743748a4/pillow-12.0.0-cp314-cp314t-win_arm64.whl", hash = "sha256:99353a06902c2e43b43e8ff74ee65a7d90307d82370604746738a1e0661ccca7", size = 2525630, upload-time = "2025-10-15T18:23:57.149Z" }, + { url = "https://files.pythonhosted.org/packages/1d/b3/582327e6c9f86d037b63beebe981425d6811104cb443e8193824ef1a2f27/pillow-12.0.0-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:b22bd8c974942477156be55a768f7aa37c46904c175be4e158b6a86e3a6b7ca8", size = 5215068, upload-time = "2025-10-15T18:23:59.594Z" }, + { url = "https://files.pythonhosted.org/packages/fd/d6/67748211d119f3b6540baf90f92fae73ae51d5217b171b0e8b5f7e5d558f/pillow-12.0.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:805ebf596939e48dbb2e4922a1d3852cfc25c38160751ce02da93058b48d252a", size = 4614994, upload-time = "2025-10-15T18:24:01.669Z" }, + { url = "https://files.pythonhosted.org/packages/2d/e1/f8281e5d844c41872b273b9f2c34a4bf64ca08905668c8ae730eedc7c9fa/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cae81479f77420d217def5f54b5b9d279804d17e982e0f2fa19b1d1e14ab5197", size = 5246639, upload-time = "2025-10-15T18:24:03.403Z" }, + { url = "https://files.pythonhosted.org/packages/94/5a/0d8ab8ffe8a102ff5df60d0de5af309015163bf710c7bb3e8311dd3b3ad0/pillow-12.0.0-pp311-pypy311_pp73-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:aeaefa96c768fc66818730b952a862235d68825c178f1b3ffd4efd7ad2edcb7c", size = 6986839, upload-time = "2025-10-15T18:24:05.344Z" }, + { url = "https://files.pythonhosted.org/packages/20/2e/3434380e8110b76cd9eb00a363c484b050f949b4bbe84ba770bb8508a02c/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:09f2d0abef9e4e2f349305a4f8cc784a8a6c2f58a8c4892eea13b10a943bd26e", size = 5313505, upload-time = "2025-10-15T18:24:07.137Z" }, + { url = "https://files.pythonhosted.org/packages/57/ca/5a9d38900d9d74785141d6580950fe705de68af735ff6e727cb911b64740/pillow-12.0.0-pp311-pypy311_pp73-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bdee52571a343d721fb2eb3b090a82d959ff37fc631e3f70422e0c2e029f3e76", size = 5963654, upload-time = "2025-10-15T18:24:09.579Z" }, + { url = "https://files.pythonhosted.org/packages/95/7e/f896623c3c635a90537ac093c6a618ebe1a90d87206e42309cb5d98a1b9e/pillow-12.0.0-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:b290fd8aa38422444d4b50d579de197557f182ef1068b75f5aa8558638b8d0a5", size = 6997850, upload-time = "2025-10-15T18:24:11.495Z" }, +] + [[package]] name = "platformdirs" version = "4.5.0"