diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb index e77c4a1b..57620b8c 100644 --- a/docs/colab_notebooks/1-the-basics.ipynb +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "709c75cf", + "id": "9f804f90", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: The Basics\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "3cb2774e", + "id": "9cb786eb", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -25,7 +25,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b886272b", + "id": "7f45ea56", "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f5cf20f9", + "id": "ea86e81e", "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ }, { "cell_type": "markdown", - "id": "e11a4288", + "id": "16611c7b", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -64,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e8faecea", + "id": "875342bb", "metadata": {}, "outputs": [], "source": [ @@ -85,7 +85,7 @@ }, { "cell_type": "markdown", - "id": "314d17c1", + "id": "b58ac676", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "be3b5c6f", + "id": "3ce805ad", "metadata": {}, "outputs": [], "source": [ @@ -107,7 +107,7 @@ }, { "cell_type": "markdown", - "id": "1c2852e1", + "id": "50e961ed", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -124,7 +124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5ad52a10", + "id": "1b07a6a5", "metadata": {}, "outputs": [], "source": [ @@ -132,7 +132,7 @@ "MODEL_PROVIDER = \"nvidia\"\n", "\n", "# The model ID is from build.nvidia.com.\n", - "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", "MODEL_ALIAS = \"nemotron-nano-v2\"\n", @@ -156,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "25cce9f7", + "id": "6d873251", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -171,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8ff7190c", + "id": "d45fac13", "metadata": {}, "outputs": [], "source": [ @@ -180,7 +180,7 @@ }, { "cell_type": "markdown", - "id": "6bc3b23e", + "id": "c35b0274", "metadata": {}, "source": [ "## 🎲 Getting started with sampler columns\n", @@ -197,7 +197,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4cff01cb", + "id": "14cb9967", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +206,7 @@ }, { "cell_type": "markdown", - "id": "f981ec58", + "id": "40945aea", "metadata": {}, "source": [ "Let's start designing our product review dataset by adding product category and subcategory columns.\n" @@ -215,7 +215,7 @@ { "cell_type": "code", "execution_count": null, - "id": "70ba24a6", + "id": "a7d87e00", "metadata": {}, "outputs": [], "source": [ @@ -296,7 +296,7 @@ }, { "cell_type": "markdown", - "id": "6f1a6c59", + "id": "48699878", "metadata": {}, "source": [ "Next, let's add samplers to generate data related to the customer and their review.\n" @@ -305,7 +305,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d45b925f", + "id": "df84faf3", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +342,7 @@ }, { "cell_type": "markdown", - "id": "bf49c2b1", + "id": "8288352d", "metadata": {}, "source": [ "## 🦜 LLM-generated columns\n", @@ -357,7 +357,7 @@ { "cell_type": "code", "execution_count": null, - "id": "669fe324", + "id": "157919b4", "metadata": {}, "outputs": [], "source": [ @@ -394,7 +394,7 @@ }, { "cell_type": "markdown", - "id": "4d93ad9a", + "id": "009646e4", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -411,7 +411,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7b2466d1", + "id": "a9c90236", "metadata": {}, "outputs": [], "source": [ @@ -421,7 +421,7 @@ { "cell_type": "code", "execution_count": null, - "id": "508a2866", + "id": "3cfe180e", "metadata": {}, "outputs": [], "source": [ @@ -432,7 +432,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6fbdaf64", + "id": "65b2f595", "metadata": {}, "outputs": [], "source": [ @@ -442,7 +442,7 @@ }, { "cell_type": "markdown", - "id": "154e8e71", + "id": "2134fa0f", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -455,7 +455,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e031c7b", + "id": "8a37dd61", "metadata": {}, "outputs": [], "source": [ @@ -465,7 +465,7 @@ }, { "cell_type": "markdown", - "id": "a60a1fab", + "id": "b715bc3a", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -478,7 +478,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e07c6718", + "id": "565f03a1", "metadata": {}, "outputs": [], "source": [ @@ -488,7 +488,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7a5406da", + "id": "9d4c91ad", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +501,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f0360b0e", + "id": "93c5a082", "metadata": {}, "outputs": [], "source": [ @@ -513,7 +513,7 @@ }, { "cell_type": "markdown", - "id": "d365dda0", + "id": "13f7c942", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb index 84ece867..819183e9 100644 --- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "75360052", + "id": "efccd5c5", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "5a028f03", + "id": "34ed6e5e", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ba8c8f3f", + "id": "d247eb12", "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b0825a6a", + "id": "a1468d3b", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "e18ab9a1", + "id": "7e3bbbcd", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0cc3443c", + "id": "6c7a6a94", "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "markdown", - "id": "abfce2e0", + "id": "b0c74b0e", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -100,7 +100,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e2a67d69", + "id": "77fd1b9f", "metadata": {}, "outputs": [], "source": [ @@ -109,7 +109,7 @@ }, { "cell_type": "markdown", - "id": "65cc7285", + "id": "13b73d51", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -126,7 +126,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a78a0747", + "id": "b817e2e6", "metadata": {}, "outputs": [], "source": [ @@ -134,7 +134,7 @@ "MODEL_PROVIDER = \"nvidia\"\n", "\n", "# The model ID is from build.nvidia.com.\n", - "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", "MODEL_ALIAS = \"nemotron-nano-v2\"\n", @@ -158,7 +158,7 @@ }, { "cell_type": "markdown", - "id": "94e33a9a", + "id": "a68b6918", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -173,7 +173,7 @@ { "cell_type": "code", "execution_count": null, - "id": "840863dd", + "id": "71b57cd9", "metadata": {}, "outputs": [], "source": [ @@ -182,7 +182,7 @@ }, { "cell_type": "markdown", - "id": "2451048a", + "id": "2ceb04fb", "metadata": {}, "source": [ "### πŸ§‘β€πŸŽ¨ Designing our data\n", @@ -209,7 +209,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fda11990", + "id": "12ffaa28", "metadata": {}, "outputs": [], "source": [ @@ -237,7 +237,7 @@ }, { "cell_type": "markdown", - "id": "b5f6ced7", + "id": "c0417a6e", "metadata": {}, "source": [ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" @@ -246,7 +246,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb4538fd", + "id": "ca2086a9", "metadata": {}, "outputs": [], "source": [ @@ -355,7 +355,7 @@ }, { "cell_type": "markdown", - "id": "5f003b9e", + "id": "70036c8a", "metadata": {}, "source": [ "Next, we will use more advanced Jinja expressions to create new columns.\n", @@ -372,7 +372,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb5d8fb1", + "id": "b3615ef2", "metadata": {}, "outputs": [], "source": [ @@ -426,7 +426,7 @@ }, { "cell_type": "markdown", - "id": "edd7d429", + "id": "4c3d3a68", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -443,7 +443,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae4bf40d", + "id": "4735ba79", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +453,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ff96200d", + "id": "e9f607ac", "metadata": {}, "outputs": [], "source": [ @@ -464,7 +464,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8743034c", + "id": "05e4ccf6", "metadata": {}, "outputs": [], "source": [ @@ -474,7 +474,7 @@ }, { "cell_type": "markdown", - "id": "bc4f5aa2", + "id": "e64d9f3a", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -487,7 +487,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7e276b36", + "id": "c50c3e4e", "metadata": {}, "outputs": [], "source": [ @@ -497,7 +497,7 @@ }, { "cell_type": "markdown", - "id": "af921b28", + "id": "977f2979", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -510,7 +510,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6053798d", + "id": "8617a7e7", "metadata": {}, "outputs": [], "source": [ @@ -520,7 +520,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49bf6b4f", + "id": "43e9e07b", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +533,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e033facc", + "id": "6f763f9c", "metadata": {}, "outputs": [], "source": [ @@ -545,7 +545,7 @@ }, { "cell_type": "markdown", - "id": "7b8641c8", + "id": "8788bda2", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb index 3b0fd70f..6ddd3c42 100644 --- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "738d16f1", + "id": "93257f91", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "64d9526d", + "id": "f9aa6154", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1767dc37", + "id": "ae7066f5", "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e04a061e", + "id": "ba8dc550", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "bb02bd01", + "id": "d00f21f0", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f030c7a3", + "id": "31c33b4f", "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "8fc24d87", + "id": "dcec3852", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -94,7 +94,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fbd4f8d1", + "id": "017ded4a", "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "markdown", - "id": "2b84a1cf", + "id": "7108f61b", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -120,7 +120,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a973341a", + "id": "821d91aa", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +128,7 @@ "MODEL_PROVIDER = \"nvidia\"\n", "\n", "# The model ID is from build.nvidia.com.\n", - "MODEL_ID = \"nvidia/nvidia-nemotron-nano-9b-v2\"\n", + "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", "MODEL_ALIAS = \"nemotron-nano-v2\"\n", @@ -152,7 +152,7 @@ }, { "cell_type": "markdown", - "id": "56ed7cf4", + "id": "fcb911ef", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -167,7 +167,7 @@ { "cell_type": "code", "execution_count": null, - "id": "031c5ca6", + "id": "5a0e0c57", "metadata": {}, "outputs": [], "source": [ @@ -176,7 +176,7 @@ }, { "cell_type": "markdown", - "id": "f617bc93", + "id": "1bd0982e", "metadata": {}, "source": [ "## πŸ₯ Prepare a seed dataset\n", @@ -201,7 +201,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f2ad0c58", + "id": "d8d6c161", "metadata": {}, "outputs": [], "source": [ @@ -219,7 +219,7 @@ }, { "cell_type": "markdown", - "id": "44a5f487", + "id": "b5f01e5e", "metadata": {}, "source": [ "## 🎨 Designing our synthetic patient notes dataset\n", @@ -236,7 +236,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e1825523", + "id": "1a85709d", "metadata": {}, "outputs": [], "source": [ @@ -326,7 +326,7 @@ }, { "cell_type": "markdown", - "id": "6aacdb7d", + "id": "0749aa84", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -343,7 +343,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7aa0d1fb", + "id": "ec3a075c", "metadata": {}, "outputs": [], "source": [ @@ -353,7 +353,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1cc1dcf2", + "id": "b91c2a99", "metadata": {}, "outputs": [], "source": [ @@ -364,7 +364,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6369e0e2", + "id": "cadf0463", "metadata": {}, "outputs": [], "source": [ @@ -374,7 +374,7 @@ }, { "cell_type": "markdown", - "id": "d8af020a", + "id": "7953ccbf", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -387,7 +387,7 @@ { "cell_type": "code", "execution_count": null, - "id": "09c662e5", + "id": "f816677a", "metadata": {}, "outputs": [], "source": [ @@ -397,7 +397,7 @@ }, { "cell_type": "markdown", - "id": "35fe454e", + "id": "2dc9b781", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -410,7 +410,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c59d0395", + "id": "b610e51a", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +420,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fab7e2b7", + "id": "09c98fcd", "metadata": {}, "outputs": [], "source": [ @@ -433,7 +433,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3c8e9a8e", + "id": "f925bca6", "metadata": {}, "outputs": [], "source": [ @@ -445,7 +445,7 @@ }, { "cell_type": "markdown", - "id": "6591a0e0", + "id": "bca05e18", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index 58b33d87..17de5af6 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "356b84c3", + "id": "f9de4d4a", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "01f2dc2a", + "id": "73aabb5a", "metadata": {}, "source": [ "#### πŸ“š What you'll learn\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "6c308378", + "id": "5822049f", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "58ebaf5e", + "id": "11e240f9", "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f6bde12b", + "id": "4695047c", "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "markdown", - "id": "96d77833", + "id": "a5a63554", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -75,7 +75,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e9f3ab9f", + "id": "d3f0fb88", "metadata": {}, "outputs": [], "source": [ @@ -106,7 +106,7 @@ }, { "cell_type": "markdown", - "id": "a708b480", + "id": "4de9f725", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -119,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c5553f64", + "id": "dce78e13", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +128,7 @@ }, { "cell_type": "markdown", - "id": "98375953", + "id": "69c097f8", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -145,7 +145,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f46d9767", + "id": "8901874d", "metadata": {}, "outputs": [], "source": [ @@ -168,7 +168,7 @@ }, { "cell_type": "markdown", - "id": "beda84e0", + "id": "9df8ed4d", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -183,7 +183,7 @@ { "cell_type": "code", "execution_count": null, - "id": "510fd21b", + "id": "8d93eca8", "metadata": {}, "outputs": [], "source": [ @@ -192,7 +192,7 @@ }, { "cell_type": "markdown", - "id": "fbfe9dc4", + "id": "ccbad23e", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -209,7 +209,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2f06c086", + "id": "1b0fd4a8", "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ { "cell_type": "code", "execution_count": null, - "id": "18f42dee", + "id": "295a3bf8", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e5b0c59b", + "id": "46d83373", "metadata": {}, "outputs": [], "source": [ @@ -290,7 +290,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b76739c8", + "id": "911edb81", "metadata": {}, "outputs": [], "source": [ @@ -300,7 +300,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d57302c5", + "id": "09734a13", "metadata": {}, "outputs": [], "source": [ @@ -314,7 +314,7 @@ { "cell_type": "code", "execution_count": null, - "id": "919dce3b", + "id": "8e226f5d", "metadata": { "lines_to_next_cell": 2 }, @@ -343,7 +343,7 @@ }, { "cell_type": "markdown", - "id": "3e0039d4", + "id": "197bcb8f", "metadata": { "lines_to_next_cell": 2 }, @@ -351,7 +351,7 @@ }, { "cell_type": "markdown", - "id": "36385331", + "id": "ccde682c", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -368,7 +368,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e3612ca0", + "id": "b33dd78e", "metadata": {}, "outputs": [], "source": [ @@ -378,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6fde5225", + "id": "f31e4a72", "metadata": {}, "outputs": [], "source": [ @@ -389,7 +389,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca5c91b5", + "id": "91e8734f", "metadata": {}, "outputs": [], "source": [ @@ -399,7 +399,7 @@ }, { "cell_type": "markdown", - "id": "64d4a3d1", + "id": "1ac7dc24", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -412,7 +412,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a06c4a54", + "id": "ace8fddc", "metadata": {}, "outputs": [], "source": [ @@ -422,7 +422,7 @@ }, { "cell_type": "markdown", - "id": "500d2cb3", + "id": "db0c0df0", "metadata": {}, "source": [ "### πŸ”Ž Visual Inspection\n", @@ -433,7 +433,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9ec41533", + "id": "41dff4d0", "metadata": { "lines_to_next_cell": 2 }, @@ -457,7 +457,7 @@ }, { "cell_type": "markdown", - "id": "b107e9f3", + "id": "ee9e0868", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -470,7 +470,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c9fe690d", + "id": "888b29ac", "metadata": {}, "outputs": [], "source": [ @@ -480,7 +480,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1c07b691", + "id": "c331022e", "metadata": {}, "outputs": [], "source": [ @@ -493,7 +493,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c52efcc9", + "id": "697bede3", "metadata": {}, "outputs": [], "source": [ @@ -505,7 +505,7 @@ }, { "cell_type": "markdown", - "id": "ca47004d", + "id": "0d1d38ad", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/concepts/models/configure-model-settings-with-the-cli.md b/docs/concepts/models/configure-model-settings-with-the-cli.md index e7baed3f..26a49c79 100644 --- a/docs/concepts/models/configure-model-settings-with-the-cli.md +++ b/docs/concepts/models/configure-model-settings-with-the-cli.md @@ -85,7 +85,7 @@ data-designer config models Create a new model configuration with the following fields: - **Alias**: A unique name for referencing this model in a column configuration. -- **Model ID**: The model identifier (e.g., `nvidia/nvidia-nemotron-nano-9b-v2`) +- **Model ID**: The model identifier (e.g., `nvidia/nemotron-3-nano-30b-a3b`) - **Provider**: Select from available providers (if multiple exist) - **Temperature**: Sampling temperature (0.0 to 2.0) - **Top P**: Nucleus sampling parameter (0.0 to 1.0) @@ -129,8 +129,6 @@ The CLI will show which configuration files exist and ask for confirmation befor ## See Also -- **[Model Providers](model-providers.md)**: Learn about the `ModelProvider` class and provider configuration -- **[Model Configurations](model-configs.md)**: Learn about `ModelConfig` - **[Default Model Settings](default-model-settings.md)**: Pre-configured providers and model settings included with Data Designer - **[Custom Model Settings](custom-model-settings.md)**: Learn how to create custom providers and model configurations - **[Model Providers](model-providers.md)**: Learn about the `ModelProvider` class and provider configuration diff --git a/docs/concepts/models/custom-model-settings.md b/docs/concepts/models/custom-model-settings.md index 84aed5aa..a7d10a48 100644 --- a/docs/concepts/models/custom-model-settings.md +++ b/docs/concepts/models/custom-model-settings.md @@ -35,7 +35,7 @@ custom_models = [ # High-temperature for more variability ModelConfig( alias="creative-writer", - model="nvidia/nvidia-nemotron-nano-9b-v2", + model="nvidia/nemotron-3-nano-30b-a3b", provider="nvidia", # Uses default NVIDIA provider inference_parameters=ChatCompletionInferenceParams( temperature=1.2, @@ -46,7 +46,7 @@ custom_models = [ # Low-temperature for less variability ModelConfig( alias="fact-checker", - model="nvidia/nvidia-nemotron-nano-9b-v2", + model="nvidia/nemotron-3-nano-30b-a3b", provider="nvidia", # Uses default NVIDIA provider inference_parameters=ChatCompletionInferenceParams( temperature=0.1, diff --git a/docs/concepts/models/default-model-settings.md b/docs/concepts/models/default-model-settings.md index d6c31cdd..d9bad6b9 100644 --- a/docs/concepts/models/default-model-settings.md +++ b/docs/concepts/models/default-model-settings.md @@ -34,7 +34,7 @@ The following model configurations are automatically available when `NVIDIA_API_ | Alias | Model | Use Case | Temperature | Top P | |-------|-------|----------|-------------|-------| -| `nvidia-text` | `nvidia/nvidia-nemotron-nano-9b-v2` | General text generation | 0.85 | 0.95 | +| `nvidia-text` | `nvidia/nemotron-3-nano-30b-a3b` | General text generation | 0.85 | 0.95 | | `nvidia-reasoning` | `openai/gpt-oss-20b` | Reasoning and analysis tasks | 0.35 | 0.95 | | `nvidia-vision` | `nvidia/nemotron-nano-12b-v2-vl` | Vision and image understanding | 0.85 | 0.95 | diff --git a/docs/concepts/models/model-configs.md b/docs/concepts/models/model-configs.md index b91640b0..7b74f29f 100644 --- a/docs/concepts/models/model-configs.md +++ b/docs/concepts/models/model-configs.md @@ -13,7 +13,7 @@ The `ModelConfig` class has the following fields: | Field | Type | Required | Description | |-------|------|----------|-------------| | `alias` | `str` | Yes | Unique identifier for this model configuration (e.g., `"my-text-model"`, `"reasoning-model"`) | -| `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nvidia-nemotron-nano-9b-v2"`, `"gpt-4"`) | +| `model` | `str` | Yes | Model identifier as recognized by the provider (e.g., `"nvidia/nemotron-3-nano-30b-a3b"`, `"gpt-4"`) | | `inference_parameters` | `InferenceParamsT` | No | Controls model behavior during generation. Use `ChatCompletionInferenceParams` for text/code/structured generation or `EmbeddingInferenceParams` for embeddings. Defaults to `ChatCompletionInferenceParams()` if not provided. The generation type is automatically determined by the inference parameters type. See [Inference Parameters](inference_parameters.md) for details. | | `provider` | `str` | No | Reference to the name of the Provider to use (e.g., `"nvidia"`, `"openai"`). If not specified, one set as the default provider, which may resolve to the first provider if there are more than one | @@ -28,7 +28,7 @@ from data_designer.essentials import ChatCompletionInferenceParams, ModelConfig # Simple model configuration with fixed parameters model_config = ModelConfig( alias="my-text-model", - model="nvidia/nvidia-nemotron-nano-9b-v2", + model="nvidia/nemotron-3-nano-30b-a3b", provider="nvidia", inference_parameters=ChatCompletionInferenceParams( temperature=0.85, @@ -52,7 +52,7 @@ model_configs = [ # Creative tasks ModelConfig( alias="creative-model", - model="nvidia/nvidia-nemotron-nano-9b-v2", + model="nvidia/nemotron-3-nano-30b-a3b", provider="nvidia", inference_parameters=ChatCompletionInferenceParams( temperature=0.9, @@ -63,7 +63,7 @@ model_configs = [ # Critic tasks ModelConfig( alias="critic-model", - model="nvidia/nvidia-nemotron-nano-9b-v2", + model="nvidia/nemotron-3-nano-30b-a3b", provider="nvidia", inference_parameters=ChatCompletionInferenceParams( temperature=0.25, diff --git a/docs/notebook_source/1-the-basics.py b/docs/notebook_source/1-the-basics.py index b03c758f..bd38f2a4 100644 --- a/docs/notebook_source/1-the-basics.py +++ b/docs/notebook_source/1-the-basics.py @@ -69,7 +69,7 @@ MODEL_PROVIDER = "nvidia" # The model ID is from build.nvidia.com. -MODEL_ID = "nvidia/nvidia-nemotron-nano-9b-v2" +MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. MODEL_ALIAS = "nemotron-nano-v2" diff --git a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py index 18627a26..84be8b5e 100644 --- a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +++ b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py @@ -71,7 +71,7 @@ MODEL_PROVIDER = "nvidia" # The model ID is from build.nvidia.com. -MODEL_ID = "nvidia/nvidia-nemotron-nano-9b-v2" +MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. MODEL_ALIAS = "nemotron-nano-v2" diff --git a/docs/notebook_source/3-seeding-with-a-dataset.py b/docs/notebook_source/3-seeding-with-a-dataset.py index 12a972aa..c1369c12 100644 --- a/docs/notebook_source/3-seeding-with-a-dataset.py +++ b/docs/notebook_source/3-seeding-with-a-dataset.py @@ -65,7 +65,7 @@ MODEL_PROVIDER = "nvidia" # The model ID is from build.nvidia.com. -MODEL_ID = "nvidia/nvidia-nemotron-nano-9b-v2" +MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. MODEL_ALIAS = "nemotron-nano-v2" diff --git a/src/data_designer/config/utils/constants.py b/src/data_designer/config/utils/constants.py index 12b0d2a3..cfefcafb 100644 --- a/src/data_designer/config/utils/constants.py +++ b/src/data_designer/config/utils/constants.py @@ -301,7 +301,7 @@ class NordColor(Enum): PREDEFINED_PROVIDERS_MODEL_MAP = { NVIDIA_PROVIDER_NAME: { - "text": "nvidia/nvidia-nemotron-nano-9b-v2", + "text": "nvidia/nemotron-3-nano-30b-a3b", "reasoning": "openai/gpt-oss-20b", "vision": "nvidia/nemotron-nano-12b-v2-vl", "embedding": "nvidia/llama-3.2-nv-embedqa-1b-v2", diff --git a/tests/config/test_default_model_settings.py b/tests/config/test_default_model_settings.py index 9619af02..df2d85a0 100644 --- a/tests/config/test_default_model_settings.py +++ b/tests/config/test_default_model_settings.py @@ -48,7 +48,7 @@ def test_get_builtin_model_configs(): builtin_model_configs = get_builtin_model_configs() assert len(builtin_model_configs) == 8 assert builtin_model_configs[0].alias == "nvidia-text" - assert builtin_model_configs[0].model == "nvidia/nvidia-nemotron-nano-9b-v2" + assert builtin_model_configs[0].model == "nvidia/nemotron-3-nano-30b-a3b" assert builtin_model_configs[0].provider == "nvidia" assert builtin_model_configs[1].alias == "nvidia-reasoning" assert builtin_model_configs[1].model == "openai/gpt-oss-20b"