diff --git a/docs/colab_notebooks/1-the-basics.ipynb b/docs/colab_notebooks/1-the-basics.ipynb index 57620b8c..3a667a5b 100644 --- a/docs/colab_notebooks/1-the-basics.ipynb +++ b/docs/colab_notebooks/1-the-basics.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "9f804f90", + "id": "56daa304", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: The Basics\n", @@ -14,7 +14,7 @@ }, { "cell_type": "markdown", - "id": "9cb786eb", + "id": "8734a74a", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -25,7 +25,7 @@ { "cell_type": "code", "execution_count": null, - "id": "7f45ea56", + "id": "45510d11", "metadata": {}, "outputs": [], "source": [ @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ea86e81e", + "id": "4bad4940", "metadata": {}, "outputs": [], "source": [ @@ -53,7 +53,7 @@ }, { "cell_type": "markdown", - "id": "16611c7b", + "id": "0543d90e", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -64,7 +64,7 @@ { "cell_type": "code", "execution_count": null, - "id": "875342bb", + "id": "90185344", "metadata": {}, "outputs": [], "source": [ @@ -85,7 +85,7 @@ }, { "cell_type": "markdown", - "id": "b58ac676", + "id": "e6fcf82b", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -98,7 +98,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3ce805ad", + "id": "8760c1ef", "metadata": {}, "outputs": [], "source": [ @@ -107,7 +107,7 @@ }, { "cell_type": "markdown", - "id": "50e961ed", + "id": "da9d9f06", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -124,7 +124,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1b07a6a5", + "id": "03760d56", "metadata": {}, "outputs": [], "source": [ @@ -135,10 +135,7 @@ "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", - "MODEL_ALIAS = \"nemotron-nano-v2\"\n", - "\n", - "# This sets reasoning to False for the nemotron-nano-v2 model.\n", - "SYSTEM_PROMPT = \"/no_think\"\n", + "MODEL_ALIAS = \"nemotron-nano-v3\"\n", "\n", "model_configs = [\n", " ModelConfig(\n", @@ -146,9 +143,10 @@ " model=MODEL_ID,\n", " provider=MODEL_PROVIDER,\n", " inference_parameters=ChatCompletionInferenceParams(\n", - " temperature=0.5,\n", + " temperature=1.0,\n", " top_p=1.0,\n", - " max_tokens=1024,\n", + " max_tokens=2048,\n", + " extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n", " ),\n", " )\n", "]" @@ -156,7 +154,7 @@ }, { "cell_type": "markdown", - "id": "6d873251", + "id": "a968637c", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -171,7 +169,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d45fac13", + "id": "e5768870", "metadata": {}, "outputs": [], "source": [ @@ -180,7 +178,7 @@ }, { "cell_type": "markdown", - "id": "c35b0274", + "id": "d12c1559", "metadata": {}, "source": [ "## 🎲 Getting started with sampler columns\n", @@ -197,7 +195,7 @@ { "cell_type": "code", "execution_count": null, - "id": "14cb9967", + "id": "3c47fbe6", "metadata": {}, "outputs": [], "source": [ @@ -206,7 +204,7 @@ }, { "cell_type": "markdown", - "id": "40945aea", + "id": "b47862c5", "metadata": {}, "source": [ "Let's start designing our product review dataset by adding product category and subcategory columns.\n" @@ -215,7 +213,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a7d87e00", + "id": "6ff2257f", "metadata": {}, "outputs": [], "source": [ @@ -296,7 +294,7 @@ }, { "cell_type": "markdown", - "id": "48699878", + "id": "a26f889e", "metadata": {}, "source": [ "Next, let's add samplers to generate data related to the customer and their review.\n" @@ -305,7 +303,7 @@ { "cell_type": "code", "execution_count": null, - "id": "df84faf3", + "id": "e603d4cc", "metadata": {}, "outputs": [], "source": [ @@ -342,7 +340,7 @@ }, { "cell_type": "markdown", - "id": "8288352d", + "id": "cf5070af", "metadata": {}, "source": [ "## 🦜 LLM-generated columns\n", @@ -357,7 +355,7 @@ { "cell_type": "code", "execution_count": null, - "id": "157919b4", + "id": "775c6fa8", "metadata": {}, "outputs": [], "source": [ @@ -370,7 +368,6 @@ " \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n", " \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n", " ),\n", - " system_prompt=SYSTEM_PROMPT,\n", " model_alias=MODEL_ALIAS,\n", " )\n", ")\n", @@ -382,9 +379,9 @@ " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", " \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n", " \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n", - " \"The style of the review should be '{{ review_style }}'.\"\n", + " \"The style of the review should be '{{ review_style }}'. \"\n", + " \"Respond with only the review, no other text.\"\n", " ),\n", - " system_prompt=SYSTEM_PROMPT,\n", " model_alias=MODEL_ALIAS,\n", " )\n", ")\n", @@ -394,7 +391,7 @@ }, { "cell_type": "markdown", - "id": "009646e4", + "id": "25796666", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -411,7 +408,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a9c90236", + "id": "ba90ee16", "metadata": {}, "outputs": [], "source": [ @@ -421,7 +418,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3cfe180e", + "id": "db9d6f8a", "metadata": {}, "outputs": [], "source": [ @@ -432,7 +429,7 @@ { "cell_type": "code", "execution_count": null, - "id": "65b2f595", + "id": "cb555bd5", "metadata": {}, "outputs": [], "source": [ @@ -442,7 +439,7 @@ }, { "cell_type": "markdown", - "id": "2134fa0f", + "id": "b35ee52b", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -455,7 +452,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8a37dd61", + "id": "0d15fb8d", "metadata": {}, "outputs": [], "source": [ @@ -465,7 +462,7 @@ }, { "cell_type": "markdown", - "id": "b715bc3a", + "id": "4fefec9f", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -478,7 +475,7 @@ { "cell_type": "code", "execution_count": null, - "id": "565f03a1", + "id": "395faa2c", "metadata": {}, "outputs": [], "source": [ @@ -488,7 +485,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9d4c91ad", + "id": "65dcd625", "metadata": {}, "outputs": [], "source": [ @@ -501,7 +498,7 @@ { "cell_type": "code", "execution_count": null, - "id": "93c5a082", + "id": "1aef103b", "metadata": {}, "outputs": [], "source": [ @@ -513,7 +510,7 @@ }, { "cell_type": "markdown", - "id": "13f7c942", + "id": "09ec21ba", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb index 819183e9..36b394cc 100644 --- a/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb +++ b/docs/colab_notebooks/2-structured-outputs-and-jinja-expressions.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "efccd5c5", + "id": "53acbda1", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Structured Outputs and Jinja Expressions\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "34ed6e5e", + "id": "cd60193e", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d247eb12", + "id": "b5e8b612", "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ { "cell_type": "code", "execution_count": null, - "id": "a1468d3b", + "id": "9a2a6627", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "7e3bbbcd", + "id": "ef153b85", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6c7a6a94", + "id": "e4196833", "metadata": {}, "outputs": [], "source": [ @@ -87,7 +87,7 @@ }, { "cell_type": "markdown", - "id": "b0c74b0e", + "id": "04af7bd6", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -100,7 +100,7 @@ { "cell_type": "code", "execution_count": null, - "id": "77fd1b9f", + "id": "72343921", "metadata": {}, "outputs": [], "source": [ @@ -109,7 +109,7 @@ }, { "cell_type": "markdown", - "id": "13b73d51", + "id": "31e7f7b4", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -126,7 +126,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b817e2e6", + "id": "18c66cde", "metadata": {}, "outputs": [], "source": [ @@ -137,10 +137,7 @@ "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", - "MODEL_ALIAS = \"nemotron-nano-v2\"\n", - "\n", - "# This sets reasoning to False for the nemotron-nano-v2 model.\n", - "SYSTEM_PROMPT = \"/no_think\"\n", + "MODEL_ALIAS = \"nemotron-nano-v3\"\n", "\n", "model_configs = [\n", " ModelConfig(\n", @@ -148,9 +145,10 @@ " model=MODEL_ID,\n", " provider=MODEL_PROVIDER,\n", " inference_parameters=ChatCompletionInferenceParams(\n", - " temperature=0.5,\n", + " temperature=1.0,\n", " top_p=1.0,\n", - " max_tokens=1024,\n", + " max_tokens=2048,\n", + " extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n", " ),\n", " )\n", "]" @@ -158,7 +156,7 @@ }, { "cell_type": "markdown", - "id": "a68b6918", + "id": "7b7593ae", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -173,7 +171,7 @@ { "cell_type": "code", "execution_count": null, - "id": "71b57cd9", + "id": "8e6abcb9", "metadata": {}, "outputs": [], "source": [ @@ -182,7 +180,7 @@ }, { "cell_type": "markdown", - "id": "2ceb04fb", + "id": "94624f9d", "metadata": {}, "source": [ "### πŸ§‘β€πŸŽ¨ Designing our data\n", @@ -209,7 +207,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12ffaa28", + "id": "974102ec", "metadata": {}, "outputs": [], "source": [ @@ -237,7 +235,7 @@ }, { "cell_type": "markdown", - "id": "c0417a6e", + "id": "1ef74249", "metadata": {}, "source": [ "Next, let's design our product review dataset using a few more tricks compared to the previous notebook.\n" @@ -246,7 +244,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ca2086a9", + "id": "dbf4649e", "metadata": {}, "outputs": [], "source": [ @@ -355,7 +353,7 @@ }, { "cell_type": "markdown", - "id": "70036c8a", + "id": "32d6054b", "metadata": {}, "source": [ "Next, we will use more advanced Jinja expressions to create new columns.\n", @@ -372,7 +370,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b3615ef2", + "id": "fcffc816", "metadata": {}, "outputs": [], "source": [ @@ -392,7 +390,6 @@ " \"related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n", " \"{{ target_age_range }} years old. The product should be priced between $10 and $1000.\"\n", " ),\n", - " system_prompt=SYSTEM_PROMPT,\n", " output_format=Product,\n", " model_alias=MODEL_ALIAS,\n", " )\n", @@ -410,12 +407,12 @@ " \"Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. \"\n", " \"Write the review in a style that is '{{ review_style }}'.\"\n", " \"{% if target_age_range == '18-25' %}\"\n", - " \"Make sure the review is more informal and conversational.\"\n", + " \"Make sure the review is more informal and conversational.\\n\"\n", " \"{% else %}\"\n", - " \"Make sure the review is more formal and structured.\"\n", + " \"Make sure the review is more formal and structured.\\n\"\n", " \"{% endif %}\"\n", + " \"The review field should contain only the review, no other text.\"\n", " ),\n", - " system_prompt=SYSTEM_PROMPT,\n", " output_format=ProductReview,\n", " model_alias=MODEL_ALIAS,\n", " )\n", @@ -426,7 +423,7 @@ }, { "cell_type": "markdown", - "id": "4c3d3a68", + "id": "7605cda9", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -443,7 +440,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4735ba79", + "id": "0c99dab1", "metadata": {}, "outputs": [], "source": [ @@ -453,7 +450,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e9f607ac", + "id": "f870c338", "metadata": {}, "outputs": [], "source": [ @@ -464,7 +461,7 @@ { "cell_type": "code", "execution_count": null, - "id": "05e4ccf6", + "id": "e41c3e55", "metadata": {}, "outputs": [], "source": [ @@ -474,7 +471,7 @@ }, { "cell_type": "markdown", - "id": "e64d9f3a", + "id": "4744d3bf", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -487,7 +484,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c50c3e4e", + "id": "5402169e", "metadata": {}, "outputs": [], "source": [ @@ -497,7 +494,7 @@ }, { "cell_type": "markdown", - "id": "977f2979", + "id": "1bafe4cb", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -510,7 +507,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8617a7e7", + "id": "26a59fac", "metadata": {}, "outputs": [], "source": [ @@ -520,7 +517,7 @@ { "cell_type": "code", "execution_count": null, - "id": "43e9e07b", + "id": "c89f583e", "metadata": {}, "outputs": [], "source": [ @@ -533,7 +530,7 @@ { "cell_type": "code", "execution_count": null, - "id": "6f763f9c", + "id": "26da405a", "metadata": {}, "outputs": [], "source": [ @@ -545,7 +542,7 @@ }, { "cell_type": "markdown", - "id": "8788bda2", + "id": "03868d2a", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb index 6ddd3c42..30d6d2ea 100644 --- a/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb +++ b/docs/colab_notebooks/3-seeding-with-a-dataset.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "93257f91", + "id": "5b90f012", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Seeding Synthetic Data Generation with an External Dataset\n", @@ -16,7 +16,7 @@ }, { "cell_type": "markdown", - "id": "f9aa6154", + "id": "f40e01b1", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -27,7 +27,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ae7066f5", + "id": "5ea09601", "metadata": {}, "outputs": [], "source": [ @@ -38,7 +38,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ba8dc550", + "id": "e1c09194", "metadata": {}, "outputs": [], "source": [ @@ -55,7 +55,7 @@ }, { "cell_type": "markdown", - "id": "d00f21f0", + "id": "845122ac", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -66,7 +66,7 @@ { "cell_type": "code", "execution_count": null, - "id": "31c33b4f", + "id": "828f2aa1", "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "markdown", - "id": "dcec3852", + "id": "e47c97c1", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -94,7 +94,7 @@ { "cell_type": "code", "execution_count": null, - "id": "017ded4a", + "id": "33d18947", "metadata": {}, "outputs": [], "source": [ @@ -103,7 +103,7 @@ }, { "cell_type": "markdown", - "id": "7108f61b", + "id": "469646e0", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -120,7 +120,7 @@ { "cell_type": "code", "execution_count": null, - "id": "821d91aa", + "id": "f2997a0a", "metadata": {}, "outputs": [], "source": [ @@ -131,10 +131,7 @@ "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", "\n", "# We choose this alias to be descriptive for our use case.\n", - "MODEL_ALIAS = \"nemotron-nano-v2\"\n", - "\n", - "# This sets reasoning to False for the nemotron-nano-v2 model.\n", - "SYSTEM_PROMPT = \"/no_think\"\n", + "MODEL_ALIAS = \"nemotron-nano-v3\"\n", "\n", "model_configs = [\n", " ModelConfig(\n", @@ -142,9 +139,10 @@ " model=MODEL_ID,\n", " provider=MODEL_PROVIDER,\n", " inference_parameters=ChatCompletionInferenceParams(\n", - " temperature=0.5,\n", + " temperature=1.0,\n", " top_p=1.0,\n", - " max_tokens=1024,\n", + " max_tokens=2048,\n", + " extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n", " ),\n", " )\n", "]" @@ -152,7 +150,7 @@ }, { "cell_type": "markdown", - "id": "fcb911ef", + "id": "820e1c4d", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -167,7 +165,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5a0e0c57", + "id": "70a499d6", "metadata": {}, "outputs": [], "source": [ @@ -176,7 +174,7 @@ }, { "cell_type": "markdown", - "id": "1bd0982e", + "id": "e5a2a4e6", "metadata": {}, "source": [ "## πŸ₯ Prepare a seed dataset\n", @@ -201,7 +199,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d8d6c161", + "id": "395e9e66", "metadata": {}, "outputs": [], "source": [ @@ -219,7 +217,7 @@ }, { "cell_type": "markdown", - "id": "b5f01e5e", + "id": "b6771c7f", "metadata": {}, "source": [ "## 🎨 Designing our synthetic patient notes dataset\n", @@ -236,7 +234,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1a85709d", + "id": "37ea9449", "metadata": {}, "outputs": [], "source": [ @@ -316,9 +314,9 @@ "as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}.\n", "\n", "Format the notes as a busy doctor might.\n", + "Respond with only the notes, no other text.\n", "\"\"\",\n", " model_alias=MODEL_ALIAS,\n", - " system_prompt=SYSTEM_PROMPT,\n", ")\n", "\n", "config_builder.validate()" @@ -326,7 +324,7 @@ }, { "cell_type": "markdown", - "id": "0749aa84", + "id": "22b85f9e", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -343,7 +341,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ec3a075c", + "id": "4c6a551e", "metadata": {}, "outputs": [], "source": [ @@ -353,7 +351,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b91c2a99", + "id": "8acc5de5", "metadata": {}, "outputs": [], "source": [ @@ -364,7 +362,7 @@ { "cell_type": "code", "execution_count": null, - "id": "cadf0463", + "id": "4f964b38", "metadata": {}, "outputs": [], "source": [ @@ -374,7 +372,7 @@ }, { "cell_type": "markdown", - "id": "7953ccbf", + "id": "fb1c2610", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -387,7 +385,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f816677a", + "id": "e31b96e6", "metadata": {}, "outputs": [], "source": [ @@ -397,7 +395,7 @@ }, { "cell_type": "markdown", - "id": "2dc9b781", + "id": "4136a65d", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -410,7 +408,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b610e51a", + "id": "c6e0d765", "metadata": {}, "outputs": [], "source": [ @@ -420,7 +418,7 @@ { "cell_type": "code", "execution_count": null, - "id": "09c98fcd", + "id": "0f89e69f", "metadata": {}, "outputs": [], "source": [ @@ -433,7 +431,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f925bca6", + "id": "214507eb", "metadata": {}, "outputs": [], "source": [ @@ -445,7 +443,7 @@ }, { "cell_type": "markdown", - "id": "bca05e18", + "id": "b96daf69", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/colab_notebooks/4-providing-images-as-context.ipynb b/docs/colab_notebooks/4-providing-images-as-context.ipynb index 17de5af6..95d443d2 100644 --- a/docs/colab_notebooks/4-providing-images-as-context.ipynb +++ b/docs/colab_notebooks/4-providing-images-as-context.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "f9de4d4a", + "id": "6bf25ee2", "metadata": {}, "source": [ "# 🎨 Data Designer Tutorial: Providing Images as Context for Vision-Based Data Generation" @@ -10,7 +10,7 @@ }, { "cell_type": "markdown", - "id": "73aabb5a", + "id": "fa76e5cd", "metadata": {}, "source": [ "#### πŸ“š What you'll learn\n", @@ -25,7 +25,7 @@ }, { "cell_type": "markdown", - "id": "5822049f", + "id": "9d42978e", "metadata": {}, "source": [ "### ⚑ Colab Setup\n", @@ -36,7 +36,7 @@ { "cell_type": "code", "execution_count": null, - "id": "11e240f9", + "id": "c016a522", "metadata": {}, "outputs": [], "source": [ @@ -47,7 +47,7 @@ { "cell_type": "code", "execution_count": null, - "id": "4695047c", + "id": "5e00e775", "metadata": {}, "outputs": [], "source": [ @@ -64,7 +64,7 @@ }, { "cell_type": "markdown", - "id": "a5a63554", + "id": "1bb848d5", "metadata": {}, "source": [ "### πŸ“¦ Import the essentials\n", @@ -75,7 +75,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d3f0fb88", + "id": "1aa2bfb8", "metadata": {}, "outputs": [], "source": [ @@ -106,7 +106,7 @@ }, { "cell_type": "markdown", - "id": "4de9f725", + "id": "f5dac189", "metadata": {}, "source": [ "### βš™οΈ Initialize the Data Designer interface\n", @@ -119,7 +119,7 @@ { "cell_type": "code", "execution_count": null, - "id": "dce78e13", + "id": "0622a4a4", "metadata": {}, "outputs": [], "source": [ @@ -128,7 +128,7 @@ }, { "cell_type": "markdown", - "id": "69c097f8", + "id": "fa146e50", "metadata": {}, "source": [ "### πŸŽ›οΈ Define model configurations\n", @@ -145,7 +145,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8901874d", + "id": "f8f66a44", "metadata": {}, "outputs": [], "source": [ @@ -168,7 +168,7 @@ }, { "cell_type": "markdown", - "id": "9df8ed4d", + "id": "332729b4", "metadata": {}, "source": [ "### πŸ—οΈ Initialize the Data Designer Config Builder\n", @@ -183,7 +183,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8d93eca8", + "id": "f879937f", "metadata": {}, "outputs": [], "source": [ @@ -192,7 +192,7 @@ }, { "cell_type": "markdown", - "id": "ccbad23e", + "id": "ac6ec0e7", "metadata": {}, "source": [ "### 🌱 Seed Dataset Creation\n", @@ -209,7 +209,7 @@ { "cell_type": "code", "execution_count": null, - "id": "1b0fd4a8", + "id": "065921f2", "metadata": {}, "outputs": [], "source": [ @@ -224,7 +224,7 @@ { "cell_type": "code", "execution_count": null, - "id": "295a3bf8", + "id": "78b301dc", "metadata": {}, "outputs": [], "source": [ @@ -272,7 +272,7 @@ { "cell_type": "code", "execution_count": null, - "id": "46d83373", + "id": "e273ca99", "metadata": {}, "outputs": [], "source": [ @@ -290,7 +290,7 @@ { "cell_type": "code", "execution_count": null, - "id": "911edb81", + "id": "a91cc80d", "metadata": {}, "outputs": [], "source": [ @@ -300,7 +300,7 @@ { "cell_type": "code", "execution_count": null, - "id": "09734a13", + "id": "6c45288b", "metadata": {}, "outputs": [], "source": [ @@ -314,7 +314,7 @@ { "cell_type": "code", "execution_count": null, - "id": "8e226f5d", + "id": "b1fe8e02", "metadata": { "lines_to_next_cell": 2 }, @@ -343,7 +343,7 @@ }, { "cell_type": "markdown", - "id": "197bcb8f", + "id": "c6ce7129", "metadata": { "lines_to_next_cell": 2 }, @@ -351,7 +351,7 @@ }, { "cell_type": "markdown", - "id": "ccde682c", + "id": "f50979bc", "metadata": {}, "source": [ "### πŸ” Iteration is key – preview the dataset!\n", @@ -368,7 +368,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b33dd78e", + "id": "f2d22d21", "metadata": {}, "outputs": [], "source": [ @@ -378,7 +378,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f31e4a72", + "id": "eeab6ce8", "metadata": {}, "outputs": [], "source": [ @@ -389,7 +389,7 @@ { "cell_type": "code", "execution_count": null, - "id": "91e8734f", + "id": "b9cd2aae", "metadata": {}, "outputs": [], "source": [ @@ -399,7 +399,7 @@ }, { "cell_type": "markdown", - "id": "1ac7dc24", + "id": "1f49c639", "metadata": {}, "source": [ "### πŸ“Š Analyze the generated data\n", @@ -412,7 +412,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ace8fddc", + "id": "35273897", "metadata": {}, "outputs": [], "source": [ @@ -422,7 +422,7 @@ }, { "cell_type": "markdown", - "id": "db0c0df0", + "id": "6f23eda2", "metadata": {}, "source": [ "### πŸ”Ž Visual Inspection\n", @@ -433,7 +433,7 @@ { "cell_type": "code", "execution_count": null, - "id": "41dff4d0", + "id": "38402972", "metadata": { "lines_to_next_cell": 2 }, @@ -457,7 +457,7 @@ }, { "cell_type": "markdown", - "id": "ee9e0868", + "id": "ce76408f", "metadata": {}, "source": [ "### πŸ†™ Scale up!\n", @@ -470,7 +470,7 @@ { "cell_type": "code", "execution_count": null, - "id": "888b29ac", + "id": "5a316fe3", "metadata": {}, "outputs": [], "source": [ @@ -480,7 +480,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c331022e", + "id": "7fc390c7", "metadata": {}, "outputs": [], "source": [ @@ -493,7 +493,7 @@ { "cell_type": "code", "execution_count": null, - "id": "697bede3", + "id": "7ccc01d3", "metadata": {}, "outputs": [], "source": [ @@ -505,7 +505,7 @@ }, { "cell_type": "markdown", - "id": "0d1d38ad", + "id": "5bd96d19", "metadata": {}, "source": [ "## ⏭️ Next Steps\n", diff --git a/docs/notebook_source/1-the-basics.py b/docs/notebook_source/1-the-basics.py index bd38f2a4..2221ab7d 100644 --- a/docs/notebook_source/1-the-basics.py +++ b/docs/notebook_source/1-the-basics.py @@ -72,10 +72,7 @@ MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. -MODEL_ALIAS = "nemotron-nano-v2" - -# This sets reasoning to False for the nemotron-nano-v2 model. -SYSTEM_PROMPT = "/no_think" +MODEL_ALIAS = "nemotron-nano-v3" model_configs = [ ModelConfig( @@ -83,9 +80,10 @@ model=MODEL_ID, provider=MODEL_PROVIDER, inference_parameters=ChatCompletionInferenceParams( - temperature=0.5, + temperature=1.0, top_p=1.0, - max_tokens=1024, + max_tokens=2048, + extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ), ) ] @@ -252,7 +250,6 @@ "on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is " "{{ target_age_range }} years old. Respond with only the product name, no other text." ), - system_prompt=SYSTEM_PROMPT, model_alias=MODEL_ALIAS, ) ) @@ -264,9 +261,9 @@ "You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. " "You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. " "Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. " - "The style of the review should be '{{ review_style }}'." + "The style of the review should be '{{ review_style }}'. " + "Respond with only the review, no other text." ), - system_prompt=SYSTEM_PROMPT, model_alias=MODEL_ALIAS, ) ) diff --git a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py index 84be8b5e..52e7f681 100644 --- a/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py +++ b/docs/notebook_source/2-structured-outputs-and-jinja-expressions.py @@ -74,10 +74,7 @@ MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. -MODEL_ALIAS = "nemotron-nano-v2" - -# This sets reasoning to False for the nemotron-nano-v2 model. -SYSTEM_PROMPT = "/no_think" +MODEL_ALIAS = "nemotron-nano-v3" model_configs = [ ModelConfig( @@ -85,9 +82,10 @@ model=MODEL_ID, provider=MODEL_PROVIDER, inference_parameters=ChatCompletionInferenceParams( - temperature=0.5, + temperature=1.0, top_p=1.0, - max_tokens=1024, + max_tokens=2048, + extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ), ) ] @@ -286,7 +284,6 @@ class ProductReview(BaseModel): "related to '{{ product_subcategory }}'. The target age range of the ideal customer is " "{{ target_age_range }} years old. The product should be priced between $10 and $1000." ), - system_prompt=SYSTEM_PROMPT, output_format=Product, model_alias=MODEL_ALIAS, ) @@ -304,12 +301,12 @@ class ProductReview(BaseModel): "Imagine your name is {{ customer_name }} and you are from {{ customer.city }}, {{ customer.state }}. " "Write the review in a style that is '{{ review_style }}'." "{% if target_age_range == '18-25' %}" - "Make sure the review is more informal and conversational." + "Make sure the review is more informal and conversational.\n" "{% else %}" - "Make sure the review is more formal and structured." + "Make sure the review is more formal and structured.\n" "{% endif %}" + "The review field should contain only the review, no other text." ), - system_prompt=SYSTEM_PROMPT, output_format=ProductReview, model_alias=MODEL_ALIAS, ) diff --git a/docs/notebook_source/3-seeding-with-a-dataset.py b/docs/notebook_source/3-seeding-with-a-dataset.py index c1369c12..9da195d3 100644 --- a/docs/notebook_source/3-seeding-with-a-dataset.py +++ b/docs/notebook_source/3-seeding-with-a-dataset.py @@ -68,10 +68,7 @@ MODEL_ID = "nvidia/nemotron-3-nano-30b-a3b" # We choose this alias to be descriptive for our use case. -MODEL_ALIAS = "nemotron-nano-v2" - -# This sets reasoning to False for the nemotron-nano-v2 model. -SYSTEM_PROMPT = "/no_think" +MODEL_ALIAS = "nemotron-nano-v3" model_configs = [ ModelConfig( @@ -79,9 +76,10 @@ model=MODEL_ID, provider=MODEL_PROVIDER, inference_parameters=ChatCompletionInferenceParams( - temperature=0.5, + temperature=1.0, top_p=1.0, - max_tokens=1024, + max_tokens=2048, + extra_body={"chat_template_kwargs": {"enable_thinking": False}}, ), ) ] @@ -220,9 +218,9 @@ as Dr. {{ doctor_sampler.first_name }} {{ doctor_sampler.last_name }}. Format the notes as a busy doctor might. +Respond with only the notes, no other text. """, model_alias=MODEL_ALIAS, - system_prompt=SYSTEM_PROMPT, ) config_builder.validate()