|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "markdown", |
5 | | - "id": "9f804f90", |
| 5 | + "id": "56daa304", |
6 | 6 | "metadata": {}, |
7 | 7 | "source": [ |
8 | 8 | "# 🎨 Data Designer Tutorial: The Basics\n", |
|
14 | 14 | }, |
15 | 15 | { |
16 | 16 | "cell_type": "markdown", |
17 | | - "id": "9cb786eb", |
| 17 | + "id": "8734a74a", |
18 | 18 | "metadata": {}, |
19 | 19 | "source": [ |
20 | 20 | "### ⚡ Colab Setup\n", |
|
25 | 25 | { |
26 | 26 | "cell_type": "code", |
27 | 27 | "execution_count": null, |
28 | | - "id": "7f45ea56", |
| 28 | + "id": "45510d11", |
29 | 29 | "metadata": {}, |
30 | 30 | "outputs": [], |
31 | 31 | "source": [ |
|
36 | 36 | { |
37 | 37 | "cell_type": "code", |
38 | 38 | "execution_count": null, |
39 | | - "id": "ea86e81e", |
| 39 | + "id": "4bad4940", |
40 | 40 | "metadata": {}, |
41 | 41 | "outputs": [], |
42 | 42 | "source": [ |
|
53 | 53 | }, |
54 | 54 | { |
55 | 55 | "cell_type": "markdown", |
56 | | - "id": "16611c7b", |
| 56 | + "id": "0543d90e", |
57 | 57 | "metadata": {}, |
58 | 58 | "source": [ |
59 | 59 | "### 📦 Import the essentials\n", |
|
64 | 64 | { |
65 | 65 | "cell_type": "code", |
66 | 66 | "execution_count": null, |
67 | | - "id": "875342bb", |
| 67 | + "id": "90185344", |
68 | 68 | "metadata": {}, |
69 | 69 | "outputs": [], |
70 | 70 | "source": [ |
|
85 | 85 | }, |
86 | 86 | { |
87 | 87 | "cell_type": "markdown", |
88 | | - "id": "b58ac676", |
| 88 | + "id": "e6fcf82b", |
89 | 89 | "metadata": {}, |
90 | 90 | "source": [ |
91 | 91 | "### ⚙️ Initialize the Data Designer interface\n", |
|
98 | 98 | { |
99 | 99 | "cell_type": "code", |
100 | 100 | "execution_count": null, |
101 | | - "id": "3ce805ad", |
| 101 | + "id": "8760c1ef", |
102 | 102 | "metadata": {}, |
103 | 103 | "outputs": [], |
104 | 104 | "source": [ |
|
107 | 107 | }, |
108 | 108 | { |
109 | 109 | "cell_type": "markdown", |
110 | | - "id": "50e961ed", |
| 110 | + "id": "da9d9f06", |
111 | 111 | "metadata": {}, |
112 | 112 | "source": [ |
113 | 113 | "### 🎛️ Define model configurations\n", |
|
124 | 124 | { |
125 | 125 | "cell_type": "code", |
126 | 126 | "execution_count": null, |
127 | | - "id": "1b07a6a5", |
| 127 | + "id": "03760d56", |
128 | 128 | "metadata": {}, |
129 | 129 | "outputs": [], |
130 | 130 | "source": [ |
|
135 | 135 | "MODEL_ID = \"nvidia/nemotron-3-nano-30b-a3b\"\n", |
136 | 136 | "\n", |
137 | 137 | "# We choose this alias to be descriptive for our use case.\n", |
138 | | - "MODEL_ALIAS = \"nemotron-nano-v2\"\n", |
139 | | - "\n", |
140 | | - "# This sets reasoning to False for the nemotron-nano-v2 model.\n", |
141 | | - "SYSTEM_PROMPT = \"/no_think\"\n", |
| 138 | + "MODEL_ALIAS = \"nemotron-nano-v3\"\n", |
142 | 139 | "\n", |
143 | 140 | "model_configs = [\n", |
144 | 141 | " ModelConfig(\n", |
145 | 142 | " alias=MODEL_ALIAS,\n", |
146 | 143 | " model=MODEL_ID,\n", |
147 | 144 | " provider=MODEL_PROVIDER,\n", |
148 | 145 | " inference_parameters=ChatCompletionInferenceParams(\n", |
149 | | - " temperature=0.5,\n", |
| 146 | + " temperature=1.0,\n", |
150 | 147 | " top_p=1.0,\n", |
151 | | - " max_tokens=1024,\n", |
| 148 | + " max_tokens=2048,\n", |
| 149 | + " extra_body={\"chat_template_kwargs\": {\"enable_thinking\": False}},\n", |
152 | 150 | " ),\n", |
153 | 151 | " )\n", |
154 | 152 | "]" |
155 | 153 | ] |
156 | 154 | }, |
157 | 155 | { |
158 | 156 | "cell_type": "markdown", |
159 | | - "id": "6d873251", |
| 157 | + "id": "a968637c", |
160 | 158 | "metadata": {}, |
161 | 159 | "source": [ |
162 | 160 | "### 🏗️ Initialize the Data Designer Config Builder\n", |
|
171 | 169 | { |
172 | 170 | "cell_type": "code", |
173 | 171 | "execution_count": null, |
174 | | - "id": "d45fac13", |
| 172 | + "id": "e5768870", |
175 | 173 | "metadata": {}, |
176 | 174 | "outputs": [], |
177 | 175 | "source": [ |
|
180 | 178 | }, |
181 | 179 | { |
182 | 180 | "cell_type": "markdown", |
183 | | - "id": "c35b0274", |
| 181 | + "id": "d12c1559", |
184 | 182 | "metadata": {}, |
185 | 183 | "source": [ |
186 | 184 | "## 🎲 Getting started with sampler columns\n", |
|
197 | 195 | { |
198 | 196 | "cell_type": "code", |
199 | 197 | "execution_count": null, |
200 | | - "id": "14cb9967", |
| 198 | + "id": "3c47fbe6", |
201 | 199 | "metadata": {}, |
202 | 200 | "outputs": [], |
203 | 201 | "source": [ |
|
206 | 204 | }, |
207 | 205 | { |
208 | 206 | "cell_type": "markdown", |
209 | | - "id": "40945aea", |
| 207 | + "id": "b47862c5", |
210 | 208 | "metadata": {}, |
211 | 209 | "source": [ |
212 | 210 | "Let's start designing our product review dataset by adding product category and subcategory columns.\n" |
|
215 | 213 | { |
216 | 214 | "cell_type": "code", |
217 | 215 | "execution_count": null, |
218 | | - "id": "a7d87e00", |
| 216 | + "id": "6ff2257f", |
219 | 217 | "metadata": {}, |
220 | 218 | "outputs": [], |
221 | 219 | "source": [ |
|
296 | 294 | }, |
297 | 295 | { |
298 | 296 | "cell_type": "markdown", |
299 | | - "id": "48699878", |
| 297 | + "id": "a26f889e", |
300 | 298 | "metadata": {}, |
301 | 299 | "source": [ |
302 | 300 | "Next, let's add samplers to generate data related to the customer and their review.\n" |
|
305 | 303 | { |
306 | 304 | "cell_type": "code", |
307 | 305 | "execution_count": null, |
308 | | - "id": "df84faf3", |
| 306 | + "id": "e603d4cc", |
309 | 307 | "metadata": {}, |
310 | 308 | "outputs": [], |
311 | 309 | "source": [ |
|
342 | 340 | }, |
343 | 341 | { |
344 | 342 | "cell_type": "markdown", |
345 | | - "id": "8288352d", |
| 343 | + "id": "cf5070af", |
346 | 344 | "metadata": {}, |
347 | 345 | "source": [ |
348 | 346 | "## 🦜 LLM-generated columns\n", |
|
357 | 355 | { |
358 | 356 | "cell_type": "code", |
359 | 357 | "execution_count": null, |
360 | | - "id": "157919b4", |
| 358 | + "id": "775c6fa8", |
361 | 359 | "metadata": {}, |
362 | 360 | "outputs": [], |
363 | 361 | "source": [ |
|
370 | 368 | " \"on products related to '{{ product_subcategory }}'. The target age range of the ideal customer is \"\n", |
371 | 369 | " \"{{ target_age_range }} years old. Respond with only the product name, no other text.\"\n", |
372 | 370 | " ),\n", |
373 | | - " system_prompt=SYSTEM_PROMPT,\n", |
374 | 371 | " model_alias=MODEL_ALIAS,\n", |
375 | 372 | " )\n", |
376 | 373 | ")\n", |
|
382 | 379 | " \"You are a customer named {{ customer.first_name }} from {{ customer.city }}, {{ customer.state }}. \"\n", |
383 | 380 | " \"You are {{ customer.age }} years old and recently purchased a product called {{ product_name }}. \"\n", |
384 | 381 | " \"Write a review of this product, which you gave a rating of {{ number_of_stars }} stars. \"\n", |
385 | | - " \"The style of the review should be '{{ review_style }}'.\"\n", |
| 382 | + " \"The style of the review should be '{{ review_style }}'. \"\n", |
| 383 | + " \"Respond with only the review, no other text.\"\n", |
386 | 384 | " ),\n", |
387 | | - " system_prompt=SYSTEM_PROMPT,\n", |
388 | 385 | " model_alias=MODEL_ALIAS,\n", |
389 | 386 | " )\n", |
390 | 387 | ")\n", |
|
394 | 391 | }, |
395 | 392 | { |
396 | 393 | "cell_type": "markdown", |
397 | | - "id": "009646e4", |
| 394 | + "id": "25796666", |
398 | 395 | "metadata": {}, |
399 | 396 | "source": [ |
400 | 397 | "### 🔁 Iteration is key – preview the dataset!\n", |
|
411 | 408 | { |
412 | 409 | "cell_type": "code", |
413 | 410 | "execution_count": null, |
414 | | - "id": "a9c90236", |
| 411 | + "id": "ba90ee16", |
415 | 412 | "metadata": {}, |
416 | 413 | "outputs": [], |
417 | 414 | "source": [ |
|
421 | 418 | { |
422 | 419 | "cell_type": "code", |
423 | 420 | "execution_count": null, |
424 | | - "id": "3cfe180e", |
| 421 | + "id": "db9d6f8a", |
425 | 422 | "metadata": {}, |
426 | 423 | "outputs": [], |
427 | 424 | "source": [ |
|
432 | 429 | { |
433 | 430 | "cell_type": "code", |
434 | 431 | "execution_count": null, |
435 | | - "id": "65b2f595", |
| 432 | + "id": "cb555bd5", |
436 | 433 | "metadata": {}, |
437 | 434 | "outputs": [], |
438 | 435 | "source": [ |
|
442 | 439 | }, |
443 | 440 | { |
444 | 441 | "cell_type": "markdown", |
445 | | - "id": "2134fa0f", |
| 442 | + "id": "b35ee52b", |
446 | 443 | "metadata": {}, |
447 | 444 | "source": [ |
448 | 445 | "### 📊 Analyze the generated data\n", |
|
455 | 452 | { |
456 | 453 | "cell_type": "code", |
457 | 454 | "execution_count": null, |
458 | | - "id": "8a37dd61", |
| 455 | + "id": "0d15fb8d", |
459 | 456 | "metadata": {}, |
460 | 457 | "outputs": [], |
461 | 458 | "source": [ |
|
465 | 462 | }, |
466 | 463 | { |
467 | 464 | "cell_type": "markdown", |
468 | | - "id": "b715bc3a", |
| 465 | + "id": "4fefec9f", |
469 | 466 | "metadata": {}, |
470 | 467 | "source": [ |
471 | 468 | "### 🆙 Scale up!\n", |
|
478 | 475 | { |
479 | 476 | "cell_type": "code", |
480 | 477 | "execution_count": null, |
481 | | - "id": "565f03a1", |
| 478 | + "id": "395faa2c", |
482 | 479 | "metadata": {}, |
483 | 480 | "outputs": [], |
484 | 481 | "source": [ |
|
488 | 485 | { |
489 | 486 | "cell_type": "code", |
490 | 487 | "execution_count": null, |
491 | | - "id": "9d4c91ad", |
| 488 | + "id": "65dcd625", |
492 | 489 | "metadata": {}, |
493 | 490 | "outputs": [], |
494 | 491 | "source": [ |
|
501 | 498 | { |
502 | 499 | "cell_type": "code", |
503 | 500 | "execution_count": null, |
504 | | - "id": "93c5a082", |
| 501 | + "id": "1aef103b", |
505 | 502 | "metadata": {}, |
506 | 503 | "outputs": [], |
507 | 504 | "source": [ |
|
513 | 510 | }, |
514 | 511 | { |
515 | 512 | "cell_type": "markdown", |
516 | | - "id": "13f7c942", |
| 513 | + "id": "09ec21ba", |
517 | 514 | "metadata": {}, |
518 | 515 | "source": [ |
519 | 516 | "## ⏭️ Next Steps\n", |
|
0 commit comments