Skip to content

Commit ebe3503

Browse files
committed
Refactor seed datasets
1 parent 1cf5f29 commit ebe3503

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+965
-1446
lines changed

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ convert-execute-notebooks:
9898

9999
generate-colab-notebooks:
100100
@echo "📓 Generating Colab-compatible notebooks..."
101-
uv run --group notebooks python docs/scripts/generate_colab_notebooks.py
101+
uv run --group docs python docs/scripts/generate_colab_notebooks.py
102102
@echo "✅ Colab notebooks created in docs/colab_notebooks/"
103103

104104
serve-docs-locally:

docs/colab_notebooks/1-the-basics.ipynb

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "markdown",
5-
"id": "2e6331ad",
5+
"id": "32827bda",
66
"metadata": {},
77
"source": [
88
"# 🎨 Data Designer Tutorial: The Basics\n",
@@ -14,7 +14,7 @@
1414
},
1515
{
1616
"cell_type": "markdown",
17-
"id": "af3aad47",
17+
"id": "cbe6b81e",
1818
"metadata": {},
1919
"source": [
2020
"### ⚡ Colab Setup\n",
@@ -25,7 +25,7 @@
2525
{
2626
"cell_type": "code",
2727
"execution_count": null,
28-
"id": "4c39e2a5",
28+
"id": "5b007eed",
2929
"metadata": {},
3030
"outputs": [],
3131
"source": [
@@ -36,7 +36,7 @@
3636
{
3737
"cell_type": "code",
3838
"execution_count": null,
39-
"id": "d8652e5e",
39+
"id": "58dfdb07",
4040
"metadata": {},
4141
"outputs": [],
4242
"source": [
@@ -53,7 +53,7 @@
5353
},
5454
{
5555
"cell_type": "markdown",
56-
"id": "c51e6323",
56+
"id": "03d07dff",
5757
"metadata": {},
5858
"source": [
5959
"### 📦 Import the essentials\n",
@@ -64,7 +64,7 @@
6464
{
6565
"cell_type": "code",
6666
"execution_count": null,
67-
"id": "2de6b279",
67+
"id": "749a1536",
6868
"metadata": {},
6969
"outputs": [],
7070
"source": [
@@ -85,7 +85,7 @@
8585
},
8686
{
8787
"cell_type": "markdown",
88-
"id": "6a484a8d",
88+
"id": "5c31d723",
8989
"metadata": {},
9090
"source": [
9191
"### ⚙️ Initialize the Data Designer interface\n",
@@ -98,7 +98,7 @@
9898
{
9999
"cell_type": "code",
100100
"execution_count": null,
101-
"id": "7554bd1a",
101+
"id": "826f2421",
102102
"metadata": {},
103103
"outputs": [],
104104
"source": [
@@ -107,7 +107,7 @@
107107
},
108108
{
109109
"cell_type": "markdown",
110-
"id": "dc1d9f84",
110+
"id": "b6bfc01a",
111111
"metadata": {},
112112
"source": [
113113
"### 🎛️ Define model configurations\n",
@@ -124,7 +124,7 @@
124124
{
125125
"cell_type": "code",
126126
"execution_count": null,
127-
"id": "76d22674",
127+
"id": "11aa0146",
128128
"metadata": {},
129129
"outputs": [],
130130
"source": [
@@ -154,7 +154,7 @@
154154
},
155155
{
156156
"cell_type": "markdown",
157-
"id": "187da050",
157+
"id": "c10b93a6",
158158
"metadata": {},
159159
"source": [
160160
"### 🏗️ Initialize the Data Designer Config Builder\n",
@@ -169,7 +169,7 @@
169169
{
170170
"cell_type": "code",
171171
"execution_count": null,
172-
"id": "977497d1",
172+
"id": "02ae97ca",
173173
"metadata": {},
174174
"outputs": [],
175175
"source": [
@@ -178,7 +178,7 @@
178178
},
179179
{
180180
"cell_type": "markdown",
181-
"id": "92c51ea0",
181+
"id": "c4c780c8",
182182
"metadata": {},
183183
"source": [
184184
"## 🎲 Getting started with sampler columns\n",
@@ -195,7 +195,7 @@
195195
{
196196
"cell_type": "code",
197197
"execution_count": null,
198-
"id": "68d7a4e6",
198+
"id": "685417ab",
199199
"metadata": {},
200200
"outputs": [],
201201
"source": [
@@ -204,7 +204,7 @@
204204
},
205205
{
206206
"cell_type": "markdown",
207-
"id": "314c4719",
207+
"id": "6bae388c",
208208
"metadata": {},
209209
"source": [
210210
"Let's start designing our product review dataset by adding product category and subcategory columns.\n"
@@ -213,7 +213,7 @@
213213
{
214214
"cell_type": "code",
215215
"execution_count": null,
216-
"id": "1bcad060",
216+
"id": "6122711b",
217217
"metadata": {},
218218
"outputs": [],
219219
"source": [
@@ -289,12 +289,12 @@
289289
")\n",
290290
"\n",
291291
"# Optionally validate that the columns are configured correctly.\n",
292-
"config_builder.validate()"
292+
"data_designer.validate(config_builder)"
293293
]
294294
},
295295
{
296296
"cell_type": "markdown",
297-
"id": "aab7414d",
297+
"id": "12d8c063",
298298
"metadata": {},
299299
"source": [
300300
"Next, let's add samplers to generate data related to the customer and their review.\n"
@@ -303,7 +303,7 @@
303303
{
304304
"cell_type": "code",
305305
"execution_count": null,
306-
"id": "f191f5bf",
306+
"id": "c4c21fa0",
307307
"metadata": {},
308308
"outputs": [],
309309
"source": [
@@ -340,7 +340,7 @@
340340
},
341341
{
342342
"cell_type": "markdown",
343-
"id": "5d893b3d",
343+
"id": "be6bd3c8",
344344
"metadata": {},
345345
"source": [
346346
"## 🦜 LLM-generated columns\n",
@@ -355,7 +355,7 @@
355355
{
356356
"cell_type": "code",
357357
"execution_count": null,
358-
"id": "2abadac9",
358+
"id": "99e953d8",
359359
"metadata": {},
360360
"outputs": [],
361361
"source": [
@@ -391,7 +391,7 @@
391391
},
392392
{
393393
"cell_type": "markdown",
394-
"id": "2c9cb423",
394+
"id": "ec30007b",
395395
"metadata": {},
396396
"source": [
397397
"### 🔁 Iteration is key – preview the dataset!\n",
@@ -408,7 +408,7 @@
408408
{
409409
"cell_type": "code",
410410
"execution_count": null,
411-
"id": "71e3a022",
411+
"id": "7ac1f25b",
412412
"metadata": {},
413413
"outputs": [],
414414
"source": [
@@ -418,7 +418,7 @@
418418
{
419419
"cell_type": "code",
420420
"execution_count": null,
421-
"id": "28f7913d",
421+
"id": "67d8dff4",
422422
"metadata": {},
423423
"outputs": [],
424424
"source": [
@@ -429,7 +429,7 @@
429429
{
430430
"cell_type": "code",
431431
"execution_count": null,
432-
"id": "6621c80f",
432+
"id": "f78e4d9f",
433433
"metadata": {},
434434
"outputs": [],
435435
"source": [
@@ -439,7 +439,7 @@
439439
},
440440
{
441441
"cell_type": "markdown",
442-
"id": "2b451ded",
442+
"id": "ee838ed6",
443443
"metadata": {},
444444
"source": [
445445
"### 📊 Analyze the generated data\n",
@@ -452,7 +452,7 @@
452452
{
453453
"cell_type": "code",
454454
"execution_count": null,
455-
"id": "0f7cb6cc",
455+
"id": "4bea0fca",
456456
"metadata": {},
457457
"outputs": [],
458458
"source": [
@@ -462,7 +462,7 @@
462462
},
463463
{
464464
"cell_type": "markdown",
465-
"id": "721b3c7d",
465+
"id": "da9875d4",
466466
"metadata": {},
467467
"source": [
468468
"### 🆙 Scale up!\n",
@@ -475,7 +475,7 @@
475475
{
476476
"cell_type": "code",
477477
"execution_count": null,
478-
"id": "1ad777d1",
478+
"id": "8c7f20b4",
479479
"metadata": {},
480480
"outputs": [],
481481
"source": [
@@ -485,7 +485,7 @@
485485
{
486486
"cell_type": "code",
487487
"execution_count": null,
488-
"id": "df089509",
488+
"id": "2f5fdaa8",
489489
"metadata": {},
490490
"outputs": [],
491491
"source": [
@@ -498,7 +498,7 @@
498498
{
499499
"cell_type": "code",
500500
"execution_count": null,
501-
"id": "e37fa65b",
501+
"id": "5c2d935f",
502502
"metadata": {},
503503
"outputs": [],
504504
"source": [
@@ -510,7 +510,7 @@
510510
},
511511
{
512512
"cell_type": "markdown",
513-
"id": "84d1802b",
513+
"id": "a80515b7",
514514
"metadata": {},
515515
"source": [
516516
"## ⏭️ Next Steps\n",

0 commit comments

Comments
 (0)