Skip to content
This repository was archived by the owner on Feb 18, 2026. It is now read-only.
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions config_templates/gretel/tasks/tabular_ft__default.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@ task:
# Scale the base LLM's context length by this factor
# using RoPE scaling to handle datasets with more
# columns, or datasets containing groups with more
# than a few records. You can try increasing the
# rope_scaling_factor (you could first try the value 2)
# if you hit an error for maximum tokens. It must be
# an integer value. The default is 1 and maximum is 6.
rope_scaling_factor: 1
# than a few records. If set to 'auto', we will
# estimate a value that's enough to cover your
# dataset. Try increasing this value if you hit an
# error for maximum tokens. It must be an integer
# value between 1 and 6.
rope_scaling_factor: auto

generate:
num_records: 1000
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ task:
# Scale the base LLM's context length by this factor
# using RoPE scaling to handle datasets with more
# columns, or datasets containing groups with more
# than a few records. You can try increasing the
# rope_scaling_factor (you could first try the value 2)
# if you hit an error for maximum tokens. It must be
# an integer value. The default is 1 and maximum is 6.
rope_scaling_factor: 1
# than a few records. If set to 'auto', we will
# estimate a value that's enough to cover your
# dataset. Try increasing this value if you hit an
# error for maximum tokens. It must be an integer
# value between 1 and 6.
rope_scaling_factor: auto

# You can try increasing this until you run out-of-memory.
batch_size: 4
Expand Down
11 changes: 6 additions & 5 deletions config_templates/gretel/workflows/tabular-ft-dp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -46,11 +46,12 @@ steps:
# Scale the base LLM's context length by this factor
# using RoPE scaling to handle datasets with more
# columns, or datasets containing groups with more
# than a few records. You can try increasing the
# rope_scaling_factor (you could first try the value 2)
# if you hit an error for maximum tokens. It must be
# an integer value. The default is 1 and maximum is 6.
rope_scaling_factor: 1
# than a few records. If set to 'auto', we will
# estimate a value that's enough to cover your
# dataset. Try increasing this value if you hit an
# error for maximum tokens. It must be an integer
# value between 1 and 6.
rope_scaling_factor: auto

# You can try increasing this until you run out-of-memory.
batch_size: 4
Expand Down
11 changes: 6 additions & 5 deletions config_templates/gretel/workflows/tabular-ft.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,12 @@ steps:
# Scale the base LLM's context length by this factor
# using RoPE scaling to handle datasets with more
# columns, or datasets containing groups with more
# than a few records. You can try increasing the
# rope_scaling_factor (you could first try the value 2)
# if you hit an error for maximum tokens. It must be
# an integer value. The default is 1 and maximum is 6.
rope_scaling_factor: 1
# than a few records. If set to 'auto', we will
# estimate a value that's enough to cover your
# dataset. Try increasing this value if you hit an
# error for maximum tokens. It must be an integer
# value between 1 and 6.
rope_scaling_factor: auto

generate:
num_records: 1000
Expand Down
103 changes: 15 additions & 88 deletions docs/notebooks/safe-synthetics/running-standalone-evaluate.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,6 @@
"## 🌐 Configure your Gretel Session"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "-MMNWeINRAZr"
},
"outputs": [],
"source": [
"# Set Gretel API key as an environment variable\n",
"import os\n",
"os.environ[\"GRETEL_API_KEY\"] = \"grtu....\""
]
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -66,16 +53,9 @@
},
"outputs": [],
"source": [
"from gretel_client import create_or_get_unique_project\n",
"from gretel_client.config import get_session_config\n",
"from gretel_client.navigator_client import Gretel\n",
"\n",
"gretel = Gretel()\n",
"project_name = \"standalone-evaluate\"\n",
"session = get_session_config()\n",
"project = create_or_get_unique_project(name=project_name, session=session)\n",
"\n",
"project.get_console_url()"
"gretel = Gretel(api_key=\"prompt\", default_project_id=\"standalone-evaluate\")"
]
},
{
Expand Down Expand Up @@ -119,16 +99,9 @@
"outputs": [],
"source": [
"# Convert any Pandas Data Frames to Datasets\n",
"from gretel_client.files import FileClient\n",
"\n",
"file_client = FileClient()\n",
"\n",
"resp_train = file_client.upload(train_df, \"dataset\")\n",
"train_file_id = resp_train.id\n",
"resp_holdout = file_client.upload(holdout_df, \"dataset\")\n",
"holdout_file_id = resp_holdout.id\n",
"resp_synthetic = file_client.upload(synthetic_df, \"dataset\")\n",
"synthetic_file_id = resp_synthetic.id"
"training_file = gretel.files.upload(train_df, purpose=\"dataset\")\n",
"holdout_file = gretel.files.upload(holdout_df, purpose=\"dataset\")\n",
"synthetic_file = gretel.files.upload(synthetic_df, purpose=\"dataset\")"
]
},
{
Expand All @@ -142,81 +115,35 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "au2V0khbM5CJ"
"id": "rtjEVolUM-Yf"
},
"outputs": [],
"source": [
"import requests\n",
"import yaml\n",
"\n",
"def run_workflow(config: str):\n",
" \"\"\"Create a workflow, and workflow run from a given yaml config. Blocks and\n",
" prints log lines until the workflow reaches a terminal state.\n",
"\n",
" Args:\n",
" config: The workflow config to run.\n",
" \"\"\"\n",
" config_dict = yaml.safe_load(config)\n",
"workflow = gretel.workflows.builder()\n",
"\n",
" response = requests.post(\n",
" f\"{session.endpoint}/v2/workflows/exec_batch\",\n",
" json={\n",
" \"workflow_config\": config_dict,\n",
" \"project_id\": project.project_guid,\n",
" },\n",
" headers={\"Authorization\": session.api_key}\n",
" )\n",
" response_body = response.json()\n",
"\n",
" print(response_body)\n",
"\n",
" workflow_id = response_body[\"workflow_id\"]\n",
" workflow_run_id = response_body[\"workflow_run_id\"]\n",
"\n",
" workflow_run_url = (\n",
" f\"{project.get_console_url().replace(project.project_guid, '')}workflows/\"\n",
" f\"{workflow_id}/runs/{workflow_run_id}\"\n",
" )\n",
"\n",
" print(f\"workflow: {workflow_id}\")\n",
" print(f\"workflow run id: {workflow_run_id}\")\n",
" print(workflow_run_url)"
"workflow.add_step(gretel.tasks.Holdout(), [training_file.id, holdout_file.id], step_name=\"holdout\")\n",
"workflow.add_step(gretel.tasks.EvaluateSafeSyntheticsDataset(), [synthetic_file.id, \"holdout\"])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rtjEVolUM-Yf"
"id": "rWmL8_iFRlv8"
},
"outputs": [],
"source": [
"eval_config = f\"\"\"\n",
"name: evaluate\n",
"version: \"2\"\n",
"\n",
"steps:\n",
" - name: holdout\n",
" task: holdout\n",
" inputs: [{train_file_id}, {holdout_file_id}]\n",
" config: {{}}\n",
" - name: eval\n",
" task: evaluate_safe_synthetics_dataset\n",
" inputs: [{synthetic_file_id}, \"holdout\"]\n",
" config: {{}}\n",
"\"\"\"\n",
"\n",
"run_workflow(eval_config)"
"results = workflow.run(wait_until_done=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "rWmL8_iFRlv8"
},
"metadata": {},
"outputs": [],
"source": []
"source": [
"results.report.display_in_notebook()"
]
}
],
"metadata": {
Expand All @@ -238,7 +165,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.10"
"version": "3.12.9"
}
},
"nbformat": 4,
Expand Down