Skip to content

Commit 1ee68df

Browse files
committed
Update python version
1 parent 828a22a commit 1ee68df

File tree

1 file changed

+74
-13
lines changed

1 file changed

+74
-13
lines changed

examples/Fine_tuning_direct_preference_optimisation_guide.ipynb

Lines changed: 74 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@
117117
},
118118
{
119119
"cell_type": "code",
120-
"execution_count": 6,
120+
"execution_count": 14,
121121
"metadata": {},
122122
"outputs": [],
123123
"source": [
@@ -146,7 +146,7 @@
146146
},
147147
{
148148
"cell_type": "code",
149-
"execution_count": null,
149+
"execution_count": 15,
150150
"metadata": {},
151151
"outputs": [],
152152
"source": [
@@ -247,9 +247,17 @@
247247
},
248248
{
249249
"cell_type": "code",
250-
"execution_count": null,
250+
"execution_count": 16,
251251
"metadata": {},
252-
"outputs": [],
252+
"outputs": [
253+
{
254+
"name": "stdout",
255+
"output_type": "stream",
256+
"text": [
257+
"Dataset ready with 500 pairs.\n"
258+
]
259+
}
260+
],
253261
"source": [
254262
"import random\n",
255263
"import nest_asyncio\n",
@@ -305,9 +313,27 @@
305313
},
306314
{
307315
"cell_type": "code",
308-
"execution_count": null,
316+
"execution_count": 17,
309317
"metadata": {},
310-
"outputs": [],
318+
"outputs": [
319+
{
320+
"data": {
321+
"text/plain": [
322+
"{'input': {'messages': [{'role': 'system',\n",
323+
" 'content': 'You are a customer‑support assistant.'},\n",
324+
" {'role': 'user',\n",
325+
" 'content': 'Hello, I purchased a gadget last week. Could you tell me the expected delivery date?'}]},\n",
326+
" 'preferred_output': [{'role': 'assistant',\n",
327+
" 'content': 'Hey there, awesome friend! 🎉 Thanks a bunch for reaching out about your gadget purchase — I’m super excited to help you out! Could you please share your order number or the email you used to place the order? That way, I can zoom in and get the exact delivery date for you in a flash! 🚀✨'}],\n",
328+
" 'non_preferred_output': [{'role': 'assistant',\n",
329+
" 'content': 'Provide the order number or purchase details to check the expected delivery date.'}]}"
330+
]
331+
},
332+
"execution_count": 17,
333+
"metadata": {},
334+
"output_type": "execute_result"
335+
}
336+
],
311337
"source": [
312338
"# set dataset sizes\n",
313339
"random.shuffle(pairs)\n",
@@ -332,9 +358,17 @@
332358
},
333359
{
334360
"cell_type": "code",
335-
"execution_count": null,
361+
"execution_count": 18,
336362
"metadata": {},
337-
"outputs": [],
363+
"outputs": [
364+
{
365+
"name": "stdout",
366+
"output_type": "stream",
367+
"text": [
368+
"Baseline mean score: 2.02\n"
369+
]
370+
}
371+
],
338372
"source": [
339373
"import io\n",
340374
"import json\n",
@@ -439,9 +473,17 @@
439473
},
440474
{
441475
"cell_type": "code",
442-
"execution_count": null,
476+
"execution_count": 19,
443477
"metadata": {},
444-
"outputs": [],
478+
"outputs": [
479+
{
480+
"name": "stdout",
481+
"output_type": "stream",
482+
"text": [
483+
"Fine-tuning job created: job_id = ftjob-vj3iM4KRZR6j80NOXmMYACHr\n"
484+
]
485+
}
486+
],
445487
"source": [
446488
"# create training file\n",
447489
"train_buf = io.BytesIO(\"\\n\".join(json.dumps(p) for p in train_pairs).encode())\n",
@@ -483,9 +525,28 @@
483525
},
484526
{
485527
"cell_type": "code",
486-
"execution_count": null,
528+
"execution_count": 22,
487529
"metadata": {},
488-
"outputs": [],
530+
"outputs": [
531+
{
532+
"name": "stdout",
533+
"output_type": "stream",
534+
"text": [
535+
"Δ mean: 0.82\n",
536+
"\n",
537+
"=== SAMPLE COMPARISON ===\n",
538+
"Prompt:\n",
539+
" Are there any discounts available for customers who stay with you long-term?\n",
540+
"\n",
541+
"Base model reply [score 2]:\n",
542+
" Yes, we do offer discounts for customers who stay with us long-term. The specific discount rates and terms can vary depending on the length of your stay and the type of accommodation or service you choose. Could you please provide more details about your intended stay duration and preferences? This will help me give you the most accurate information.\n",
543+
"\n",
544+
"DPO-tuned model reply [score 3]:\n",
545+
" Thank you for your interest in staying with us long-term! We do offer special discounts and benefits for our long-term customers. I’d be happy to provide you with more details—could you please let me know how long you’re planning to stay or what kind of arrangement you have in mind? This will help me find the best options for you!\n",
546+
"\n"
547+
]
548+
}
549+
],
489550
"source": [
490551
"job = sync_client.fine_tuning.jobs.retrieve(ft.id)\n",
491552
"if job.status == \"succeeded\":\n",
@@ -497,7 +558,7 @@
497558
" )\n",
498559
" # print a sample comparison from the test set for illustration\n",
499560
" print(\"\\n=== SAMPLE COMPARISON ===\")\n",
500-
" idx = 0\n",
561+
" idx = 1\n",
501562
" print(f\"Prompt:\\n {test_prompts[idx]}\\n\")\n",
502563
" print(\n",
503564
" f\"Base model reply [score {base_scores[idx]['score']}]:\\n {base_scores[idx]['reply']}\\n\"\n",

0 commit comments

Comments
 (0)