From b701bc0c6fce2d907d7e2263b6aa24e6b4a1499e Mon Sep 17 00:00:00 2001 From: Robert Tinn Date: Wed, 30 Apr 2025 09:47:30 +0100 Subject: [PATCH 1/2] Small fixes for reading the input files correctly and to the OpenAIFineTuner --- ...ft_retrieval_augmented_generation_qdrant.ipynb | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb index 75f698905f..f7257d8183 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb @@ -559,7 +559,7 @@ "\n", " def create_openai_file(self):\n", " self.file_object = client.files.create(\n", - " file=open(self.training_file_path, \"r\"),\n", + " file=open(self.training_file_path, \"rb\"),\n", " purpose=\"fine-tune\",\n", " )\n", "\n", @@ -571,19 +571,22 @@ "\n", " def create_fine_tuning_job(self):\n", " self.fine_tuning_job = client.fine_tuning.jobs.create(\n", - " training_file=self.file_object[\"id\"],\n", + " training_file=self.file_object.id,\n", " model=self.model_name,\n", " suffix=self.suffix,\n", " )\n", "\n", " def wait_for_fine_tuning(self, sleep_time=45):\n", - " while self.fine_tuning_job.status != 'succeeded':\n", + " while True:\n", + " # Retrieve the latest fine-tuning job status\n", + " self.fine_tuning_job = client.fine_tuning.jobs.retrieve(self.fine_tuning_job.id)\n", + " print(\"Job Status:\", self.fine_tuning_job.status)\n", + " if self.fine_tuning_job.status in {'succeeded', 'failed', 'cancelled'}:\n", + " break\n", " time.sleep(sleep_time)\n", - " self.fine_tuning_job.refresh()\n", - " print(\"Job Status: \", self.fine_tuning_job.status)\n", "\n", " def retrieve_fine_tuned_model(self):\n", - " self.model_id = client.fine_tuning.jobs.retrieve(self.fine_tuning_job[\"id\"]).fine_tuned_model\n", + " self.model_id = client.fine_tuning.jobs.retrieve(self.fine_tuning_job.id).fine_tuned_model\n", " return self.model_id\n", "\n", " def fine_tune_model(self):\n", From 975d6eeaf2210ec555fb547cacf4bb40d028466b Mon Sep 17 00:00:00 2001 From: Robert Tinn Date: Wed, 7 May 2025 09:07:14 +0100 Subject: [PATCH 2/2] Add note about finetuning on squad with newer models --- .../ft_retrieval_augmented_generation_qdrant.ipynb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb index f7257d8183..eb86b85b0a 100644 --- a/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb +++ b/examples/fine-tuned_qa/ft_retrieval_augmented_generation_qdrant.ipynb @@ -11,6 +11,8 @@ "\n", "We will also be integrating Qdrant and Few-Shot Learning to boost the model's performance and reduce hallucinations. This could serve as a practical guide for ML practitioners, data scientists, and AI Engineers interested in leveraging the power of OpenAI models for specific use-cases. 🤩\n", "\n", + "Note: This notebook uses the gpt-3.5-turbo model. Fine-tuning on the SQuAD dataset with this setup yields only minimal gains for more advanced models such as gpt-4o or gpt-4.1. As such, this notebook is primarily intended as a guide for fine-tuning workflows and retrieval-augmented generation (RAG) practices\n", + "\n", "## Why should you read this blog?\n", "\n", "You want to learn how to \n",