diff --git a/nbs/llm/llm.ipynb b/nbs/llm/llm.ipynb new file mode 100644 index 0000000..f98037e --- /dev/null +++ b/nbs/llm/llm.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp llm.llm" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# LLM Interface for Ragas" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "import typing as t\n", + "import asyncio\n", + "import inspect\n", + "import threading\n", + "from pydantic import BaseModel\n", + "import instructor\n", + "\n", + "T = t.TypeVar('T', bound=BaseModel)\n", + "\n", + "class RagasLLM:\n", + " def __init__(self, provider: str, model: str, client: t.Any, **model_args):\n", + " self.provider = provider.lower()\n", + " self.model = model\n", + " self.model_args = model_args or {}\n", + " self.client = self._initialize_client(provider, client)\n", + " # Check if client is async-capable at initialization\n", + " self.is_async = self._check_client_async()\n", + " \n", + " def _check_client_async(self) -> bool:\n", + " \"\"\"Determine if the client is async-capable.\"\"\"\n", + " try:\n", + " # Check if this is an async client by checking for a coroutine method\n", + " if hasattr(self.client.chat.completions, 'create'):\n", + " return inspect.iscoroutinefunction(self.client.chat.completions.create)\n", + " return False\n", + " except (AttributeError, TypeError):\n", + " return False\n", + " \n", + " def _initialize_client(self, provider: str, client: t.Any) -> t.Any:\n", + " provider = provider.lower()\n", + " \n", + " if provider == \"openai\":\n", + " return instructor.from_openai(client)\n", + " elif provider == \"anthropic\":\n", + " return instructor.from_anthropic(client)\n", + " elif provider == \"cohere\":\n", + " return instructor.from_cohere(client)\n", + " elif provider == \"gemini\":\n", + " return instructor.from_gemini(client)\n", + " elif provider == \"litellm\":\n", + " return instructor.from_litellm(client)\n", + " else:\n", + " raise ValueError(f\"Unsupported provider: {provider}\")\n", + " \n", + " def _run_async_in_current_loop(self, coro):\n", + " \"\"\"Run an async coroutine in the current event loop if possible.\n", + " \n", + " This handles Jupyter environments correctly by using a separate thread\n", + " when a running event loop is detected.\n", + " \"\"\"\n", + " try:\n", + " # Try to get the current event loop\n", + " loop = asyncio.get_event_loop()\n", + " \n", + " if loop.is_running():\n", + " # If the loop is already running (like in Jupyter notebooks),\n", + " # we run the coroutine in a separate thread with its own event loop\n", + " result_container = {'result': None, 'exception': None}\n", + " \n", + " def run_in_thread():\n", + " # Create a new event loop for this thread\n", + " new_loop = asyncio.new_event_loop()\n", + " asyncio.set_event_loop(new_loop)\n", + " try:\n", + " # Run the coroutine in this thread's event loop\n", + " result_container['result'] = new_loop.run_until_complete(coro)\n", + " except Exception as e:\n", + " # Capture any exceptions to re-raise in the main thread\n", + " result_container['exception'] = e\n", + " finally:\n", + " # Clean up the event loop\n", + " new_loop.close()\n", + " \n", + " # Start the thread and wait for it to complete\n", + " thread = threading.Thread(target=run_in_thread)\n", + " thread.start()\n", + " thread.join()\n", + " \n", + " # Re-raise any exceptions that occurred in the thread\n", + " if result_container['exception']:\n", + " raise result_container['exception']\n", + " \n", + " return result_container['result']\n", + " else:\n", + " # Standard case - event loop exists but isn't running\n", + " return loop.run_until_complete(coro)\n", + " \n", + " except RuntimeError:\n", + " # If we get a runtime error about no event loop, create a new one\n", + " loop = asyncio.new_event_loop()\n", + " asyncio.set_event_loop(loop)\n", + " try:\n", + " return loop.run_until_complete(coro)\n", + " finally:\n", + " # Clean up\n", + " loop.close()\n", + " asyncio.set_event_loop(None)\n", + " \n", + " def generate(self, prompt: str, response_model: t.Type[T]) -> T:\n", + " \"\"\"Generate a response using the configured LLM.\n", + " \n", + " For async clients, this will run the async method in the appropriate event loop.\n", + " \"\"\"\n", + " messages = [{\"role\": \"user\", \"content\": prompt}]\n", + " \n", + " # If client is async, use the appropriate method to run it\n", + " if self.is_async:\n", + " return self._run_async_in_current_loop(\n", + " self.agenerate(prompt, response_model)\n", + " )\n", + " else:\n", + " # Regular sync client, just call the method directly\n", + " return self.client.chat.completions.create(\n", + " model=self.model,\n", + " messages=messages,\n", + " response_model=response_model,\n", + " **self.model_args,\n", + " )\n", + " \n", + " async def agenerate(self, prompt: str, response_model: t.Type[T]) -> T:\n", + " \"\"\"Asynchronously generate a response using the configured LLM.\"\"\"\n", + " messages = [{\"role\": \"user\", \"content\": prompt}]\n", + " \n", + " # If client is not async, raise a helpful error\n", + " if not self.is_async:\n", + " raise TypeError(\n", + " \"Cannot use agenerate() with a synchronous client. Use generate() instead.\"\n", + " )\n", + " \n", + " # Regular async client, call the method directly\n", + " return await self.client.chat.completions.create(\n", + " model=self.model,\n", + " messages=messages,\n", + " response_model=response_model,\n", + " **self.model_args,\n", + " )\n", + "\n", + "def ragas_llm(provider: str, model: str, client: t.Any, **model_args) -> RagasLLM:\n", + " return RagasLLM(provider=provider, client=client, model=model, **model_args)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Example Usage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| eval: false\n", + "\n", + "from openai import OpenAI\n", + "class Response(BaseModel):\n", + " response: str\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", + "llm.generate(\"What is the capital of India?\",response_model=Response) #works fine\n", + "\n", + "try:\n", + " await llm.agenerate(\"What is the capital of India?\", response_model=Response)\n", + "except TypeError as e:\n", + " assert isinstance(e, TypeError)\n", + "#gives TypeError: object Response can't be used in 'await' expression\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Response(response='The capital of India is New Delhi.')" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| eval: false\n", + "\n", + "from openai import AsyncOpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=AsyncOpenAI())\n", + "await llm.agenerate(\"What is the capital of India?\",response_model=Response)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Response(response='The capital of India is New Delhi.')" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#| eval: false\n", + "\n", + "from anthropic import Anthropic\n", + "\n", + "llm = ragas_llm(provider=\"anthropic\",model=\"claude-3-opus-20240229\",client=Anthropic(),max_tokens=1024)\n", + "llm.generate(\"What is the capital of India?\",response_model=Response)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/metric/base.ipynb b/nbs/metric/base.ipynb index c6d6e24..f34255a 100644 --- a/nbs/metric/base.ipynb +++ b/nbs/metric/base.ipynb @@ -43,14 +43,14 @@ "from pydantic import BaseModel\n", "import typing as t\n", "from ragas_annotator.metric import MetricResult\n", - "from ragas_annotator.metric import LLM\n", + "from ragas_annotator.llm import RagasLLM\n", "\n", "@dataclass\n", "class Metric(ABC):\n", " \"\"\"Base class for all metrics in the LLM evaluation library.\"\"\"\n", " name: str\n", " prompt: str\n", - " llm: LLM\n", + " llm: RagasLLM\n", " _response_models: t.Dict[bool, t.Type[BaseModel]] = field(\n", " default_factory=dict, init=False, repr=False\n", " )\n", @@ -114,6 +114,11 @@ "source": [ "#| eval: false\n", "\n", + "from ragas_annotator.llm import ragas_llm\n", + "from openai import OpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", + "\n", "@dataclass\n", "class CustomMetric(Metric):\n", " values: t.List[str] = field(default_factory=lambda: [\"pass\", \"fail\"])\n", @@ -131,12 +136,18 @@ " \n", " return results[0] # Placeholder for ensemble logic\n", "\n", - "my_metric = CustomMetric(name=\"example\", prompt=\"What is the result of {input}?\", llm=LLM())\n", + "my_metric = CustomMetric(name=\"example\", prompt=\"What is the result of {input}?\", llm=llm)\n", "my_metric.score(input=\"test\")" ] } ], - "metadata": {}, + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, "nbformat": 4, "nbformat_minor": 2 } diff --git a/nbs/metric/decorator.ipynb b/nbs/metric/decorator.ipynb index 70131f0..ec03547 100644 --- a/nbs/metric/decorator.ipynb +++ b/nbs/metric/decorator.ipynb @@ -30,6 +30,7 @@ "import asyncio\n", "from dataclasses import dataclass\n", "from ragas_annotator.metric import MetricResult\n", + "from ragas_annotator.llm import RagasLLM\n", "\n", "\n", "\n", @@ -44,7 +45,7 @@ " Returns:\n", " A decorator factory function for the specified metric type\n", " \"\"\"\n", - " def decorator_factory(llm, prompt, name: t.Optional[str] = None, **metric_params):\n", + " def decorator_factory(llm:RagasLLM, prompt, name: t.Optional[str] = None, **metric_params):\n", " \"\"\"\n", " Creates a decorator that wraps a function into a metric instance.\n", " \n", @@ -168,12 +169,16 @@ "\n", "\n", "from ragas_annotator.metric import DiscreteMetric\n", - "from ragas_annotator.metric.llm import LLM\n", "from pydantic import BaseModel\n", "\n", + "from ragas_annotator.llm import ragas_llm\n", + "from openai import OpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", + "\n", "discrete_metric = create_metric_decorator(DiscreteMetric)\n", "\n", - "@discrete_metric(llm=LLM(),\n", + "@discrete_metric(llm=llm,\n", " prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n", " name='new_metric',values=[\"low\",\"med\",\"high\"])\n", "def my_metric(llm,prompt,**kwargs):\n", diff --git a/nbs/metric/discrete.ipynb b/nbs/metric/discrete.ipynb index c27815c..44a3ed5 100644 --- a/nbs/metric/discrete.ipynb +++ b/nbs/metric/discrete.ipynb @@ -21,7 +21,16 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "#| export\n", "import typing as t\n", @@ -91,7 +100,7 @@ "output_type": "stream", "text": [ "low\n", - "No context or content was provided for evaluation.\n" + "The response does not provide any specific information or context that can help evaluate its helpfulness.\n" ] } ], @@ -99,10 +108,14 @@ "\n", "#| eval: false\n", "\n", - "from ragas_annotator.metric.llm import LLM\n", + "from ragas_annotator.llm import ragas_llm\n", + "from openai import OpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", + "\n", "\n", "my_metric = DiscreteMetric(\n", - " llm=LLM(),\n", + " llm=llm,\n", " name='helpfulness',\n", " prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n", " values=[\"low\",\"med\",\"high\"],\n", @@ -130,14 +143,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "low\n", + "high\n", "reason\n" ] } ], "source": [ "#| eval: false\n", - "@discrete_metric(llm=LLM(),\n", + "@discrete_metric(llm=llm,\n", " prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n", " name='new_metric',values=[\"low\",\"med\",\"high\"])\n", "def my_metric(llm,prompt,**kwargs):\n", diff --git a/nbs/metric/llm.ipynb b/nbs/metric/llm.ipynb deleted file mode 100644 index 6ceca63..0000000 --- a/nbs/metric/llm.ipynb +++ /dev/null @@ -1,61 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| default_exp metric.llm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "\n", - "import openai\n", - "import instructor\n", - "from dataclasses import dataclass\n", - "\n", - "@dataclass\n", - "class LLM:\n", - "\n", - " def __post_init__(self):\n", - " self.aclient = instructor.from_openai(openai.AsyncOpenAI())\n", - " self.client = instructor.from_openai(openai.OpenAI())\n", - "\n", - " \n", - " def generate(self,prompt,response_model):\n", - " return self.client.chat.completions.create(\n", - " model=\"gpt-4o-mini\",\n", - " messages=[\n", - " {\"role\": \"user\", \"content\": prompt},\n", - " ],\n", - " response_model=response_model,\n", - " )\n", - "\n", - " async def agenerate(self,prompt,response_model):\n", - " return await self.aclient.chat.completions.create(\n", - " model=\"gpt-4o-mini\",\n", - " messages=[\n", - " {\"role\": \"user\", \"content\": prompt},\n", - " ],\n", - " response_model=response_model,\n", - " )" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "python3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/nbs/metric/numeric.ipynb b/nbs/metric/numeric.ipynb index e3b08b0..c46a13f 100644 --- a/nbs/metric/numeric.ipynb +++ b/nbs/metric/numeric.ipynb @@ -21,7 +21,16 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "#| export\n", "\n", @@ -81,7 +90,7 @@ { "data": { "text/plain": [ - "'The response does not provide any context or information that can be evaluated as helpful.'" + "\"The provided input lacks context or content to determine if it is helpful as it merely states 'this is my response' without any additional information.\"" ] }, "execution_count": null, @@ -90,13 +99,18 @@ } ], "source": [ + "\n", "#| eval: false\n", "\n", - "from ragas_annotator.metric.llm import LLM\n", + "from ragas_annotator.llm import ragas_llm\n", + "from openai import OpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", + "\n", "\n", "my_metric = NumericMetric(\n", " name='helpfulness',\n", - " llm=LLM(),\n", + " llm=llm,\n", " prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n", " range=(0,10),\n", ")\n", @@ -122,7 +136,7 @@ { "data": { "text/plain": [ - "20" + "10" ] }, "execution_count": null, @@ -134,7 +148,7 @@ "\n", "#| eval: false\n", "\n", - "@numeric_metric(llm=LLM(),\n", + "@numeric_metric(llm=llm,\n", " prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n", " name='new_metric',range=(0,10))\n", "def my_metric(llm,prompt,**kwargs):\n", diff --git a/nbs/metric/ranking.ipynb b/nbs/metric/ranking.ipynb index 48e2aa3..efd8895 100644 --- a/nbs/metric/ranking.ipynb +++ b/nbs/metric/ranking.ipynb @@ -21,7 +21,16 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "#| export\n", "\n", @@ -117,11 +126,11 @@ "name": "stdout", "output_type": "stream", "text": [ - "[0, 1, 2]\n", + "[2, 1, 0]\n", "Ensemble ranking based on multiple evaluations.\n", - "The ranking is based on the length and detail of the responses, with 'short answer.' being the least detailed (rank 0), 'a bit more detailed.' being moderate (rank 1), and 'the longest and most detailed answer.' being the most comprehensive (rank 2).\n", - "The ranking is based on the length and detail of the responses. The shortest response is ranked the lowest (0), the moderately detailed response is ranked higher (1), and the longest and most detailed response is ranked the highest (2).\n", - "Ranking is based on length and detail; the longest answer (2) is most detailed, followed by a bit more detailed (1), and the shortest answer (0) is the least detailed.\n" + "The ranking is based on the length and detail of each response. 'the longest and most detailed answer.' is the most comprehensive, followed by 'a bit more detailed.', and 'short answer.' is the briefest.\n", + "The ranking is based on the length and detail of each response. The response 'the longest and most detailed answer.' is ranked highest (2) because it is the most detailed, followed by 'a bit more detailed.' (1), and finally 'short answer.' (0) as it is the least detailed.\n", + "The responses are ranked based on the level of detail and length. 'short answer.' is the least detailed, 'a bit more detailed.' provides more information, and 'the longest and most detailed answer.' offers the most comprehensive explanation.\n" ] } ], @@ -129,11 +138,14 @@ "\n", "#| eval: false\n", "\n", - "from ragas_annotator.metric.llm import LLM\n", + "from ragas_annotator.llm import ragas_llm\n", + "from openai import OpenAI\n", + "\n", + "llm = ragas_llm(provider=\"openai\",model=\"gpt-4o\",client=OpenAI())\n", "\n", "my_ranking_metric = RankingMetric(\n", " name='response_ranking',\n", - " llm=LLM(), # Your language model instance\n", + " llm=llm, # Your language model instance\n", " prompt=\"Rank the following responses:\\n{candidates}\",\n", " num_ranks=3,\n", ")\n", @@ -175,7 +187,7 @@ "\n", "\n", "@ranking_metric(\n", - " llm=LLM(), # Your language model instance\n", + " llm=llm, # Your language model instance\n", " prompt=\"Rank the following responses:\\n{candidates}\",\n", " name='new_ranking_metric',\n", " num_ranks=3\n", diff --git a/nbs/sidebar.yml b/nbs/sidebar.yml index c6a1ec2..1ededb3 100644 --- a/nbs/sidebar.yml +++ b/nbs/sidebar.yml @@ -10,16 +10,17 @@ website: - backends/factory.ipynb - backends/mock_notion_client.ipynb - backends/notion.ipynb + - section: llm + contents: + - llm/llm.ipynb - section: metric contents: - metric/base.ipynb - metric/decorator.ipynb - metric/discrete.ipynb - - metric/llm.ipynb - metric/numeric.ipynb - metric/ranking.ipynb - metric/result.ipynb - - metric/test_base.ipynb - section: model contents: - model/notion_model.ipynb diff --git a/ragas_annotator/_modidx.py b/ragas_annotator/_modidx.py index c055929..678967a 100644 --- a/ragas_annotator/_modidx.py +++ b/ragas_annotator/_modidx.py @@ -131,6 +131,20 @@ 'ragas_annotator/experiment.py'), 'ragas_annotator.experiment.Experiment.__str__': ( 'experiment.html#experiment.__str__', 'ragas_annotator/experiment.py')}, + 'ragas_annotator.llm.llm': { 'ragas_annotator.llm.llm.RagasLLM': ('llm/llm.html#ragasllm', 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM.__init__': ( 'llm/llm.html#ragasllm.__init__', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM._check_client_async': ( 'llm/llm.html#ragasllm._check_client_async', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM._initialize_client': ( 'llm/llm.html#ragasllm._initialize_client', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM._run_async_in_current_loop': ( 'llm/llm.html#ragasllm._run_async_in_current_loop', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM.agenerate': ( 'llm/llm.html#ragasllm.agenerate', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.RagasLLM.generate': ( 'llm/llm.html#ragasllm.generate', + 'ragas_annotator/llm/llm.py'), + 'ragas_annotator.llm.llm.ragas_llm': ('llm/llm.html#ragas_llm', 'ragas_annotator/llm/llm.py')}, 'ragas_annotator.metric.base': { 'ragas_annotator.metric.base.Metric': ( 'metric/base.html#metric', 'ragas_annotator/metric/base.py'), 'ragas_annotator.metric.base.Metric._ensemble': ( 'metric/base.html#metric._ensemble', @@ -153,13 +167,6 @@ 'ragas_annotator/metric/discrete.py'), 'ragas_annotator.metric.discrete.DiscreteMetric._get_response_model': ( 'metric/discrete.html#discretemetric._get_response_model', 'ragas_annotator/metric/discrete.py')}, - 'ragas_annotator.metric.llm': { 'ragas_annotator.metric.llm.LLM': ('metric/llm.html#llm', 'ragas_annotator/metric/llm.py'), - 'ragas_annotator.metric.llm.LLM.__post_init__': ( 'metric/llm.html#llm.__post_init__', - 'ragas_annotator/metric/llm.py'), - 'ragas_annotator.metric.llm.LLM.agenerate': ( 'metric/llm.html#llm.agenerate', - 'ragas_annotator/metric/llm.py'), - 'ragas_annotator.metric.llm.LLM.generate': ( 'metric/llm.html#llm.generate', - 'ragas_annotator/metric/llm.py')}, 'ragas_annotator.metric.numeric': { 'ragas_annotator.metric.numeric.NumericMetric': ( 'metric/numeric.html#numericmetric', 'ragas_annotator/metric/numeric.py'), 'ragas_annotator.metric.numeric.NumericMetric._ensemble': ( 'metric/numeric.html#numericmetric._ensemble', diff --git a/ragas_annotator/llm/__init__.py b/ragas_annotator/llm/__init__.py new file mode 100644 index 0000000..cea67d0 --- /dev/null +++ b/ragas_annotator/llm/__init__.py @@ -0,0 +1,3 @@ +from ragas_annotator.llm.llm import RagasLLM, ragas_llm + +__all__ = ["RagasLLM", "ragas_llm"] \ No newline at end of file diff --git a/ragas_annotator/llm/llm.py b/ragas_annotator/llm/llm.py new file mode 100644 index 0000000..f4e0086 --- /dev/null +++ b/ragas_annotator/llm/llm.py @@ -0,0 +1,145 @@ +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/llm/llm.ipynb. + +# %% auto 0 +__all__ = ['T', 'RagasLLM', 'ragas_llm'] + +# %% ../../nbs/llm/llm.ipynb 2 +import typing as t +import asyncio +import inspect +import threading +from pydantic import BaseModel +import instructor + +T = t.TypeVar('T', bound=BaseModel) + +class RagasLLM: + def __init__(self, provider: str, model: str, client: t.Any, **model_args): + self.provider = provider.lower() + self.model = model + self.model_args = model_args or {} + self.client = self._initialize_client(provider, client) + # Check if client is async-capable at initialization + self.is_async = self._check_client_async() + + def _check_client_async(self) -> bool: + """Determine if the client is async-capable.""" + try: + # Check if this is an async client by checking for a coroutine method + if hasattr(self.client.chat.completions, 'create'): + return inspect.iscoroutinefunction(self.client.chat.completions.create) + return False + except (AttributeError, TypeError): + return False + + def _initialize_client(self, provider: str, client: t.Any) -> t.Any: + provider = provider.lower() + + if provider == "openai": + return instructor.from_openai(client) + elif provider == "anthropic": + return instructor.from_anthropic(client) + elif provider == "cohere": + return instructor.from_cohere(client) + elif provider == "gemini": + return instructor.from_gemini(client) + elif provider == "litellm": + return instructor.from_litellm(client) + else: + raise ValueError(f"Unsupported provider: {provider}") + + def _run_async_in_current_loop(self, coro): + """Run an async coroutine in the current event loop if possible. + + This handles Jupyter environments correctly by using a separate thread + when a running event loop is detected. + """ + try: + # Try to get the current event loop + loop = asyncio.get_event_loop() + + if loop.is_running(): + # If the loop is already running (like in Jupyter notebooks), + # we run the coroutine in a separate thread with its own event loop + result_container = {'result': None, 'exception': None} + + def run_in_thread(): + # Create a new event loop for this thread + new_loop = asyncio.new_event_loop() + asyncio.set_event_loop(new_loop) + try: + # Run the coroutine in this thread's event loop + result_container['result'] = new_loop.run_until_complete(coro) + except Exception as e: + # Capture any exceptions to re-raise in the main thread + result_container['exception'] = e + finally: + # Clean up the event loop + new_loop.close() + + # Start the thread and wait for it to complete + thread = threading.Thread(target=run_in_thread) + thread.start() + thread.join() + + # Re-raise any exceptions that occurred in the thread + if result_container['exception']: + raise result_container['exception'] + + return result_container['result'] + else: + # Standard case - event loop exists but isn't running + return loop.run_until_complete(coro) + + except RuntimeError: + # If we get a runtime error about no event loop, create a new one + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + try: + return loop.run_until_complete(coro) + finally: + # Clean up + loop.close() + asyncio.set_event_loop(None) + + def generate(self, prompt: str, response_model: t.Type[T]) -> T: + """Generate a response using the configured LLM. + + For async clients, this will run the async method in the appropriate event loop. + """ + messages = [{"role": "user", "content": prompt}] + + # If client is async, use the appropriate method to run it + if self.is_async: + return self._run_async_in_current_loop( + self.agenerate(prompt, response_model) + ) + else: + # Regular sync client, just call the method directly + return self.client.chat.completions.create( + model=self.model, + messages=messages, + response_model=response_model, + **self.model_args, + ) + + async def agenerate(self, prompt: str, response_model: t.Type[T]) -> T: + """Asynchronously generate a response using the configured LLM.""" + messages = [{"role": "user", "content": prompt}] + + # If client is not async, raise a helpful error + if not self.is_async: + raise TypeError( + "Cannot use agenerate() with a synchronous client. Use generate() instead." + ) + + # Regular async client, call the method directly + return await self.client.chat.completions.create( + model=self.model, + messages=messages, + response_model=response_model, + **self.model_args, + ) + +def ragas_llm(provider: str, model: str, client: t.Any, **model_args) -> RagasLLM: + return RagasLLM(provider=provider, client=client, model=model, **model_args) diff --git a/ragas_annotator/metric/__init__.py b/ragas_annotator/metric/__init__.py index 57a31d3..4733fc4 100644 --- a/ragas_annotator/metric/__init__.py +++ b/ragas_annotator/metric/__init__.py @@ -1,12 +1,10 @@ from ragas_annotator.metric.result import MetricResult -from ragas_annotator.metric.llm import LLM from ragas_annotator.metric.base import Metric from ragas_annotator.metric.discrete import DiscreteMetric from ragas_annotator.metric.numeric import NumericMetric from ragas_annotator.metric.ranking import RankingMetric __all__ = ['MetricResult', - 'LLM', 'Metric', 'DiscreteMetric', 'NumericMetric', diff --git a/ragas_annotator/metric/base.py b/ragas_annotator/metric/base.py index d37b9c5..9f9f932 100644 --- a/ragas_annotator/metric/base.py +++ b/ragas_annotator/metric/base.py @@ -12,14 +12,14 @@ from pydantic import BaseModel import typing as t from . import MetricResult -from . import LLM +from ..llm import RagasLLM @dataclass class Metric(ABC): """Base class for all metrics in the LLM evaluation library.""" name: str prompt: str - llm: LLM + llm: RagasLLM _response_models: t.Dict[bool, t.Type[BaseModel]] = field( default_factory=dict, init=False, repr=False ) diff --git a/ragas_annotator/metric/decorator.py b/ragas_annotator/metric/decorator.py index 016773a..f43fdd8 100644 --- a/ragas_annotator/metric/decorator.py +++ b/ragas_annotator/metric/decorator.py @@ -11,6 +11,7 @@ import asyncio from dataclasses import dataclass from . import MetricResult +from ..llm import RagasLLM @@ -25,7 +26,7 @@ def create_metric_decorator(metric_class): Returns: A decorator factory function for the specified metric type """ - def decorator_factory(llm, prompt, name: t.Optional[str] = None, **metric_params): + def decorator_factory(llm:RagasLLM, prompt, name: t.Optional[str] = None, **metric_params): """ Creates a decorator that wraps a function into a metric instance. diff --git a/ragas_annotator/metric/llm.py b/ragas_annotator/metric/llm.py deleted file mode 100644 index c602e53..0000000 --- a/ragas_annotator/metric/llm.py +++ /dev/null @@ -1,35 +0,0 @@ -# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/metric/llm.ipynb. - -# %% auto 0 -__all__ = ['LLM'] - -# %% ../../nbs/metric/llm.ipynb 1 -import openai -import instructor -from dataclasses import dataclass - -@dataclass -class LLM: - - def __post_init__(self): - self.aclient = instructor.from_openai(openai.AsyncOpenAI()) - self.client = instructor.from_openai(openai.OpenAI()) - - - def generate(self,prompt,response_model): - return self.client.chat.completions.create( - model="gpt-4o-mini", - messages=[ - {"role": "user", "content": prompt}, - ], - response_model=response_model, - ) - - async def agenerate(self,prompt,response_model): - return await self.aclient.chat.completions.create( - model="gpt-4o-mini", - messages=[ - {"role": "user", "content": prompt}, - ], - response_model=response_model, - ) diff --git a/settings.ini b/settings.ini index 0215d37..8324191 100644 --- a/settings.ini +++ b/settings.ini @@ -38,7 +38,7 @@ status = 3 user = explodinggradients ### Dependencies ### -requirements = notion-client fastcore tqdm langfuse openai instructor pydantic +requirements = notion-client fastcore tqdm langfuse instructor pydantic dev_requirements = pytest # console_scripts = # conda_user =