Merge pull request #14 from explodinggradients/fix/langfuse

jjmachan · web-flow · commit a49fe0ba9328 · 2025-04-16T21:46:23.000-07:00
[merge after select PR] fix: langfuse experimental
diff --git a/nbs/project/experiments.ipynb b/nbs/project/experiments.ipynb
@@ -319,18 +319,6 @@
     "    async def run_async(self, name: str, dataset: Dataset): ..."
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# | export\n",
-    "\n",
-    "# this one we have to clean up\n",
-    "from langfuse.decorators import observe"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -477,13 +465,13 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Running experiment: 100%|██████████| 6/6 [00:01<00:00,  3.84it/s]\n"
+      "Running experiment: 100%|██████████| 6/6 [00:01<00:00,  3.23it/s]\n"
      ]
     },
     {
      "data": {
       "text/plain": [
-       "Experiment(name=gallant_torvalds, model=TextExperimentModel)"
+       "Experiment(name=dazzling_knuth, model=TextExperimentModel)"
       ]
      },
      "execution_count": null,
@@ -496,6 +484,18 @@
     "await test_experiment.run_async(test_dataset)"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# | export\n",
+    "\n",
+    "# this one we have to clean up\n",
+    "from langfuse.decorators import observe"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -518,37 +518,111 @@
     "    \"\"\"\n",
     "\n",
     "    def decorator(func: t.Callable) -> ExperimentProtocol:\n",
-    "        # First, create a base experiment wrapper\n",
-    "        base_experiment = self.experiment(experiment_model, name_prefix)(func)\n",
-    "\n",
-    "        # Override the wrapped function to add Langfuse observation\n",
     "        @wraps(func)\n",
-    "        async def wrapped_with_langfuse(*args, **kwargs):\n",
-    "            # wrap the function with langfuse observation\n",
-    "            observed_func = observe(name=f\"{name_prefix}-{func.__name__}\")(func)\n",
+    "        async def langfuse_wrapped_func(*args, **kwargs):\n",
+    "            # Apply langfuse observation directly here\n",
+    "            trace_name = f\"{name_prefix}-{func.__name__}\" if name_prefix else func.__name__\n",
+    "            observed_func = observe(name=trace_name)(func)\n",
     "            return await observed_func(*args, **kwargs)\n",
-    "\n",
-    "        # Replace the async function to use Langfuse\n",
-    "        original_run_async = base_experiment.run_async\n",
-    "\n",
-    "        # Use the original run_async but with the Langfuse-wrapped function\n",
-    "        async def run_async_with_langfuse(\n",
-    "            dataset: Dataset, name: t.Optional[str] = None\n",
-    "        ):\n",
-    "            # Override the internal wrapped_experiment with our Langfuse version\n",
-    "            base_experiment.__wrapped__ = wrapped_with_langfuse\n",
-    "\n",
-    "            # Call the original run_async which will now use our Langfuse-wrapped function\n",
-    "            return await original_run_async(dataset, name)\n",
-    "\n",
-    "        # Replace the run_async method\n",
-    "        base_experiment.__setattr__(\"run_async\", run_async_with_langfuse)\n",
-    "\n",
-    "        return t.cast(ExperimentProtocol, base_experiment)\n",
+    "        \n",
+    "        # Now create the experiment wrapper with our already-observed function\n",
+    "        experiment_wrapper = self.experiment(experiment_model, name_prefix)(langfuse_wrapped_func)\n",
+    "        \n",
+    "        return t.cast(ExperimentProtocol, experiment_wrapper)\n",
     "\n",
     "    return decorator"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "# import langfuse\n",
+    "from langfuse import Langfuse"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "langfuse = Langfuse(\n",
+    "  secret_key=os.getenv(\"LANGFUSE_SECRET_KEY\"),\n",
+    "  public_key=os.getenv(\"LANGFUSE_PUBLIC_KEY\"),\n",
+    "  host=\"https://us.cloud.langfuse.com\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@p.langfuse_experiment(TextExperimentModel)\n",
+    "async def test_experiment(item: TestModel):\n",
+    "    return TextExperimentModel(**item.model_dump(), response=\"test response\", is_correct=\"yes\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "TextExperimentModel(name='test item 1', description='test item 1 description', price=100.0, url='https://www.google.com', tags='test', response='test response', is_correct='yes')"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "await test_experiment(test_dataset[0])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Running experiment: 100%|██████████| 6/6 [00:01<00:00,  4.01it/s]\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Experiment(name=cool_matsumoto, model=TextExperimentModel)"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "await test_experiment.run_async(test_dataset)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Compare and Plot"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/ragas_experimental/project/experiments.py b/ragas_experimental/project/experiments.py
@@ -124,16 +124,12 @@ async def __call__(self, *args, **kwargs): ...
     async def run_async(self, name: str, dataset: Dataset): ...
 
 # %% ../../nbs/project/experiments.ipynb 16
-# this one we have to clean up
-from langfuse.decorators import observe
-
-# %% ../../nbs/project/experiments.ipynb 17
 from .naming import MemorableNames
 
-# %% ../../nbs/project/experiments.ipynb 18
+# %% ../../nbs/project/experiments.ipynb 17
 memorable_names = MemorableNames()
 
-# %% ../../nbs/project/experiments.ipynb 19
+# %% ../../nbs/project/experiments.ipynb 18
 @patch
 def experiment(
     self: Project, experiment_model, name_prefix: str = ""
@@ -214,6 +210,10 @@ async def run_async(dataset: Dataset, name: t.Optional[str] = None):
 
 
 
+# %% ../../nbs/project/experiments.ipynb 22
+# this one we have to clean up
+from langfuse.decorators import observe
+
 # %% ../../nbs/project/experiments.ipynb 23
 @patch
 def langfuse_experiment(
@@ -230,37 +230,21 @@ def langfuse_experiment(
     """
 
     def decorator(func: t.Callable) -> ExperimentProtocol:
-        # First, create a base experiment wrapper
-        base_experiment = self.experiment(experiment_model, name_prefix)(func)
-
-        # Override the wrapped function to add Langfuse observation
         @wraps(func)
-        async def wrapped_with_langfuse(*args, **kwargs):
-            # wrap the function with langfuse observation
-            observed_func = observe(name=f"{name_prefix}-{func.__name__}")(func)
+        async def langfuse_wrapped_func(*args, **kwargs):
+            # Apply langfuse observation directly here
+            trace_name = f"{name_prefix}-{func.__name__}" if name_prefix else func.__name__
+            observed_func = observe(name=trace_name)(func)
             return await observed_func(*args, **kwargs)
-
-        # Replace the async function to use Langfuse
-        original_run_async = base_experiment.run_async
-
-        # Use the original run_async but with the Langfuse-wrapped function
-        async def run_async_with_langfuse(
-            dataset: Dataset, name: t.Optional[str] = None
-        ):
-            # Override the internal wrapped_experiment with our Langfuse version
-            base_experiment.__wrapped__ = wrapped_with_langfuse
-
-            # Call the original run_async which will now use our Langfuse-wrapped function
-            return await original_run_async(dataset, name)
-
-        # Replace the run_async method
-        base_experiment.__setattr__("run_async", run_async_with_langfuse)
-
-        return t.cast(ExperimentProtocol, base_experiment)
+        
+        # Now create the experiment wrapper with our already-observed function
+        experiment_wrapper = self.experiment(experiment_model, name_prefix)(langfuse_wrapped_func)
+        
+        return t.cast(ExperimentProtocol, experiment_wrapper)
 
     return decorator
 
-# %% ../../nbs/project/experiments.ipynb 24
+# %% ../../nbs/project/experiments.ipynb 30
 import logging
 from ..utils import plot_experiments_as_subplots