explodinggradients
diff --git a/‎nbs/metric/base.ipynb‎
Lines changed: 12 additions & 13 deletions b/‎nbs/metric/base.ipynb‎
Lines changed: 12 additions & 13 deletions
diff --git a/‎nbs/metric/discrete.ipynb‎
Lines changed: 7 additions & 13 deletions b/‎nbs/metric/discrete.ipynb‎
Lines changed: 7 additions & 13 deletions
diff --git a/‎nbs/metric/result.ipynb‎
Lines changed: 112 additions & 6 deletions b/‎nbs/metric/result.ipynb‎
Lines changed: 112 additions & 6 deletions
diff --git a/‎nbs/project/experiments.ipynb‎
Lines changed: 68 additions & 0 deletions b/‎nbs/project/experiments.ipynb‎
Lines changed: 68 additions & 0 deletions
@@ -24,16 +24,7 @@
    "execution_count": null,
    "id": "e8ccff58",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/opt/homebrew/Caskroom/miniforge/base/envs/random/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#| export\n",
     "\n",
@@ -53,8 +44,16 @@
     "from ragas_annotator.project.core import Project\n",
     "from ragas_annotator.model.notion_model import NotionModel\n",
     "from ragas_annotator.prompt.dynamic_few_shot import DynamicFewShotPrompt\n",
-    "\n",
-    "\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
     "@dataclass\n",
     "class Metric(ABC):\n",
     "    \"\"\"Base class for all metrics in the LLM evaluation library.\"\"\"\n",
@@ -174,7 +173,7 @@
     {
      "data": {
       "text/plain": [
-       "100"
+       "1"
       ]
      },
      "execution_count": null,
 
@@ -90,17 +90,8 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "med\n",
-      "The given input \"this is my response\" is too vague to provide a comprehensive evaluation.\n",
-      "\n",
-      "Positives:\n",
-      "1. Clear Statement: It's a straightforward indication that a response has been provided.\n",
-      "\n",
-      "Negatives:\n",
-      "1. Lack of Context: Without context or additional information, it's impossible to assess the relevance or accuracy of the response.\n",
-      "2. No Specificity: The response doesn't convey any specific information or insight related to a topic or question.\n",
-      "\n",
-      "If this response was intended to be part of a conversation or instruction, more detail would be required to make it highly effective. At present, it serves as a neutral statement without actionable or informative content.\n"
+      "low\n",
+      "The response is incomplete and lacks any specific information. It cannot be evaluated for helpfulness without further context or content.\n"
      ]
     }
    ],
@@ -152,9 +143,12 @@
     "#| eval: false\n",
     "from ragas_annotator.metric.result import MetricResult\n",
     "\n",
-    "@discrete_metric(llm=llm,\n",
+    "@discrete_metric(\n",
+    "    llm=llm,\n",
     "    prompt=\"Evaluate if given answer is helpful\\n\\n{response}\",\n",
-    "    name='new_metric',values=[\"low\",\"med\",\"high\"])\n",
+    "    name='new_metric',\n",
+    "    values=[\"low\",\"med\",\"high\"]\n",
+    ")\n",
     "def my_metric(llm,prompt,**kwargs):\n",
     "\n",
     "        class response_model(BaseModel):\n",
 
@@ -22,18 +22,25 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "0f1c801a-6568-4ba4-8bbe-30bf154174fe",
+   "id": "dcc3080c",
    "metadata": {},
    "outputs": [],
    "source": [
     "#| export\n",
     "\n",
     "import typing as t\n",
     "\n",
-    "\n",
-    "\n",
-    "\n",
-    "\n",
+    "from fastcore.utils import patch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f1c801a-6568-4ba4-8bbe-30bf154174fe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
     "class MetricResult:\n",
     "    \"\"\"Class to hold the result of a metric evaluation.\n",
     "    \n",
@@ -248,10 +255,109 @@
     "print(list_result[1:])  # 2\n"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "06ce7a1d",
+   "metadata": {},
+   "source": [
+    "now lets make it `Pydantic` compatible also"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5d8fb818",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "from pydantic_core import core_schema\n",
+    "from pydantic import GetCoreSchemaHandler, ValidationInfo"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f4c288c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "\n",
+    "@patch(cls_method=True)\n",
+    "def validate(cls: MetricResult, value: t.Any, info: ValidationInfo):\n",
+    "    \"\"\"Provide compatibility with older Pydantic versions.\"\"\"\n",
+    "    if isinstance(value, MetricResult):\n",
+    "        return value\n",
+    "    return MetricResult(result=value)\n",
+    "\n",
+    "# Add Pydantic compatibility methods\n",
+    "@patch(cls_method=True)\n",
+    "def __get_pydantic_core_schema__(\n",
+    "    cls: MetricResult, \n",
+    "    _source_type: t.Any, \n",
+    "    _handler: GetCoreSchemaHandler\n",
+    ") -> core_schema.CoreSchema:\n",
+    "    \"\"\"Generate a Pydantic core schema for MetricResult.\"\"\"\n",
+    "    return core_schema.with_info_plain_validator_function(cls.validate)\n",
+    "\n",
+    "\n",
+    "@patch\n",
+    "def model_dump(self: MetricResult):\n",
+    "    \"\"\"Support Pydantic's model_dump method.\"\"\"\n",
+    "    return self.to_dict()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f49739a6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from pydantic import BaseModel\n",
+    "\n",
+    "class TestModel(BaseModel):\n",
+    "    response: str\n",
+    "    grade: MetricResult\n",
+    "    faithfulness: MetricResult\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ac6b955",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "m = TestModel(response=\"test\", grade=MetricResult(result=1, reason=\"test\"), faithfulness=MetricResult(result=1, reason=\"test\"))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4ffe750f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'test'"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "m.grade.reason"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a984dde9",
+   "id": "9d32b10f",
    "metadata": {},
    "outputs": [],
    "source": []
 
@@ -363,6 +363,74 @@
     "    return decorator"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create experimental test dataset\n",
+    "test_dataset = p.create_dataset(name=\"test dataset for experiment\", model=TestModel)\n",
+    "test_dataset.append(TestModel(name=\"test item 1\", description=\"test item 1 description\", price=100))\n",
+    "test_dataset.append(TestModel(name=\"test item 2\", description=\"test item 2 description\", price=200))\n",
+    "test_dataset.append(TestModel(name=\"test item 3\", description=\"test item 3 description\", price=300))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# create experiment model\n",
+    "class TextExperimentModel(TestModel):\n",
+    "    response: str\n",
+    "    is_correct: t.Literal[\"yes\", \"no\"]\n",
+    "\n",
+    "# create a test experiment function\n",
+    "@p.experiment(TextExperimentModel)\n",
+    "async def test_experiment(item: TestModel):\n",
+    "    print(item)\n",
+    "    return TextExperimentModel(**item.model_dump(), response=\"test response\", is_correct=\"yes\")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "100%|██████████| 3/3 [00:00<00:00, 7752.87it/s]\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "name='test item 2' description='test item 2 description' price=200.0\n",
+      "name='test item 1' description='test item 1 description' price=100.0\n",
+      "name='test item 3' description='test item 3 description' price=300.0\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "Experiment(name=keen_backus, model=TextExperimentModel)"
+      ]
+     },
+     "execution_count": null,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# run the experiment\n",
+    "await test_experiment.run_async(test_dataset)"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,