moved around utils and exceptions

jjmachan · jjmachan · commit e4968fd39e3e · 2025-04-10T17:25:57.000-07:00
diff --git a/nbs/exceptions.ipynb b/nbs/exceptions.ipynb
diff --git a/nbs/model/pydantic_mode.ipynb b/nbs/model/pydantic_mode.ipynb
@@ -0,0 +1,194 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Ragas `BaseModel`\n",
+    "\n",
+    "> An Extended version of Pydantics `BaseModel` for some ragas specific stuff"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| default_exp model.pydantic_model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "import typing as t\n",
+    "\n",
+    "from pydantic import BaseModel, PrivateAttr\n",
+    "\n",
+    "from ragas_annotator.typing import FieldMeta as RagasFieldMeta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "class ExtendedPydanticBaseModel(BaseModel):\n",
+    "    \"\"\"Extended Pydantic BaseModel with database integration capabilities\"\"\"\n",
+    "    \n",
+    "    # Private attribute for storing the database row_id\n",
+    "    _row_id: t.Optional[int] = PrivateAttr(default=None)\n",
+    "    \n",
+    "    # Class variable for storing column mapping overrides\n",
+    "    __column_mapping__: t.ClassVar[t.Dict[str, str]] = {}\n",
+    "    \n",
+    "    def __init__(self, **data):\n",
+    "        super().__init__(**data)\n",
+    "        # Initialize column mapping if not already defined\n",
+    "        if not self.__class__.__column_mapping__:\n",
+    "            self._initialize_column_mapping()\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def _initialize_column_mapping(cls):\n",
+    "        \"\"\"Initialize mapping from field names to column IDs.\"\"\"\n",
+    "        for field_name, field_info in cls.model_fields.items():\n",
+    "            # Check if field has Column metadata (for Pydantic v2)\n",
+    "            column_id = None\n",
+    "            for extra in field_info.metadata or []:\n",
+    "                if isinstance(extra, RagasFieldMeta) and extra.id:\n",
+    "                    column_id = extra.id\n",
+    "                    break\n",
+    "            \n",
+    "            # If no Column metadata found, use field name as column ID\n",
+    "            if not column_id:\n",
+    "                column_id = field_name\n",
+    "            \n",
+    "            cls.__column_mapping__[field_name] = column_id\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def get_column_id(cls, field_name: str) -> str:\n",
+    "        \"\"\"Get the column ID for a given field name.\"\"\"\n",
+    "        if field_name not in cls.__column_mapping__:\n",
+    "            raise ValueError(f\"No column mapping found for field {field_name}\")\n",
+    "        return cls.__column_mapping__[field_name]\n",
+    "    \n",
+    "    @classmethod\n",
+    "    def set_column_id(cls, field_name: str, column_id: str):\n",
+    "        \"\"\"Set the column ID for a given field name.\"\"\"\n",
+    "        if field_name not in cls.model_fields:\n",
+    "            raise ValueError(f\"Field {field_name} not found in model\")\n",
+    "        cls.__column_mapping__[field_name] = column_id\n",
+    "    \n",
+    "    def get_db_field_mapping(self) -> t.Dict[str, str]:\n",
+    "        \"\"\"Get a mapping from field names to column IDs for this model.\"\"\"\n",
+    "        return self.__class__.__column_mapping__\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import ragas_annotator.typing as rt"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Example usage\n",
+    "class TestDataRow(ExtendedPydanticBaseModel):\n",
+    "    id: t.Optional[int] = None\n",
+    "    query: t.Annotated[str, rt.Text(id=\"search_query\")]\n",
+    "    persona: t.List[t.Literal[\"opt1\", \"opt2\", \"opt3\"]]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{}"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "TestDataRow.__column_mapping__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "t = TestDataRow(id=1, query=\"this is a test\", persona=[\"opt1\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'id': 'id', 'query': 'search_query', 'persona': 'persona'}"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t.__column_mapping__"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/nbs/utils.ipynb b/nbs/utils.ipynb
@@ -0,0 +1,68 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| default_exp utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "import string\n",
+    "import uuid"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#| export\n",
+    "def create_nano_id(size=12):\n",
+    "    # Define characters to use (alphanumeric)\n",
+    "    alphabet = string.ascii_letters + string.digits\n",
+    "    \n",
+    "    # Generate UUID and convert to int\n",
+    "    uuid_int = uuid.uuid4().int\n",
+    "    \n",
+    "    # Convert to base62\n",
+    "    result = \"\"\n",
+    "    while uuid_int:\n",
+    "        uuid_int, remainder = divmod(uuid_int, len(alphabet))\n",
+    "        result = alphabet[remainder] + result\n",
+    "    \n",
+    "    # Pad if necessary and return desired length\n",
+    "    return result[:size]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/ragas_annotator/exceptions.py b/ragas_annotator/exceptions.py
@@ -1,11 +1,11 @@
 """All the exceptions specific to the `notion_annotator` project."""
 
-# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils/exceptions.ipynb.
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/exceptions.ipynb.
 
 # %% auto 0
 __all__ = ['ValidationError', 'DuplicateError', 'NotFoundError']
 
-# %% ../nbs/utils/exceptions.ipynb 2
+# %% ../nbs/exceptions.ipynb 2
 class ValidationError(Exception):
     """Raised when field validation fails."""
 
diff --git a/ragas_annotator/utils.py b/ragas_annotator/utils.py
@@ -0,0 +1,25 @@
+# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/utils.ipynb.
+
+# %% auto 0
+__all__ = ['create_nano_id']
+
+# %% ../nbs/utils.ipynb 1
+import string
+import uuid
+
+# %% ../nbs/utils.ipynb 2
+def create_nano_id(size=12):
+    # Define characters to use (alphanumeric)
+    alphabet = string.ascii_letters + string.digits
+    
+    # Generate UUID and convert to int
+    uuid_int = uuid.uuid4().int
+    
+    # Convert to base62
+    result = ""
+    while uuid_int:
+        uuid_int, remainder = divmod(uuid_int, len(alphabet))
+        result = alphabet[remainder] + result
+    
+    # Pad if necessary and return desired length
+    return result[:size]