diff --git a/nbs/backends/.notest b/nbs/backends/.notest new file mode 100644 index 0000000..e69de29 diff --git a/nbs/backends/factory.ipynb b/nbs/backends/factory.ipynb index 3967a62..c9d5595 100644 --- a/nbs/backends/factory.ipynb +++ b/nbs/backends/factory.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp backends.factory" + "# | default_exp backends.factory" ] }, { @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import typing as t\n", "import os\n", "\n", @@ -39,31 +39,31 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class NotionClientFactory:\n", " \"\"\"Factory for creating Notion client instances.\"\"\"\n", - " \n", + "\n", " @staticmethod\n", " def create(\n", " use_mock: bool = False,\n", " api_key: t.Optional[str] = None,\n", " initialize_project: bool = False,\n", - " root_page_id: t.Optional[str] = None\n", + " root_page_id: t.Optional[str] = None,\n", " ) -> t.Union[NotionClient, MockNotionClient]:\n", " \"\"\"Create a Notion client.\n", - " \n", + "\n", " Args:\n", " use_mock: If True, create a mock client\n", " api_key: Notion API key (only used for real client)\n", " initialize_project: If True and using mock, initialize project structure\n", " root_page_id: Required if initialize_project is True\n", - " \n", + "\n", " Returns:\n", " Union[NotionClient, MockNotionClient]: A real or mock client\n", " \"\"\"\n", " if use_mock:\n", " client = MockNotionClient()\n", - " \n", + "\n", " # Optionally initialize project structure\n", " if initialize_project and root_page_id:\n", " # Create root page if it doesn't exist in the mock client\n", @@ -77,18 +77,24 @@ " \"archived\": False,\n", " \"properties\": {\n", " \"title\": {\n", - " \"type\": \"title\", \n", - " \"title\": [{\"plain_text\": \"Root Page\", \"type\": \"text\", \"text\": {\"content\": \"Root Page\"}}]\n", + " \"type\": \"title\",\n", + " \"title\": [\n", + " {\n", + " \"plain_text\": \"Root Page\",\n", + " \"type\": \"text\",\n", + " \"text\": {\"content\": \"Root Page\"},\n", + " }\n", + " ],\n", " }\n", - " }\n", + " },\n", " }\n", " client.add_page(root_page)\n", - " \n", + "\n", " # Create required sub-pages\n", " for page_name in [\"Datasets\", \"Experiments\", \"Comparisons\"]:\n", " # Create page ID\n", " page_id = client._create_id()\n", - " \n", + "\n", " # Create page\n", " page = {\n", " \"id\": page_id,\n", @@ -98,14 +104,20 @@ " \"archived\": False,\n", " \"properties\": {\n", " \"title\": {\n", - " \"type\": \"title\", \n", - " \"title\": [{\"plain_text\": page_name, \"type\": \"text\", \"text\": {\"content\": page_name}}]\n", + " \"type\": \"title\",\n", + " \"title\": [\n", + " {\n", + " \"plain_text\": page_name,\n", + " \"type\": \"text\",\n", + " \"text\": {\"content\": page_name},\n", + " }\n", + " ],\n", " }\n", " },\n", - " \"parent\": {\"type\": \"page_id\", \"page_id\": root_page_id}\n", + " \"parent\": {\"type\": \"page_id\", \"page_id\": root_page_id},\n", " }\n", " client.add_page(page)\n", - " \n", + "\n", " # Add child block to root\n", " child_block = {\n", " \"id\": client._create_id(),\n", @@ -113,22 +125,22 @@ " \"type\": \"child_page\",\n", " \"created_time\": client._get_timestamp(),\n", " \"last_edited_time\": client._get_timestamp(),\n", - " \"child_page\": {\n", - " \"title\": page_name\n", - " }\n", + " \"child_page\": {\"title\": page_name},\n", " }\n", - " \n", + "\n", " client.add_children(root_page_id, [child_block])\n", - " \n", + "\n", " return client\n", " else:\n", " # For real client, use provided API key or environment variable\n", " if api_key is None:\n", " api_key = os.getenv(\"NOTION_API_KEY\")\n", - " \n", + "\n", " if api_key is None:\n", - " raise ValueError(\"api_key must be provided or set as NOTION_API_KEY environment variable\")\n", - " \n", + " raise ValueError(\n", + " \"api_key must be provided or set as NOTION_API_KEY environment variable\"\n", + " )\n", + "\n", " return NotionClient(auth=api_key)" ] }, @@ -179,9 +191,7 @@ ], "source": [ "mock_notion_client = NotionClientFactory.create(\n", - " use_mock=True, \n", - " initialize_project=True, \n", - " root_page_id=\"your_root_page_id\"\n", + " use_mock=True, initialize_project=True, root_page_id=\"your_root_page_id\"\n", ")\n", "mock_notion_client" ] @@ -192,27 +202,27 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class NotionBackendFactory:\n", " \"\"\"Factory for creating NotionBackend instances.\"\"\"\n", - " \n", + "\n", " @staticmethod\n", " def create(\n", " root_page_id: str,\n", " use_mock: bool = False,\n", " api_key: t.Optional[str] = None,\n", " initialize_project: bool = False,\n", - " notion_client: t.Optional[t.Union[NotionClient, MockNotionClient]] = None\n", + " notion_client: t.Optional[t.Union[NotionClient, MockNotionClient]] = None,\n", " ) -> NotionBackend:\n", " \"\"\"Create a NotionBackend instance.\n", - " \n", + "\n", " Args:\n", " root_page_id: The ID of the root page\n", " use_mock: If True, create a backend with a mock client\n", " api_key: Notion API key (only used for real client)\n", " initialize_project: If True and using mock, initialize project structure\n", " notion_client: Optional pre-configured Notion client\n", - " \n", + "\n", " Returns:\n", " NotionBackend: A backend instance with either real or mock client\n", " \"\"\"\n", @@ -222,14 +232,11 @@ " use_mock=use_mock,\n", " api_key=api_key,\n", " initialize_project=initialize_project,\n", - " root_page_id=root_page_id\n", + " root_page_id=root_page_id,\n", " )\n", - " \n", + "\n", " # Create and return the backend\n", - " return NotionBackend(\n", - " root_page_id=root_page_id,\n", - " notion_client=notion_client\n", - " )" + " return NotionBackend(root_page_id=root_page_id, notion_client=notion_client)" ] } ], diff --git a/nbs/backends/mock_notion_client.ipynb b/nbs/backends/mock_notion_client.ipynb index 5fa0acb..1c26c2f 100644 --- a/nbs/backends/mock_notion_client.ipynb +++ b/nbs/backends/mock_notion_client.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp backends.mock_notion" + "# | default_exp backends.mock_notion" ] }, { @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import typing as t\n", "import uuid\n", "from copy import deepcopy\n", @@ -39,17 +39,17 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class MockPagesAPI:\n", " \"\"\"Mock implementation of notion_client.Client.pages\"\"\"\n", - " \n", + "\n", " def __init__(self, client):\n", " self.client = client\n", - " \n", + "\n", " def create(self, parent, properties, **kwargs):\n", " \"\"\"Create a new page.\"\"\"\n", " page_id = self.client._create_id()\n", - " \n", + "\n", " # Create the page object\n", " page = {\n", " \"id\": page_id,\n", @@ -58,16 +58,16 @@ " \"last_edited_time\": self.client._get_timestamp(),\n", " \"archived\": False,\n", " \"properties\": deepcopy(properties),\n", - " \"parent\": deepcopy(parent)\n", + " \"parent\": deepcopy(parent),\n", " }\n", - " \n", + "\n", " # Add page to storage\n", " self.client._pages[page_id] = page\n", - " \n", + "\n", " # Add child reference to parent\n", " parent_type = parent.get(\"type\")\n", " parent_id = parent.get(f\"{parent_type}_id\")\n", - " \n", + "\n", " if parent_id:\n", " child_block = {\n", " \"id\": self.client._create_id(),\n", @@ -75,50 +75,50 @@ " \"type\": \"child_page\",\n", " \"created_time\": self.client._get_timestamp(),\n", " \"last_edited_time\": self.client._get_timestamp(),\n", - " \"child_page\": {\n", - " \"title\": self._extract_title(properties)\n", - " }\n", + " \"child_page\": {\"title\": self._extract_title(properties)},\n", " }\n", - " \n", + "\n", " if parent_id not in self.client._children:\n", " self.client._children[parent_id] = []\n", - " \n", + "\n", " self.client._children[parent_id].append(child_block)\n", - " \n", + "\n", " return deepcopy(page)\n", - " \n", + "\n", " def retrieve(self, page_id):\n", " \"\"\"Retrieve a page by ID.\"\"\"\n", " if page_id not in self.client._pages:\n", " raise NotFoundError(f\"Page {page_id} not found\")\n", - " \n", + "\n", " return deepcopy(self.client._pages[page_id])\n", - " \n", + "\n", " def update(self, page_id, properties=None, archived=None, **kwargs):\n", " \"\"\"Update a page.\"\"\"\n", " if page_id not in self.client._pages:\n", " raise NotFoundError(f\"Page {page_id} not found\")\n", - " \n", + "\n", " page = self.client._pages[page_id]\n", - " \n", + "\n", " if properties:\n", " # Update properties\n", " for key, value in properties.items():\n", " page[\"properties\"][key] = deepcopy(value)\n", - " \n", + "\n", " if archived is not None:\n", " page[\"archived\"] = archived\n", - " \n", + "\n", " page[\"last_edited_time\"] = self.client._get_timestamp()\n", - " \n", + "\n", " return deepcopy(page)\n", - " \n", + "\n", " def _extract_title(self, properties):\n", " \"\"\"Extract page title from properties.\"\"\"\n", " for prop in properties.values():\n", " if prop.get(\"type\") == \"title\" and prop.get(\"title\"):\n", " for text_obj in prop[\"title\"]:\n", - " if text_obj.get(\"type\") == \"text\" and \"content\" in text_obj.get(\"text\", {}):\n", + " if text_obj.get(\"type\") == \"text\" and \"content\" in text_obj.get(\n", + " \"text\", {}\n", + " ):\n", " return text_obj[\"text\"][\"content\"]\n", " return \"Untitled\"" ] @@ -129,17 +129,17 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class MockDatabasesAPI:\n", " \"\"\"Mock implementation of notion_client.Client.databases\"\"\"\n", - " \n", + "\n", " def __init__(self, client):\n", " self.client = client\n", - " \n", + "\n", " def create(self, parent, title, properties, **kwargs):\n", " \"\"\"Create a new database.\"\"\"\n", " database_id = self.client._create_id()\n", - " \n", + "\n", " # Create database object\n", " database = {\n", " \"id\": database_id,\n", @@ -148,16 +148,16 @@ " \"last_edited_time\": self.client._get_timestamp(),\n", " \"title\": deepcopy(title),\n", " \"properties\": deepcopy(properties),\n", - " \"parent\": deepcopy(parent)\n", + " \"parent\": deepcopy(parent),\n", " }\n", - " \n", + "\n", " # Add database to storage\n", " self.client._databases[database_id] = database\n", - " \n", + "\n", " # Add child reference to parent\n", " parent_type = parent.get(\"type\")\n", " parent_id = parent.get(f\"{parent_type}_id\")\n", - " \n", + "\n", " if parent_id:\n", " child_block = {\n", " \"id\": self.client._create_id(),\n", @@ -165,51 +165,56 @@ " \"type\": \"child_database\",\n", " \"created_time\": self.client._get_timestamp(),\n", " \"last_edited_time\": self.client._get_timestamp(),\n", - " \"child_database\": {\n", - " \"title\": self._extract_title(title)\n", - " }\n", + " \"child_database\": {\"title\": self._extract_title(title)},\n", " }\n", - " \n", + "\n", " if parent_id not in self.client._children:\n", " self.client._children[parent_id] = []\n", - " \n", + "\n", " self.client._children[parent_id].append(child_block)\n", - " \n", + "\n", " return deepcopy(database)\n", - " \n", + "\n", " def retrieve(self, database_id):\n", " \"\"\"Retrieve a database by ID.\"\"\"\n", " if database_id not in self.client._databases:\n", " raise NotFoundError(f\"Database {database_id} not found\")\n", - " \n", + "\n", " return deepcopy(self.client._databases[database_id])\n", - " \n", - " def query(self, database_id, filter=None, sorts=None, start_cursor=None, page_size=100, **kwargs):\n", + "\n", + " def query(\n", + " self,\n", + " database_id,\n", + " filter=None,\n", + " sorts=None,\n", + " start_cursor=None,\n", + " page_size=100,\n", + " **kwargs,\n", + " ):\n", " \"\"\"Query a database.\"\"\"\n", " if database_id not in self.client._databases:\n", " raise NotFoundError(f\"Database {database_id} not found\")\n", - " \n", + "\n", " # Get all pages in the database\n", " results = []\n", " for page_id, page in self.client._pages.items():\n", " parent = page.get(\"parent\", {})\n", - " if parent.get(\"type\") == \"database_id\" and parent.get(\"database_id\") == database_id:\n", + " if (\n", + " parent.get(\"type\") == \"database_id\"\n", + " and parent.get(\"database_id\") == database_id\n", + " ):\n", " results.append(deepcopy(page))\n", - " \n", + "\n", " # TODO: Implement filtering, sorting, and pagination if needed\n", - " \n", - " return {\n", - " \"results\": results,\n", - " \"has_more\": False, \n", - " \"next_cursor\": None\n", - " }\n", - " \n", + "\n", + " return {\"results\": results, \"has_more\": False, \"next_cursor\": None}\n", + "\n", " def _extract_title(self, title):\n", " \"\"\"Extract database title from title array.\"\"\"\n", " for text_obj in title:\n", " if text_obj.get(\"type\") == \"text\" and \"content\" in text_obj.get(\"text\", {}):\n", " return text_obj[\"text\"][\"content\"]\n", - " return \"Untitled\"\n" + " return \"Untitled\"" ] }, { @@ -218,39 +223,35 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class MockBlocksAPI:\n", " \"\"\"Mock implementation of notion_client.Client.blocks\"\"\"\n", - " \n", + "\n", " def __init__(self, client):\n", " self.client = client\n", " self.children = MockBlockChildrenAPI(client)\n", - " \n", + "\n", " def retrieve(self, block_id):\n", " \"\"\"Retrieve a block by ID.\"\"\"\n", " if block_id not in self.client._blocks:\n", " raise NotFoundError(f\"Block {block_id} not found\")\n", - " \n", + "\n", " return deepcopy(self.client._blocks[block_id])\n", - " \n", + "\n", "\n", "class MockBlockChildrenAPI:\n", " \"\"\"Mock implementation of notion_client.Client.blocks.children\"\"\"\n", - " \n", + "\n", " def __init__(self, client):\n", " self.client = client\n", - " \n", + "\n", " def list(self, block_id, start_cursor=None, page_size=100):\n", " \"\"\"List children of a block.\"\"\"\n", " children = self.client._children.get(block_id, [])\n", - " \n", + "\n", " # TODO: Implement pagination if needed\n", - " \n", - " return {\n", - " \"results\": deepcopy(children),\n", - " \"has_more\": False,\n", - " \"next_cursor\": None\n", - " }" + "\n", + " return {\"results\": deepcopy(children), \"has_more\": False, \"next_cursor\": None}" ] }, { @@ -259,13 +260,13 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class MockNotionClient:\n", " \"\"\"Mock implementation of notion_client.Client for testing.\"\"\"\n", - " \n", + "\n", " def __init__(self, auth=None):\n", " \"\"\"Initialize the mock client with in-memory storage.\n", - " \n", + "\n", " Args:\n", " auth: Ignored in mock implementation\n", " \"\"\"\n", @@ -274,32 +275,32 @@ " self._databases = {} # database_id -> database object\n", " self._blocks = {} # block_id -> block object\n", " self._children = {} # parent_id -> list of child blocks\n", - " \n", + "\n", " # Create API namespaces to match real client\n", " self.pages = MockPagesAPI(self)\n", " self.databases = MockDatabasesAPI(self)\n", " self.blocks = MockBlocksAPI(self)\n", - " \n", + "\n", " def _get_timestamp(self):\n", " \"\"\"Generate a timestamp in Notion API format.\"\"\"\n", " return datetime.utcnow().isoformat() + \"Z\"\n", - " \n", + "\n", " def _create_id(self):\n", " \"\"\"Generate a random ID in Notion format.\"\"\"\n", " return str(uuid.uuid4()).replace(\"-\", \"\")\n", - " \n", + "\n", " def add_page(self, page_data):\n", " \"\"\"Add a page to the mock storage.\"\"\"\n", " self._pages[page_data[\"id\"]] = deepcopy(page_data)\n", - " \n", + "\n", " def add_database(self, database_data):\n", " \"\"\"Add a database to the mock storage.\"\"\"\n", " self._databases[database_data[\"id\"]] = deepcopy(database_data)\n", - " \n", + "\n", " def add_block(self, block_data):\n", " \"\"\"Add a block to the mock storage.\"\"\"\n", " self._blocks[block_data[\"id\"]] = deepcopy(block_data)\n", - " \n", + "\n", " def add_children(self, parent_id, children):\n", " \"\"\"Add children to a parent.\"\"\"\n", " if parent_id not in self._children:\n", @@ -311,7 +312,7 @@ " len(self._pages), len(self._databases), len(self._blocks)\n", " )\n", "\n", - " __repr__ = __str__\n" + " __repr__ = __str__" ] } ], diff --git a/nbs/backends/ragas_api_client.ipynb b/nbs/backends/ragas_api_client.ipynb new file mode 100644 index 0000000..9707aca --- /dev/null +++ b/nbs/backends/ragas_api_client.ipynb @@ -0,0 +1,1973 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# `Ragas API Client`\n", + "\n", + "> Python client to api.ragas.io" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| default_exp backends.ragas_api_client" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "RAGAS_APP_TOKEN = \"apt.47bd-c55e4a45b27c-02f8-8446-1441f09b-651a8\"\n", + "RAGAS_API_ENDPOINT = \"https://api.dev.app.ragas.io\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import httpx\n", + "import asyncio\n", + "import typing as t\n", + "from pydantic import BaseModel, Field\n", + "from enum import StrEnum\n", + "import uuid\n", + "from fastcore.utils import patch" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class RagasRelay:\n", + " \"\"\"Client for the Ragas Relay API.\"\"\"\n", + "\n", + " def __init__(self, base_url: str, app_token: t.Optional[str] = None):\n", + " \"\"\"Initialize the Ragas API client.\n", + " \n", + " Args:\n", + " base_url: Base URL for the API (e.g., \"http://localhost:8087\")\n", + " app_token: API token for authentication\n", + " \"\"\"\n", + " if not app_token:\n", + " raise ValueError(\"app_token must be provided\")\n", + "\n", + " self.base_url = f\"{base_url.rstrip('/')}/api/v1\"\n", + " self.app_token = app_token\n", + "\n", + " async def _request(\n", + " self,\n", + " method: str,\n", + " endpoint: str,\n", + " params: t.Optional[t.Dict] = None,\n", + " json_data: t.Optional[t.Dict] = None,\n", + " ) -> t.Dict:\n", + " \"\"\"Make a request to the API.\n", + " \n", + " Args:\n", + " method: HTTP method (GET, POST, PATCH, DELETE)\n", + " endpoint: API endpoint path\n", + " params: Query parameters\n", + " json_data: JSON request body\n", + " \n", + " Returns:\n", + " The response data from the API\n", + " \"\"\"\n", + " url = f\"{self.base_url}/{endpoint.lstrip('/')}\"\n", + " headers = {\"X-App-Token\": self.app_token}\n", + "\n", + " async with httpx.AsyncClient() as client:\n", + " response = await client.request(\n", + " method=method, url=url, params=params, json=json_data, headers=headers\n", + " )\n", + "\n", + " data = response.json()\n", + "\n", + " if response.status_code >= 400 or data.get(\"status\") == \"error\":\n", + " error_msg = data.get(\"message\", \"Unknown error\")\n", + " raise Exception(f\"API Error ({response.status_code}): {error_msg}\")\n", + "\n", + " return data.get(\"data\")\n", + "\n", + " #---- Resource Handlers ----\n", + " async def _create_resource(self, path, data):\n", + " \"\"\"Generic resource creation.\"\"\"\n", + " return await self._request(\"POST\", path, json_data=data)\n", + " \n", + " async def _list_resources(self, path, **params):\n", + " \"\"\"Generic resource listing.\"\"\"\n", + " return await self._request(\"GET\", path, params=params)\n", + " \n", + " async def _get_resource(self, path):\n", + " \"\"\"Generic resource retrieval.\"\"\"\n", + " return await self._request(\"GET\", path)\n", + " \n", + " async def _update_resource(self, path, data):\n", + " \"\"\"Generic resource update.\"\"\"\n", + " return await self._request(\"PATCH\", path, json_data=data)\n", + " \n", + " async def _delete_resource(self, path):\n", + " \"\"\"Generic resource deletion.\"\"\"\n", + " return await self._request(\"DELETE\", path)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "#---- Projects ----\n", + "@patch\n", + "async def list_projects(\n", + " self: RagasRelay,\n", + " ids: t.Optional[t.List[str]] = None,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List projects.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + "\n", + " if ids:\n", + " params[\"ids\"] = \",\".join(ids)\n", + "\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + "\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + "\n", + " return await self._list_resources(\"projects\", **params)\n", + "\n", + "@patch\n", + "async def get_project(self: RagasRelay, project_id: str) -> t.Dict:\n", + " \"\"\"Get a specific project by ID.\"\"\"\n", + " return await self._get_resource(f\"projects/{project_id}\")\n", + "\n", + "@patch\n", + "async def create_project(\n", + " self: RagasRelay, title: str, description: t.Optional[str] = None\n", + ") -> t.Dict:\n", + " \"\"\"Create a new project.\"\"\"\n", + " data = {\"title\": title}\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._create_resource(\"projects\", data)\n", + "\n", + "@patch\n", + "async def update_project(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " title: t.Optional[str] = None,\n", + " description: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing project.\"\"\"\n", + " data = {}\n", + " if title:\n", + " data[\"title\"] = title\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._update_resource(f\"projects/{project_id}\", data)\n", + "\n", + "@patch\n", + "async def delete_project(self: RagasRelay, project_id: str) -> None:\n", + " \"\"\"Delete a project.\"\"\"\n", + " await self._delete_resource(f\"projects/{project_id}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 2 projects:\n", + "Error: string indices must be integers, not 'str'\n" + ] + } + ], + "source": [ + "# Initialize client with your authentication token\n", + "client = RagasRelay(base_url=RAGAS_API_ENDPOINT, app_token=RAGAS_APP_TOKEN)\n", + "\n", + "# List projects\n", + "try:\n", + " projects = await client.list_projects(limit=10)\n", + " print(f\"Found {len(projects)} projects:\")\n", + " for project in projects:\n", + " print(f\"- {project['title']} (ID: {project['id']})\")\n", + "except Exception as e:\n", + " print(f\"Error: {e}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Projects" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '59cf2483-d2c7-4306-af87-bfac2813f27b',\n", + " 'title': 'test project',\n", + " 'description': 'test description',\n", + " 'created_at': '2025-04-09T05:57:57.991728+00:00',\n", + " 'updated_at': '2025-04-09T05:57:57.991728+00:00'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.create_project(\"test project\", \"test description\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [{'id': '59cf2483-d2c7-4306-af87-bfac2813f27b',\n", + " 'title': 'test project',\n", + " 'description': 'test description',\n", + " 'created_at': '2025-04-09T05:57:57.991728+00:00',\n", + " 'updated_at': '2025-04-09T05:57:57.991728+00:00'},\n", + " {'id': 'c026b63c-d618-42c0-81c3-d7824c976eb1',\n", + " 'title': 'test project',\n", + " 'description': 'test description',\n", + " 'created_at': '2025-04-08T22:46:04.045516+00:00',\n", + " 'updated_at': '2025-04-08T22:46:04.045516+00:00'},\n", + " {'id': '3dd738de-49f7-494c-aa0a-f6531d3b603a',\n", + " 'title': 'RagasTest',\n", + " 'description': '',\n", + " 'created_at': '2025-04-08T17:45:32.759553+00:00',\n", + " 'updated_at': '2025-04-08T17:45:32.759553+00:00'},\n", + " {'id': '2f45d026-1b13-4851-a36d-c7680edb6380',\n", + " 'title': 'test project',\n", + " 'description': 'test description',\n", + " 'created_at': '2025-04-08T14:38:14.61165+00:00',\n", + " 'updated_at': '2025-04-08T14:38:14.61165+00:00'},\n", + " {'id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6',\n", + " 'title': 'test project',\n", + " 'description': 'test description',\n", + " 'created_at': '2025-03-30T02:33:38.751793+00:00',\n", + " 'updated_at': '2025-03-30T02:33:38.751793+00:00'}],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 5,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'desc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.list_projects()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "TEST_PROJECT_ID = \"e1b3f1e4-d344-48f4-a178-84e7e32e6ab6\"\n", + "project = await client.get_project(TEST_PROJECT_ID)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "#---- Datasets ----\n", + "@patch\n", + "async def list_datasets(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List datasets in a project.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(f\"projects/{project_id}/datasets\", **params)\n", + "\n", + "@patch\n", + "async def get_dataset(self: RagasRelay, project_id: str, dataset_id: str) -> t.Dict:\n", + " \"\"\"Get a specific dataset.\"\"\"\n", + " return await self._get_resource(f\"projects/{project_id}/datasets/{dataset_id}\")\n", + "\n", + "@patch\n", + "async def create_dataset(\n", + " self: RagasRelay, project_id: str, name: str, description: t.Optional[str] = None\n", + ") -> t.Dict:\n", + " \"\"\"Create a new dataset in a project.\"\"\"\n", + " data = {\"name\": name}\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._create_resource(f\"projects/{project_id}/datasets\", data)\n", + "\n", + "@patch\n", + "async def update_dataset(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " dataset_id: str,\n", + " name: t.Optional[str] = None,\n", + " description: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing dataset.\"\"\"\n", + " data = {}\n", + " if name:\n", + " data[\"name\"] = name\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._update_resource(f\"projects/{project_id}/datasets/{dataset_id}\", data)\n", + "\n", + "@patch\n", + "async def delete_dataset(self: RagasRelay, project_id: str, dataset_id: str) -> None:\n", + " \"\"\"Delete a dataset.\"\"\"\n", + " await self._delete_resource(f\"projects/{project_id}/datasets/{dataset_id}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('59cf2483-d2c7-4306-af87-bfac2813f27b',\n", + " 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6')" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# check project ID\n", + "projects = await client.list_projects()\n", + "projects[\"items\"][0][\"id\"], TEST_PROJECT_ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "New dataset created: {'id': '656e632c-6f0b-4046-929e-05b98fb14eaa', 'name': 'New Dataset', 'description': 'This is a new dataset', 'updated_at': '2025-04-09T05:59:04.352084+00:00', 'created_at': '2025-04-09T05:59:04.352084+00:00', 'version_counter': 0, 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}\n" + ] + } + ], + "source": [ + "# Create a new dataset\n", + "new_dataset = await client.create_dataset(\n", + " projects[\"items\"][0][\"id\"], \"New Dataset\", \"This is a new dataset\"\n", + ")\n", + "print(f\"New dataset created: {new_dataset}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Found 2 datasets\n" + ] + } + ], + "source": [ + "# List datasets in the project\n", + "datasets = await client.list_datasets(projects[\"items\"][0][\"id\"])\n", + "print(f\"Found {len(datasets)} datasets\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Updated dataset: {'id': '656e632c-6f0b-4046-929e-05b98fb14eaa', 'name': 'Updated Dataset', 'description': 'This is an updated dataset', 'created_at': '2025-04-09T05:59:04.352084+00:00', 'updated_at': '2025-04-09T05:59:10.388986+00:00', 'version_counter': 0, 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}\n" + ] + } + ], + "source": [ + "updated_dataset = await client.update_dataset(\n", + " projects[\"items\"][0][\"id\"],\n", + " datasets[\"items\"][0][\"id\"],\n", + " \"Updated Dataset\",\n", + " \"This is an updated dataset\",\n", + ")\n", + "print(f\"Updated dataset: {updated_dataset}\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Dataset deleted\n" + ] + } + ], + "source": [ + "# Delete the dataset\n", + "await client.delete_dataset(projects[\"items\"][0][\"id\"], datasets[\"items\"][0][\"id\"])\n", + "print(\"Dataset deleted\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Experiments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + " #| export\n", + "#---- Experiments ----\n", + "@patch\n", + "async def list_experiments(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List experiments in a project.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(f\"projects/{project_id}/experiments\", **params)\n", + "\n", + "@patch\n", + "async def get_experiment(self: RagasRelay, project_id: str, experiment_id: str) -> t.Dict:\n", + " \"\"\"Get a specific experiment.\"\"\"\n", + " return await self._get_resource(f\"projects/{project_id}/experiments/{experiment_id}\")\n", + "\n", + "@patch\n", + "async def create_experiment(\n", + " self: RagasRelay, project_id: str, name: str, description: t.Optional[str] = None\n", + ") -> t.Dict:\n", + " \"\"\"Create a new experiment in a project.\"\"\"\n", + " data = {\"name\": name}\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._create_resource(f\"projects/{project_id}/experiments\", data)\n", + "\n", + "@patch\n", + "async def update_experiment(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " experiment_id: str,\n", + " name: t.Optional[str] = None,\n", + " description: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing experiment.\"\"\"\n", + " data = {}\n", + " if name:\n", + " data[\"name\"] = name\n", + " if description:\n", + " data[\"description\"] = description\n", + " return await self._update_resource(f\"projects/{project_id}/experiments/{experiment_id}\", data)\n", + "\n", + "@patch\n", + "async def delete_experiment(self: RagasRelay, project_id: str, experiment_id: str) -> None:\n", + " \"\"\"Delete an experiment.\"\"\"\n", + " await self._delete_resource(f\"projects/{project_id}/experiments/{experiment_id}\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "New experiment created: {'id': '783c34ec-9d92-4bf9-9a45-d64746e58ba9', 'name': 'New Experiment', 'description': 'This is a new experiment', 'updated_at': '2025-04-09T06:00:01.80579+00:00', 'created_at': '2025-04-09T06:00:01.80579+00:00', 'version_counter': 0, 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}\n", + "Found 2 experiments\n", + "Experiment: {'id': '783c34ec-9d92-4bf9-9a45-d64746e58ba9', 'name': 'New Experiment', 'description': 'This is a new experiment', 'created_at': '2025-04-09T06:00:01.80579+00:00', 'updated_at': '2025-04-09T06:00:01.80579+00:00', 'version_counter': 0, 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}\n", + "Updated experiment: {'id': '783c34ec-9d92-4bf9-9a45-d64746e58ba9', 'name': 'Updated Experiment', 'description': 'This is an updated experiment', 'created_at': '2025-04-09T06:00:01.80579+00:00', 'updated_at': '2025-04-09T06:00:04.394175+00:00', 'version_counter': 0, 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}\n", + "Experiment deleted\n" + ] + } + ], + "source": [ + "# create a new experiment\n", + "new_experiment = await client.create_experiment(\n", + " projects[\"items\"][0][\"id\"], \"New Experiment\", \"This is a new experiment\"\n", + ")\n", + "print(f\"New experiment created: {new_experiment}\")\n", + "# list experiments\n", + "experiments = await client.list_experiments(projects[\"items\"][0][\"id\"])\n", + "print(f\"Found {len(experiments)} experiments\")\n", + "# get a specific experiment\n", + "experiment = await client.get_experiment(\n", + " projects[\"items\"][0][\"id\"], experiments[\"items\"][0][\"id\"]\n", + ")\n", + "print(f\"Experiment: {experiment}\")\n", + "# update an experiment\n", + "updated_experiment = await client.update_experiment(\n", + " projects[\"items\"][0][\"id\"],\n", + " experiments[\"items\"][0][\"id\"],\n", + " \"Updated Experiment\",\n", + " \"This is an updated experiment\",\n", + ")\n", + "print(f\"Updated experiment: {updated_experiment}\")\n", + "# delete an experiment\n", + "await client.delete_experiment(projects[\"items\"][0][\"id\"], experiments[\"items\"][0][\"id\"])\n", + "print(\"Experiment deleted\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [{'id': '78fd6c58-7edf-4239-93d1-4f49185d8e49',\n", + " 'name': 'New Experiment',\n", + " 'description': 'This is a new experiment',\n", + " 'created_at': '2025-03-30T06:31:31.689269+00:00',\n", + " 'updated_at': '2025-03-30T06:31:31.689269+00:00',\n", + " 'project_id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6'}],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 1,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'asc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.list_experiments(TEST_PROJECT_ID)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Columns (for datasets)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The API supports the following column types:\n", + "\n", + "- `number`: Numeric values\n", + "- `longText`: Text content\n", + "- `select`: Single selection from predefined options\n", + "- `date`: Date values\n", + "- `multiSelect`: Multiple selections from predefined options\n", + "- `checkbox`: Boolean values\n", + "- `custom`: Custom column types with specific behavior\n", + "\n", + "Each column type has specific settings that can be configured through the `settings` object." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "class ColumnType(StrEnum):\n", + " NUMBER = \"number\"\n", + " TEXT = \"text\"\n", + " LONG_TEXT = \"longText\"\n", + " SELECT = \"select\"\n", + " DATE = \"date\"\n", + " MULTI_SELECT = \"multiSelect\"\n", + " CHECKBOX = \"checkbox\"\n", + " CUSTOM = \"custom\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "\n", + "#---- Dataset Columns ----\n", + "@patch\n", + "async def list_dataset_columns(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " dataset_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List columns in a dataset.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/columns\", **params\n", + " )\n", + "\n", + "@patch\n", + "async def get_dataset_column(\n", + " self: RagasRelay, project_id: str, dataset_id: str, column_id: str\n", + ") -> t.Dict:\n", + " \"\"\"Get a specific column in a dataset.\"\"\"\n", + " return await self._get_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\"\n", + " )\n", + "\n", + "@patch\n", + "async def create_dataset_column(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " dataset_id: str,\n", + " id: str,\n", + " name: str,\n", + " type: str,\n", + " col_order: t.Optional[int] = None,\n", + " settings: t.Optional[t.Dict] = None,\n", + ") -> t.Dict:\n", + " \"\"\"Create a new column in a dataset.\"\"\"\n", + " data = {\"id\": id, \"name\": name, \"type\": type}\n", + " if col_order is not None:\n", + " data[\"col_order\"] = col_order\n", + " if settings:\n", + " data[\"settings\"] = settings\n", + " return await self._create_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/columns\", data\n", + " )\n", + "\n", + "@patch\n", + "async def update_dataset_column(\n", + " self: RagasRelay, project_id: str, dataset_id: str, column_id: str, **column_data\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing column in a dataset.\"\"\"\n", + " return await self._update_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\",\n", + " column_data,\n", + " )\n", + "\n", + "@patch\n", + "async def delete_dataset_column(\n", + " self: RagasRelay, project_id: str, dataset_id: str, column_id: str\n", + ") -> None:\n", + " \"\"\"Delete a column from a dataset.\"\"\"\n", + " await self._delete_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '9f0f1ac6-beb6-4bda-95d4-9d2a7dc4b837',\n", + " 'name': 'New Dataset for testing columns',\n", + " 'description': 'This is a new dataset for testing columns',\n", + " 'updated_at': '2025-04-09T06:01:40.671173+00:00',\n", + " 'created_at': '2025-04-09T06:01:40.671173+00:00',\n", + " 'version_counter': 0,\n", + " 'project_id': '59cf2483-d2c7-4306-af87-bfac2813f27b'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "datasets = await client.create_dataset(\n", + " projects[\"items\"][0][\"id\"],\n", + " \"New Dataset for testing columns\",\n", + " \"This is a new dataset for testing columns\",\n", + ")\n", + "datasets" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'settings': {'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'max_length': 255,\n", + " 'is_required': True},\n", + " 'created_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'updated_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'datatable_id': '9f0f1ac6-beb6-4bda-95d4-9d2a7dc4b837'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# add a new column to the dataset\n", + "new_column = await client.create_dataset_column(\n", + " project_id=projects[\"items\"][0][\"id\"],\n", + " dataset_id=datasets[\"id\"],\n", + " id=\"new_column_3\",\n", + " name=\"New Column 3\",\n", + " type=ColumnType.TEXT.value,\n", + " settings={\n", + " \"max_length\": 255,\n", + " \"is_required\": True,\n", + " },\n", + ")\n", + "new_column" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [{'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'settings': {'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'max_length': 255,\n", + " 'is_required': True},\n", + " 'created_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'updated_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'datatable_id': '9f0f1ac6-beb6-4bda-95d4-9d2a7dc4b837'}],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 1,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'asc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.list_dataset_columns(projects[\"items\"][0][\"id\"], datasets[\"id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'settings': {'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'max_length': 255,\n", + " 'is_required': True},\n", + " 'created_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'updated_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'datatable_id': '9f0f1ac6-beb6-4bda-95d4-9d2a7dc4b837'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "col3 = await client.get_dataset_column(\n", + " projects[\"items\"][0][\"id\"], datasets[\"id\"], \"new_column_3\"\n", + ")\n", + "col3" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': 'new_column_3',\n", + " 'name': 'New Column 3 Updated',\n", + " 'type': 'number',\n", + " 'settings': {'id': 'new_column_3',\n", + " 'name': 'New Column 3',\n", + " 'type': 'longText',\n", + " 'max_length': 255,\n", + " 'is_required': True},\n", + " 'created_at': '2025-04-09T06:01:41.978634+00:00',\n", + " 'updated_at': '2025-04-09T06:01:46.946099+00:00',\n", + " 'datatable_id': '9f0f1ac6-beb6-4bda-95d4-9d2a7dc4b837'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.update_dataset_column(\n", + " projects[\"items\"][0][\"id\"],\n", + " datasets[\"id\"],\n", + " \"new_column_3\",\n", + " name=\"New Column 3 Updated\",\n", + " type=ColumnType.NUMBER.value,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "await client.delete_dataset_column(\n", + " projects[\"items\"][0][\"id\"], datasets[\"id\"], \"new_column_3\"\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Rows (for datasets)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#---- Dataset Rows ----\n", + "@patch\n", + "async def list_dataset_rows(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " dataset_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List rows in a dataset.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/rows\", **params\n", + " )\n", + "\n", + "@patch\n", + "async def get_dataset_row(\n", + " self: RagasRelay, project_id: str, dataset_id: str, row_id: str\n", + ") -> t.Dict:\n", + " \"\"\"Get a specific row in a dataset.\"\"\"\n", + " return await self._get_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\"\n", + " )\n", + "\n", + "@patch\n", + "async def create_dataset_row(\n", + " self: RagasRelay, project_id: str, dataset_id: str, id: str, data: t.Dict\n", + ") -> t.Dict:\n", + " \"\"\"Create a new row in a dataset.\"\"\"\n", + " row_data = {\"id\": id, \"data\": data}\n", + " return await self._create_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/rows\", row_data\n", + " )\n", + "\n", + "@patch\n", + "async def update_dataset_row(\n", + " self: RagasRelay, project_id: str, dataset_id: str, row_id: str, data: t.Dict\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing row in a dataset.\"\"\"\n", + " row_data = {\"data\": data}\n", + " return await self._update_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\",\n", + " row_data,\n", + " )\n", + "\n", + "@patch\n", + "async def delete_dataset_row(\n", + " self: RagasRelay, project_id: str, dataset_id: str, row_id: str\n", + ") -> None:\n", + " \"\"\"Delete a row from a dataset.\"\"\"\n", + " await self._delete_resource(\n", + " f\"projects/{project_id}/datasets/{dataset_id}/rows/{row_id}\"\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "Exception", + "evalue": "API Error (500): Failed to create row", + "output_type": "error", + "traceback": [ + "\u001b[31m---------------------------------------------------------------------------\u001b[39m", + "\u001b[31mException\u001b[39m Traceback (most recent call last)", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[36]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[38;5;28;01mawait\u001b[39;00m client.create_dataset_row(\n\u001b[32m 2\u001b[39m projects[\u001b[33m\"\u001b[39m\u001b[33mitems\u001b[39m\u001b[33m\"\u001b[39m][\u001b[32m0\u001b[39m][\u001b[33m\"\u001b[39m\u001b[33mid\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 3\u001b[39m datasets[\u001b[33m\"\u001b[39m\u001b[33mid\u001b[39m\u001b[33m\"\u001b[39m],\n\u001b[32m 4\u001b[39m \u001b[33m\"\u001b[39m\u001b[33m1\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 5\u001b[39m {\u001b[33m\"\u001b[39m\u001b[33mname\u001b[39m\u001b[33m\"\u001b[39m: \u001b[33m\"\u001b[39m\u001b[33mNew Row 1\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mage\u001b[39m\u001b[33m\"\u001b[39m: \u001b[32m30\u001b[39m},\n\u001b[32m 6\u001b[39m )\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[35]\u001b[39m\u001b[32m, line 37\u001b[39m, in \u001b[36mcreate_dataset_row\u001b[39m\u001b[34m(self, project_id, dataset_id, id, data)\u001b[39m\n\u001b[32m 35\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m\"\"\"Create a new row in a dataset.\"\"\"\u001b[39;00m\n\u001b[32m 36\u001b[39m row_data = {\u001b[33m\"\u001b[39m\u001b[33mid\u001b[39m\u001b[33m\"\u001b[39m: \u001b[38;5;28mid\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mdata\u001b[39m\u001b[33m\"\u001b[39m: data}\n\u001b[32m---> \u001b[39m\u001b[32m37\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._create_resource(\n\u001b[32m 38\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mprojects/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mproject_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/datasets/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdataset_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m/rows\u001b[39m\u001b[33m\"\u001b[39m, row_data\n\u001b[32m 39\u001b[39m )\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 55\u001b[39m, in \u001b[36mRagasRelay._create_resource\u001b[39m\u001b[34m(self, path, data)\u001b[39m\n\u001b[32m 53\u001b[39m \u001b[38;5;28;01masync\u001b[39;00m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34m_create_resource\u001b[39m(\u001b[38;5;28mself\u001b[39m, path, data):\n\u001b[32m 54\u001b[39m \u001b[38;5;250m \u001b[39m\u001b[33;03m\"\"\"Generic resource creation.\"\"\"\u001b[39;00m\n\u001b[32m---> \u001b[39m\u001b[32m55\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;01mawait\u001b[39;00m \u001b[38;5;28mself\u001b[39m._request(\u001b[33m\"\u001b[39m\u001b[33mPOST\u001b[39m\u001b[33m\"\u001b[39m, path, json_data=data)\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 48\u001b[39m, in \u001b[36mRagasRelay._request\u001b[39m\u001b[34m(self, method, endpoint, params, json_data)\u001b[39m\n\u001b[32m 46\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m response.status_code >= \u001b[32m400\u001b[39m \u001b[38;5;129;01mor\u001b[39;00m data.get(\u001b[33m\"\u001b[39m\u001b[33mstatus\u001b[39m\u001b[33m\"\u001b[39m) == \u001b[33m\"\u001b[39m\u001b[33merror\u001b[39m\u001b[33m\"\u001b[39m:\n\u001b[32m 47\u001b[39m error_msg = data.get(\u001b[33m\"\u001b[39m\u001b[33mmessage\u001b[39m\u001b[33m\"\u001b[39m, \u001b[33m\"\u001b[39m\u001b[33mUnknown error\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m---> \u001b[39m\u001b[32m48\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAPI Error (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse.status_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m): \u001b[39m\u001b[38;5;132;01m{\u001b[39;00merror_msg\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m\"\u001b[39m)\n\u001b[32m 50\u001b[39m \u001b[38;5;28;01mreturn\u001b[39;00m data.get(\u001b[33m\"\u001b[39m\u001b[33mdata\u001b[39m\u001b[33m\"\u001b[39m)\n", + "\u001b[31mException\u001b[39m: API Error (500): Failed to create row" + ] + } + ], + "source": [ + "await client.create_dataset_row(\n", + " projects[\"items\"][0][\"id\"],\n", + " datasets[\"id\"],\n", + " \"1\",\n", + " {\"name\": \"New Row 1\", \"age\": 30},\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 0,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'asc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.list_dataset_rows(projects[\"items\"][0][\"id\"], datasets[\"id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "import uuid\n", + "import string" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "def create_nano_id(size=12):\n", + " # Define characters to use (alphanumeric)\n", + " alphabet = string.ascii_letters + string.digits\n", + " \n", + " # Generate UUID and convert to int\n", + " uuid_int = uuid.uuid4().int\n", + " \n", + " # Convert to base62\n", + " result = \"\"\n", + " while uuid_int:\n", + " uuid_int, remainder = divmod(uuid_int, len(alphabet))\n", + " result = alphabet[remainder] + result\n", + " \n", + " # Pad if necessary and return desired length\n", + " return result[:size]\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'eAscOAbBMSv2'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Usage\n", + "nano_id = create_nano_id() # e.g., \"8dK9cNw3mP5x\"\n", + "nano_id" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get a Dataset Visualized - Created From UI\n", + "Lets Create a new dataset and add columns and rows via the endpoint to see how it behaves" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://dev.app.ragas.io/dashboard/projects/e1b3f1e4-d344-48f4-a178-84e7e32e6ab6/datasets/cc239f73-6546-4d5c-84e1-8c2ed8072edc'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# generate a dataset\n", + "dataset = await client.create_dataset(\n", + " project_id=TEST_PROJECT_ID,\n", + " name=\"Dataset Visualized from UI\",\n", + " description=\"This is a dataset created from the UI\",\n", + ")\n", + "\n", + "# show url\n", + "WEB_ENDPOINT = \"https://dev.app.ragas.io\"\n", + "url = f\"{WEB_ENDPOINT}/dashboard/projects/{TEST_PROJECT_ID}/datasets/{dataset['id']}\"\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# list columns\n", + "columns = await client.list_dataset_columns(TEST_PROJECT_ID, dataset[\"id\"])\n", + "# list rows\n", + "rows = await client.list_dataset_rows(TEST_PROJECT_ID, dataset[\"id\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 0,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'asc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "columns\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'items': [],\n", + " 'pagination': {'offset': 0,\n", + " 'limit': 50,\n", + " 'total': 0,\n", + " 'order_by': 'created_at',\n", + " 'sort_dir': 'asc'}}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "rows" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Create a Dataset from data\n", + "\n", + "we want to be able to use the API with python data like this `t.List[t.Dict]`.\n", + "```py\n", + "# how we want the data to look\n", + "data = [\n", + " {\n", + " \"id\": \"1\",\n", + " \"query\": \"What is the capital of France?\",\n", + " \"persona\": \"John\",\n", + " \"ground_truth\": \"Paris\",\n", + " },\n", + " {\n", + " \"id\": \"2\",\n", + " \"query\": \"What is the capital of Germany?\",\n", + " \"persona\": \"Jane\",\n", + " \"ground_truth\": \"Berlin\",\n", + " },\n", + " {\n", + " \"id\": \"3\",\n", + " \"query\": \"What is the capital of Italy?\",\n", + " \"persona\": \"John\",\n", + " \"ground_truth\": \"Rome\",\n", + " },\n", + "]\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "['number', 'text', 'longText', 'select', 'date', 'multiSelect', 'checkbox', 'custom']\n" + ] + } + ], + "source": [ + "# print out column types\n", + "print([col.value for col in ColumnType])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# it should be able to handle simple python dicts\n", + "data = [\n", + " {\n", + " \"id\": \"1\",\n", + " \"query\": \"What is the capital of France?\",\n", + " \"persona\": \"John\",\n", + " \"ground_truth\": \"Paris\",\n", + " },\n", + " {\n", + " \"id\": \"2\",\n", + " \"query\": \"What is the capital of Germany?\",\n", + " \"persona\": \"Jane\",\n", + " \"ground_truth\": \"Berlin\",\n", + " },\n", + "]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "There can be 2 ways to pass in data\n", + "\n", + "1. Data can come as either as simple dicts\n", + "\n", + "```py\n", + "data = [\n", + " {\"column_1\": \"value\", \"column_2\": \"value\"}\n", + "]\n", + "```\n", + "\n", + "2. or if you want to give more settings\n", + "\n", + "```py\n", + "data = [\n", + " {\n", + " \"column_1\": {\"data\": \"value\", \"type\": ColumnType.text},\n", + " \"column_2\": {\"data\": \"value\", \"type\": ColumnType.number},\n", + " }\n", + "]\n", + "```\n", + "\n", + "3. after that you will have to pass a list `Column` and `Row` to add it." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# test data\n", + "test_data_columns = [\n", + " {\"name\": \"id\", \"type\": ColumnType.NUMBER.value},\n", + " {\"name\": \"query\", \"type\": ColumnType.TEXT.value},\n", + " {\"name\": \"persona\", \"type\": ColumnType.TEXT.value},\n", + " {\"name\": \"ground_truth\", \"type\": ColumnType.TEXT.value},\n", + "]\n", + "\n", + "test_data_rows = [{\n", + " \"id\": \"1\",\n", + " \"query\": \"What is the capital of France?\",\n", + " \"persona\": \"John\",\n", + " \"ground_truth\": \"Paris\",\n", + "}, {\n", + " \"id\": \"2\",\n", + " \"query\": \"What is the capital of Germany?\",\n", + " \"persona\": \"Jane\",\n", + " \"ground_truth\": \"Berlin\",\n", + "}, {\n", + " \"id\": \"3\",\n", + " \"query\": \"What is the capital of Italy?\",\n", + " \"persona\": \"John\",\n", + " \"ground_truth\": \"Rome\",\n", + "}]\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "# Default settings for columns\n", + "DEFAULT_SETTINGS = {\n", + " \"is_required\": False,\n", + " \"max_length\": 1000\n", + "}\n", + "\n", + "# Model definitions\n", + "class Column(BaseModel):\n", + " id: str = Field(default_factory=create_nano_id)\n", + " name: str = Field(...)\n", + " type: str = Field(...)\n", + " settings: t.Dict = Field(default_factory=lambda: DEFAULT_SETTINGS.copy())\n", + " col_order: t.Optional[int] = Field(default=None)\n", + "\n", + "class RowCell(BaseModel):\n", + " data: t.Any = Field(...)\n", + " column_id: str = Field(...)\n", + "\n", + "class Row(BaseModel):\n", + " id: str = Field(default_factory=create_nano_id)\n", + " data: t.List[RowCell] = Field(...)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "#---- Resource With Data Helper Methods ----\n", + "@patch\n", + "async def _create_with_data(\n", + " self: RagasRelay,\n", + " resource_type: str,\n", + " project_id: str,\n", + " name: str, \n", + " description: str,\n", + " columns: t.List[Column],\n", + " rows: t.List[Row],\n", + " batch_size: int = 50\n", + ") -> t.Dict:\n", + " \"\"\"Generic method to create a resource with columns and rows.\n", + " \n", + " Args:\n", + " resource_type: Type of resource (\"dataset\" or \"experiment\")\n", + " project_id: Project ID\n", + " name: Resource name\n", + " description: Resource description\n", + " columns: List of column definitions\n", + " rows: List of row data\n", + " batch_size: Number of operations to perform concurrently\n", + " \n", + " Returns:\n", + " The created resource\n", + " \"\"\"\n", + " # Select appropriate methods based on resource type\n", + " if resource_type == \"dataset\":\n", + " create_fn = self.create_dataset\n", + " create_col_fn = self.create_dataset_column\n", + " create_row_fn = self.create_dataset_row\n", + " delete_fn = self.delete_dataset\n", + " id_key = \"dataset_id\"\n", + " elif resource_type == \"experiment\":\n", + " create_fn = self.create_experiment\n", + " create_col_fn = self.create_experiment_column\n", + " create_row_fn = self.create_experiment_row\n", + " delete_fn = self.delete_experiment\n", + " id_key = \"experiment_id\"\n", + " else:\n", + " raise ValueError(f\"Unsupported resource type: {resource_type}\")\n", + " \n", + " try:\n", + " # Create the resource\n", + " resource = await create_fn(project_id, name, description)\n", + " \n", + " # Process columns in batches\n", + " for i in range(0, len(columns), batch_size):\n", + " batch = columns[i:i+batch_size]\n", + " col_tasks = []\n", + " \n", + " for col in batch:\n", + " params = {\n", + " \"project_id\": project_id,\n", + " id_key: resource[\"id\"], # dataset_id here\n", + " \"id\": col.id,\n", + " \"name\": col.name,\n", + " \"type\": col.type,\n", + " \"settings\": col.settings\n", + " }\n", + " if col.col_order is not None:\n", + " params[\"col_order\"] = col.col_order\n", + " \n", + " col_tasks.append(create_col_fn(**params))\n", + " \n", + " await asyncio.gather(*col_tasks)\n", + " \n", + " # Process rows in batches\n", + " for i in range(0, len(rows), batch_size):\n", + " batch = rows[i:i+batch_size]\n", + " row_tasks = []\n", + " \n", + " for row in batch:\n", + " row_data = {cell.column_id: cell.data for cell in row.data}\n", + " row_tasks.append(\n", + " create_row_fn(\n", + " project_id=project_id,\n", + " **{id_key: resource[\"id\"]},\n", + " id=row.id,\n", + " data=row_data\n", + " )\n", + " )\n", + " \n", + " await asyncio.gather(*row_tasks)\n", + " \n", + " return resource\n", + " \n", + " except Exception as e:\n", + " # Clean up on error\n", + " if 'resource' in locals():\n", + " try:\n", + " await delete_fn(project_id, resource[\"id\"])\n", + " except:\n", + " pass # Ignore cleanup errors\n", + " raise e\n", + "\n", + "@patch\n", + "async def create_dataset_with_data(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " name: str,\n", + " description: str,\n", + " columns: t.List[Column],\n", + " rows: t.List[Row],\n", + " batch_size: int = 50\n", + ") -> t.Dict:\n", + " \"\"\"Create a dataset with columns and rows.\n", + " \n", + " This method creates a dataset and populates it with columns and rows in an\n", + " optimized way using concurrent requests.\n", + " \n", + " Args:\n", + " project_id: Project ID\n", + " name: Dataset name\n", + " description: Dataset description\n", + " columns: List of column definitions\n", + " rows: List of row data\n", + " batch_size: Number of operations to perform concurrently\n", + " \n", + " Returns:\n", + " The created dataset\n", + " \"\"\"\n", + " return await self._create_with_data(\n", + " \"dataset\", project_id, name, description, columns, rows, batch_size\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Now lets test this." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created dataset with ID: 5e7912f4-6a65-4d0c-bf79-0fab9ddda40c\n", + "Created 4 columns\n", + "Created 3 rows\n" + ] + } + ], + "source": [ + "# Create Column objects\n", + "column_objects = []\n", + "for col in test_data_columns:\n", + " column_objects.append(Column(\n", + " name=col[\"name\"],\n", + " type=col[\"type\"]\n", + " # id and settings will be auto-generated\n", + " ))\n", + "\n", + "# Create a mapping of column names to their IDs for creating rows\n", + "column_map = {col.name: col.id for col in column_objects}\n", + "\n", + "# Create Row objects\n", + "row_objects = []\n", + "for row in test_data_rows:\n", + " cells = []\n", + " for key, value in row.items():\n", + " if key in column_map: # Skip any extra fields not in columns\n", + " cells.append(RowCell(\n", + " data=value,\n", + " column_id=column_map[key]\n", + " ))\n", + " row_objects.append(Row(data=cells))\n", + "\n", + "# Now we can create the dataset\n", + "dataset = await client.create_dataset_with_data(\n", + " project_id=TEST_PROJECT_ID,\n", + " name=\"Capitals Dataset\",\n", + " description=\"A dataset about capital cities\",\n", + " columns=column_objects,\n", + " rows=row_objects\n", + ")\n", + "\n", + "print(f\"Created dataset with ID: {dataset['id']}\")\n", + "\n", + "# Verify the data\n", + "columns = await client.list_dataset_columns(TEST_PROJECT_ID, dataset[\"id\"])\n", + "print(f\"Created {len(columns['items'])} columns\")\n", + "\n", + "rows = await client.list_dataset_rows(TEST_PROJECT_ID, dataset[\"id\"])\n", + "print(f\"Created {len(rows['items'])} rows\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'https://dev.app.ragas.io/dashboard/projects/e1b3f1e4-d344-48f4-a178-84e7e32e6ab6/datasets/5e7912f4-6a65-4d0c-bf79-0fab9ddda40c'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# get dataset url\n", + "url = f\"{WEB_ENDPOINT}/dashboard/projects/{TEST_PROJECT_ID}/datasets/{dataset['id']}\"\n", + "url" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# cleanup\n", + "await client.delete_dataset(TEST_PROJECT_ID, dataset[\"id\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### The same but for Experiments" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "#---- Experiment Columns ----\n", + "@patch\n", + "async def list_experiment_columns(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " experiment_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List columns in an experiment.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/columns\", **params\n", + " )\n", + "\n", + "@patch\n", + "async def get_experiment_column(\n", + " self: RagasRelay, project_id: str, experiment_id: str, column_id: str\n", + ") -> t.Dict:\n", + " \"\"\"Get a specific column in an experiment.\"\"\"\n", + " return await self._get_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\"\n", + " )\n", + "\n", + "@patch\n", + "async def create_experiment_column(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " experiment_id: str,\n", + " id: str,\n", + " name: str,\n", + " type: str,\n", + " col_order: t.Optional[int] = None,\n", + " settings: t.Optional[t.Dict] = None,\n", + ") -> t.Dict:\n", + " \"\"\"Create a new column in an experiment.\"\"\"\n", + " data = {\"id\": id, \"name\": name, \"type\": type}\n", + " if col_order is not None:\n", + " data[\"col_order\"] = col_order\n", + " if settings:\n", + " data[\"settings\"] = settings\n", + " return await self._create_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/columns\", data\n", + " )\n", + "\n", + "@patch\n", + "async def update_experiment_column(\n", + " self: RagasRelay, project_id: str, experiment_id: str, column_id: str, **column_data\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing column in an experiment.\"\"\"\n", + " return await self._update_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\",\n", + " column_data,\n", + " )\n", + "\n", + "@patch\n", + "async def delete_experiment_column(\n", + " self: RagasRelay, project_id: str, experiment_id: str, column_id: str\n", + ") -> None:\n", + " \"\"\"Delete a column from an experiment.\"\"\"\n", + " await self._delete_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}\"\n", + " )\n", + "\n", + "#---- Experiment Rows ----\n", + "@patch\n", + "async def list_experiment_rows(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " experiment_id: str,\n", + " limit: int = 50,\n", + " offset: int = 0,\n", + " order_by: t.Optional[str] = None,\n", + " sort_dir: t.Optional[str] = None,\n", + ") -> t.Dict:\n", + " \"\"\"List rows in an experiment.\"\"\"\n", + " params = {\"limit\": limit, \"offset\": offset}\n", + " if order_by:\n", + " params[\"order_by\"] = order_by\n", + " if sort_dir:\n", + " params[\"sort_dir\"] = sort_dir\n", + " return await self._list_resources(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/rows\", **params\n", + " )\n", + "\n", + "@patch\n", + "async def get_experiment_row(\n", + " self: RagasRelay, project_id: str, experiment_id: str, row_id: str\n", + ") -> t.Dict:\n", + " \"\"\"Get a specific row in an experiment.\"\"\"\n", + " return await self._get_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\"\n", + " )\n", + "\n", + "@patch\n", + "async def create_experiment_row(\n", + " self: RagasRelay, project_id: str, experiment_id: str, id: str, data: t.Dict\n", + ") -> t.Dict:\n", + " \"\"\"Create a new row in an experiment.\"\"\"\n", + " row_data = {\"id\": id, \"data\": data}\n", + " return await self._create_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/rows\", row_data\n", + " )\n", + "\n", + "@patch\n", + "async def update_experiment_row(\n", + " self: RagasRelay, project_id: str, experiment_id: str, row_id: str, data: t.Dict\n", + ") -> t.Dict:\n", + " \"\"\"Update an existing row in an experiment.\"\"\"\n", + " row_data = {\"data\": data}\n", + " return await self._update_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\",\n", + " row_data,\n", + " )\n", + "\n", + "@patch\n", + "async def delete_experiment_row(\n", + " self: RagasRelay, project_id: str, experiment_id: str, row_id: str\n", + ") -> None:\n", + " \"\"\"Delete a row from an experiment.\"\"\"\n", + " await self._delete_resource(\n", + " f\"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'id': '7c695b58-7fc3-464c-a18b-a96e35f9684d',\n", + " 'name': 'New Experiment',\n", + " 'description': 'This is a new experiment',\n", + " 'updated_at': '2025-04-09T17:03:44.340782+00:00',\n", + " 'created_at': '2025-04-09T17:03:44.340782+00:00',\n", + " 'version_counter': 0,\n", + " 'project_id': 'e1b3f1e4-d344-48f4-a178-84e7e32e6ab6'}" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "await client.create_experiment(TEST_PROJECT_ID, \"New Experiment\", \"This is a new experiment\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'78fd6c58-7edf-4239-93d1-4f49185d8e49'" + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "experiments = await client.list_experiments(TEST_PROJECT_ID)\n", + "EXPERIMENT_ID = experiments[\"items\"][0][\"id\"]\n", + "EXPERIMENT_ID" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "@patch\n", + "async def create_experiment_with_data(\n", + " self: RagasRelay,\n", + " project_id: str,\n", + " name: str,\n", + " description: str,\n", + " columns: t.List[Column],\n", + " rows: t.List[Row],\n", + " batch_size: int = 50\n", + ") -> t.Dict:\n", + " \"\"\"Create an experiment with columns and rows.\n", + " \n", + " This method creates an experiment and populates it with columns and rows in an\n", + " optimized way using concurrent requests.\n", + " \n", + " Args:\n", + " project_id: Project ID\n", + " name: Experiment name\n", + " description: Experiment description\n", + " columns: List of column definitions\n", + " rows: List of row data\n", + " batch_size: Number of operations to perform concurrently\n", + " \n", + " Returns:\n", + " The created experiment\n", + " \"\"\"\n", + " return await self._create_with_data(\n", + " \"experiment\", project_id, name, description, columns, rows, batch_size\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#| export\n", + "#---- Utility Methods ----\n", + "@patch\n", + "def create_column(\n", + " self: RagasRelay, \n", + " name: str, \n", + " type: str, \n", + " settings: t.Optional[t.Dict] = None, \n", + " col_order: t.Optional[int] = None,\n", + " id: t.Optional[str] = None\n", + ") -> Column:\n", + " \"\"\"Create a Column object.\n", + " \n", + " Args:\n", + " name: Column name\n", + " type: Column type (use ColumnType enum)\n", + " settings: Column settings\n", + " col_order: Column order\n", + " id: Custom ID (generates one if not provided)\n", + " \n", + " Returns:\n", + " Column object\n", + " \"\"\"\n", + " params = {\"name\": name, \"type\": type}\n", + " if settings:\n", + " params[\"settings\"] = settings\n", + " if col_order is not None:\n", + " params[\"col_order\"] = col_order\n", + " if id:\n", + " params[\"id\"] = id\n", + " \n", + " return Column(**params)\n", + " \n", + "@patch\n", + "def create_row(\n", + " self: RagasRelay, \n", + " data: t.Dict[str, t.Any], \n", + " column_map: t.Dict[str, str],\n", + " id: t.Optional[str] = None\n", + ") -> Row:\n", + " \"\"\"Create a Row object from a dictionary.\n", + " \n", + " Args:\n", + " data: Dictionary mapping column names to values\n", + " column_map: Dictionary mapping column names to column IDs\n", + " id: Custom ID (generates one if not provided)\n", + " \n", + " Returns:\n", + " Row object\n", + " \"\"\"\n", + " cells = []\n", + " for col_name, value in data.items():\n", + " if col_name in column_map:\n", + " cells.append(RowCell(\n", + " data=value,\n", + " column_id=column_map[col_name]\n", + " ))\n", + " \n", + " params = {\"data\": cells}\n", + " if id:\n", + " params[\"id\"] = id\n", + " \n", + " return Row(**params)\n", + " \n", + "@patch\n", + "def create_column_map(self: RagasRelay, columns: t.List[Column]) -> t.Dict[str, str]:\n", + " \"\"\"Create a mapping of column names to IDs.\n", + " \n", + " Args:\n", + " columns: List of column objects\n", + " \n", + " Returns:\n", + " Dictionary mapping column names to IDs\n", + " \"\"\"\n", + " return {col.name: col.id for col in columns}\n", + " \n", + "@patch\n", + "async def convert_raw_data(\n", + " self: RagasRelay,\n", + " column_defs: t.List[t.Dict],\n", + " row_data: t.List[t.Dict]\n", + ") -> t.Tuple[t.List[Column], t.List[Row]]:\n", + " \"\"\"Convert raw data to column and row objects.\n", + " \n", + " Args:\n", + " column_defs: List of column definitions (dicts with name, type)\n", + " row_data: List of dictionaries with row data\n", + " \n", + " Returns:\n", + " Tuple of (columns, rows)\n", + " \"\"\"\n", + " # Create columns\n", + " columns = []\n", + " for col in column_defs:\n", + " columns.append(self.create_column(**col))\n", + " \n", + " # Create column map\n", + " column_map = self.create_column_map(columns)\n", + " \n", + " # Create rows\n", + " rows = []\n", + " for data in row_data:\n", + " rows.append(self.create_row(data, column_map))\n", + " \n", + " return columns, rows" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "python3", + "language": "python", + "name": "python3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/nbs/dataset.ipynb b/nbs/dataset.ipynb index 47d5cfe..545c331 100644 --- a/nbs/dataset.ipynb +++ b/nbs/dataset.ipynb @@ -109,7 +109,9 @@ " self._entries[index] = self.model.from_notion(response)\n", "\n", " def __repr__(self) -> str:\n", - " return f\"Dataset(name={self.name}, model={self.model.__name__}, len={len(self)})\"\n", + " return (\n", + " f\"Dataset(name={self.name}, model={self.model.__name__}, len={len(self)})\"\n", + " )\n", "\n", " def __len__(self) -> int:\n", " return len(self._entries)\n", @@ -124,7 +126,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "import ragas_annotator.model.notion_typing as nmt\n", "from ragas_annotator.backends.mock_notion import MockNotionClient\n", "from ragas_annotator.backends.factory import NotionClientFactory\n", @@ -165,12 +167,14 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "# Set up a test environment with mock Notion client and a test database.\n", "# root page id\n", "root_page_id = \"test-root-id\"\n", "# Create a mock client\n", - "mock_client = NotionClientFactory.create(use_mock=True, initialize_project=True, root_page_id=root_page_id)\n", + "mock_client = NotionClientFactory.create(\n", + " use_mock=True, initialize_project=True, root_page_id=root_page_id\n", + ")\n", "\n", "# Create NotionBackend with mock client\n", "backend = NotionBackend(root_page_id=root_page_id, notion_client=mock_client)\n", @@ -194,10 +198,7 @@ "outputs": [], "source": [ "dataset = Dataset(\n", - " name=\"TestModel\",\n", - " model=TestModel,\n", - " database_id=datasets_id,\n", - " notion_backend=backend\n", + " name=\"TestModel\", model=TestModel, database_id=datasets_id, notion_backend=backend\n", ")" ] }, @@ -251,7 +252,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(len(dataset), 1)" ] }, @@ -304,7 +305,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(len(dataset), 0)" ] }, @@ -452,7 +453,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(test_model.description, \"test description\")" ] }, @@ -521,7 +522,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(dataset.get(0).description, \"updated description\")" ] } diff --git a/nbs/model/notion_model.ipynb b/nbs/model/notion_model.ipynb index 5c8bf28..24281e5 100644 --- a/nbs/model/notion_model.ipynb +++ b/nbs/model/notion_model.ipynb @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "from fastcore.test import *" ] }, diff --git a/nbs/model/notion_types.ipynb b/nbs/model/notion_types.ipynb index 398ab88..8ac4f7b 100644 --- a/nbs/model/notion_types.ipynb +++ b/nbs/model/notion_types.ipynb @@ -203,7 +203,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "import pytest\n", "from fastcore.test import *" ] @@ -251,7 +251,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class Text(Field[str], str):\n", " \"\"\"Rich text property type.\"\"\"\n", "\n", @@ -269,10 +269,13 @@ " # Split the text into chunks of CHUNK_SIZE characters\n", " if not value:\n", " return {self.name: {self.NOTION_FIELD_TYPE: []}}\n", - " \n", - " chunks = [value[i:i+self.CHUNK_SIZE] for i in range(0, len(value), self.CHUNK_SIZE)]\n", + "\n", + " chunks = [\n", + " value[i : i + self.CHUNK_SIZE]\n", + " for i in range(0, len(value), self.CHUNK_SIZE)\n", + " ]\n", " rich_text_array = [{\"text\": {\"content\": chunk}} for chunk in chunks]\n", - " \n", + "\n", " return {self.name: {self.NOTION_FIELD_TYPE: rich_text_array}}\n", "\n", " def _from_notion(self, data: dict) -> t.Optional[str]:\n", @@ -281,10 +284,10 @@ " rich_text = data[\"properties\"][self.name][self.NOTION_FIELD_TYPE]\n", " else:\n", " rich_text = data[self.name][self.NOTION_FIELD_TYPE]\n", - " \n", + "\n", " if not rich_text:\n", " return None\n", - " \n", + "\n", " # Combine all text chunks into a single string\n", " return \"\".join(item[\"text\"][\"content\"] for item in rich_text if \"text\" in item)" ] @@ -313,7 +316,7 @@ "field = Text()\n", "field.name = \"text\"\n", "result = field._to_notion(\"test\")\n", - "test_eq(result, {\"text\": {\"rich_text\": [{'text': {'content': \"test\"}}]}})\n", + "test_eq(result, {\"text\": {\"rich_text\": [{\"text\": {\"content\": \"test\"}}]}})\n", "\n", "# test from_notion\n", "result = field._from_notion({\"text\": SAMPLE_NOTION_DATA[\"text_field\"]})\n", @@ -321,8 +324,8 @@ "\n", "# test validate\n", "test_eq(field.validate(\"test\"), \"test\")\n", - "#test_eq(field.validate(42), \"42\")\n", - "#test_eq(field.validate(None), None)" + "# test_eq(field.validate(42), \"42\")\n", + "# test_eq(field.validate(None), None)" ] }, { @@ -339,7 +342,7 @@ "field.name = \"text\"\n", "short_text = \"This is a short text\"\n", "result = field._to_notion(short_text)\n", - "test_eq(result, {\"text\": {\"rich_text\": [{'text': {'content': short_text}}]}})\n", + "test_eq(result, {\"text\": {\"rich_text\": [{\"text\": {\"content\": short_text}}]}})\n", "test_eq(len(result[\"text\"][\"rich_text\"]), 1)\n", "\n", "# 2. Test with exactly 2000 chars (boundary case)\n", @@ -362,7 +365,7 @@ " \"rich_text\": [\n", " {\"text\": {\"content\": \"First chunk. \"}},\n", " {\"text\": {\"content\": \"Second chunk. \"}},\n", - " {\"text\": {\"content\": \"Last chunk.\"}}\n", + " {\"text\": {\"content\": \"Last chunk.\"}},\n", " ]\n", " }\n", "}\n", @@ -558,6 +561,104 @@ " return url" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## New Types" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "T = t.TypeVar(\"T\")\n", + "\n", + "\n", + "class NotionFieldMeta:\n", + " \"\"\"Base metadata class for Notion field types.\"\"\"\n", + "\n", + " NOTION_FIELD_TYPE: t.ClassVar[str] = \"\"\n", + "\n", + " def __init__(self, required: bool = True):\n", + " self.required = required\n", + " self.name: str = \"\" # Will be set during model initialization\n", + "\n", + " def __set_name__(self, owner, name: str):\n", + " \"\"\"Set field name when used directly as class attribute.\"\"\"\n", + " self.name = name\n", + "\n", + " def validate(self, value: t.Any) -> t.Any:\n", + " \"\"\"Validate field value.\"\"\"\n", + " if value is None and self.required:\n", + " raise ValueError(f\"Field {self.name} is required\")\n", + " return value\n", + "\n", + " def to_notion(self, value: t.Any) -> dict:\n", + " \"\"\"Convert Python value to Notion format.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + " def from_notion(self, data: dict) -> t.Any:\n", + " \"\"\"Convert Notion format to Python value.\"\"\"\n", + " raise NotImplementedError()\n", + "\n", + " def to_notion_property(self) -> dict:\n", + " \"\"\"Convert field to Notion property definition.\"\"\"\n", + " return {self.name: {\"type\": self.NOTION_FIELD_TYPE, self.NOTION_FIELD_TYPE: {}}}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# | export\n", + "class TextNew(NotionFieldMeta):\n", + " \"\"\"Rich text property type for Notion.\"\"\"\n", + "\n", + " NOTION_FIELD_TYPE = \"rich_text\"\n", + " CHUNK_SIZE = 2000 # Notion's character limit per rich text block\n", + "\n", + " def __init__(self, required: bool = True):\n", + " super().__init__(required=required)\n", + "\n", + " def to_notion(self, value: str) -> dict:\n", + " # Split text into chunks of CHUNK_SIZE characters\n", + " if not value:\n", + " return {self.name: {self.NOTION_FIELD_TYPE: []}}\n", + "\n", + " chunks = [\n", + " value[i : i + self.CHUNK_SIZE]\n", + " for i in range(0, len(value), self.CHUNK_SIZE)\n", + " ]\n", + " rich_text_array = [{\"text\": {\"content\": chunk}} for chunk in chunks]\n", + "\n", + " return {self.name: {self.NOTION_FIELD_TYPE: rich_text_array}}\n", + "\n", + " def from_notion(self, data: dict) -> t.Optional[str]:\n", + " # Handle both direct and properties-wrapped format\n", + " if \"properties\" in data:\n", + " if self.name in data[\"properties\"]:\n", + " rich_text = data[\"properties\"][self.name][self.NOTION_FIELD_TYPE]\n", + " else:\n", + " return None\n", + " else:\n", + " if self.name in data:\n", + " rich_text = data[self.name][self.NOTION_FIELD_TYPE]\n", + " else:\n", + " return None\n", + "\n", + " if not rich_text:\n", + " return None\n", + "\n", + " # Combine all text chunks into a single string\n", + " return \"\".join(item[\"text\"][\"content\"] for item in rich_text if \"text\" in item)" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/nbs/project/comparison.ipynb b/nbs/project/comparison.ipynb index 7ec0249..e864516 100644 --- a/nbs/project/comparison.ipynb +++ b/nbs/project/comparison.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp project.comparison" + "# | default_exp project.comparison" ] }, { @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "import pytest\n", "from unittest.mock import MagicMock\n", "from fastcore.test import *\n", @@ -58,7 +58,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "logger = logging.getLogger(__name__)" ] }, @@ -68,15 +68,19 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "# utility function to check if a model has a title property and get the name of the title property\n", "@t.overload\n", - "def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: t.Literal[True] = True) -> str:\n", - " ...\n", + "def _get_title_property(\n", + " model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[True] = True\n", + ") -> str: ...\n", "@t.overload\n", - "def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: t.Literal[False] = False) -> t.Optional[str]:\n", - " ...\n", - "def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: bool = True) -> t.Optional[str]:\n", + "def _get_title_property(\n", + " model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[False] = False\n", + ") -> t.Optional[str]: ...\n", + "def _get_title_property(\n", + " model: NotionModel | t.Type[NotionModel], raise_exception: bool = True\n", + ") -> t.Optional[str]:\n", " has_title = False\n", " for field in model._fields.keys():\n", " if isinstance(model._fields[field], nmt.Title):\n", @@ -97,13 +101,14 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "class ModelWithoutTitle(NotionModel):\n", " id: int = nmt.ID()\n", " select: str = nmt.Select()\n", "\n", + "\n", "class ModelWithTitle(ModelWithoutTitle):\n", - " some_title: str = nmt.Title()\n" + " some_title: str = nmt.Title()" ] }, { @@ -112,7 +117,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(_get_title_property(ModelWithoutTitle, raise_exception=False), None)\n", "pytest.raises(ValueError, _get_title_property, ModelWithoutTitle)\n", "test_eq(_get_title_property(ModelWithTitle), \"some_title\")" @@ -124,7 +129,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "def _validate_experiments(experiments: t.Sequence[Experiment]):\n", " # validate we have more than 2 experiments\n", " if len(experiments) < 2:\n", @@ -137,7 +142,9 @@ " if not isinstance(exp, Experiment):\n", " raise ValueError(\"All experiments must be of type Experiment\")\n", " if top_exp != exp.model:\n", - " logger.warning(f\"Experiments have different models: {top_exp.model} and {exp.model}\")\n", + " logger.warning(\n", + " f\"Experiments have different models: {top_exp.model} and {exp.model}\"\n", + " )\n", " if title_property != _get_title_property(exp.model):\n", " raise ValueError(\"All experiments must have the same title property.\")" ] @@ -164,43 +171,106 @@ } ], "source": [ - "#| hide\n", + "# | hide\n", "\n", "example_notion_backend = MagicMock(spec=NotionBackend)\n", "\n", "# test the validation logics\n", "with pytest.raises(ValueError):\n", - " _validate_experiments([\n", - " Experiment(\"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend), \n", - " Experiment(\"test_experiment_1\", ModelWithoutTitle, \"test_database_id\", example_notion_backend), \n", - " Experiment(\"test_experiment_2\", ModelWithTitle, \"test_database_id\", example_notion_backend),\n", - " ])\n", + " _validate_experiments(\n", + " [\n", + " Experiment(\n", + " \"test_experiment_1\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_1\",\n", + " ModelWithoutTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_2\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " ]\n", + " )\n", "\n", "\n", "# with should pass\n", - "_validate_experiments([\n", - " Experiment(\"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend), \n", - " Experiment(\"test_experiment_2\", ModelWithTitle, \"test_database_id\", example_notion_backend)\n", - "])\n", + "_validate_experiments(\n", + " [\n", + " Experiment(\n", + " \"test_experiment_1\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_2\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " ]\n", + ")\n", + "\n", "\n", "# throw a warning if the models are different\n", "class DifferentTitleModel(ModelWithoutTitle):\n", " some_title: str = nmt.Title()\n", "\n", - "_validate_experiments([\n", - " Experiment(\"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend), \n", - " Experiment(\"test_experiment_2\", ModelWithTitle, \"test_database_id\", example_notion_backend),\n", - " Experiment(\"test_experiment_3\", DifferentTitleModel, \"test_database_id\", example_notion_backend)\n", - "])\n", + "\n", + "_validate_experiments(\n", + " [\n", + " Experiment(\n", + " \"test_experiment_1\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_2\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_3\",\n", + " DifferentTitleModel,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " ]\n", + ")\n", + "\n", "\n", "# throw an error if the title properties are different\n", "class DifferentTitleNameModel(ModelWithoutTitle):\n", " some_title_other: str = nmt.Title()\n", + "\n", + "\n", "with pytest.raises(ValueError):\n", - " _validate_experiments([\n", - " Experiment(\"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend), \n", - " Experiment(\"test_experiment_2\", DifferentTitleNameModel, \"test_database_id\", example_notion_backend),\n", - " ])" + " _validate_experiments(\n", + " [\n", + " Experiment(\n", + " \"test_experiment_1\",\n", + " ModelWithTitle,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " Experiment(\n", + " \"test_experiment_2\",\n", + " DifferentTitleNameModel,\n", + " \"test_database_id\",\n", + " example_notion_backend,\n", + " ),\n", + " ]\n", + " )" ] }, { @@ -209,7 +279,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "test_eq(_get_title_property(ModelWithTitle), \"some_title\")\n", "test_eq(_get_title_property(DifferentTitleNameModel), \"some_title_other\")\n", "with pytest.raises(ValueError):\n", @@ -244,20 +314,32 @@ } ], "source": [ - "#| hide\n", + "# | hide\n", "# a test for grouping experiments\n", "NUM_EXPS = 3\n", "# dummy experiments\n", - "exp1 = Experiment(\"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend)\n", - "exp2 = Experiment(\"test_experiment_2\", ModelWithTitle, \"test_database_id\", example_notion_backend)\n", - "exp3 = Experiment(\"test_experiment_3\", ModelWithTitle, \"test_database_id\", example_notion_backend)\n", + "exp1 = Experiment(\n", + " \"test_experiment_1\", ModelWithTitle, \"test_database_id\", example_notion_backend\n", + ")\n", + "exp2 = Experiment(\n", + " \"test_experiment_2\", ModelWithTitle, \"test_database_id\", example_notion_backend\n", + ")\n", + "exp3 = Experiment(\n", + " \"test_experiment_3\", ModelWithTitle, \"test_database_id\", example_notion_backend\n", + ")\n", "# fill the experiments with dummy data\n", "for i in range(NUM_EXPS):\n", - " exp1._entries.append(ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_1_{i}\"))\n", + " exp1._entries.append(\n", + " ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_1_{i}\")\n", + " )\n", "for i in range(NUM_EXPS):\n", - " exp2._entries.append(ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_2_{i}\"))\n", + " exp2._entries.append(\n", + " ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_2_{i}\")\n", + " )\n", "for i in range(NUM_EXPS):\n", - " exp3._entries.append(ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_3_{i}\"))\n", + " exp3._entries.append(\n", + " ModelWithTitle(some_title=f\"test_{i}\", id=i, select=f\"test_exp_3_{i}\")\n", + " )\n", "\n", "\n", "# manually create the combined fields\n", @@ -279,7 +361,7 @@ " exp3_as_field[\"select\"] = f\"test_exp_3_{i}\"\n", " combined_experiments_fields.append([exp1_as_field, exp2_as_field, exp3_as_field])\n", "\n", - "combined_experiments_fields[1]\n" + "combined_experiments_fields[1]" ] }, { @@ -288,7 +370,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "def _model_to_dict(model: NotionModel) -> dict:\n", " # drop ID filed\n", " data = {}\n", @@ -296,7 +378,7 @@ " if isinstance(model._fields[field_name], nmt.ID):\n", " continue\n", " data[field_name] = model.__getattribute__(field_name)\n", - " return data\n" + " return data" ] }, { @@ -305,7 +387,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "# test it\n", "for i, grouped_row in enumerate(combined_experiments_fields):\n", " # add the missing fields to exp1\n", @@ -321,38 +403,38 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "def _combine_experiments(experiments: t.Sequence[Experiment]):\n", " \"\"\"Group experiment rows by their title property value.\"\"\"\n", " if not experiments:\n", " return []\n", - " \n", + "\n", " title_property: str = _get_title_property(experiments[0].model)\n", - " \n", + "\n", " # Create a dictionary to group rows by title value\n", " grouped_by_title = {}\n", - " \n", + "\n", " # Process each experiment\n", " for exp in experiments:\n", " for row in exp:\n", " title_value = getattr(row, title_property)\n", - " \n", + "\n", " # Create key if it doesn't exist\n", " if title_value not in grouped_by_title:\n", " grouped_by_title[title_value] = []\n", - " \n", + "\n", " # Add this row to the appropriate group\n", " row_dict = _model_to_dict(row)\n", " row_dict[\"experiment_name\"] = exp.name\n", " grouped_by_title[title_value].append(row_dict)\n", - " \n", + "\n", " # Convert dictionary to list and add id_str\n", " result = []\n", " for i, (_, rows) in enumerate(grouped_by_title.items()):\n", " for row in rows:\n", " row[\"id_str\"] = str(i)\n", " result.append(rows)\n", - " \n", + "\n", " return result" ] }, @@ -362,7 +444,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "# lets see if the asserts pass though\n", "test_eq(_combine_experiments([exp1, exp2, exp3]), combined_experiments_fields)" ] @@ -381,7 +463,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "@patch\n", "def compare_experiments(\n", " self: Project,\n", @@ -393,6 +475,7 @@ " class CombinedModel(NotionModel):\n", " id_str: str = nmt.Text()\n", " experiment_name: str = nmt.Text()\n", + "\n", " for exp in experiments:\n", " for field in exp.model._fields.keys():\n", " if field not in CombinedModel._fields:\n", @@ -421,12 +504,12 @@ " combined_model_instance = CombinedModel(**row)\n", " self._notion_backend.create_page_in_database(\n", " database_id=comparison_database_id,\n", - " properties=combined_model_instance.to_notion()[\"properties\"]\n", + " properties=combined_model_instance.to_notion()[\"properties\"],\n", " )\n", " # Get the URL for the created database\n", " # The format for Notion URLs is: https://www.notion.so/{database_id}\n", " notion_url = f\"https://www.notion.so/{comparison_database_id.replace('-', '')}\"\n", - " \n", + "\n", " return notion_url" ] } diff --git a/nbs/project/core.ipynb b/nbs/project/core.ipynb index 06824db..c3da57d 100644 --- a/nbs/project/core.ipynb +++ b/nbs/project/core.ipynb @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| hide\n", + "# | hide\n", "from ragas_annotator.backends.factory import NotionBackendFactory" ] }, @@ -159,9 +159,7 @@ "source": [ "# | hide\n", "notion_backend = NotionBackendFactory.create(\n", - " root_page_id=\"your_root_page_id\",\n", - " use_mock=True,\n", - " initialize_project=True\n", + " root_page_id=\"your_root_page_id\", use_mock=True, initialize_project=True\n", ")" ] }, diff --git a/nbs/project/experiments.ipynb b/nbs/project/experiments.ipynb index 05e3404..a51fb1c 100644 --- a/nbs/project/experiments.ipynb +++ b/nbs/project/experiments.ipynb @@ -137,7 +137,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "from ragas_annotator.project.naming import MemorableNames" ] }, @@ -147,7 +147,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "memorable_names = MemorableNames()" ] }, @@ -207,7 +207,7 @@ " wrapped_experiment.__setattr__(\"run_async\", run_async)\n", " return t.cast(ExperimentProtocol, wrapped_experiment)\n", "\n", - " return decorator\n" + " return decorator" ] }, { @@ -234,28 +234,30 @@ " def decorator(func: t.Callable) -> ExperimentProtocol:\n", " # First, create a base experiment wrapper\n", " base_experiment = self.experiment(experiment_model, name_prefix)(func)\n", - " \n", + "\n", " # Override the wrapped function to add Langfuse observation\n", " @wraps(func)\n", " async def wrapped_with_langfuse(*args, **kwargs):\n", " # wrap the function with langfuse observation\n", " observed_func = observe(name=f\"{name_prefix}-{func.__name__}\")(func)\n", " return await observed_func(*args, **kwargs)\n", - " \n", + "\n", " # Replace the async function to use Langfuse\n", " original_run_async = base_experiment.run_async\n", - " \n", + "\n", " # Use the original run_async but with the Langfuse-wrapped function\n", - " async def run_async_with_langfuse(dataset: Dataset, name: t.Optional[str] = None):\n", + " async def run_async_with_langfuse(\n", + " dataset: Dataset, name: t.Optional[str] = None\n", + " ):\n", " # Override the internal wrapped_experiment with our Langfuse version\n", " base_experiment.__wrapped__ = wrapped_with_langfuse\n", - " \n", + "\n", " # Call the original run_async which will now use our Langfuse-wrapped function\n", " return await original_run_async(dataset, name)\n", - " \n", + "\n", " # Replace the run_async method\n", " base_experiment.__setattr__(\"run_async\", run_async_with_langfuse)\n", - " \n", + "\n", " return t.cast(ExperimentProtocol, base_experiment)\n", "\n", " return decorator" diff --git a/nbs/project/naming.ipynb b/nbs/project/naming.ipynb index 98c6241..dac4d28 100644 --- a/nbs/project/naming.ipynb +++ b/nbs/project/naming.ipynb @@ -25,7 +25,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import random" ] }, @@ -35,70 +35,210 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class MemorableNames:\n", " def __init__(self):\n", " # List of adjectives (similar to what Docker uses)\n", " self.adjectives = [\n", - " \"admiring\", \"adoring\", \"affectionate\", \"agitated\", \"amazing\",\n", - " \"angry\", \"awesome\", \"blissful\", \"bold\", \"boring\", \"brave\", \"busy\",\n", - " \"charming\", \"clever\", \"cool\", \"compassionate\", \"competent\", \"condescending\",\n", - " \"confident\", \"cranky\", \"crazy\", \"dazzling\", \"determined\", \"distracted\",\n", - " \"dreamy\", \"eager\", \"ecstatic\", \"elastic\", \"elated\", \"elegant\", \"eloquent\",\n", - " \"epic\", \"fervent\", \"festive\", \"flamboyant\", \"focused\", \"friendly\",\n", - " \"frosty\", \"gallant\", \"gifted\", \"goofy\", \"gracious\", \"happy\", \"hardcore\",\n", - " \"heuristic\", \"hopeful\", \"hungry\", \"infallible\", \"inspiring\", \"jolly\",\n", - " \"jovial\", \"keen\", \"kind\", \"laughing\", \"loving\", \"lucid\", \"magical\",\n", - " \"mystifying\", \"modest\", \"musing\", \"naughty\", \"nervous\", \"nifty\", \"nostalgic\",\n", - " \"objective\", \"optimistic\", \"peaceful\", \"pedantic\", \"pensive\", \"practical\",\n", - " \"priceless\", \"quirky\", \"quizzical\", \"relaxed\", \"reverent\", \"romantic\",\n", - " \"sad\", \"serene\", \"sharp\", \"silly\", \"sleepy\", \"stoic\", \"stupefied\",\n", - " \"suspicious\", \"sweet\", \"tender\", \"thirsty\", \"trusting\", \"upbeat\", \"vibrant\",\n", - " \"vigilant\", \"vigorous\", \"wizardly\", \"wonderful\", \"xenodochial\", \"youthful\",\n", - " \"zealous\", \"zen\"\n", + " \"admiring\",\n", + " \"adoring\",\n", + " \"affectionate\",\n", + " \"agitated\",\n", + " \"amazing\",\n", + " \"angry\",\n", + " \"awesome\",\n", + " \"blissful\",\n", + " \"bold\",\n", + " \"boring\",\n", + " \"brave\",\n", + " \"busy\",\n", + " \"charming\",\n", + " \"clever\",\n", + " \"cool\",\n", + " \"compassionate\",\n", + " \"competent\",\n", + " \"condescending\",\n", + " \"confident\",\n", + " \"cranky\",\n", + " \"crazy\",\n", + " \"dazzling\",\n", + " \"determined\",\n", + " \"distracted\",\n", + " \"dreamy\",\n", + " \"eager\",\n", + " \"ecstatic\",\n", + " \"elastic\",\n", + " \"elated\",\n", + " \"elegant\",\n", + " \"eloquent\",\n", + " \"epic\",\n", + " \"fervent\",\n", + " \"festive\",\n", + " \"flamboyant\",\n", + " \"focused\",\n", + " \"friendly\",\n", + " \"frosty\",\n", + " \"gallant\",\n", + " \"gifted\",\n", + " \"goofy\",\n", + " \"gracious\",\n", + " \"happy\",\n", + " \"hardcore\",\n", + " \"heuristic\",\n", + " \"hopeful\",\n", + " \"hungry\",\n", + " \"infallible\",\n", + " \"inspiring\",\n", + " \"jolly\",\n", + " \"jovial\",\n", + " \"keen\",\n", + " \"kind\",\n", + " \"laughing\",\n", + " \"loving\",\n", + " \"lucid\",\n", + " \"magical\",\n", + " \"mystifying\",\n", + " \"modest\",\n", + " \"musing\",\n", + " \"naughty\",\n", + " \"nervous\",\n", + " \"nifty\",\n", + " \"nostalgic\",\n", + " \"objective\",\n", + " \"optimistic\",\n", + " \"peaceful\",\n", + " \"pedantic\",\n", + " \"pensive\",\n", + " \"practical\",\n", + " \"priceless\",\n", + " \"quirky\",\n", + " \"quizzical\",\n", + " \"relaxed\",\n", + " \"reverent\",\n", + " \"romantic\",\n", + " \"sad\",\n", + " \"serene\",\n", + " \"sharp\",\n", + " \"silly\",\n", + " \"sleepy\",\n", + " \"stoic\",\n", + " \"stupefied\",\n", + " \"suspicious\",\n", + " \"sweet\",\n", + " \"tender\",\n", + " \"thirsty\",\n", + " \"trusting\",\n", + " \"upbeat\",\n", + " \"vibrant\",\n", + " \"vigilant\",\n", + " \"vigorous\",\n", + " \"wizardly\",\n", + " \"wonderful\",\n", + " \"xenodochial\",\n", + " \"youthful\",\n", + " \"zealous\",\n", + " \"zen\",\n", " ]\n", - " \n", + "\n", " # List of influential computer scientists and tech entrepreneurs\n", " self.scientists = [\n", - " \"turing\", \"hopper\", \"knuth\", \"torvalds\", \"ritchie\", \"thompson\",\n", - " \"dijkstra\", \"kay\", \"wozniak\", \"gates\", \"jobs\", \"musk\", \"bezos\",\n", - " \"lovelace\", \"berners_lee\", \"cerf\", \"gosling\", \"kernighan\", \"lamport\",\n", - " \"mccarthy\", \"minsky\", \"rossum\", \"backus\", \"engelbart\", \"hamilton\",\n", - " \"chomsky\", \"shannon\", \"zuckerberg\", \"page\", \"brin\", \"matsumoto\",\n", - " \"stallman\", \"stroustrup\", \"cook\", \"neumann\", \"babbage\", \"tanenbaum\",\n", - " \"rivest\", \"shamir\", \"adleman\", \"carmack\", \"andreessen\", \"ullman\",\n", - " \"postel\", \"huffman\", \"boole\", \"curry\", \"liskov\", \"wing\", \"goldwasser\",\n", - " \"hoare\", \"milner\", \"perlis\", \"sutherland\", \"tarjan\", \"valiant\",\n", - " \"yao\", \"hopcroft\", \"naur\", \"wilkes\", \"codd\", \"diffie\", \"hellman\",\n", - " \"pearl\", \"thiel\", \"narayen\", \"nadella\", \"pichai\", \"dorsey\"\n", + " \"turing\",\n", + " \"hopper\",\n", + " \"knuth\",\n", + " \"torvalds\",\n", + " \"ritchie\",\n", + " \"thompson\",\n", + " \"dijkstra\",\n", + " \"kay\",\n", + " \"wozniak\",\n", + " \"gates\",\n", + " \"jobs\",\n", + " \"musk\",\n", + " \"bezos\",\n", + " \"lovelace\",\n", + " \"berners_lee\",\n", + " \"cerf\",\n", + " \"gosling\",\n", + " \"kernighan\",\n", + " \"lamport\",\n", + " \"mccarthy\",\n", + " \"minsky\",\n", + " \"rossum\",\n", + " \"backus\",\n", + " \"engelbart\",\n", + " \"hamilton\",\n", + " \"chomsky\",\n", + " \"shannon\",\n", + " \"zuckerberg\",\n", + " \"page\",\n", + " \"brin\",\n", + " \"matsumoto\",\n", + " \"stallman\",\n", + " \"stroustrup\",\n", + " \"cook\",\n", + " \"neumann\",\n", + " \"babbage\",\n", + " \"tanenbaum\",\n", + " \"rivest\",\n", + " \"shamir\",\n", + " \"adleman\",\n", + " \"carmack\",\n", + " \"andreessen\",\n", + " \"ullman\",\n", + " \"postel\",\n", + " \"huffman\",\n", + " \"boole\",\n", + " \"curry\",\n", + " \"liskov\",\n", + " \"wing\",\n", + " \"goldwasser\",\n", + " \"hoare\",\n", + " \"milner\",\n", + " \"perlis\",\n", + " \"sutherland\",\n", + " \"tarjan\",\n", + " \"valiant\",\n", + " \"yao\",\n", + " \"hopcroft\",\n", + " \"naur\",\n", + " \"wilkes\",\n", + " \"codd\",\n", + " \"diffie\",\n", + " \"hellman\",\n", + " \"pearl\",\n", + " \"thiel\",\n", + " \"narayen\",\n", + " \"nadella\",\n", + " \"pichai\",\n", + " \"dorsey\",\n", " ]\n", - " \n", + "\n", " self.used_names = set()\n", - " \n", + "\n", " def generate_name(self):\n", " \"\"\"Generate a single experiment name.\"\"\"\n", " adjective = random.choice(self.adjectives)\n", " scientist = random.choice(self.scientists)\n", " return f\"{adjective}_{scientist}\"\n", - " \n", + "\n", " def generate_unique_name(self):\n", " \"\"\"Generate a unique experiment name.\"\"\"\n", " attempts = 0\n", " max_attempts = 100 # Prevent infinite loops\n", - " \n", + "\n", " while attempts < max_attempts:\n", " name = self.generate_name()\n", " if name not in self.used_names:\n", " self.used_names.add(name)\n", " return name\n", " attempts += 1\n", - " \n", + "\n", " # If we exhaust our combinations, add a random suffix\n", " base_name = self.generate_name()\n", " unique_name = f\"{base_name}_{random.randint(1000, 9999)}\"\n", " self.used_names.add(unique_name)\n", " return unique_name\n", - " \n", + "\n", " def generate_unique_names(self, count):\n", " \"\"\"Generate multiple unique experiment names.\"\"\"\n", " return [self.generate_unique_name() for _ in range(count)]" diff --git a/nbs/tracing/langfuse.ipynb b/nbs/tracing/langfuse.ipynb index 444b5a7..465b113 100644 --- a/nbs/tracing/langfuse.ipynb +++ b/nbs/tracing/langfuse.ipynb @@ -15,7 +15,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| default_exp tracing.langfuse" + "# | default_exp tracing.langfuse" ] }, { @@ -24,7 +24,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "import typing as t\n", "import asyncio\n", "import logging\n", @@ -41,7 +41,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "# just adding it to the namespace\n", "observe = observe" ] @@ -52,7 +52,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "logger = logging.getLogger(__name__)" ] }, @@ -62,14 +62,14 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "class LangfuseTrace:\n", " def __init__(self, trace: TraceWithFullDetails):\n", " self.trace = trace\n", " self._langfuse_client = langfuse_context.client_instance\n", "\n", " def get_url(self):\n", - " return langfuse_context.get_current_trace_url() \n", + " return langfuse_context.get_current_trace_url()\n", "\n", " def filter(self, span_name: str) -> t.List[Observation]:\n", " trace = self._langfuse_client.fetch_trace(self.trace.id)\n", @@ -82,8 +82,10 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", - "async def sync_trace(trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2) -> LangfuseTrace:\n", + "# | export\n", + "async def sync_trace(\n", + " trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2\n", + ") -> LangfuseTrace:\n", " \"\"\"Wait for a Langfuse trace to be synced to the server.\n", "\n", " Args:\n", @@ -99,7 +101,9 @@ " trace_id = langfuse_context.get_current_trace_id()\n", "\n", " if not trace_id:\n", - " raise ValueError(\"No trace id found. Please ensure you are running this function within a function decorated with @observe().\")\n", + " raise ValueError(\n", + " \"No trace id found. Please ensure you are running this function within a function decorated with @observe().\"\n", + " )\n", " for _ in range(max_retries):\n", " langfuse_client = LangfuseSingleton().get()\n", " try:\n", @@ -122,21 +126,21 @@ "metadata": {}, "outputs": [], "source": [ - "#| export\n", + "# | export\n", "def add_query_param(url, param_name, param_value):\n", " \"\"\"Add a query parameter to a URL.\"\"\"\n", " # Parse the URL\n", " url_parts = list(urlparse(url))\n", - " \n", + "\n", " # Get query params as a dict and add new param\n", " query_dict = dict(parse_qsl(url_parts[4]))\n", " query_dict[param_name] = param_value\n", - " \n", + "\n", " # Replace the query part with updated params\n", " url_parts[4] = urlencode(query_dict)\n", - " \n", + "\n", " # Reconstruct the URL\n", - " return urlunparse(url_parts)\n" + " return urlunparse(url_parts)" ] }, { diff --git a/ragas_annotator/__init__.py b/ragas_annotator/__init__.py index 0d55343..cb36591 100644 --- a/ragas_annotator/__init__.py +++ b/ragas_annotator/__init__.py @@ -1,4 +1,4 @@ -__version__ = "0.0.1" +__version__ = "0.0.2" # AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/init_module.ipynb. # %% auto 0 diff --git a/ragas_annotator/_modidx.py b/ragas_annotator/_modidx.py index e9a1ff1..266a13f 100644 --- a/ragas_annotator/_modidx.py +++ b/ragas_annotator/_modidx.py @@ -97,6 +97,106 @@ 'ragas_annotator/backends/notion_backend.py'), 'ragas_annotator.backends.notion_backend.get_page_id': ( 'backends/notion.html#get_page_id', 'ragas_annotator/backends/notion_backend.py')}, + 'ragas_annotator.backends.ragas_api_client': { 'ragas_annotator.backends.ragas_api_client.Column': ( 'backends/ragas_api_client.html#column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.ColumnType': ( 'backends/ragas_api_client.html#columntype', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay': ( 'backends/ragas_api_client.html#ragasrelay', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.__init__': ( 'backends/ragas_api_client.html#ragasrelay.__init__', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._create_resource': ( 'backends/ragas_api_client.html#ragasrelay._create_resource', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._create_with_data': ( 'backends/ragas_api_client.html#ragasrelay._create_with_data', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._delete_resource': ( 'backends/ragas_api_client.html#ragasrelay._delete_resource', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._get_resource': ( 'backends/ragas_api_client.html#ragasrelay._get_resource', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._list_resources': ( 'backends/ragas_api_client.html#ragasrelay._list_resources', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._request': ( 'backends/ragas_api_client.html#ragasrelay._request', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay._update_resource': ( 'backends/ragas_api_client.html#ragasrelay._update_resource', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.convert_raw_data': ( 'backends/ragas_api_client.html#ragasrelay.convert_raw_data', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_column': ( 'backends/ragas_api_client.html#ragasrelay.create_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_column_map': ( 'backends/ragas_api_client.html#ragasrelay.create_column_map', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_dataset': ( 'backends/ragas_api_client.html#ragasrelay.create_dataset', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_dataset_column': ( 'backends/ragas_api_client.html#ragasrelay.create_dataset_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_dataset_with_data': ( 'backends/ragas_api_client.html#ragasrelay.create_dataset_with_data', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_experiment': ( 'backends/ragas_api_client.html#ragasrelay.create_experiment', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_experiment_column': ( 'backends/ragas_api_client.html#ragasrelay.create_experiment_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_experiment_row': ( 'backends/ragas_api_client.html#ragasrelay.create_experiment_row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_experiment_with_data': ( 'backends/ragas_api_client.html#ragasrelay.create_experiment_with_data', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_project': ( 'backends/ragas_api_client.html#ragasrelay.create_project', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.create_row': ( 'backends/ragas_api_client.html#ragasrelay.create_row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_dataset': ( 'backends/ragas_api_client.html#ragasrelay.delete_dataset', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_dataset_column': ( 'backends/ragas_api_client.html#ragasrelay.delete_dataset_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_experiment': ( 'backends/ragas_api_client.html#ragasrelay.delete_experiment', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_experiment_column': ( 'backends/ragas_api_client.html#ragasrelay.delete_experiment_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_experiment_row': ( 'backends/ragas_api_client.html#ragasrelay.delete_experiment_row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.delete_project': ( 'backends/ragas_api_client.html#ragasrelay.delete_project', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_dataset': ( 'backends/ragas_api_client.html#ragasrelay.get_dataset', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_dataset_column': ( 'backends/ragas_api_client.html#ragasrelay.get_dataset_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_experiment': ( 'backends/ragas_api_client.html#ragasrelay.get_experiment', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_experiment_column': ( 'backends/ragas_api_client.html#ragasrelay.get_experiment_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_experiment_row': ( 'backends/ragas_api_client.html#ragasrelay.get_experiment_row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.get_project': ( 'backends/ragas_api_client.html#ragasrelay.get_project', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_dataset_columns': ( 'backends/ragas_api_client.html#ragasrelay.list_dataset_columns', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_datasets': ( 'backends/ragas_api_client.html#ragasrelay.list_datasets', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_experiment_columns': ( 'backends/ragas_api_client.html#ragasrelay.list_experiment_columns', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_experiment_rows': ( 'backends/ragas_api_client.html#ragasrelay.list_experiment_rows', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_experiments': ( 'backends/ragas_api_client.html#ragasrelay.list_experiments', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.list_projects': ( 'backends/ragas_api_client.html#ragasrelay.list_projects', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_dataset': ( 'backends/ragas_api_client.html#ragasrelay.update_dataset', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_dataset_column': ( 'backends/ragas_api_client.html#ragasrelay.update_dataset_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_experiment': ( 'backends/ragas_api_client.html#ragasrelay.update_experiment', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_experiment_column': ( 'backends/ragas_api_client.html#ragasrelay.update_experiment_column', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_experiment_row': ( 'backends/ragas_api_client.html#ragasrelay.update_experiment_row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RagasRelay.update_project': ( 'backends/ragas_api_client.html#ragasrelay.update_project', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.Row': ( 'backends/ragas_api_client.html#row', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.RowCell': ( 'backends/ragas_api_client.html#rowcell', + 'ragas_annotator/backends/ragas_api_client.py'), + 'ragas_annotator.backends.ragas_api_client.create_nano_id': ( 'backends/ragas_api_client.html#create_nano_id', + 'ragas_annotator/backends/ragas_api_client.py')}, 'ragas_annotator.core': {'ragas_annotator.core.foo': ('core.html#foo', 'ragas_annotator/core.py')}, 'ragas_annotator.dataset': { 'ragas_annotator.dataset.Dataset': ('dataset.html#dataset', 'ragas_annotator/dataset.py'), 'ragas_annotator.dataset.Dataset.__getitem__': ( 'dataset.html#dataset.__getitem__', @@ -321,6 +421,20 @@ 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.MultiSelect.validate': ( 'model/notion_types.html#multiselect.validate', 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta': ( 'model/notion_types.html#notionfieldmeta', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.__init__': ( 'model/notion_types.html#notionfieldmeta.__init__', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.__set_name__': ( 'model/notion_types.html#notionfieldmeta.__set_name__', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.from_notion': ( 'model/notion_types.html#notionfieldmeta.from_notion', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.to_notion': ( 'model/notion_types.html#notionfieldmeta.to_notion', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.to_notion_property': ( 'model/notion_types.html#notionfieldmeta.to_notion_property', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.NotionFieldMeta.validate': ( 'model/notion_types.html#notionfieldmeta.validate', + 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.Select': ( 'model/notion_types.html#select', 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.Select.__init__': ( 'model/notion_types.html#select.__init__', @@ -345,6 +459,14 @@ 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.Text._to_notion': ( 'model/notion_types.html#text._to_notion', 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.TextNew': ( 'model/notion_types.html#textnew', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.TextNew.__init__': ( 'model/notion_types.html#textnew.__init__', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.TextNew.from_notion': ( 'model/notion_types.html#textnew.from_notion', + 'ragas_annotator/model/notion_typing.py'), + 'ragas_annotator.model.notion_typing.TextNew.to_notion': ( 'model/notion_types.html#textnew.to_notion', + 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.Title': ( 'model/notion_types.html#title', 'ragas_annotator/model/notion_typing.py'), 'ragas_annotator.model.notion_typing.Title.__init__': ( 'model/notion_types.html#title.__init__', diff --git a/ragas_annotator/backends/factory.py b/ragas_annotator/backends/factory.py index 5e26e7d..715fc9e 100644 --- a/ragas_annotator/backends/factory.py +++ b/ragas_annotator/backends/factory.py @@ -16,28 +16,28 @@ # %% ../../nbs/backends/factory.ipynb 3 class NotionClientFactory: """Factory for creating Notion client instances.""" - + @staticmethod def create( use_mock: bool = False, api_key: t.Optional[str] = None, initialize_project: bool = False, - root_page_id: t.Optional[str] = None + root_page_id: t.Optional[str] = None, ) -> t.Union[NotionClient, MockNotionClient]: """Create a Notion client. - + Args: use_mock: If True, create a mock client api_key: Notion API key (only used for real client) initialize_project: If True and using mock, initialize project structure root_page_id: Required if initialize_project is True - + Returns: Union[NotionClient, MockNotionClient]: A real or mock client """ if use_mock: client = MockNotionClient() - + # Optionally initialize project structure if initialize_project and root_page_id: # Create root page if it doesn't exist in the mock client @@ -51,18 +51,24 @@ def create( "archived": False, "properties": { "title": { - "type": "title", - "title": [{"plain_text": "Root Page", "type": "text", "text": {"content": "Root Page"}}] + "type": "title", + "title": [ + { + "plain_text": "Root Page", + "type": "text", + "text": {"content": "Root Page"}, + } + ], } - } + }, } client.add_page(root_page) - + # Create required sub-pages for page_name in ["Datasets", "Experiments", "Comparisons"]: # Create page ID page_id = client._create_id() - + # Create page page = { "id": page_id, @@ -72,14 +78,20 @@ def create( "archived": False, "properties": { "title": { - "type": "title", - "title": [{"plain_text": page_name, "type": "text", "text": {"content": page_name}}] + "type": "title", + "title": [ + { + "plain_text": page_name, + "type": "text", + "text": {"content": page_name}, + } + ], } }, - "parent": {"type": "page_id", "page_id": root_page_id} + "parent": {"type": "page_id", "page_id": root_page_id}, } client.add_page(page) - + # Add child block to root child_block = { "id": client._create_id(), @@ -87,45 +99,45 @@ def create( "type": "child_page", "created_time": client._get_timestamp(), "last_edited_time": client._get_timestamp(), - "child_page": { - "title": page_name - } + "child_page": {"title": page_name}, } - + client.add_children(root_page_id, [child_block]) - + return client else: # For real client, use provided API key or environment variable if api_key is None: api_key = os.getenv("NOTION_API_KEY") - + if api_key is None: - raise ValueError("api_key must be provided or set as NOTION_API_KEY environment variable") - + raise ValueError( + "api_key must be provided or set as NOTION_API_KEY environment variable" + ) + return NotionClient(auth=api_key) # %% ../../nbs/backends/factory.ipynb 7 class NotionBackendFactory: """Factory for creating NotionBackend instances.""" - + @staticmethod def create( root_page_id: str, use_mock: bool = False, api_key: t.Optional[str] = None, initialize_project: bool = False, - notion_client: t.Optional[t.Union[NotionClient, MockNotionClient]] = None + notion_client: t.Optional[t.Union[NotionClient, MockNotionClient]] = None, ) -> NotionBackend: """Create a NotionBackend instance. - + Args: root_page_id: The ID of the root page use_mock: If True, create a backend with a mock client api_key: Notion API key (only used for real client) initialize_project: If True and using mock, initialize project structure notion_client: Optional pre-configured Notion client - + Returns: NotionBackend: A backend instance with either real or mock client """ @@ -135,11 +147,8 @@ def create( use_mock=use_mock, api_key=api_key, initialize_project=initialize_project, - root_page_id=root_page_id + root_page_id=root_page_id, ) - + # Create and return the backend - return NotionBackend( - root_page_id=root_page_id, - notion_client=notion_client - ) + return NotionBackend(root_page_id=root_page_id, notion_client=notion_client) diff --git a/ragas_annotator/backends/mock_notion.py b/ragas_annotator/backends/mock_notion.py index af7d5f6..a84e8e8 100644 --- a/ragas_annotator/backends/mock_notion.py +++ b/ragas_annotator/backends/mock_notion.py @@ -16,14 +16,14 @@ # %% ../../nbs/backends/mock_notion_client.ipynb 3 class MockPagesAPI: """Mock implementation of notion_client.Client.pages""" - + def __init__(self, client): self.client = client - + def create(self, parent, properties, **kwargs): """Create a new page.""" page_id = self.client._create_id() - + # Create the page object page = { "id": page_id, @@ -32,16 +32,16 @@ def create(self, parent, properties, **kwargs): "last_edited_time": self.client._get_timestamp(), "archived": False, "properties": deepcopy(properties), - "parent": deepcopy(parent) + "parent": deepcopy(parent), } - + # Add page to storage self.client._pages[page_id] = page - + # Add child reference to parent parent_type = parent.get("type") parent_id = parent.get(f"{parent_type}_id") - + if parent_id: child_block = { "id": self.client._create_id(), @@ -49,64 +49,64 @@ def create(self, parent, properties, **kwargs): "type": "child_page", "created_time": self.client._get_timestamp(), "last_edited_time": self.client._get_timestamp(), - "child_page": { - "title": self._extract_title(properties) - } + "child_page": {"title": self._extract_title(properties)}, } - + if parent_id not in self.client._children: self.client._children[parent_id] = [] - + self.client._children[parent_id].append(child_block) - + return deepcopy(page) - + def retrieve(self, page_id): """Retrieve a page by ID.""" if page_id not in self.client._pages: raise NotFoundError(f"Page {page_id} not found") - + return deepcopy(self.client._pages[page_id]) - + def update(self, page_id, properties=None, archived=None, **kwargs): """Update a page.""" if page_id not in self.client._pages: raise NotFoundError(f"Page {page_id} not found") - + page = self.client._pages[page_id] - + if properties: # Update properties for key, value in properties.items(): page["properties"][key] = deepcopy(value) - + if archived is not None: page["archived"] = archived - + page["last_edited_time"] = self.client._get_timestamp() - + return deepcopy(page) - + def _extract_title(self, properties): """Extract page title from properties.""" for prop in properties.values(): if prop.get("type") == "title" and prop.get("title"): for text_obj in prop["title"]: - if text_obj.get("type") == "text" and "content" in text_obj.get("text", {}): + if text_obj.get("type") == "text" and "content" in text_obj.get( + "text", {} + ): return text_obj["text"]["content"] return "Untitled" # %% ../../nbs/backends/mock_notion_client.ipynb 4 class MockDatabasesAPI: """Mock implementation of notion_client.Client.databases""" - + def __init__(self, client): self.client = client - + def create(self, parent, title, properties, **kwargs): """Create a new database.""" database_id = self.client._create_id() - + # Create database object database = { "id": database_id, @@ -115,16 +115,16 @@ def create(self, parent, title, properties, **kwargs): "last_edited_time": self.client._get_timestamp(), "title": deepcopy(title), "properties": deepcopy(properties), - "parent": deepcopy(parent) + "parent": deepcopy(parent), } - + # Add database to storage self.client._databases[database_id] = database - + # Add child reference to parent parent_type = parent.get("type") parent_id = parent.get(f"{parent_type}_id") - + if parent_id: child_block = { "id": self.client._create_id(), @@ -132,45 +132,50 @@ def create(self, parent, title, properties, **kwargs): "type": "child_database", "created_time": self.client._get_timestamp(), "last_edited_time": self.client._get_timestamp(), - "child_database": { - "title": self._extract_title(title) - } + "child_database": {"title": self._extract_title(title)}, } - + if parent_id not in self.client._children: self.client._children[parent_id] = [] - + self.client._children[parent_id].append(child_block) - + return deepcopy(database) - + def retrieve(self, database_id): """Retrieve a database by ID.""" if database_id not in self.client._databases: raise NotFoundError(f"Database {database_id} not found") - + return deepcopy(self.client._databases[database_id]) - - def query(self, database_id, filter=None, sorts=None, start_cursor=None, page_size=100, **kwargs): + + def query( + self, + database_id, + filter=None, + sorts=None, + start_cursor=None, + page_size=100, + **kwargs, + ): """Query a database.""" if database_id not in self.client._databases: raise NotFoundError(f"Database {database_id} not found") - + # Get all pages in the database results = [] for page_id, page in self.client._pages.items(): parent = page.get("parent", {}) - if parent.get("type") == "database_id" and parent.get("database_id") == database_id: + if ( + parent.get("type") == "database_id" + and parent.get("database_id") == database_id + ): results.append(deepcopy(page)) - + # TODO: Implement filtering, sorting, and pagination if needed - - return { - "results": results, - "has_more": False, - "next_cursor": None - } - + + return {"results": results, "has_more": False, "next_cursor": None} + def _extract_title(self, title): """Extract database title from title array.""" for text_obj in title: @@ -178,48 +183,43 @@ def _extract_title(self, title): return text_obj["text"]["content"] return "Untitled" - # %% ../../nbs/backends/mock_notion_client.ipynb 5 class MockBlocksAPI: """Mock implementation of notion_client.Client.blocks""" - + def __init__(self, client): self.client = client self.children = MockBlockChildrenAPI(client) - + def retrieve(self, block_id): """Retrieve a block by ID.""" if block_id not in self.client._blocks: raise NotFoundError(f"Block {block_id} not found") - + return deepcopy(self.client._blocks[block_id]) - + class MockBlockChildrenAPI: """Mock implementation of notion_client.Client.blocks.children""" - + def __init__(self, client): self.client = client - + def list(self, block_id, start_cursor=None, page_size=100): """List children of a block.""" children = self.client._children.get(block_id, []) - + # TODO: Implement pagination if needed - - return { - "results": deepcopy(children), - "has_more": False, - "next_cursor": None - } + + return {"results": deepcopy(children), "has_more": False, "next_cursor": None} # %% ../../nbs/backends/mock_notion_client.ipynb 6 class MockNotionClient: """Mock implementation of notion_client.Client for testing.""" - + def __init__(self, auth=None): """Initialize the mock client with in-memory storage. - + Args: auth: Ignored in mock implementation """ @@ -228,32 +228,32 @@ def __init__(self, auth=None): self._databases = {} # database_id -> database object self._blocks = {} # block_id -> block object self._children = {} # parent_id -> list of child blocks - + # Create API namespaces to match real client self.pages = MockPagesAPI(self) self.databases = MockDatabasesAPI(self) self.blocks = MockBlocksAPI(self) - + def _get_timestamp(self): """Generate a timestamp in Notion API format.""" return datetime.utcnow().isoformat() + "Z" - + def _create_id(self): """Generate a random ID in Notion format.""" return str(uuid.uuid4()).replace("-", "") - + def add_page(self, page_data): """Add a page to the mock storage.""" self._pages[page_data["id"]] = deepcopy(page_data) - + def add_database(self, database_data): """Add a database to the mock storage.""" self._databases[database_data["id"]] = deepcopy(database_data) - + def add_block(self, block_data): """Add a block to the mock storage.""" self._blocks[block_data["id"]] = deepcopy(block_data) - + def add_children(self, parent_id, children): """Add children to a parent.""" if parent_id not in self._children: @@ -266,4 +266,3 @@ def __str__(self): ) __repr__ = __str__ - diff --git a/ragas_annotator/backends/ragas_api_client.py b/ragas_annotator/backends/ragas_api_client.py new file mode 100644 index 0000000..54c6760 --- /dev/null +++ b/ragas_annotator/backends/ragas_api_client.py @@ -0,0 +1,781 @@ +"""Python client to api.ragas.io""" + +# AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/backends/ragas_api_client.ipynb. + +# %% auto 0 +__all__ = ['DEFAULT_SETTINGS', 'RagasRelay', 'ColumnType', 'create_nano_id', 'Column', 'RowCell', 'Row'] + +# %% ../../nbs/backends/ragas_api_client.ipynb 3 +import httpx +import asyncio +import typing as t +from pydantic import BaseModel, Field +from enum import StrEnum +import uuid +from fastcore.utils import patch + +# %% ../../nbs/backends/ragas_api_client.ipynb 4 +class RagasRelay: + """Client for the Ragas Relay API.""" + + def __init__(self, base_url: str, app_token: t.Optional[str] = None): + """Initialize the Ragas API client. + + Args: + base_url: Base URL for the API (e.g., "http://localhost:8087") + app_token: API token for authentication + """ + if not app_token: + raise ValueError("app_token must be provided") + + self.base_url = f"{base_url.rstrip('/')}/api/v1" + self.app_token = app_token + + async def _request( + self, + method: str, + endpoint: str, + params: t.Optional[t.Dict] = None, + json_data: t.Optional[t.Dict] = None, + ) -> t.Dict: + """Make a request to the API. + + Args: + method: HTTP method (GET, POST, PATCH, DELETE) + endpoint: API endpoint path + params: Query parameters + json_data: JSON request body + + Returns: + The response data from the API + """ + url = f"{self.base_url}/{endpoint.lstrip('/')}" + headers = {"X-App-Token": self.app_token} + + async with httpx.AsyncClient() as client: + response = await client.request( + method=method, url=url, params=params, json=json_data, headers=headers + ) + + data = response.json() + + if response.status_code >= 400 or data.get("status") == "error": + error_msg = data.get("message", "Unknown error") + raise Exception(f"API Error ({response.status_code}): {error_msg}") + + return data.get("data") + + #---- Resource Handlers ---- + async def _create_resource(self, path, data): + """Generic resource creation.""" + return await self._request("POST", path, json_data=data) + + async def _list_resources(self, path, **params): + """Generic resource listing.""" + return await self._request("GET", path, params=params) + + async def _get_resource(self, path): + """Generic resource retrieval.""" + return await self._request("GET", path) + + async def _update_resource(self, path, data): + """Generic resource update.""" + return await self._request("PATCH", path, json_data=data) + + async def _delete_resource(self, path): + """Generic resource deletion.""" + return await self._request("DELETE", path) + +# %% ../../nbs/backends/ragas_api_client.ipynb 5 +#---- Projects ---- +@patch +async def list_projects( + self: RagasRelay, + ids: t.Optional[t.List[str]] = None, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List projects.""" + params = {"limit": limit, "offset": offset} + + if ids: + params["ids"] = ",".join(ids) + + if order_by: + params["order_by"] = order_by + + if sort_dir: + params["sort_dir"] = sort_dir + + return await self._list_resources("projects", **params) + +@patch +async def get_project(self: RagasRelay, project_id: str) -> t.Dict: + """Get a specific project by ID.""" + return await self._get_resource(f"projects/{project_id}") + +@patch +async def create_project( + self: RagasRelay, title: str, description: t.Optional[str] = None +) -> t.Dict: + """Create a new project.""" + data = {"title": title} + if description: + data["description"] = description + return await self._create_resource("projects", data) + +@patch +async def update_project( + self: RagasRelay, + project_id: str, + title: t.Optional[str] = None, + description: t.Optional[str] = None, +) -> t.Dict: + """Update an existing project.""" + data = {} + if title: + data["title"] = title + if description: + data["description"] = description + return await self._update_resource(f"projects/{project_id}", data) + +@patch +async def delete_project(self: RagasRelay, project_id: str) -> None: + """Delete a project.""" + await self._delete_resource(f"projects/{project_id}") + + +# %% ../../nbs/backends/ragas_api_client.ipynb 12 +#---- Datasets ---- +@patch +async def list_datasets( + self: RagasRelay, + project_id: str, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List datasets in a project.""" + params = {"limit": limit, "offset": offset} + if order_by: + params["order_by"] = order_by + if sort_dir: + params["sort_dir"] = sort_dir + return await self._list_resources(f"projects/{project_id}/datasets", **params) + +@patch +async def get_dataset(self: RagasRelay, project_id: str, dataset_id: str) -> t.Dict: + """Get a specific dataset.""" + return await self._get_resource(f"projects/{project_id}/datasets/{dataset_id}") + +@patch +async def create_dataset( + self: RagasRelay, project_id: str, name: str, description: t.Optional[str] = None +) -> t.Dict: + """Create a new dataset in a project.""" + data = {"name": name} + if description: + data["description"] = description + return await self._create_resource(f"projects/{project_id}/datasets", data) + +@patch +async def update_dataset( + self: RagasRelay, + project_id: str, + dataset_id: str, + name: t.Optional[str] = None, + description: t.Optional[str] = None, +) -> t.Dict: + """Update an existing dataset.""" + data = {} + if name: + data["name"] = name + if description: + data["description"] = description + return await self._update_resource(f"projects/{project_id}/datasets/{dataset_id}", data) + +@patch +async def delete_dataset(self: RagasRelay, project_id: str, dataset_id: str) -> None: + """Delete a dataset.""" + await self._delete_resource(f"projects/{project_id}/datasets/{dataset_id}") + +# %% ../../nbs/backends/ragas_api_client.ipynb 19 +#---- Experiments ---- +@patch +async def list_experiments( + self: RagasRelay, + project_id: str, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List experiments in a project.""" + params = {"limit": limit, "offset": offset} + if order_by: + params["order_by"] = order_by + if sort_dir: + params["sort_dir"] = sort_dir + return await self._list_resources(f"projects/{project_id}/experiments", **params) + +@patch +async def get_experiment(self: RagasRelay, project_id: str, experiment_id: str) -> t.Dict: + """Get a specific experiment.""" + return await self._get_resource(f"projects/{project_id}/experiments/{experiment_id}") + +@patch +async def create_experiment( + self: RagasRelay, project_id: str, name: str, description: t.Optional[str] = None +) -> t.Dict: + """Create a new experiment in a project.""" + data = {"name": name} + if description: + data["description"] = description + return await self._create_resource(f"projects/{project_id}/experiments", data) + +@patch +async def update_experiment( + self: RagasRelay, + project_id: str, + experiment_id: str, + name: t.Optional[str] = None, + description: t.Optional[str] = None, +) -> t.Dict: + """Update an existing experiment.""" + data = {} + if name: + data["name"] = name + if description: + data["description"] = description + return await self._update_resource(f"projects/{project_id}/experiments/{experiment_id}", data) + +@patch +async def delete_experiment(self: RagasRelay, project_id: str, experiment_id: str) -> None: + """Delete an experiment.""" + await self._delete_resource(f"projects/{project_id}/experiments/{experiment_id}") + + +# %% ../../nbs/backends/ragas_api_client.ipynb 24 +class ColumnType(StrEnum): + NUMBER = "number" + TEXT = "text" + LONG_TEXT = "longText" + SELECT = "select" + DATE = "date" + MULTI_SELECT = "multiSelect" + CHECKBOX = "checkbox" + CUSTOM = "custom" + +# %% ../../nbs/backends/ragas_api_client.ipynb 25 +#---- Dataset Columns ---- +@patch +async def list_dataset_columns( + self: RagasRelay, + project_id: str, + dataset_id: str, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List columns in a dataset.""" + params = {"limit": limit, "offset": offset} + if order_by: + params["order_by"] = order_by + if sort_dir: + params["sort_dir"] = sort_dir + return await self._list_resources( + f"projects/{project_id}/datasets/{dataset_id}/columns", **params + ) + +@patch +async def get_dataset_column( + self: RagasRelay, project_id: str, dataset_id: str, column_id: str +) -> t.Dict: + """Get a specific column in a dataset.""" + return await self._get_resource( + f"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}" + ) + +@patch +async def create_dataset_column( + self: RagasRelay, + project_id: str, + dataset_id: str, + id: str, + name: str, + type: str, + col_order: t.Optional[int] = None, + settings: t.Optional[t.Dict] = None, +) -> t.Dict: + """Create a new column in a dataset.""" + data = {"id": id, "name": name, "type": type} + if col_order is not None: + data["col_order"] = col_order + if settings: + data["settings"] = settings + return await self._create_resource( + f"projects/{project_id}/datasets/{dataset_id}/columns", data + ) + +@patch +async def update_dataset_column( + self: RagasRelay, project_id: str, dataset_id: str, column_id: str, **column_data +) -> t.Dict: + """Update an existing column in a dataset.""" + return await self._update_resource( + f"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}", + column_data, + ) + +@patch +async def delete_dataset_column( + self: RagasRelay, project_id: str, dataset_id: str, column_id: str +) -> None: + """Delete a column from a dataset.""" + await self._delete_resource( + f"projects/{project_id}/datasets/{dataset_id}/columns/{column_id}" + ) + +# %% ../../nbs/backends/ragas_api_client.ipynb 36 +import uuid +import string + +# %% ../../nbs/backends/ragas_api_client.ipynb 37 +def create_nano_id(size=12): + # Define characters to use (alphanumeric) + alphabet = string.ascii_letters + string.digits + + # Generate UUID and convert to int + uuid_int = uuid.uuid4().int + + # Convert to base62 + result = "" + while uuid_int: + uuid_int, remainder = divmod(uuid_int, len(alphabet)) + result = alphabet[remainder] + result + + # Pad if necessary and return desired length + return result[:size] + + +# %% ../../nbs/backends/ragas_api_client.ipynb 49 +# Default settings for columns +DEFAULT_SETTINGS = { + "is_required": False, + "max_length": 1000 +} + +# Model definitions +class Column(BaseModel): + id: str = Field(default_factory=create_nano_id) + name: str = Field(...) + type: str = Field(...) + settings: t.Dict = Field(default_factory=lambda: DEFAULT_SETTINGS.copy()) + col_order: t.Optional[int] = Field(default=None) + +class RowCell(BaseModel): + data: t.Any = Field(...) + column_id: str = Field(...) + +class Row(BaseModel): + id: str = Field(default_factory=create_nano_id) + data: t.List[RowCell] = Field(...) + +# %% ../../nbs/backends/ragas_api_client.ipynb 50 +#---- Resource With Data Helper Methods ---- +@patch +async def _create_with_data( + self: RagasRelay, + resource_type: str, + project_id: str, + name: str, + description: str, + columns: t.List[Column], + rows: t.List[Row], + batch_size: int = 50 +) -> t.Dict: + """Generic method to create a resource with columns and rows. + + Args: + resource_type: Type of resource ("dataset" or "experiment") + project_id: Project ID + name: Resource name + description: Resource description + columns: List of column definitions + rows: List of row data + batch_size: Number of operations to perform concurrently + + Returns: + The created resource + """ + # Select appropriate methods based on resource type + if resource_type == "dataset": + create_fn = self.create_dataset + create_col_fn = self.create_dataset_column + create_row_fn = self.create_dataset_row + delete_fn = self.delete_dataset + id_key = "dataset_id" + elif resource_type == "experiment": + create_fn = self.create_experiment + create_col_fn = self.create_experiment_column + create_row_fn = self.create_experiment_row + delete_fn = self.delete_experiment + id_key = "experiment_id" + else: + raise ValueError(f"Unsupported resource type: {resource_type}") + + try: + # Create the resource + resource = await create_fn(project_id, name, description) + + # Process columns in batches + for i in range(0, len(columns), batch_size): + batch = columns[i:i+batch_size] + col_tasks = [] + + for col in batch: + params = { + "project_id": project_id, + id_key: resource["id"], # dataset_id here + "id": col.id, + "name": col.name, + "type": col.type, + "settings": col.settings + } + if col.col_order is not None: + params["col_order"] = col.col_order + + col_tasks.append(create_col_fn(**params)) + + await asyncio.gather(*col_tasks) + + # Process rows in batches + for i in range(0, len(rows), batch_size): + batch = rows[i:i+batch_size] + row_tasks = [] + + for row in batch: + row_data = {cell.column_id: cell.data for cell in row.data} + row_tasks.append( + create_row_fn( + project_id=project_id, + **{id_key: resource["id"]}, + id=row.id, + data=row_data + ) + ) + + await asyncio.gather(*row_tasks) + + return resource + + except Exception as e: + # Clean up on error + if 'resource' in locals(): + try: + await delete_fn(project_id, resource["id"]) + except: + pass # Ignore cleanup errors + raise e + +@patch +async def create_dataset_with_data( + self: RagasRelay, + project_id: str, + name: str, + description: str, + columns: t.List[Column], + rows: t.List[Row], + batch_size: int = 50 +) -> t.Dict: + """Create a dataset with columns and rows. + + This method creates a dataset and populates it with columns and rows in an + optimized way using concurrent requests. + + Args: + project_id: Project ID + name: Dataset name + description: Dataset description + columns: List of column definitions + rows: List of row data + batch_size: Number of operations to perform concurrently + + Returns: + The created dataset + """ + return await self._create_with_data( + "dataset", project_id, name, description, columns, rows, batch_size + ) + +# %% ../../nbs/backends/ragas_api_client.ipynb 56 +#---- Experiment Columns ---- +@patch +async def list_experiment_columns( + self: RagasRelay, + project_id: str, + experiment_id: str, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List columns in an experiment.""" + params = {"limit": limit, "offset": offset} + if order_by: + params["order_by"] = order_by + if sort_dir: + params["sort_dir"] = sort_dir + return await self._list_resources( + f"projects/{project_id}/experiments/{experiment_id}/columns", **params + ) + +@patch +async def get_experiment_column( + self: RagasRelay, project_id: str, experiment_id: str, column_id: str +) -> t.Dict: + """Get a specific column in an experiment.""" + return await self._get_resource( + f"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}" + ) + +@patch +async def create_experiment_column( + self: RagasRelay, + project_id: str, + experiment_id: str, + id: str, + name: str, + type: str, + col_order: t.Optional[int] = None, + settings: t.Optional[t.Dict] = None, +) -> t.Dict: + """Create a new column in an experiment.""" + data = {"id": id, "name": name, "type": type} + if col_order is not None: + data["col_order"] = col_order + if settings: + data["settings"] = settings + return await self._create_resource( + f"projects/{project_id}/experiments/{experiment_id}/columns", data + ) + +@patch +async def update_experiment_column( + self: RagasRelay, project_id: str, experiment_id: str, column_id: str, **column_data +) -> t.Dict: + """Update an existing column in an experiment.""" + return await self._update_resource( + f"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}", + column_data, + ) + +@patch +async def delete_experiment_column( + self: RagasRelay, project_id: str, experiment_id: str, column_id: str +) -> None: + """Delete a column from an experiment.""" + await self._delete_resource( + f"projects/{project_id}/experiments/{experiment_id}/columns/{column_id}" + ) + +#---- Experiment Rows ---- +@patch +async def list_experiment_rows( + self: RagasRelay, + project_id: str, + experiment_id: str, + limit: int = 50, + offset: int = 0, + order_by: t.Optional[str] = None, + sort_dir: t.Optional[str] = None, +) -> t.Dict: + """List rows in an experiment.""" + params = {"limit": limit, "offset": offset} + if order_by: + params["order_by"] = order_by + if sort_dir: + params["sort_dir"] = sort_dir + return await self._list_resources( + f"projects/{project_id}/experiments/{experiment_id}/rows", **params + ) + +@patch +async def get_experiment_row( + self: RagasRelay, project_id: str, experiment_id: str, row_id: str +) -> t.Dict: + """Get a specific row in an experiment.""" + return await self._get_resource( + f"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}" + ) + +@patch +async def create_experiment_row( + self: RagasRelay, project_id: str, experiment_id: str, id: str, data: t.Dict +) -> t.Dict: + """Create a new row in an experiment.""" + row_data = {"id": id, "data": data} + return await self._create_resource( + f"projects/{project_id}/experiments/{experiment_id}/rows", row_data + ) + +@patch +async def update_experiment_row( + self: RagasRelay, project_id: str, experiment_id: str, row_id: str, data: t.Dict +) -> t.Dict: + """Update an existing row in an experiment.""" + row_data = {"data": data} + return await self._update_resource( + f"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}", + row_data, + ) + +@patch +async def delete_experiment_row( + self: RagasRelay, project_id: str, experiment_id: str, row_id: str +) -> None: + """Delete a row from an experiment.""" + await self._delete_resource( + f"projects/{project_id}/experiments/{experiment_id}/rows/{row_id}" + ) + +# %% ../../nbs/backends/ragas_api_client.ipynb 59 +@patch +async def create_experiment_with_data( + self: RagasRelay, + project_id: str, + name: str, + description: str, + columns: t.List[Column], + rows: t.List[Row], + batch_size: int = 50 +) -> t.Dict: + """Create an experiment with columns and rows. + + This method creates an experiment and populates it with columns and rows in an + optimized way using concurrent requests. + + Args: + project_id: Project ID + name: Experiment name + description: Experiment description + columns: List of column definitions + rows: List of row data + batch_size: Number of operations to perform concurrently + + Returns: + The created experiment + """ + return await self._create_with_data( + "experiment", project_id, name, description, columns, rows, batch_size + ) + +# %% ../../nbs/backends/ragas_api_client.ipynb 60 +#---- Utility Methods ---- +@patch +def create_column( + self: RagasRelay, + name: str, + type: str, + settings: t.Optional[t.Dict] = None, + col_order: t.Optional[int] = None, + id: t.Optional[str] = None +) -> Column: + """Create a Column object. + + Args: + name: Column name + type: Column type (use ColumnType enum) + settings: Column settings + col_order: Column order + id: Custom ID (generates one if not provided) + + Returns: + Column object + """ + params = {"name": name, "type": type} + if settings: + params["settings"] = settings + if col_order is not None: + params["col_order"] = col_order + if id: + params["id"] = id + + return Column(**params) + +@patch +def create_row( + self: RagasRelay, + data: t.Dict[str, t.Any], + column_map: t.Dict[str, str], + id: t.Optional[str] = None +) -> Row: + """Create a Row object from a dictionary. + + Args: + data: Dictionary mapping column names to values + column_map: Dictionary mapping column names to column IDs + id: Custom ID (generates one if not provided) + + Returns: + Row object + """ + cells = [] + for col_name, value in data.items(): + if col_name in column_map: + cells.append(RowCell( + data=value, + column_id=column_map[col_name] + )) + + params = {"data": cells} + if id: + params["id"] = id + + return Row(**params) + +@patch +def create_column_map(self: RagasRelay, columns: t.List[Column]) -> t.Dict[str, str]: + """Create a mapping of column names to IDs. + + Args: + columns: List of column objects + + Returns: + Dictionary mapping column names to IDs + """ + return {col.name: col.id for col in columns} + +@patch +async def convert_raw_data( + self: RagasRelay, + column_defs: t.List[t.Dict], + row_data: t.List[t.Dict] +) -> t.Tuple[t.List[Column], t.List[Row]]: + """Convert raw data to column and row objects. + + Args: + column_defs: List of column definitions (dicts with name, type) + row_data: List of dictionaries with row data + + Returns: + Tuple of (columns, rows) + """ + # Create columns + columns = [] + for col in column_defs: + columns.append(self.create_column(**col)) + + # Create column map + column_map = self.create_column_map(columns) + + # Create rows + rows = [] + for data in row_data: + rows.append(self.create_row(data, column_map)) + + return columns, rows diff --git a/ragas_annotator/dataset.py b/ragas_annotator/dataset.py index b9f6aa3..2900b4d 100644 --- a/ragas_annotator/dataset.py +++ b/ragas_annotator/dataset.py @@ -71,7 +71,9 @@ def __setitem__(self, index: int, entry: NotionModelType) -> None: self._entries[index] = self.model.from_notion(response) def __repr__(self) -> str: - return f"Dataset(name={self.name}, model={self.model.__name__}, len={len(self)})" + return ( + f"Dataset(name={self.name}, model={self.model.__name__}, len={len(self)})" + ) def __len__(self) -> int: return len(self._entries) diff --git a/ragas_annotator/model/notion_typing.py b/ragas_annotator/model/notion_typing.py index 55650ae..959298e 100644 --- a/ragas_annotator/model/notion_typing.py +++ b/ragas_annotator/model/notion_typing.py @@ -3,7 +3,7 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/model/notion_types.ipynb. # %% auto 0 -__all__ = ['T', 'Field', 'ID', 'Text', 'Title', 'Select', 'MultiSelect', 'URL'] +__all__ = ['T', 'Field', 'ID', 'Text', 'Title', 'Select', 'MultiSelect', 'URL', 'NotionFieldMeta', 'TextNew'] # %% ../../nbs/model/notion_types.ipynb 2 import typing as t @@ -121,10 +121,13 @@ def _to_notion(self, value: str) -> dict: # Split the text into chunks of CHUNK_SIZE characters if not value: return {self.name: {self.NOTION_FIELD_TYPE: []}} - - chunks = [value[i:i+self.CHUNK_SIZE] for i in range(0, len(value), self.CHUNK_SIZE)] + + chunks = [ + value[i : i + self.CHUNK_SIZE] + for i in range(0, len(value), self.CHUNK_SIZE) + ] rich_text_array = [{"text": {"content": chunk}} for chunk in chunks] - + return {self.name: {self.NOTION_FIELD_TYPE: rich_text_array}} def _from_notion(self, data: dict) -> t.Optional[str]: @@ -133,10 +136,10 @@ def _from_notion(self, data: dict) -> t.Optional[str]: rich_text = data["properties"][self.name][self.NOTION_FIELD_TYPE] else: rich_text = data[self.name][self.NOTION_FIELD_TYPE] - + if not rich_text: return None - + # Combine all text chunks into a single string return "".join(item["text"]["content"] for item in rich_text if "text" in item) @@ -283,3 +286,80 @@ def _from_notion(self, data: dict) -> t.Optional[str]: else: url = data[self.name][self.NOTION_FIELD_TYPE] return url + +# %% ../../nbs/model/notion_types.ipynb 20 +T = t.TypeVar("T") + + +class NotionFieldMeta: + """Base metadata class for Notion field types.""" + + NOTION_FIELD_TYPE: t.ClassVar[str] = "" + + def __init__(self, required: bool = True): + self.required = required + self.name: str = "" # Will be set during model initialization + + def __set_name__(self, owner, name: str): + """Set field name when used directly as class attribute.""" + self.name = name + + def validate(self, value: t.Any) -> t.Any: + """Validate field value.""" + if value is None and self.required: + raise ValueError(f"Field {self.name} is required") + return value + + def to_notion(self, value: t.Any) -> dict: + """Convert Python value to Notion format.""" + raise NotImplementedError() + + def from_notion(self, data: dict) -> t.Any: + """Convert Notion format to Python value.""" + raise NotImplementedError() + + def to_notion_property(self) -> dict: + """Convert field to Notion property definition.""" + return {self.name: {"type": self.NOTION_FIELD_TYPE, self.NOTION_FIELD_TYPE: {}}} + +# %% ../../nbs/model/notion_types.ipynb 21 +class TextNew(NotionFieldMeta): + """Rich text property type for Notion.""" + + NOTION_FIELD_TYPE = "rich_text" + CHUNK_SIZE = 2000 # Notion's character limit per rich text block + + def __init__(self, required: bool = True): + super().__init__(required=required) + + def to_notion(self, value: str) -> dict: + # Split text into chunks of CHUNK_SIZE characters + if not value: + return {self.name: {self.NOTION_FIELD_TYPE: []}} + + chunks = [ + value[i : i + self.CHUNK_SIZE] + for i in range(0, len(value), self.CHUNK_SIZE) + ] + rich_text_array = [{"text": {"content": chunk}} for chunk in chunks] + + return {self.name: {self.NOTION_FIELD_TYPE: rich_text_array}} + + def from_notion(self, data: dict) -> t.Optional[str]: + # Handle both direct and properties-wrapped format + if "properties" in data: + if self.name in data["properties"]: + rich_text = data["properties"][self.name][self.NOTION_FIELD_TYPE] + else: + return None + else: + if self.name in data: + rich_text = data[self.name][self.NOTION_FIELD_TYPE] + else: + return None + + if not rich_text: + return None + + # Combine all text chunks into a single string + return "".join(item["text"]["content"] for item in rich_text if "text" in item) diff --git a/ragas_annotator/project/comparison.py b/ragas_annotator/project/comparison.py index 4445f9b..f7b21af 100644 --- a/ragas_annotator/project/comparison.py +++ b/ragas_annotator/project/comparison.py @@ -24,12 +24,16 @@ # %% ../../nbs/project/comparison.ipynb 5 # utility function to check if a model has a title property and get the name of the title property @t.overload -def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: t.Literal[True] = True) -> str: - ... +def _get_title_property( + model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[True] = True +) -> str: ... @t.overload -def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: t.Literal[False] = False) -> t.Optional[str]: - ... -def _get_title_property(model: NotionModel|t.Type[NotionModel], raise_exception: bool = True) -> t.Optional[str]: +def _get_title_property( + model: NotionModel | t.Type[NotionModel], raise_exception: t.Literal[False] = False +) -> t.Optional[str]: ... +def _get_title_property( + model: NotionModel | t.Type[NotionModel], raise_exception: bool = True +) -> t.Optional[str]: has_title = False for field in model._fields.keys(): if isinstance(model._fields[field], nmt.Title): @@ -56,7 +60,9 @@ def _validate_experiments(experiments: t.Sequence[Experiment]): if not isinstance(exp, Experiment): raise ValueError("All experiments must be of type Experiment") if top_exp != exp.model: - logger.warning(f"Experiments have different models: {top_exp.model} and {exp.model}") + logger.warning( + f"Experiments have different models: {top_exp.model} and {exp.model}" + ) if title_property != _get_title_property(exp.model): raise ValueError("All experiments must have the same title property.") @@ -70,39 +76,38 @@ def _model_to_dict(model: NotionModel) -> dict: data[field_name] = model.__getattribute__(field_name) return data - # %% ../../nbs/project/comparison.ipynb 14 def _combine_experiments(experiments: t.Sequence[Experiment]): """Group experiment rows by their title property value.""" if not experiments: return [] - + title_property: str = _get_title_property(experiments[0].model) - + # Create a dictionary to group rows by title value grouped_by_title = {} - + # Process each experiment for exp in experiments: for row in exp: title_value = getattr(row, title_property) - + # Create key if it doesn't exist if title_value not in grouped_by_title: grouped_by_title[title_value] = [] - + # Add this row to the appropriate group row_dict = _model_to_dict(row) row_dict["experiment_name"] = exp.name grouped_by_title[title_value].append(row_dict) - + # Convert dictionary to list and add id_str result = [] for i, (_, rows) in enumerate(grouped_by_title.items()): for row in rows: row["id_str"] = str(i) result.append(rows) - + return result # %% ../../nbs/project/comparison.ipynb 17 @@ -117,6 +122,7 @@ def compare_experiments( class CombinedModel(NotionModel): id_str: str = nmt.Text() experiment_name: str = nmt.Text() + for exp in experiments: for field in exp.model._fields.keys(): if field not in CombinedModel._fields: @@ -145,10 +151,10 @@ class CombinedModel(NotionModel): combined_model_instance = CombinedModel(**row) self._notion_backend.create_page_in_database( database_id=comparison_database_id, - properties=combined_model_instance.to_notion()["properties"] + properties=combined_model_instance.to_notion()["properties"], ) # Get the URL for the created database # The format for Notion URLs is: https://www.notion.so/{database_id} notion_url = f"https://www.notion.so/{comparison_database_id.replace('-', '')}" - + return notion_url diff --git a/ragas_annotator/project/experiments.py b/ragas_annotator/project/experiments.py index 9d94044..ecc2ffd 100644 --- a/ragas_annotator/project/experiments.py +++ b/ragas_annotator/project/experiments.py @@ -140,7 +140,6 @@ async def run_async(dataset: Dataset, name: t.Optional[str] = None): return decorator - # %% ../../nbs/project/experiments.ipynb 10 @patch def langfuse_experiment( @@ -159,28 +158,30 @@ def langfuse_experiment( def decorator(func: t.Callable) -> ExperimentProtocol: # First, create a base experiment wrapper base_experiment = self.experiment(experiment_model, name_prefix)(func) - + # Override the wrapped function to add Langfuse observation @wraps(func) async def wrapped_with_langfuse(*args, **kwargs): # wrap the function with langfuse observation observed_func = observe(name=f"{name_prefix}-{func.__name__}")(func) return await observed_func(*args, **kwargs) - + # Replace the async function to use Langfuse original_run_async = base_experiment.run_async - + # Use the original run_async but with the Langfuse-wrapped function - async def run_async_with_langfuse(dataset: Dataset, name: t.Optional[str] = None): + async def run_async_with_langfuse( + dataset: Dataset, name: t.Optional[str] = None + ): # Override the internal wrapped_experiment with our Langfuse version base_experiment.__wrapped__ = wrapped_with_langfuse - + # Call the original run_async which will now use our Langfuse-wrapped function return await original_run_async(dataset, name) - + # Replace the run_async method base_experiment.__setattr__("run_async", run_async_with_langfuse) - + return t.cast(ExperimentProtocol, base_experiment) return decorator diff --git a/ragas_annotator/project/naming.py b/ragas_annotator/project/naming.py index 7f6cc7f..d9204b3 100644 --- a/ragas_annotator/project/naming.py +++ b/ragas_annotator/project/naming.py @@ -13,65 +13,205 @@ class MemorableNames: def __init__(self): # List of adjectives (similar to what Docker uses) self.adjectives = [ - "admiring", "adoring", "affectionate", "agitated", "amazing", - "angry", "awesome", "blissful", "bold", "boring", "brave", "busy", - "charming", "clever", "cool", "compassionate", "competent", "condescending", - "confident", "cranky", "crazy", "dazzling", "determined", "distracted", - "dreamy", "eager", "ecstatic", "elastic", "elated", "elegant", "eloquent", - "epic", "fervent", "festive", "flamboyant", "focused", "friendly", - "frosty", "gallant", "gifted", "goofy", "gracious", "happy", "hardcore", - "heuristic", "hopeful", "hungry", "infallible", "inspiring", "jolly", - "jovial", "keen", "kind", "laughing", "loving", "lucid", "magical", - "mystifying", "modest", "musing", "naughty", "nervous", "nifty", "nostalgic", - "objective", "optimistic", "peaceful", "pedantic", "pensive", "practical", - "priceless", "quirky", "quizzical", "relaxed", "reverent", "romantic", - "sad", "serene", "sharp", "silly", "sleepy", "stoic", "stupefied", - "suspicious", "sweet", "tender", "thirsty", "trusting", "upbeat", "vibrant", - "vigilant", "vigorous", "wizardly", "wonderful", "xenodochial", "youthful", - "zealous", "zen" + "admiring", + "adoring", + "affectionate", + "agitated", + "amazing", + "angry", + "awesome", + "blissful", + "bold", + "boring", + "brave", + "busy", + "charming", + "clever", + "cool", + "compassionate", + "competent", + "condescending", + "confident", + "cranky", + "crazy", + "dazzling", + "determined", + "distracted", + "dreamy", + "eager", + "ecstatic", + "elastic", + "elated", + "elegant", + "eloquent", + "epic", + "fervent", + "festive", + "flamboyant", + "focused", + "friendly", + "frosty", + "gallant", + "gifted", + "goofy", + "gracious", + "happy", + "hardcore", + "heuristic", + "hopeful", + "hungry", + "infallible", + "inspiring", + "jolly", + "jovial", + "keen", + "kind", + "laughing", + "loving", + "lucid", + "magical", + "mystifying", + "modest", + "musing", + "naughty", + "nervous", + "nifty", + "nostalgic", + "objective", + "optimistic", + "peaceful", + "pedantic", + "pensive", + "practical", + "priceless", + "quirky", + "quizzical", + "relaxed", + "reverent", + "romantic", + "sad", + "serene", + "sharp", + "silly", + "sleepy", + "stoic", + "stupefied", + "suspicious", + "sweet", + "tender", + "thirsty", + "trusting", + "upbeat", + "vibrant", + "vigilant", + "vigorous", + "wizardly", + "wonderful", + "xenodochial", + "youthful", + "zealous", + "zen", ] - + # List of influential computer scientists and tech entrepreneurs self.scientists = [ - "turing", "hopper", "knuth", "torvalds", "ritchie", "thompson", - "dijkstra", "kay", "wozniak", "gates", "jobs", "musk", "bezos", - "lovelace", "berners_lee", "cerf", "gosling", "kernighan", "lamport", - "mccarthy", "minsky", "rossum", "backus", "engelbart", "hamilton", - "chomsky", "shannon", "zuckerberg", "page", "brin", "matsumoto", - "stallman", "stroustrup", "cook", "neumann", "babbage", "tanenbaum", - "rivest", "shamir", "adleman", "carmack", "andreessen", "ullman", - "postel", "huffman", "boole", "curry", "liskov", "wing", "goldwasser", - "hoare", "milner", "perlis", "sutherland", "tarjan", "valiant", - "yao", "hopcroft", "naur", "wilkes", "codd", "diffie", "hellman", - "pearl", "thiel", "narayen", "nadella", "pichai", "dorsey" + "turing", + "hopper", + "knuth", + "torvalds", + "ritchie", + "thompson", + "dijkstra", + "kay", + "wozniak", + "gates", + "jobs", + "musk", + "bezos", + "lovelace", + "berners_lee", + "cerf", + "gosling", + "kernighan", + "lamport", + "mccarthy", + "minsky", + "rossum", + "backus", + "engelbart", + "hamilton", + "chomsky", + "shannon", + "zuckerberg", + "page", + "brin", + "matsumoto", + "stallman", + "stroustrup", + "cook", + "neumann", + "babbage", + "tanenbaum", + "rivest", + "shamir", + "adleman", + "carmack", + "andreessen", + "ullman", + "postel", + "huffman", + "boole", + "curry", + "liskov", + "wing", + "goldwasser", + "hoare", + "milner", + "perlis", + "sutherland", + "tarjan", + "valiant", + "yao", + "hopcroft", + "naur", + "wilkes", + "codd", + "diffie", + "hellman", + "pearl", + "thiel", + "narayen", + "nadella", + "pichai", + "dorsey", ] - + self.used_names = set() - + def generate_name(self): """Generate a single experiment name.""" adjective = random.choice(self.adjectives) scientist = random.choice(self.scientists) return f"{adjective}_{scientist}" - + def generate_unique_name(self): """Generate a unique experiment name.""" attempts = 0 max_attempts = 100 # Prevent infinite loops - + while attempts < max_attempts: name = self.generate_name() if name not in self.used_names: self.used_names.add(name) return name attempts += 1 - + # If we exhaust our combinations, add a random suffix base_name = self.generate_name() unique_name = f"{base_name}_{random.randint(1000, 9999)}" self.used_names.add(unique_name) return unique_name - + def generate_unique_names(self, count): """Generate multiple unique experiment names.""" return [self.generate_unique_name() for _ in range(count)] diff --git a/ragas_annotator/tracing/langfuse.py b/ragas_annotator/tracing/langfuse.py index e0f1a7d..0bab84e 100644 --- a/ragas_annotator/tracing/langfuse.py +++ b/ragas_annotator/tracing/langfuse.py @@ -29,14 +29,16 @@ def __init__(self, trace: TraceWithFullDetails): self._langfuse_client = langfuse_context.client_instance def get_url(self): - return langfuse_context.get_current_trace_url() + return langfuse_context.get_current_trace_url() def filter(self, span_name: str) -> t.List[Observation]: trace = self._langfuse_client.fetch_trace(self.trace.id) return [span for span in trace.data.observations if span.name == span_name] # %% ../../nbs/tracing/langfuse.ipynb 6 -async def sync_trace(trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2) -> LangfuseTrace: +async def sync_trace( + trace_id: t.Optional[str] = None, max_retries: int = 10, delay: float = 2 +) -> LangfuseTrace: """Wait for a Langfuse trace to be synced to the server. Args: @@ -52,7 +54,9 @@ async def sync_trace(trace_id: t.Optional[str] = None, max_retries: int = 10, de trace_id = langfuse_context.get_current_trace_id() if not trace_id: - raise ValueError("No trace id found. Please ensure you are running this function within a function decorated with @observe().") + raise ValueError( + "No trace id found. Please ensure you are running this function within a function decorated with @observe()." + ) for _ in range(max_retries): langfuse_client = LangfuseSingleton().get() try: @@ -73,14 +77,13 @@ def add_query_param(url, param_name, param_value): """Add a query parameter to a URL.""" # Parse the URL url_parts = list(urlparse(url)) - + # Get query params as a dict and add new param query_dict = dict(parse_qsl(url_parts[4])) query_dict[param_name] = param_value - + # Replace the query part with updated params url_parts[4] = urlencode(query_dict) - + # Reconstruct the URL return urlunparse(url_parts) -