feat: added basics for upload (#1741)

jjmachan · web-flow · commit 9f5cccc53987 · 2024-12-09T17:57:32.000+05:30
diff --git a/.gitignore b/.gitignore
@@ -168,4 +168,6 @@ cython_debug/
 experiments/
 **/fil-result/
 src/ragas/_version.py
-.vscode
+.vscode
+.envrc
+uv.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,5 +1,6 @@
 [project]
 name = "ragas"
+requires-python = ">=3.9"
 dependencies = [
     "numpy",
     "datasets",
@@ -40,6 +41,32 @@ docs = [
     "mkdocs-git-committers-plugin-2",
     "mkdocs-git-revision-date-localized-plugin",
 ]
+dev = [
+    "rich",
+    "ruff",
+    "isort",
+    "black[jupyter]",
+    "pyright",
+    "llama_index",
+    "notebook",
+    "sphinx-autobuild",
+    "sentence-transformers",
+    "transformers",
+    "fastembed",
+    "graphene",
+    "rouge_score",
+    "nltk",
+    "rapidfuzz",
+    "pandas",
+    "datacompy",
+]
+test = [
+    "pytest",
+    "pytest-xdist[psutil]",
+    "pytest-asyncio",
+    "llama_index",
+    "nbmake",
+]
 [tool.setuptools]
 package-dir = {"" = "src"}
 
diff --git a/src/ragas/dataset_schema.py b/src/ragas/dataset_schema.py
@@ -14,8 +14,10 @@
 
 from ragas.callbacks import ChainRunEncoder, parse_run_traces
 from ragas.cost import CostCallbackHandler
+from ragas.exceptions import UploadException
 from ragas.messages import AIMessage, HumanMessage, ToolCall, ToolMessage
-from ragas.utils import RAGAS_API_URL, safe_nanmean
+from ragas.sdk import RAGAS_API_URL, RAGAS_APP_URL, upload_packet
+from ragas.utils import safe_nanmean
 
 if t.TYPE_CHECKING:
     from pathlib import Path
@@ -509,8 +511,6 @@ def total_cost(
     def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
         from datetime import datetime, timezone
 
-        import requests
-
         timestamp = datetime.now(timezone.utc).isoformat()
         root_trace = [
             trace for trace in self.ragas_traces.values() if trace.parent_run_id is None
@@ -523,19 +523,28 @@ def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
             },
             cls=ChainRunEncoder,
         )
-
-        response = requests.post(
-            f"{base_url}/alignment/evaluation",
-            data=packet,
-            headers={"Content-Type": "application/json"},
+        response = upload_packet(
+            path="/alignment/evaluation",
+            data_json_string=packet,
+            base_url=base_url,
         )
 
-        if response.status_code != 200:
-            raise Exception(f"Failed to upload results: {response.text}")
-
+        # check status codes
         evaluation_endpoint = (
-            f"https://app.ragas.io/alignment/evaluation/{root_trace.run_id}"
+            f"{RAGAS_APP_URL}/alignment/evaluation/{root_trace.run_id}"
         )
+        if response.status_code == 409:
+            # this evalution already exists
+            if verbose:
+                print(f"Evaluation run already exists. View at {evaluation_endpoint}")
+            return evaluation_endpoint
+        elif response.status_code != 200:
+            # any other error
+            raise UploadException(
+                status_code=response.status_code,
+                message=f"Failed to upload results: {response.text}",
+            )
+
         if verbose:
             print(f"Evaluation results uploaded! View at {evaluation_endpoint}")
         return evaluation_endpoint
@@ -563,15 +572,13 @@ def __getitem__(self, key):
 
 
 class MetricAnnotation(BaseModel):
-
     root: t.Dict[str, t.List[SampleAnnotation]]
 
     def __getitem__(self, key):
         return SingleMetricAnnotation(name=key, samples=self.root[key])
 
     @classmethod
     def from_json(cls, path, metric_name: t.Optional[str]) -> "MetricAnnotation":
-
         dataset = json.load(open(path))
         if metric_name is not None and metric_name not in dataset:
             raise ValueError(f"Split {metric_name} not found in the dataset.")
@@ -613,7 +620,6 @@ def select(self, indices: t.List[int]) -> "SingleMetricAnnotation":
 
     @classmethod
     def from_json(cls, path) -> "SingleMetricAnnotation":
-
         dataset = json.load(open(path))
 
         return cls(
@@ -622,7 +628,6 @@ def from_json(cls, path) -> "SingleMetricAnnotation":
         )
 
     def filter(self, function: t.Optional[t.Callable] = None):
-
         if function is None:
             function = lambda x: True  # noqa: E731
 
diff --git a/src/ragas/exceptions.py b/src/ragas/exceptions.py
@@ -39,3 +39,13 @@ class LLMDidNotFinishException(RagasException):
     def __init__(self):
         msg = "The LLM generation was not completed. Please increase try increasing the max_tokens and try again."
         super().__init__(msg)
+
+
+class UploadException(RagasException):
+    """
+    Exception raised when the app fails to upload the results.
+    """
+
+    def __init__(self, status_code: int, message: str):
+        self.status_code = status_code
+        super().__init__(message)
diff --git a/src/ragas/executor.py b/src/ragas/executor.py
@@ -5,13 +5,13 @@
 import typing as t
 from dataclasses import dataclass, field
 
+import nest_asyncio
 import numpy as np
 from tqdm.auto import tqdm
 
 from ragas.run_config import RunConfig
 from ragas.utils import batched
 
-import nest_asyncio
 nest_asyncio.apply()
 
 logger = logging.getLogger(__name__)
diff --git a/src/ragas/sdk.py b/src/ragas/sdk.py
@@ -0,0 +1,44 @@
+"""
+SDK module for interacting with the Ragas API service.
+"""
+
+import os
+from functools import lru_cache
+
+import requests
+
+from ragas._version import __version__
+from ragas.exceptions import UploadException
+
+# endpoint for uploading results
+RAGAS_API_URL = "https://api.ragas.io"
+RAGAS_APP_URL = "https://app.ragas.io"
+RAGAS_API_SOURCE = "ragas_py"
+
+
+@lru_cache(maxsize=1)
+def get_app_token() -> str:
+    app_token = os.environ.get("RAGAS_APP_TOKEN")
+    if app_token is None:
+        raise ValueError("RAGAS_APP_TOKEN is not set")
+    return app_token
+
+
+def upload_packet(path: str, data_json_string: str, base_url: str = RAGAS_API_URL):
+    app_token = get_app_token()
+    response = requests.post(
+        f"{base_url}/api/v1{path}",
+        data=data_json_string,
+        headers={
+            "Content-Type": "application/json",
+            "x-app-token": app_token,
+            "x-source": RAGAS_API_SOURCE,
+            "x-app-version": __version__,
+        },
+    )
+    if response.status_code == 403:
+        raise UploadException(
+            status_code=response.status_code,
+            message="AUTHENTICATION_ERROR: The app token is invalid. Please check your RAGAS_APP_TOKEN environment variable.",
+        )
+    return response
diff --git a/src/ragas/testset/synthesizers/testset_schema.py b/src/ragas/testset/synthesizers/testset_schema.py
@@ -15,7 +15,8 @@
     RagasDataset,
     SingleTurnSample,
 )
-from ragas.utils import RAGAS_API_URL
+from ragas.exceptions import UploadException
+from ragas.sdk import RAGAS_API_URL, RAGAS_APP_URL, upload_packet
 
 
 class TestsetSample(BaseSample):
@@ -136,16 +137,24 @@ def total_cost(
         )
 
     def upload(self, base_url: str = RAGAS_API_URL, verbose: bool = True) -> str:
-        import requests
-
         packet = TestsetPacket(samples_original=self.samples, run_id=self.run_id)
-        response = requests.post(
-            f"{base_url}/alignment/testset", json=packet.model_dump()
+        response = upload_packet(
+            path="/alignment/testset",
+            data_json_string=packet.model_dump_json(),
+            base_url=base_url,
         )
-        if response.status_code != 200:
-            raise Exception(f"Failed to upload results: {response.text}")
-
-        testset_endpoint = f"https://app.ragas.io/alignment/testset/{packet.run_id}"
+        testset_endpoint = f"{RAGAS_APP_URL}/alignment/testset/{self.run_id}"
+        if response.status_code == 409:
+            # this testset already exists
+            if verbose:
+                print(f"Testset already exists. View at {testset_endpoint}")
+            return testset_endpoint
+        elif response.status_code != 200:
+            # any other error
+            raise UploadException(
+                status_code=response.status_code,
+                message=f"Failed to upload results: {response.text}",
+            )
         if verbose:
             print(f"Testset uploaded! View at {testset_endpoint}")
         return testset_endpoint
diff --git a/src/ragas/utils.py b/src/ragas/utils.py
@@ -20,8 +20,6 @@
 RAGAS_SUPPORTED_LANGUAGE_CODES = {
     v.__name__.lower(): k for k, v in LANGUAGE_CODES.items()
 }
-# endpoint for uploading results
-RAGAS_API_URL = "https://api.ragas.io"
 
 
 @lru_cache(maxsize=1)

Original file line number	Diff line number	Diff line change
`@@ -20,8 +20,6 @@`
`20`	`20`	`RAGAS_SUPPORTED_LANGUAGE_CODES = {`
`21`	`21`	`v.__name__.lower(): k for k, v in LANGUAGE_CODES.items()`
`22`	`22`	`}`
`23`		`-# endpoint for uploading results`
`24`		`-RAGAS_API_URL = "https://api.ragas.io"`
`25`	`23`
`26`	`24`
`27`	`25`	`@lru_cache(maxsize=1)`