diff --git a/api/ee/docker/Dockerfile.dev b/api/ee/docker/Dockerfile.dev
index 2074141a18..814cbec3ff 100644
--- a/api/ee/docker/Dockerfile.dev
+++ b/api/ee/docker/Dockerfile.dev
@@ -34,8 +34,8 @@ RUN cat -A /etc/cron.d/meters-cron
RUN chmod +x /meters.sh \
&& chmod 0644 /etc/cron.d/meters-cron
-COPY ./oss/src/crons/queries.sh /queries.sh
-COPY ./oss/src/crons/queries.txt /etc/cron.d/queries-cron
+COPY ./ee/src/crons/queries.sh /queries.sh
+COPY ./ee/src/crons/queries.txt /etc/cron.d/queries-cron
RUN sed -i -e '$a\' /etc/cron.d/queries-cron
RUN cat -A /etc/cron.d/queries-cron
diff --git a/api/ee/docker/Dockerfile.gh b/api/ee/docker/Dockerfile.gh
index ab3a06b2ff..c3652a59df 100644
--- a/api/ee/docker/Dockerfile.gh
+++ b/api/ee/docker/Dockerfile.gh
@@ -34,8 +34,8 @@ RUN cat -A /etc/cron.d/meters-cron
RUN chmod +x /meters.sh \
&& chmod 0644 /etc/cron.d/meters-cron
-COPY ./oss/src/crons/queries.sh /queries.sh
-COPY ./oss/src/crons/queries.txt /etc/cron.d/queries-cron
+COPY ./ee/src/crons/queries.sh /queries.sh
+COPY ./ee/src/crons/queries.txt /etc/cron.d/queries-cron
RUN sed -i -e '$a\' /etc/cron.d/queries-cron
RUN cat -A /etc/cron.d/queries-cron
diff --git a/api/ee/src/apis/fastapi/billing/router.py b/api/ee/src/apis/fastapi/billing/router.py
index 08762eaa76..7ac23142c5 100644
--- a/api/ee/src/apis/fastapi/billing/router.py
+++ b/api/ee/src/apis/fastapi/billing/router.py
@@ -824,13 +824,12 @@ async def create_portal_user_route(
self,
request: Request,
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.EDIT_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.EDIT_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.create_portal(
organization_id=request.state.organization_id,
@@ -852,13 +851,12 @@ async def create_checkout_user_route(
plan: Plan = Query(...),
success_url: str = Query(...), # find a way to make this optional or moot
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.EDIT_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.EDIT_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.create_checkout(
organization_id=request.state.organization_id,
@@ -884,13 +882,12 @@ async def fetch_plan_user_route(
self,
request: Request,
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.VIEW_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.VIEW_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.fetch_plans(
organization_id=request.state.organization_id,
@@ -902,13 +899,12 @@ async def switch_plans_user_route(
request: Request,
plan: Plan = Query(...),
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.EDIT_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.EDIT_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.switch_plans(
organization_id=request.state.organization_id,
@@ -931,13 +927,12 @@ async def fetch_subscription_user_route(
self,
request: Request,
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.VIEW_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.VIEW_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.fetch_subscription(
organization_id=request.state.organization_id,
@@ -948,13 +943,12 @@ async def cancel_subscription_user_route(
self,
request: Request,
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.EDIT_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.EDIT_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.cancel_subscription(
organization_id=request.state.organization_id,
@@ -974,13 +968,12 @@ async def fetch_usage_user_route(
self,
request: Request,
):
- if is_ee():
- if not await check_action_access(
- user_uid=request.state.user_id,
- project_id=request.state.project_id,
- permission=Permission.VIEW_BILLING,
- ):
- return FORBIDDEN_RESPONSE
+ if not await check_action_access(
+ user_uid=request.state.user_id,
+ project_id=request.state.project_id,
+ permission=Permission.VIEW_BILLING,
+ ):
+ return FORBIDDEN_RESPONSE
return await self.fetch_usage(
organization_id=request.state.organization_id,
diff --git a/api/oss/src/crons/queries.sh b/api/ee/src/crons/queries.sh
similarity index 100%
rename from api/oss/src/crons/queries.sh
rename to api/ee/src/crons/queries.sh
diff --git a/api/oss/src/crons/queries.txt b/api/ee/src/crons/queries.txt
similarity index 100%
rename from api/oss/src/crons/queries.txt
rename to api/ee/src/crons/queries.txt
diff --git a/api/oss/src/tasks/__init__.py b/api/ee/src/dbs/postgres/shared/__init__.py
similarity index 100%
rename from api/oss/src/tasks/__init__.py
rename to api/ee/src/dbs/postgres/shared/__init__.py
diff --git a/api/ee/src/main.py b/api/ee/src/main.py
index 036bda6f0f..86d8ecf618 100644
--- a/api/ee/src/main.py
+++ b/api/ee/src/main.py
@@ -2,7 +2,12 @@
from oss.src.utils.logging import get_module_logger
-from ee.src.routers import workspace_router, organization_router
+from ee.src.routers import (
+ workspace_router,
+ organization_router,
+ evaluation_router,
+ human_evaluation_router,
+)
from ee.src.dbs.postgres.meters.dao import MetersDAO
from ee.src.dbs.postgres.subscriptions.dao import SubscriptionsDAO
@@ -66,11 +71,29 @@ def extend_main(app: FastAPI):
prefix="/workspaces",
)
+ app.include_router(
+ evaluation_router.router,
+ prefix="/evaluations",
+ tags=["Evaluations"],
+ )
+
+ app.include_router(
+ human_evaluation_router.router,
+ prefix="/human-evaluations",
+ tags=["Human-Evaluations"],
+ )
+
# --------------------------------------------------------------------------
return app
+def load_tasks():
+ import ee.src.tasks.evaluations.live
+ import ee.src.tasks.evaluations.legacy
+ import ee.src.tasks.evaluations.batch
+
+
def extend_app_schema(app: FastAPI):
app.openapi()["info"]["title"] = "Agenta API"
app.openapi()["info"]["description"] = "Agenta API"
diff --git a/api/ee/src/models/db_models.py b/api/ee/src/models/db_models.py
index b05b633659..b5a4c194da 100644
--- a/api/ee/src/models/db_models.py
+++ b/api/ee/src/models/db_models.py
@@ -252,3 +252,267 @@ class ProjectMemberDB(Base):
class DeploymentDB(OssDeploymentDB):
pass
+
+
+class HumanEvaluationVariantDB(Base):
+ __tablename__ = "human_evaluation_variants"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ human_evaluation_id = Column(
+ UUID(as_uuid=True), ForeignKey("human_evaluations.id", ondelete="CASCADE")
+ )
+ variant_id = Column(
+ UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
+ )
+ variant_revision_id = Column(
+ UUID(as_uuid=True), ForeignKey("app_variant_revisions.id", ondelete="SET NULL")
+ )
+
+ variant = relationship("AppVariantDB", backref="evaluation_variant")
+ variant_revision = relationship(
+ "AppVariantRevisionsDB", backref="evaluation_variant_revision"
+ )
+
+
+class HumanEvaluationDB(Base):
+ __tablename__ = "human_evaluations"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ app_id = Column(UUID(as_uuid=True), ForeignKey("app_db.id", ondelete="CASCADE"))
+ project_id = Column(
+ UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
+ )
+ status = Column(String)
+ evaluation_type = Column(String)
+ testset_id = Column(UUID(as_uuid=True), ForeignKey("testsets.id"))
+ created_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+ updated_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+
+ testset = relationship("TestsetDB")
+ evaluation_variant = relationship(
+ "HumanEvaluationVariantDB",
+ cascade=CASCADE_ALL_DELETE,
+ backref="human_evaluation",
+ )
+ evaluation_scenario = relationship(
+ "HumanEvaluationScenarioDB",
+ cascade=CASCADE_ALL_DELETE,
+ backref="evaluation_scenario",
+ )
+
+
+class HumanEvaluationScenarioDB(Base):
+ __tablename__ = "human_evaluations_scenarios"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ project_id = Column(
+ UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
+ )
+ evaluation_id = Column(
+ UUID(as_uuid=True), ForeignKey("human_evaluations.id", ondelete="CASCADE")
+ )
+ inputs = Column(
+ mutable_json_type(dbtype=JSONB, nested=True)
+ ) # List of HumanEvaluationScenarioInput
+ outputs = Column(
+ mutable_json_type(dbtype=JSONB, nested=True)
+ ) # List of HumanEvaluationScenarioOutput
+ vote = Column(String)
+ score = Column(String)
+ correct_answer = Column(String)
+ created_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+ updated_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+ is_pinned = Column(Boolean)
+ note = Column(String)
+
+
+class EvaluationAggregatedResultDB(Base):
+ __tablename__ = "auto_evaluation_aggregated_results"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ evaluation_id = Column(
+ UUID(as_uuid=True), ForeignKey("auto_evaluations.id", ondelete="CASCADE")
+ )
+ evaluator_config_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
+ )
+ result = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+
+ evaluator_config = relationship("EvaluatorConfigDB", backref="evaluator_config")
+
+
+class EvaluationScenarioResultDB(Base):
+ __tablename__ = "auto_evaluation_scenario_results"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ evaluation_scenario_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("auto_evaluation_scenarios.id", ondelete="CASCADE"),
+ )
+ evaluator_config_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
+ )
+ result = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+
+
+class EvaluationDB(Base):
+ __tablename__ = "auto_evaluations"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ app_id = Column(UUID(as_uuid=True), ForeignKey("app_db.id", ondelete="CASCADE"))
+ project_id = Column(
+ UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
+ )
+ status = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+ testset_id = Column(
+ UUID(as_uuid=True), ForeignKey("testsets.id", ondelete="SET NULL")
+ )
+ variant_id = Column(
+ UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
+ )
+ variant_revision_id = Column(
+ UUID(as_uuid=True), ForeignKey("app_variant_revisions.id", ondelete="SET NULL")
+ )
+ average_cost = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+ total_cost = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+ average_latency = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
+ created_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+ updated_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+
+ project = relationship("ee.src.models.db_models.ProjectDB")
+ testset = relationship("TestsetDB")
+ variant = relationship("AppVariantDB")
+ variant_revision = relationship("AppVariantRevisionsDB")
+ aggregated_results = relationship(
+ "EvaluationAggregatedResultDB",
+ cascade=CASCADE_ALL_DELETE,
+ backref="evaluation",
+ )
+ evaluation_scenarios = relationship(
+ "EvaluationScenarioDB", cascade=CASCADE_ALL_DELETE, backref="evaluation"
+ )
+ evaluator_configs = relationship(
+ "EvaluationEvaluatorConfigDB",
+ cascade=CASCADE_ALL_DELETE,
+ backref="evaluation",
+ )
+
+
+class EvaluationEvaluatorConfigDB(Base):
+ __tablename__ = "auto_evaluation_evaluator_configs"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ evaluation_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("auto_evaluations.id", ondelete="CASCADE"),
+ primary_key=True,
+ )
+ evaluator_config_id = Column(
+ UUID(as_uuid=True),
+ ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
+ primary_key=True,
+ )
+
+
+class EvaluationScenarioDB(Base):
+ __tablename__ = "auto_evaluation_scenarios"
+
+ id = Column(
+ UUID(as_uuid=True),
+ primary_key=True,
+ default=uuid.uuid7,
+ unique=True,
+ nullable=False,
+ )
+ project_id = Column(
+ UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
+ )
+ evaluation_id = Column(
+ UUID(as_uuid=True), ForeignKey("auto_evaluations.id", ondelete="CASCADE")
+ )
+ variant_id = Column(
+ UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
+ )
+ inputs = Column(
+ mutable_json_type(dbtype=JSONB, nested=True)
+ ) # List of EvaluationScenarioInput
+ outputs = Column(
+ mutable_json_type(dbtype=JSONB, nested=True)
+ ) # List of EvaluationScenarioOutput
+ correct_answers = Column(
+ mutable_json_type(dbtype=JSONB, nested=True)
+ ) # List of CorrectAnswer
+ is_pinned = Column(Boolean)
+ note = Column(String)
+ latency = Column(Integer)
+ cost = Column(Integer)
+ created_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+ updated_at = Column(
+ DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
+ )
+
+ project = relationship("ee.src.models.db_models.ProjectDB")
+ variant = relationship("AppVariantDB")
+ results = relationship(
+ "EvaluationScenarioResultDB",
+ cascade=CASCADE_ALL_DELETE,
+ backref="evaluation_scenario",
+ )
diff --git a/api/oss/src/routers/evaluation_router.py b/api/ee/src/routers/evaluation_router.py
similarity index 96%
rename from api/oss/src/routers/evaluation_router.py
rename to api/ee/src/routers/evaluation_router.py
index cac2b06523..a01679d161 100644
--- a/api/oss/src/routers/evaluation_router.py
+++ b/api/ee/src/routers/evaluation_router.py
@@ -7,10 +7,10 @@
from oss.src.utils.logging import get_module_logger
from oss.src.utils.caching import get_cache, set_cache
-from oss.src.services import converters
-from oss.src.services import evaluation_service
+from ee.src.services import converters
+from ee.src.services import evaluation_service
-from oss.src.tasks.evaluations.legacy import (
+from ee.src.tasks.evaluations.legacy import (
setup_evaluation,
annotate,
)
@@ -21,6 +21,7 @@
NewEvaluation,
DeleteEvaluation,
)
+from ee.src.services import db_manager_ee
from oss.src.services import app_manager, db_manager
if is_ee():
@@ -81,7 +82,7 @@ async def fetch_evaluation_ids(
{"detail": error_msg},
status_code=403,
)
- evaluations = await db_manager.fetch_evaluations_by_resource(
+ evaluations = await db_manager_ee.fetch_evaluations_by_resource(
resource_type,
request.state.project_id,
resource_ids,
@@ -135,7 +136,7 @@ async def fetch_evaluation_status(
status_code=403,
)
- evaluation_status = await db_manager.fetch_evaluation_status_by_id(
+ evaluation_status = await db_manager_ee.fetch_evaluation_status_by_id(
project_id=request.state.project_id,
evaluation_id=evaluation_id,
)
@@ -169,7 +170,7 @@ async def fetch_evaluation_results(
_type_: _description_
"""
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=request.state.project_id,
evaluation_id=evaluation_id,
)
@@ -214,7 +215,7 @@ async def fetch_evaluation_scenarios(
List[EvaluationScenario]: A list of evaluation scenarios.
"""
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=request.state.project_id,
evaluation_id=evaluation_id,
)
@@ -297,7 +298,7 @@ async def fetch_evaluation(
Evaluation: The fetched evaluation.
"""
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=request.state.project_id,
evaluation_id=evaluation_id,
)
@@ -342,7 +343,7 @@ async def delete_evaluations(
A list of the deleted comparison tables' IDs.
"""
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=request.state.project_id,
evaluation_id=payload.evaluations_ids[0],
)
@@ -394,7 +395,7 @@ async def fetch_evaluation_scenarios_comparison_results(
"""
evaluations_ids_list = evaluations_ids.split(",")
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=request.state.project_id,
evaluation_id=evaluations_ids_list[0],
)
diff --git a/api/oss/src/routers/human_evaluation_router.py b/api/ee/src/routers/human_evaluation_router.py
similarity index 93%
rename from api/oss/src/routers/human_evaluation_router.py
rename to api/ee/src/routers/human_evaluation_router.py
index 681f7588bf..eb8e7e27f8 100644
--- a/api/oss/src/routers/human_evaluation_router.py
+++ b/api/ee/src/routers/human_evaluation_router.py
@@ -2,10 +2,11 @@
from fastapi import HTTPException, Body, Request, status, Response
from oss.src.utils.logging import get_module_logger
-from oss.src.services import converters
+from ee.src.services import converters
from oss.src.services import db_manager
-from oss.src.services import results_service
-from oss.src.services import evaluation_service
+from ee.src.services import db_manager_ee
+from ee.src.services import results_service
+from ee.src.services import evaluation_service
from oss.src.utils.common import APIRouter, is_ee
from oss.src.models.api.evaluation_model import (
DeleteEvaluation,
@@ -18,7 +19,7 @@
NewHumanEvaluation,
SimpleEvaluationOutput,
)
-from oss.src.services.evaluation_service import (
+from ee.src.services.evaluation_service import (
update_human_evaluation_scenario,
update_human_evaluation_service,
)
@@ -129,7 +130,7 @@ async def fetch_human_evaluation(
HumanEvaluation: The fetched evaluation.
"""
- human_evaluation = await db_manager.fetch_human_evaluation_by_id(evaluation_id)
+ human_evaluation = await db_manager_ee.fetch_human_evaluation_by_id(evaluation_id)
if not human_evaluation:
raise HTTPException(status_code=404, detail="Evaluation not found")
@@ -170,7 +171,7 @@ async def fetch_human_evaluation_scenarios(
List[EvaluationScenario]: A list of evaluation scenarios.
"""
- human_evaluation = await db_manager.fetch_human_evaluation_by_id(evaluation_id)
+ human_evaluation = await db_manager_ee.fetch_human_evaluation_by_id(evaluation_id)
if human_evaluation is None:
raise HTTPException(
status_code=404,
@@ -215,7 +216,9 @@ async def update_human_evaluation(
"""
try:
- human_evaluation = await db_manager.fetch_human_evaluation_by_id(evaluation_id)
+ human_evaluation = await db_manager_ee.fetch_human_evaluation_by_id(
+ evaluation_id
+ )
if not human_evaluation:
raise HTTPException(status_code=404, detail="Evaluation not found")
@@ -261,7 +264,7 @@ async def update_evaluation_scenario_router(
None: 204 No Content status code upon successful update.
"""
- evaluation_scenario_db = await db_manager.fetch_human_evaluation_scenario_by_id(
+ evaluation_scenario_db = await db_manager_ee.fetch_human_evaluation_scenario_by_id(
evaluation_scenario_id
)
if evaluation_scenario_db is None:
@@ -306,7 +309,7 @@ async def get_evaluation_scenario_score_router(
Dictionary containing the scenario ID and its score.
"""
- evaluation_scenario = db_manager.fetch_evaluation_scenario_by_id(
+ evaluation_scenario = db_manager_ee.fetch_evaluation_scenario_by_id(
evaluation_scenario_id
)
if evaluation_scenario is None:
@@ -349,7 +352,7 @@ async def update_evaluation_scenario_score_router(
None: 204 No Content status code upon successful update.
"""
- evaluation_scenario = await db_manager.fetch_evaluation_scenario_by_id(
+ evaluation_scenario = await db_manager_ee.fetch_evaluation_scenario_by_id(
evaluation_scenario_id
)
if evaluation_scenario is None:
@@ -392,7 +395,7 @@ async def fetch_results(
_description_
"""
- evaluation = await db_manager.fetch_human_evaluation_by_id(evaluation_id)
+ evaluation = await db_manager_ee.fetch_human_evaluation_by_id(evaluation_id)
if evaluation is None:
raise HTTPException(
status_code=404,
@@ -437,7 +440,7 @@ async def delete_evaluations(
A list of the deleted comparison tables' IDs.
"""
- evaluation = await db_manager.fetch_human_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_human_evaluation_by_id(
payload.evaluations_ids[0]
)
if is_ee():
diff --git a/api/ee/src/services/aggregation_service.py b/api/ee/src/services/aggregation_service.py
new file mode 100644
index 0000000000..55a14e5f8f
--- /dev/null
+++ b/api/ee/src/services/aggregation_service.py
@@ -0,0 +1,135 @@
+import re
+import traceback
+from typing import List, Optional
+
+from oss.src.models.shared_models import InvokationResult, Result, Error
+
+
+def aggregate_ai_critique(results: List[Result]) -> Result:
+ """Aggregates the results for the ai critique evaluation.
+
+ Args:
+ results (List[Result]): list of result objects
+
+ Returns:
+ Result: aggregated result
+ """
+
+ try:
+ numeric_scores = []
+ for result in results:
+ # Extract the first number found in the result value
+ match = re.search(r"\d+", result.value)
+ if match:
+ try:
+ score = int(match.group())
+ numeric_scores.append(score)
+ except ValueError:
+ # Ignore if the extracted value is not an integer
+ continue
+
+ # Calculate the average of numeric scores if any are present
+ average_value = (
+ sum(numeric_scores) / len(numeric_scores) if numeric_scores else None
+ )
+ return Result(
+ type="number",
+ value=average_value,
+ )
+ except Exception as exc:
+ return Result(
+ type="error",
+ value=None,
+ error=Error(message=str(exc), stacktrace=str(traceback.format_exc())),
+ )
+
+
+def aggregate_binary(results: List[Result]) -> Result:
+ """Aggregates the results for the binary (auto regex) evaluation.
+
+ Args:
+ results (List[Result]): list of result objects
+
+ Returns:
+ Result: aggregated result
+ """
+
+ if all(isinstance(result.value, bool) for result in results):
+ average_value = sum(int(result.value) for result in results) / len(results)
+ else:
+ average_value = None
+ return Result(type="number", value=average_value)
+
+
+def aggregate_float(results: List[Result]) -> Result:
+ """Aggregates the results for evaluations aside from auto regex and ai critique.
+
+ Args:
+ results (List[Result]): list of result objects
+
+ Returns:
+ Result: aggregated result
+ """
+
+ try:
+ average_value = sum(result.value for result in results) / len(results)
+ return Result(type="number", value=average_value)
+ except Exception as exc:
+ return Result(
+ type="error",
+ value=None,
+ error=Error(message=str(exc), stacktrace=str(traceback.format_exc())),
+ )
+
+
+def aggregate_float_from_llm_app_response(
+ invocation_results: List[InvokationResult], key: Optional[str]
+) -> Result:
+ try:
+ if not key:
+ raise ValueError("Key is required to aggregate InvokationResult objects.")
+
+ values = [
+ getattr(inv_result, key)
+ for inv_result in invocation_results
+ if hasattr(inv_result, key) and getattr(inv_result, key) is not None
+ ]
+
+ if not values:
+ return Result(type=key, value=None)
+
+ average_value = sum(values) / len(values)
+ return Result(type=key, value=average_value)
+ except Exception as exc:
+ return Result(
+ type="error",
+ value=None,
+ error=Error(message=str(exc), stacktrace=str(traceback.format_exc())),
+ )
+
+
+def sum_float_from_llm_app_response(
+ invocation_results: List[InvokationResult], key: Optional[str]
+) -> Result:
+ try:
+ if not key:
+ raise ValueError("Key is required to aggregate InvokationResult objects.")
+
+ values = [
+ getattr(inv_result, key)
+ for inv_result in invocation_results
+ if hasattr(inv_result, key) and getattr(inv_result, key) is not None
+ ]
+
+ if not values:
+ return Result(type=key, value=None)
+
+ total_value = sum(values)
+
+ return Result(type=key, value=total_value)
+ except Exception as exc:
+ return Result(
+ type="error",
+ value=None,
+ error=Error(message=str(exc), stacktrace=str(traceback.format_exc())),
+ )
diff --git a/api/ee/src/services/converters.py b/api/ee/src/services/converters.py
index 2bfc1d330b..5b120899fc 100644
--- a/api/ee/src/services/converters.py
+++ b/api/ee/src/services/converters.py
@@ -3,9 +3,28 @@
from datetime import datetime, timezone
from oss.src.services import db_manager
+from oss.src.models.api.evaluation_model import (
+ CorrectAnswer,
+ Evaluation,
+ HumanEvaluation,
+ EvaluationScenario,
+ SimpleEvaluationOutput,
+ EvaluationScenarioInput,
+ HumanEvaluationScenario,
+ EvaluationScenarioOutput,
+)
from ee.src.services import db_manager_ee
-from ee.src.models.api.workspace_models import WorkspaceRole, WorkspaceResponse
+from ee.src.models.api.workspace_models import (
+ WorkspaceRole,
+ WorkspaceResponse,
+)
from ee.src.models.shared_models import Permission
+from ee.src.models.db_models import (
+ EvaluationDB,
+ HumanEvaluationDB,
+ EvaluationScenarioDB,
+ HumanEvaluationScenarioDB,
+)
from oss.src.models.db_models import WorkspaceDB
@@ -130,3 +149,173 @@ def get_all_workspace_permissions_by_role(role_name: str) -> Dict[str, List[Any]
getattr(WorkspaceRole, role_name.upper())
)
return workspace_permissions
+
+
+async def human_evaluation_db_to_simple_evaluation_output(
+ human_evaluation_db: HumanEvaluationDB,
+) -> SimpleEvaluationOutput:
+ evaluation_variants = await db_manager_ee.fetch_human_evaluation_variants(
+ human_evaluation_id=str(human_evaluation_db.id)
+ )
+ return SimpleEvaluationOutput(
+ id=str(human_evaluation_db.id),
+ app_id=str(human_evaluation_db.app_id),
+ project_id=str(human_evaluation_db.project_id),
+ status=human_evaluation_db.status, # type: ignore
+ evaluation_type=human_evaluation_db.evaluation_type, # type: ignore
+ variant_ids=[
+ str(evaluation_variant.variant_id)
+ for evaluation_variant in evaluation_variants
+ ],
+ )
+
+
+async def evaluation_db_to_pydantic(
+ evaluation_db: EvaluationDB,
+) -> Evaluation:
+ variant_name = (
+ evaluation_db.variant.variant_name
+ if evaluation_db.variant.variant_name
+ else str(evaluation_db.variant_id)
+ )
+ aggregated_results = aggregated_result_of_evaluation_to_pydantic(
+ evaluation_db.aggregated_results
+ )
+
+ return Evaluation(
+ id=str(evaluation_db.id),
+ app_id=str(evaluation_db.app_id),
+ project_id=str(evaluation_db.project_id),
+ status=evaluation_db.status,
+ variant_ids=[str(evaluation_db.variant_id)],
+ variant_revision_ids=[str(evaluation_db.variant_revision_id)],
+ revisions=[str(evaluation_db.variant_revision.revision)],
+ variant_names=[variant_name],
+ testset_id=str(evaluation_db.testset_id),
+ testset_name=evaluation_db.testset.name,
+ aggregated_results=aggregated_results,
+ created_at=str(evaluation_db.created_at),
+ updated_at=str(evaluation_db.updated_at),
+ average_cost=evaluation_db.average_cost,
+ total_cost=evaluation_db.total_cost,
+ average_latency=evaluation_db.average_latency,
+ )
+
+
+async def human_evaluation_db_to_pydantic(
+ evaluation_db: HumanEvaluationDB,
+) -> HumanEvaluation:
+ evaluation_variants = await db_manager_ee.fetch_human_evaluation_variants(
+ human_evaluation_id=str(evaluation_db.id) # type: ignore
+ )
+
+ revisions = []
+ variants_ids = []
+ variants_names = []
+ variants_revision_ids = []
+ for evaluation_variant in evaluation_variants:
+ variant_name = (
+ evaluation_variant.variant.variant_name
+ if isinstance(evaluation_variant.variant_id, uuid.UUID)
+ else str(evaluation_variant.variant_id)
+ )
+ variants_names.append(str(variant_name))
+ variants_ids.append(str(evaluation_variant.variant_id))
+ variant_revision = (
+ str(evaluation_variant.variant_revision.revision)
+ if isinstance(evaluation_variant.variant_revision_id, uuid.UUID)
+ else " None"
+ )
+ revisions.append(variant_revision)
+ variants_revision_ids.append(str(evaluation_variant.variant_revision_id))
+
+ return HumanEvaluation(
+ id=str(evaluation_db.id),
+ app_id=str(evaluation_db.app_id),
+ project_id=str(evaluation_db.project_id),
+ status=evaluation_db.status, # type: ignore
+ evaluation_type=evaluation_db.evaluation_type, # type: ignore
+ variant_ids=variants_ids,
+ variant_names=variants_names,
+ testset_id=str(evaluation_db.testset_id),
+ testset_name=evaluation_db.testset.name,
+ variants_revision_ids=variants_revision_ids,
+ revisions=revisions,
+ created_at=str(evaluation_db.created_at), # type: ignore
+ updated_at=str(evaluation_db.updated_at), # type: ignore
+ )
+
+
+def human_evaluation_scenario_db_to_pydantic(
+ evaluation_scenario_db: HumanEvaluationScenarioDB, evaluation_id: str
+) -> HumanEvaluationScenario:
+ return HumanEvaluationScenario(
+ id=str(evaluation_scenario_db.id),
+ evaluation_id=evaluation_id,
+ inputs=evaluation_scenario_db.inputs, # type: ignore
+ outputs=evaluation_scenario_db.outputs, # type: ignore
+ vote=evaluation_scenario_db.vote, # type: ignore
+ score=evaluation_scenario_db.score, # type: ignore
+ correct_answer=evaluation_scenario_db.correct_answer, # type: ignore
+ is_pinned=evaluation_scenario_db.is_pinned or False, # type: ignore
+ note=evaluation_scenario_db.note or "", # type: ignore
+ )
+
+
+def aggregated_result_of_evaluation_to_pydantic(
+ evaluation_aggregated_results: List,
+) -> List[dict]:
+ transformed_results = []
+ for aggregated_result in evaluation_aggregated_results:
+ evaluator_config_dict = (
+ {
+ "id": str(aggregated_result.evaluator_config.id),
+ "name": aggregated_result.evaluator_config.name,
+ "evaluator_key": aggregated_result.evaluator_config.evaluator_key,
+ "settings_values": aggregated_result.evaluator_config.settings_values,
+ "created_at": str(aggregated_result.evaluator_config.created_at),
+ "updated_at": str(aggregated_result.evaluator_config.updated_at),
+ }
+ if isinstance(aggregated_result.evaluator_config_id, uuid.UUID)
+ else None
+ )
+ transformed_results.append(
+ {
+ "evaluator_config": (
+ {} if evaluator_config_dict is None else evaluator_config_dict
+ ),
+ "result": aggregated_result.result,
+ }
+ )
+ return transformed_results
+
+
+async def evaluation_scenario_db_to_pydantic(
+ evaluation_scenario_db: EvaluationScenarioDB, evaluation_id: str
+) -> EvaluationScenario:
+ scenario_results = [
+ {
+ "evaluator_config": str(scenario_result.evaluator_config_id),
+ "result": scenario_result.result,
+ }
+ for scenario_result in evaluation_scenario_db.results
+ ]
+ return EvaluationScenario(
+ id=str(evaluation_scenario_db.id),
+ evaluation_id=evaluation_id,
+ inputs=[
+ EvaluationScenarioInput(**scenario_input) # type: ignore
+ for scenario_input in evaluation_scenario_db.inputs
+ ],
+ outputs=[
+ EvaluationScenarioOutput(**scenario_output) # type: ignore
+ for scenario_output in evaluation_scenario_db.outputs
+ ],
+ correct_answers=[
+ CorrectAnswer(**correct_answer) # type: ignore
+ for correct_answer in evaluation_scenario_db.correct_answers
+ ],
+ is_pinned=evaluation_scenario_db.is_pinned or False, # type: ignore
+ note=evaluation_scenario_db.note or "", # type: ignore
+ results=scenario_results, # type: ignore
+ )
diff --git a/api/ee/src/services/db_manager.py b/api/ee/src/services/db_manager.py
index 0d8e36c384..1091c4f736 100644
--- a/api/ee/src/services/db_manager.py
+++ b/api/ee/src/services/db_manager.py
@@ -1,7 +1,7 @@
import uuid
from oss.src.dbs.postgres.shared.engine import engine
-from ee.src.models.db_models import DeploymentDB
+from ee.src.models.db_models import DeploymentDB_ as DeploymentDB
async def create_deployment(
diff --git a/api/ee/src/services/db_manager_ee.py b/api/ee/src/services/db_manager_ee.py
index bc174918b6..b101f7b68d 100644
--- a/api/ee/src/services/db_manager_ee.py
+++ b/api/ee/src/services/db_manager_ee.py
@@ -1,12 +1,14 @@
import uuid
-from typing import List, Union, NoReturn, Optional, Tuple
+from typing import List, Dict, Union, Any, NoReturn, Optional, Tuple
import sendgrid
from fastapi import HTTPException
+from sendgrid.helpers.mail import Mail
+from sqlalchemy import func, asc
from sqlalchemy.future import select
from sqlalchemy.ext.asyncio import AsyncSession
-from sqlalchemy.orm import joinedload, load_only
+from sqlalchemy.orm import joinedload, load_only, aliased
from sqlalchemy.exc import NoResultFound, MultipleResultsFound
from oss.src.utils.logging import get_module_logger
@@ -29,14 +31,36 @@
from ee.src.models.db_models import (
ProjectDB,
WorkspaceDB,
+ EvaluationDB,
OrganizationDB,
ProjectMemberDB,
WorkspaceMemberDB,
+ HumanEvaluationDB,
OrganizationMemberDB,
+ EvaluationScenarioDB,
+ HumanEvaluationScenarioDB,
+ HumanEvaluationVariantDB,
+ EvaluationScenarioResultDB,
+ EvaluationEvaluatorConfigDB,
+ EvaluationAggregatedResultDB,
)
from oss.src.models.db_models import (
+ AppVariantDB,
UserDB,
+ AppDB,
+ TestsetDB,
InvitationDB,
+ EvaluatorConfigDB,
+ AppVariantRevisionsDB,
+)
+from oss.src.models.shared_models import (
+ Result,
+ CorrectAnswer,
+ AggregatedResult,
+ EvaluationScenarioResult,
+ EvaluationScenarioInput,
+ EvaluationScenarioOutput,
+ HumanEvaluationScenarioInput,
)
from ee.src.services.converters import get_workspace_in_format
from ee.src.services.selectors import get_org_default_workspace
@@ -1183,6 +1207,85 @@ async def get_all_workspace_roles() -> List[WorkspaceRole]:
return workspace_roles
+# async def get_project_id_from_db_entity(
+# object_id: str, type: str, project_id: str
+# ) -> dict:
+# """
+# Get the project id of the object.
+
+# Args:
+# object_id (str): The ID of the object.
+# type (str): The type of the object.
+
+# Returns:
+# dict: The project_id of the object.
+
+# Raises:
+# ValueError: If the object type is unknown.
+# Exception: If there is an error retrieving the project_id.
+# """
+# try:
+# if type == "app":
+# app = await db_manager.fetch_app_by_id(object_id)
+# project_id = app.project_id
+
+# elif type == "app_variant":
+# app_variant = await db_manager.fetch_app_variant_by_id(object_id)
+# project_id = app_variant.project_id
+
+# elif type == "base":
+# base = await db_manager.fetch_base_by_id(object_id)
+# project_id = base.project_id
+
+# elif type == "deployment":
+# deployment = await db_manager.get_deployment_by_id(object_id)
+# project_id = deployment.project_id
+
+# elif type == "testset":
+# testset = await db_manager.fetch_testset_by_id(object_id)
+# project_id = testset.project_id
+
+# elif type == "evaluation":
+# evaluation = await db_manager.fetch_evaluation_by_id(object_id)
+# project_id = evaluation.project_id
+
+# elif type == "evaluation_scenario":
+# evaluation_scenario = await db_manager.fetch_evaluation_scenario_by_id(
+# object_id
+# )
+# project_id = evaluation_scenario.project_id
+
+# elif type == "evaluator_config":
+# evaluator_config = await db_manager.fetch_evaluator_config(object_id)
+# project_id = evaluator_config.project_id
+
+# elif type == "human_evaluation":
+# human_evaluation = await db_manager.fetch_human_evaluation_by_id(object_id)
+# project_id = human_evaluation.project_id
+
+# elif type == "human_evaluation_scenario":
+# human_evaluation_scenario = (
+# await db_manager.fetch_human_evaluation_scenario_by_id(object_id)
+# )
+# project_id = human_evaluation_scenario.project_id
+
+# elif type == "human_evaluation_scenario_by_evaluation_id":
+# human_evaluation_scenario_by_evaluation = (
+# await db_manager.fetch_human_evaluation_scenario_by_evaluation_id(
+# object_id
+# )
+# )
+# project_id = human_evaluation_scenario_by_evaluation.project_id
+
+# else:
+# raise ValueError(f"Unknown object type: {type}")
+
+# return str(project_id)
+
+# except Exception as e:
+# raise e
+
+
async def add_user_to_organization(
organization_id: str,
user_id: str,
@@ -1274,3 +1377,755 @@ async def add_user_to_project(
)
await session.commit()
+
+
+async def fetch_evaluation_status_by_id(
+ project_id: str,
+ evaluation_id: str,
+) -> Optional[str]:
+ """Fetch only the status of an evaluation by its ID."""
+ assert evaluation_id is not None, "evaluation_id cannot be None"
+
+ async with engine.core_session() as session:
+ query = (
+ select(EvaluationDB)
+ .filter_by(project_id=project_id, id=uuid.UUID(evaluation_id))
+ .options(load_only(EvaluationDB.status))
+ )
+
+ result = await session.execute(query)
+ evaluation = result.scalars().first()
+ return evaluation.status if evaluation else None
+
+
+async def fetch_evaluation_by_id(
+ project_id: str,
+ evaluation_id: str,
+) -> Optional[EvaluationDB]:
+ """Fetches a evaluation by its ID.
+
+ Args:
+ evaluation_id (str): The ID of the evaluation to fetch.
+
+ Returns:
+ EvaluationDB: The fetched evaluation, or None if no evaluation was found.
+ """
+
+ assert evaluation_id is not None, "evaluation_id cannot be None"
+ async with engine.core_session() as session:
+ base_query = select(EvaluationDB).filter_by(
+ project_id=project_id,
+ id=uuid.UUID(evaluation_id),
+ )
+ query = base_query.options(
+ joinedload(EvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
+ )
+
+ result = await session.execute(
+ query.options(
+ joinedload(EvaluationDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
+ joinedload(EvaluationDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.revision), # type: ignore
+ joinedload(
+ EvaluationDB.aggregated_results.of_type(
+ EvaluationAggregatedResultDB
+ )
+ ).joinedload(EvaluationAggregatedResultDB.evaluator_config),
+ )
+ )
+ evaluation = result.unique().scalars().first()
+ return evaluation
+
+
+async def list_human_evaluations(app_id: str, project_id: str):
+ """
+ Fetches human evaluations belonging to an App.
+
+ Args:
+ app_id (str): The application identifier
+ """
+
+ async with engine.core_session() as session:
+ base_query = (
+ select(HumanEvaluationDB)
+ .filter_by(app_id=uuid.UUID(app_id), project_id=uuid.UUID(project_id))
+ .filter(HumanEvaluationDB.testset_id.isnot(None))
+ )
+ query = base_query.options(
+ joinedload(HumanEvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
+ )
+
+ result = await session.execute(query)
+ human_evaluations = result.scalars().all()
+ return human_evaluations
+
+
+async def create_human_evaluation(
+ app: AppDB,
+ status: str,
+ evaluation_type: str,
+ testset_id: str,
+ variants_ids: List[str],
+):
+ """
+ Creates a human evaluation.
+
+ Args:
+ app (AppDB: The app object
+ status (str): The status of the evaluation
+ evaluation_type (str): The evaluation type
+ testset_id (str): The ID of the evaluation testset
+ variants_ids (List[str]): The IDs of the variants for the evaluation
+ """
+
+ async with engine.core_session() as session:
+ human_evaluation = HumanEvaluationDB(
+ app_id=app.id,
+ project_id=app.project_id,
+ status=status,
+ evaluation_type=evaluation_type,
+ testset_id=testset_id,
+ )
+
+ session.add(human_evaluation)
+ await session.commit()
+ await session.refresh(human_evaluation, attribute_names=["testset"])
+
+ # create variants for human evaluation
+ await create_human_evaluation_variants(
+ human_evaluation_id=str(human_evaluation.id),
+ variants_ids=variants_ids,
+ )
+ return human_evaluation
+
+
+async def fetch_human_evaluation_variants(human_evaluation_id: str):
+ """
+ Fetches human evaluation variants.
+
+ Args:
+ human_evaluation_id (str): The human evaluation ID
+
+ Returns:
+ The human evaluation variants.
+ """
+
+ async with engine.core_session() as session:
+ base_query = select(HumanEvaluationVariantDB).filter_by(
+ human_evaluation_id=uuid.UUID(human_evaluation_id)
+ )
+ query = base_query.options(
+ joinedload(HumanEvaluationVariantDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
+ joinedload(HumanEvaluationVariantDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.id, AppVariantRevisionsDB.revision), # type: ignore
+ )
+
+ result = await session.execute(query)
+ evaluation_variants = result.scalars().all()
+ return evaluation_variants
+
+
+async def create_human_evaluation_variants(
+ human_evaluation_id: str, variants_ids: List[str]
+):
+ """
+ Creates human evaluation variants.
+
+ Args:
+ human_evaluation_id (str): The human evaluation identifier
+ variants_ids (List[str]): The variants identifiers
+ project_id (str): The project ID
+ """
+
+ variants_dict = {}
+ for variant_id in variants_ids:
+ variant = await db_manager.fetch_app_variant_by_id(app_variant_id=variant_id)
+ if variant:
+ variants_dict[variant_id] = variant
+
+ variants_revisions_dict = {}
+ for variant_id, variant in variants_dict.items():
+ variant_revision = await db_manager.fetch_app_variant_revision_by_variant(
+ app_variant_id=str(variant.id), project_id=str(variant.project_id), revision=variant.revision # type: ignore
+ )
+ if variant_revision:
+ variants_revisions_dict[variant_id] = variant_revision
+
+ if set(variants_dict.keys()) != set(variants_revisions_dict.keys()):
+ raise ValueError("Mismatch between variants and their revisions")
+
+ async with engine.core_session() as session:
+ for variant_id in variants_ids:
+ variant = variants_dict[variant_id]
+ variant_revision = variants_revisions_dict[variant_id]
+ human_evaluation_variant = HumanEvaluationVariantDB(
+ human_evaluation_id=uuid.UUID(human_evaluation_id),
+ variant_id=variant.id, # type: ignore
+ variant_revision_id=variant_revision.id, # type: ignore
+ )
+ session.add(human_evaluation_variant)
+
+ await session.commit()
+
+
+async def fetch_human_evaluation_by_id(
+ evaluation_id: str,
+) -> Optional[HumanEvaluationDB]:
+ """
+ Fetches a evaluation by its ID.
+
+ Args:
+ evaluation_id (str): The ID of the evaluation to fetch.
+
+ Returns:
+ EvaluationDB: The fetched evaluation, or None if no evaluation was found.
+ """
+
+ assert evaluation_id is not None, "evaluation_id cannot be None"
+ async with engine.core_session() as session:
+ base_query = select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
+ query = base_query.options(
+ joinedload(HumanEvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
+ )
+ result = await session.execute(query)
+ evaluation = result.scalars().first()
+ return evaluation
+
+
+async def update_human_evaluation(evaluation_id: str, values_to_update: dict):
+ """Updates human evaluation with the specified values.
+
+ Args:
+ evaluation_id (str): The evaluation ID
+ values_to_update (dict): The values to update
+
+ Exceptions:
+ NoResultFound: if human evaluation is not found
+ """
+
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
+ )
+ human_evaluation = result.scalars().first()
+ if not human_evaluation:
+ raise NoResultFound(f"Human evaluation with id {evaluation_id} not found")
+
+ for key, value in values_to_update.items():
+ if hasattr(human_evaluation, key):
+ setattr(human_evaluation, key, value)
+
+ await session.commit()
+ await session.refresh(human_evaluation)
+
+
+async def delete_human_evaluation(evaluation_id: str):
+ """Delete the evaluation by its ID.
+
+ Args:
+ evaluation_id (str): The ID of the evaluation to delete.
+ """
+
+ assert evaluation_id is not None, "evaluation_id cannot be None"
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
+ )
+ evaluation = result.scalars().first()
+ if not evaluation:
+ raise NoResultFound(f"Human evaluation with id {evaluation_id} not found")
+
+ await session.delete(evaluation)
+ await session.commit()
+
+
+async def create_human_evaluation_scenario(
+ inputs: List[HumanEvaluationScenarioInput],
+ project_id: str,
+ evaluation_id: str,
+ evaluation_extend: Dict[str, Any],
+):
+ """
+ Creates a human evaluation scenario.
+
+ Args:
+ inputs (List[HumanEvaluationScenarioInput]): The inputs.
+ evaluation_id (str): The evaluation identifier.
+ evaluation_extend (Dict[str, any]): An extended required payload for the evaluation scenario. Contains score, vote, and correct_answer.
+ """
+
+ async with engine.core_session() as session:
+ evaluation_scenario = HumanEvaluationScenarioDB(
+ **evaluation_extend,
+ project_id=uuid.UUID(project_id),
+ evaluation_id=uuid.UUID(evaluation_id),
+ inputs=[input.model_dump() for input in inputs],
+ outputs=[],
+ )
+
+ session.add(evaluation_scenario)
+ await session.commit()
+
+
+async def update_human_evaluation_scenario(
+ evaluation_scenario_id: str, values_to_update: dict
+):
+ """Updates human evaluation scenario with the specified values.
+
+ Args:
+ evaluation_scenario_id (str): The evaluation scenario ID
+ values_to_update (dict): The values to update
+
+ Exceptions:
+ NoResultFound: if human evaluation scenario is not found
+ """
+
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationScenarioDB).filter_by(
+ id=uuid.UUID(evaluation_scenario_id)
+ )
+ )
+ human_evaluation_scenario = result.scalars().first()
+ if not human_evaluation_scenario:
+ raise NoResultFound(
+ f"Human evaluation scenario with id {evaluation_scenario_id} not found"
+ )
+
+ for key, value in values_to_update.items():
+ if hasattr(human_evaluation_scenario, key):
+ setattr(human_evaluation_scenario, key, value)
+
+ await session.commit()
+ await session.refresh(human_evaluation_scenario)
+
+
+async def fetch_human_evaluation_scenarios(evaluation_id: str):
+ """
+ Fetches human evaluation scenarios.
+
+ Args:
+ evaluation_id (str): The evaluation identifier
+
+ Returns:
+ The evaluation scenarios.
+ """
+
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationScenarioDB)
+ .filter_by(evaluation_id=uuid.UUID(evaluation_id))
+ .order_by(asc(HumanEvaluationScenarioDB.created_at))
+ )
+ evaluation_scenarios = result.scalars().all()
+ return evaluation_scenarios
+
+
+async def fetch_evaluation_scenarios(evaluation_id: str, project_id: str):
+ """
+ Fetches evaluation scenarios.
+
+ Args:
+ evaluation_id (str): The evaluation identifier
+ project_id (str): The ID of the project
+
+ Returns:
+ The evaluation scenarios.
+ """
+
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(EvaluationScenarioDB)
+ .filter_by(
+ evaluation_id=uuid.UUID(evaluation_id), project_id=uuid.UUID(project_id)
+ )
+ .options(joinedload(EvaluationScenarioDB.results))
+ )
+ evaluation_scenarios = result.unique().scalars().all()
+ return evaluation_scenarios
+
+
+async def fetch_evaluation_scenario_by_id(
+ evaluation_scenario_id: str,
+) -> Optional[EvaluationScenarioDB]:
+ """Fetches and evaluation scenario by its ID.
+
+ Args:
+ evaluation_scenario_id (str): The ID of the evaluation scenario to fetch.
+
+ Returns:
+ EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
+ """
+
+ assert evaluation_scenario_id is not None, "evaluation_scenario_id cannot be None"
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(EvaluationScenarioDB).filter_by(id=uuid.UUID(evaluation_scenario_id))
+ )
+ evaluation_scenario = result.scalars().first()
+ return evaluation_scenario
+
+
+async def fetch_human_evaluation_scenario_by_id(
+ evaluation_scenario_id: str,
+) -> Optional[HumanEvaluationScenarioDB]:
+ """Fetches and evaluation scenario by its ID.
+
+ Args:
+ evaluation_scenario_id (str): The ID of the evaluation scenario to fetch.
+
+ Returns:
+ EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
+ """
+
+ assert evaluation_scenario_id is not None, "evaluation_scenario_id cannot be None"
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationScenarioDB).filter_by(
+ id=uuid.UUID(evaluation_scenario_id)
+ )
+ )
+ evaluation_scenario = result.scalars().first()
+ return evaluation_scenario
+
+
+async def fetch_human_evaluation_scenario_by_evaluation_id(
+ evaluation_id: str,
+) -> Optional[HumanEvaluationScenarioDB]:
+ """Fetches and evaluation scenario by its ID.
+ Args:
+ evaluation_id (str): The ID of the evaluation object to use in fetching the human evaluation.
+ Returns:
+ EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
+ """
+
+ evaluation = await fetch_human_evaluation_by_id(evaluation_id)
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(HumanEvaluationScenarioDB).filter_by(
+ evaluation_id=evaluation.id # type: ignore
+ )
+ )
+ human_eval_scenario = result.scalars().first()
+ return human_eval_scenario
+
+
+async def create_new_evaluation(
+ app: AppDB,
+ project_id: str,
+ testset: TestsetDB,
+ status: Result,
+ variant: str,
+ variant_revision: str,
+) -> EvaluationDB:
+ """Create a new evaluation scenario.
+ Returns:
+ EvaluationScenarioDB: The created evaluation scenario.
+ """
+
+ async with engine.core_session() as session:
+ evaluation = EvaluationDB(
+ app_id=app.id,
+ project_id=uuid.UUID(project_id),
+ testset_id=testset.id,
+ status=status.model_dump(),
+ variant_id=uuid.UUID(variant),
+ variant_revision_id=uuid.UUID(variant_revision),
+ )
+
+ session.add(evaluation)
+ await session.commit()
+ await session.refresh(
+ evaluation,
+ attribute_names=[
+ "testset",
+ "variant",
+ "variant_revision",
+ "aggregated_results",
+ ],
+ )
+
+ return evaluation
+
+
+async def list_evaluations(app_id: str, project_id: str):
+ """Retrieves evaluations of the specified app from the db.
+
+ Args:
+ app_id (str): The ID of the app
+ project_id (str): The ID of the project
+ """
+
+ async with engine.core_session() as session:
+ base_query = select(EvaluationDB).filter_by(
+ app_id=uuid.UUID(app_id), project_id=uuid.UUID(project_id)
+ )
+ query = base_query.options(
+ joinedload(EvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
+ )
+
+ result = await session.execute(
+ query.options(
+ joinedload(EvaluationDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
+ joinedload(EvaluationDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.revision), # type: ignore
+ joinedload(
+ EvaluationDB.aggregated_results.of_type(
+ EvaluationAggregatedResultDB
+ )
+ ).joinedload(EvaluationAggregatedResultDB.evaluator_config),
+ )
+ )
+ evaluations = result.unique().scalars().all()
+ return evaluations
+
+
+async def fetch_evaluations_by_resource(
+ resource_type: str, project_id: str, resource_ids: List[str]
+):
+ """
+ Fetches an evaluations by resource.
+
+ Args:
+ resource_type (str): The resource type
+ project_id (str): The ID of the project
+ resource_ids (List[str]): The resource identifiers
+
+ Returns:
+ The evaluations by resource.
+
+ Raises:
+ HTTPException:400 resource_type {type} is not supported
+ """
+
+ ids = list(map(uuid.UUID, resource_ids))
+
+ async with engine.core_session() as session:
+ if resource_type == "variant":
+ result_evaluations = await session.execute(
+ select(EvaluationDB)
+ .filter(
+ EvaluationDB.variant_id.in_(ids),
+ EvaluationDB.project_id == uuid.UUID(project_id),
+ )
+ .options(load_only(EvaluationDB.id)) # type: ignore
+ )
+ result_human_evaluations = await session.execute(
+ select(HumanEvaluationDB)
+ .join(HumanEvaluationVariantDB)
+ .filter(
+ HumanEvaluationVariantDB.variant_id.in_(ids),
+ HumanEvaluationDB.project_id == uuid.UUID(project_id),
+ )
+ .options(load_only(HumanEvaluationDB.id)) # type: ignore
+ )
+ res_evaluations = result_evaluations.scalars().all()
+ res_human_evaluations = result_human_evaluations.scalars().all()
+ return res_evaluations + res_human_evaluations
+
+ elif resource_type == "testset":
+ result_evaluations = await session.execute(
+ select(EvaluationDB)
+ .filter(
+ EvaluationDB.testset_id.in_(ids),
+ EvaluationDB.project_id == uuid.UUID(project_id),
+ )
+ .options(load_only(EvaluationDB.id)) # type: ignore
+ )
+ result_human_evaluations = await session.execute(
+ select(HumanEvaluationDB)
+ .filter(
+ HumanEvaluationDB.testset_id.in_(ids),
+ HumanEvaluationDB.project_id
+ == uuid.UUID(project_id), # Fixed to match HumanEvaluationDB
+ )
+ .options(load_only(HumanEvaluationDB.id)) # type: ignore
+ )
+ res_evaluations = result_evaluations.scalars().all()
+ res_human_evaluations = result_human_evaluations.scalars().all()
+ return res_evaluations + res_human_evaluations
+
+ elif resource_type == "evaluator_config":
+ query = (
+ select(EvaluationDB)
+ .join(EvaluationDB.evaluator_configs)
+ .filter(
+ EvaluationEvaluatorConfigDB.evaluator_config_id.in_(ids),
+ EvaluationDB.project_id == uuid.UUID(project_id),
+ )
+ )
+ result = await session.execute(query)
+ res = result.scalars().all()
+ return res
+
+ raise HTTPException(
+ status_code=400,
+ detail=f"resource_type {resource_type} is not supported",
+ )
+
+
+async def delete_evaluations(evaluation_ids: List[str]) -> None:
+ """Delete evaluations based on the ids provided from the db.
+
+ Args:
+ evaluations_ids (list[str]): The IDs of the evaluation
+ """
+
+ async with engine.core_session() as session:
+ query = select(EvaluationDB).where(EvaluationDB.id.in_(evaluation_ids))
+ result = await session.execute(query)
+ evaluations = result.scalars().all()
+ for evaluation in evaluations:
+ await session.delete(evaluation)
+ await session.commit()
+
+
+async def create_new_evaluation_scenario(
+ project_id: str,
+ evaluation_id: str,
+ variant_id: str,
+ inputs: List[EvaluationScenarioInput],
+ outputs: List[EvaluationScenarioOutput],
+ correct_answers: Optional[List[CorrectAnswer]],
+ is_pinned: Optional[bool],
+ note: Optional[str],
+ results: List[EvaluationScenarioResult],
+) -> EvaluationScenarioDB:
+ """Create a new evaluation scenario.
+
+ Returns:
+ EvaluationScenarioDB: The created evaluation scenario.
+ """
+
+ async with engine.core_session() as session:
+ evaluation_scenario = EvaluationScenarioDB(
+ project_id=uuid.UUID(project_id),
+ evaluation_id=uuid.UUID(evaluation_id),
+ variant_id=uuid.UUID(variant_id),
+ inputs=[input.model_dump() for input in inputs],
+ outputs=[output.model_dump() for output in outputs],
+ correct_answers=(
+ [correct_answer.model_dump() for correct_answer in correct_answers]
+ if correct_answers is not None
+ else []
+ ),
+ is_pinned=is_pinned,
+ note=note,
+ )
+
+ session.add(evaluation_scenario)
+ await session.commit()
+ await session.refresh(evaluation_scenario)
+
+ # create evaluation scenario result
+ for result in results:
+ evaluation_scenario_result = EvaluationScenarioResultDB(
+ evaluation_scenario_id=evaluation_scenario.id,
+ evaluator_config_id=uuid.UUID(result.evaluator_config),
+ result=result.result.model_dump(),
+ )
+
+ session.add(evaluation_scenario_result)
+
+ await session.commit() # ensures that scenario results insertion is committed
+ await session.refresh(evaluation_scenario)
+
+ return evaluation_scenario
+
+
+async def update_evaluation_with_aggregated_results(
+ evaluation_id: str, aggregated_results: List[AggregatedResult]
+):
+ async with engine.core_session() as session:
+ for result in aggregated_results:
+ aggregated_result = EvaluationAggregatedResultDB(
+ evaluation_id=uuid.UUID(evaluation_id),
+ evaluator_config_id=uuid.UUID(result.evaluator_config),
+ result=result.result.model_dump(),
+ )
+ session.add(aggregated_result)
+
+ await session.commit()
+
+
+async def fetch_eval_aggregated_results(evaluation_id: str):
+ """
+ Fetches an evaluation aggregated results by evaluation identifier.
+
+ Args:
+ evaluation_id (str): The evaluation identifier
+
+ Returns:
+ The evaluation aggregated results by evaluation identifier.
+ """
+
+ async with engine.core_session() as session:
+ base_query = select(EvaluationAggregatedResultDB).filter_by(
+ evaluation_id=uuid.UUID(evaluation_id)
+ )
+ query = base_query.options(
+ joinedload(
+ EvaluationAggregatedResultDB.evaluator_config.of_type(EvaluatorConfigDB)
+ ).load_only(
+ EvaluatorConfigDB.id, # type: ignore
+ EvaluatorConfigDB.name, # type: ignore
+ EvaluatorConfigDB.evaluator_key, # type: ignore
+ EvaluatorConfigDB.settings_values, # type: ignore
+ EvaluatorConfigDB.created_at, # type: ignore
+ EvaluatorConfigDB.updated_at, # type: ignore
+ )
+ )
+
+ result = await session.execute(query)
+ aggregated_results = result.scalars().all()
+ return aggregated_results
+
+
+async def update_evaluation(
+ evaluation_id: str, project_id: str, updates: Dict[str, Any]
+) -> EvaluationDB:
+ """
+ Update an evaluator configuration in the database with the provided id.
+
+ Arguments:
+ evaluation_id (str): The ID of the evaluator configuration to be updated.
+ project_id (str): The ID of the project.
+ updates (Dict[str, Any]): The updates to apply to the evaluator configuration.
+
+ Returns:
+ EvaluatorConfigDB: The updated evaluator configuration object.
+ """
+
+ async with engine.core_session() as session:
+ result = await session.execute(
+ select(EvaluationDB).filter_by(
+ id=uuid.UUID(evaluation_id), project_id=uuid.UUID(project_id)
+ )
+ )
+ evaluation = result.scalars().first()
+ for key, value in updates.items():
+ if hasattr(evaluation, key):
+ setattr(evaluation, key, value)
+
+ await session.commit()
+ await session.refresh(evaluation)
+
+ return evaluation
+
+
+async def check_if_evaluation_contains_failed_evaluation_scenarios(
+ evaluation_id: str,
+) -> bool:
+ async with engine.core_session() as session:
+ EvaluationResultAlias = aliased(EvaluationScenarioResultDB)
+ query = (
+ select(func.count(EvaluationScenarioDB.id))
+ .join(EvaluationResultAlias, EvaluationScenarioDB.results)
+ .where(
+ EvaluationScenarioDB.evaluation_id == uuid.UUID(evaluation_id),
+ EvaluationResultAlias.result["type"].astext == "error",
+ )
+ )
+
+ result = await session.execute(query)
+ count = result.scalar()
+ if not count:
+ return False
+ return count > 0
diff --git a/api/oss/src/services/evaluation_service.py b/api/ee/src/services/evaluation_service.py
similarity index 93%
rename from api/oss/src/services/evaluation_service.py
rename to api/ee/src/services/evaluation_service.py
index ca40a70cef..9d7b61cb3d 100644
--- a/api/oss/src/services/evaluation_service.py
+++ b/api/ee/src/services/evaluation_service.py
@@ -3,8 +3,9 @@
from fastapi import HTTPException
from oss.src.utils.logging import get_module_logger
-from oss.src.services import converters
+from ee.src.services import converters
from oss.src.services import db_manager
+from ee.src.services import db_manager_ee
from oss.src.models.api.evaluation_model import (
Evaluation,
@@ -17,7 +18,7 @@
NewHumanEvaluation,
)
from oss.src.models.db_models import AppDB
-from oss.src.models.db_models import (
+from ee.src.models.db_models import (
EvaluationDB,
HumanEvaluationDB,
HumanEvaluationScenarioDB,
@@ -65,7 +66,7 @@ async def prepare_csvdata_and_create_evaluation_scenario(
for name in payload_inputs
]
except KeyError:
- await db_manager.delete_human_evaluation(
+ await db_manager_ee.delete_human_evaluation(
evaluation_id=str(new_evaluation.id)
)
msg = f"""
@@ -91,7 +92,7 @@ async def prepare_csvdata_and_create_evaluation_scenario(
**_extend_with_evaluation(evaluation_type),
**_extend_with_correct_answer(evaluation_type, datum),
}
- await db_manager.create_human_evaluation_scenario(
+ await db_manager_ee.create_human_evaluation_scenario(
inputs=list_of_scenario_input,
project_id=project_id,
evaluation_id=str(new_evaluation.id),
@@ -111,7 +112,7 @@ async def update_human_evaluation_service(
"""
# Update the evaluation
- await db_manager.update_human_evaluation(
+ await db_manager_ee.update_human_evaluation(
evaluation_id=str(evaluation.id), values_to_update=update_payload.model_dump()
)
@@ -130,7 +131,7 @@ async def fetch_evaluation_scenarios_for_evaluation(
List[EvaluationScenario]: A list of evaluation scenarios.
"""
- evaluation_scenarios = await db_manager.fetch_evaluation_scenarios(
+ evaluation_scenarios = await db_manager_ee.fetch_evaluation_scenarios(
evaluation_id=evaluation_id, project_id=project_id
)
return [
@@ -156,7 +157,7 @@ async def fetch_human_evaluation_scenarios_for_evaluation(
Returns:
List[EvaluationScenario]: A list of evaluation scenarios.
"""
- human_evaluation_scenarios = await db_manager.fetch_human_evaluation_scenarios(
+ human_evaluation_scenarios = await db_manager_ee.fetch_human_evaluation_scenarios(
evaluation_id=str(human_evaluation.id)
)
eval_scenarios = [
@@ -224,7 +225,7 @@ async def update_human_evaluation_scenario(
if "correct_answer" in payload:
values_to_update["correct_answer"] = payload["correct_answer"]
- await db_manager.update_human_evaluation_scenario(
+ await db_manager_ee.update_human_evaluation_scenario(
evaluation_scenario_id=str(evaluation_scenario_db.id),
values_to_update=values_to_update,
)
@@ -259,7 +260,7 @@ async def fetch_list_evaluations(app: AppDB, project_id: str) -> List[Evaluation
List[Evaluation]: A list of evaluations.
"""
- evaluations_db = await db_manager.list_evaluations(
+ evaluations_db = await db_manager_ee.list_evaluations(
app_id=str(app.id), project_id=project_id
)
return [
@@ -282,7 +283,7 @@ async def fetch_list_human_evaluations(
List[Evaluation]: A list of evaluations.
"""
- evaluations_db = await db_manager.list_human_evaluations(
+ evaluations_db = await db_manager_ee.list_human_evaluations(
app_id=app_id, project_id=project_id
)
return [
@@ -318,7 +319,7 @@ async def delete_human_evaluations(evaluation_ids: List[str]) -> None:
"""
for evaluation_id in evaluation_ids:
- await db_manager.delete_human_evaluation(evaluation_id=evaluation_id)
+ await db_manager_ee.delete_human_evaluation(evaluation_id=evaluation_id)
async def delete_evaluations(evaluation_ids: List[str]) -> None:
@@ -332,7 +333,7 @@ async def delete_evaluations(evaluation_ids: List[str]) -> None:
HTTPException: If evaluation not found or access denied.
"""
- await db_manager.delete_evaluations(evaluation_ids=evaluation_ids)
+ await db_manager_ee.delete_evaluations(evaluation_ids=evaluation_ids)
async def create_new_human_evaluation(payload: NewHumanEvaluation) -> HumanEvaluationDB:
@@ -353,7 +354,7 @@ async def create_new_human_evaluation(payload: NewHumanEvaluation) -> HumanEvalu
detail=f"App with id {payload.app_id} does not exist",
)
- human_evaluation = await db_manager.create_human_evaluation(
+ human_evaluation = await db_manager_ee.create_human_evaluation(
app=app,
status=payload.status,
evaluation_type=payload.evaluation_type,
@@ -410,7 +411,7 @@ async def create_new_evaluation(
assert testset is not None, f"Testset with id {testset_id} does not exist"
- evaluation_db = await db_manager.create_new_evaluation(
+ evaluation_db = await db_manager_ee.create_new_evaluation(
app=app,
project_id=project_id,
testset=testset,
@@ -424,7 +425,7 @@ async def create_new_evaluation(
async def compare_evaluations_scenarios(evaluations_ids: List[str], project_id: str):
- evaluation = await db_manager.fetch_evaluation_by_id(
+ evaluation = await db_manager_ee.fetch_evaluation_by_id(
project_id=project_id,
evaluation_id=evaluations_ids[0],
)
diff --git a/api/oss/src/services/llm_apps_service.py b/api/ee/src/services/llm_apps_service.py
similarity index 100%
rename from api/oss/src/services/llm_apps_service.py
rename to api/ee/src/services/llm_apps_service.py
diff --git a/api/oss/src/services/results_service.py b/api/ee/src/services/results_service.py
similarity index 91%
rename from api/oss/src/services/results_service.py
rename to api/ee/src/services/results_service.py
index cccb32164d..ca52151315 100644
--- a/api/oss/src/services/results_service.py
+++ b/api/ee/src/services/results_service.py
@@ -1,16 +1,16 @@
import uuid
from typing import Sequence, Dict, Any
-from oss.src.services import db_manager
+from ee.src.services import db_manager_ee
from oss.src.models.api.evaluation_model import EvaluationType
-from oss.src.models.db_models import (
+from ee.src.models.db_models import (
HumanEvaluationDB,
EvaluationScenarioDB,
)
async def fetch_results_for_evaluation(evaluation: HumanEvaluationDB):
- evaluation_scenarios = await db_manager.fetch_human_evaluation_scenarios(
+ evaluation_scenarios = await db_manager_ee.fetch_human_evaluation_scenarios(
evaluation_id=str(evaluation.id)
)
@@ -18,7 +18,7 @@ async def fetch_results_for_evaluation(evaluation: HumanEvaluationDB):
if len(evaluation_scenarios) == 0:
return results
- evaluation_variants = await db_manager.fetch_human_evaluation_variants(
+ evaluation_variants = await db_manager_ee.fetch_human_evaluation_variants(
human_evaluation_id=str(evaluation.id)
)
results["variants"] = [
@@ -99,7 +99,7 @@ async def _compute_stats_for_human_a_b_testing_evaluation(
async def fetch_results_for_single_model_test(evaluation_id: str):
- evaluation_scenarios = await db_manager.fetch_human_evaluation_scenarios(
+ evaluation_scenarios = await db_manager_ee.fetch_human_evaluation_scenarios(
evaluation_id=str(evaluation_id)
)
scores_and_counts: Dict[str, Any] = {}
diff --git a/api/ee/src/services/utils.py b/api/ee/src/services/utils.py
new file mode 100644
index 0000000000..0eaedde4ff
--- /dev/null
+++ b/api/ee/src/services/utils.py
@@ -0,0 +1,21 @@
+# Stdlib Imports
+import asyncio
+from functools import partial
+from typing import Callable, Coroutine
+
+
+async def run_in_separate_thread(func: Callable, *args, **kwargs) -> Coroutine:
+ """
+ Run a synchronous function in a separate thread.
+
+ Args:
+ func (callable): The synchronous function to be executed.
+ args (tuple): Positional arguments to be passed to `func`.
+ kwargs (dict): Keyword arguments to be passed to `func`.
+
+ Returns:
+ The result of the synchronous function.
+ """
+
+ loop = asyncio.get_event_loop()
+ return await loop.run_in_executor(None, partial(func, *args, **kwargs))
diff --git a/api/oss/src/tasks/evaluations/__init__.py b/api/ee/src/tasks/__init__.py
similarity index 100%
rename from api/oss/src/tasks/evaluations/__init__.py
rename to api/ee/src/tasks/__init__.py
diff --git a/web/oss/src/components/Evaluations/HumanEvaluationResult.tsx b/api/ee/src/tasks/evaluations/__init__.py
similarity index 100%
rename from web/oss/src/components/Evaluations/HumanEvaluationResult.tsx
rename to api/ee/src/tasks/evaluations/__init__.py
diff --git a/api/oss/src/tasks/evaluations/batch.py b/api/ee/src/tasks/evaluations/batch.py
similarity index 97%
rename from api/oss/src/tasks/evaluations/batch.py
rename to api/ee/src/tasks/evaluations/batch.py
index 324ed74e49..5fdef15b3c 100644
--- a/api/oss/src/tasks/evaluations/batch.py
+++ b/api/ee/src/tasks/evaluations/batch.py
@@ -10,9 +10,8 @@
from oss.src.utils.helpers import parse_url, get_slug_from_name_and_id
from oss.src.utils.logging import get_module_logger
-from oss.src.utils.common import is_ee
from oss.src.services.auth_helper import sign_secret_token
-from oss.src.services import llm_apps_service
+from ee.src.services import llm_apps_service
from oss.src.models.shared_models import InvokationResult
from oss.src.services.db_manager import (
fetch_app_by_id,
@@ -22,9 +21,7 @@
get_project_by_id,
)
from oss.src.core.secrets.utils import get_llm_providers_secrets
-
-if is_ee():
- from ee.src.utils.entitlements import check_entitlements, Counter
+from ee.src.utils.entitlements import check_entitlements, Counter
from oss.src.dbs.postgres.queries.dbes import (
QueryArtifactDBE,
diff --git a/api/oss/src/tasks/evaluations/legacy.py b/api/ee/src/tasks/evaluations/legacy.py
similarity index 99%
rename from api/oss/src/tasks/evaluations/legacy.py
rename to api/ee/src/tasks/evaluations/legacy.py
index d3bc69f9cc..579c6853b9 100644
--- a/api/oss/src/tasks/evaluations/legacy.py
+++ b/api/ee/src/tasks/evaluations/legacy.py
@@ -9,9 +9,8 @@
from oss.src.utils.helpers import parse_url, get_slug_from_name_and_id
from oss.src.utils.logging import get_module_logger
-from oss.src.utils.common import is_ee
from oss.src.services.auth_helper import sign_secret_token
-from oss.src.services import llm_apps_service
+from ee.src.services import llm_apps_service
from oss.src.models.shared_models import InvokationResult
from oss.src.services.db_manager import (
fetch_app_by_id,
@@ -22,9 +21,7 @@
get_project_by_id,
)
from oss.src.core.secrets.utils import get_llm_providers_secrets
-
-if is_ee():
- from ee.src.utils.entitlements import check_entitlements, Counter
+from ee.src.utils.entitlements import check_entitlements, Counter
from oss.src.dbs.postgres.queries.dbes import (
QueryArtifactDBE,
@@ -1544,14 +1541,13 @@ def annotate(
# edit meters to avoid conting failed evaluations --------------------------
if run_status == EvaluationStatus.FAILURE:
- if is_ee():
- loop.run_until_complete(
- check_entitlements(
- organization_id=project.organization_id,
- key=Counter.EVALUATIONS,
- delta=-1,
- )
+ loop.run_until_complete(
+ check_entitlements(
+ organization_id=project.organization_id,
+ key=Counter.EVALUATIONS,
+ delta=-1,
)
+ )
log.info("[DONE] ", run_id=run_id, project_id=project_id, user_id=user_id)
diff --git a/api/oss/src/tasks/evaluations/live.py b/api/ee/src/tasks/evaluations/live.py
similarity index 100%
rename from api/oss/src/tasks/evaluations/live.py
rename to api/ee/src/tasks/evaluations/live.py
diff --git a/api/oss/tests/manual/evaluations/live.http b/api/ee/tests/manual/evaluations/live.http
similarity index 100%
rename from api/oss/tests/manual/evaluations/live.http
rename to api/ee/tests/manual/evaluations/live.http
diff --git a/api/oss/tests/manual/evaluators/human-evaluator.http b/api/ee/tests/manual/evaluators/human-evaluator.http
similarity index 100%
rename from api/oss/tests/manual/evaluators/human-evaluator.http
rename to api/ee/tests/manual/evaluators/human-evaluator.http
diff --git a/api/entrypoint.py b/api/entrypoint.py
index 36b37b699c..aa50cf2de1 100644
--- a/api/entrypoint.py
+++ b/api/entrypoint.py
@@ -24,8 +24,6 @@
from oss.src.services.auth_helper import authentication_middleware
from oss.src.services.analytics_service import analytics_middleware
-from oss.src.routers import evaluation_router, human_evaluation_router
-
# DBEs
from oss.src.dbs.postgres.queries.dbes import (
QueryArtifactDBE,
@@ -474,18 +472,6 @@ async def lifespan(*args, **kwargs):
tags=["Evaluations"],
)
-app.include_router(
- evaluation_router.router,
- prefix="/evaluations",
- tags=["Evaluations"],
-)
-
-app.include_router(
- human_evaluation_router.router,
- prefix="/human-evaluations",
- tags=["Human-Evaluations"],
-)
-
app.include_router(
admin_router.router,
prefix="/admin",
@@ -582,11 +568,7 @@ async def lifespan(*args, **kwargs):
# ------------------------------------------------------------------------------
-
-import oss.src.tasks.evaluations.live
-import oss.src.tasks.evaluations.legacy
-import oss.src.tasks.evaluations.batch
-
-
if ee and is_ee():
app = ee.extend_app_schema(app)
+
+ ee.load_tasks()
diff --git a/api/oss/docker/Dockerfile.dev b/api/oss/docker/Dockerfile.dev
index 8b500fd96e..647b46c960 100644
--- a/api/oss/docker/Dockerfile.dev
+++ b/api/oss/docker/Dockerfile.dev
@@ -34,12 +34,12 @@ ENV PYTHONPATH=/sdk:$PYTHONPATH
#
#
-COPY ./oss/src/crons/queries.sh /queries.sh
-COPY ./oss/src/crons/queries.txt /etc/cron.d/queries-cron
-RUN sed -i -e '$a\' /etc/cron.d/queries-cron
-RUN cat -A /etc/cron.d/queries-cron
+#
+#
+#
+#
-RUN chmod +x /queries.sh \
- && chmod 0644 /etc/cron.d/queries-cron
+#
+#
EXPOSE 8000
diff --git a/api/oss/docker/Dockerfile.gh b/api/oss/docker/Dockerfile.gh
index a9bd7c8365..cf9817f0b0 100644
--- a/api/oss/docker/Dockerfile.gh
+++ b/api/oss/docker/Dockerfile.gh
@@ -18,11 +18,11 @@ RUN pip install --upgrade pip \
#
COPY ./oss /app/oss/
COPY ./entrypoint.py ./pyproject.toml /app/
-COPY ./sdk /sdk/
+#
RUN poetry config virtualenvs.create false \
- && poetry install --no-interaction --no-ansi \
- && pip install --force-reinstall --upgrade /sdk/
+ && poetry install --no-interaction --no-ansi
+#
#
@@ -34,12 +34,12 @@ RUN poetry config virtualenvs.create false \
#
#
-COPY ./oss/src/crons/queries.sh /queries.sh
-COPY ./oss/src/crons/queries.txt /etc/cron.d/queries-cron
-RUN sed -i -e '$a\' /etc/cron.d/queries-cron
-RUN cat -A /etc/cron.d/queries-cron
+#
+#
+#
+#
-RUN chmod +x /queries.sh \
- && chmod 0644 /etc/cron.d/queries-cron
+#
+#
EXPOSE 8000
diff --git a/api/oss/src/apis/fastapi/applications/router.py b/api/oss/src/apis/fastapi/applications/router.py
index e179e0f04c..2f03ea8c8b 100644
--- a/api/oss/src/apis/fastapi/applications/router.py
+++ b/api/oss/src/apis/fastapi/applications/router.py
@@ -104,14 +104,13 @@ async def retrieve_application_revision(
*,
application_revision_retrieve_request: ApplicationRevisionRetrieveRequest,
):
- if is_ee():
- if not await check_action_access( # type: ignore
- project_id=request.state.project_id,
- user_uid=request.state.user_id,
- #
- permission=Permission.VIEW_APPLICATIONS, # type: ignore
- ):
- raise FORBIDDEN_EXCEPTION # type: ignore
+ if not await check_action_access( # type: ignore
+ project_id=request.state.project_id,
+ user_uid=request.state.user_id,
+ #
+ permission=Permission.VIEW_APPLICATIONS, # type: ignore
+ ):
+ raise FORBIDDEN_EXCEPTION # type: ignore
cache_key = {
"artifact_ref": application_revision_retrieve_request.application_ref, # type: ignore
diff --git a/api/oss/src/apis/fastapi/evaluators/router.py b/api/oss/src/apis/fastapi/evaluators/router.py
index 4ee7f5cbd8..4461df9072 100644
--- a/api/oss/src/apis/fastapi/evaluators/router.py
+++ b/api/oss/src/apis/fastapi/evaluators/router.py
@@ -748,14 +748,13 @@ async def retrieve_evaluator_revision(
*,
evaluator_revision_retrieve_request: EvaluatorRevisionRetrieveRequest,
) -> EvaluatorRevisionResponse:
- if is_ee():
- if not await check_action_access( # type: ignore
- project_id=request.state.project_id,
- user_uid=request.state.user_id,
- #
- permission=Permission.VIEW_EVALUATORS, # type: ignore
- ):
- raise FORBIDDEN_EXCEPTION # type: ignore
+ if not await check_action_access( # type: ignore
+ project_id=request.state.project_id,
+ user_uid=request.state.user_id,
+ #
+ permission=Permission.VIEW_EVALUATORS, # type: ignore
+ ):
+ raise FORBIDDEN_EXCEPTION # type: ignore
cache_key = {
"artifact_ref": evaluator_revision_retrieve_request.evaluator_ref, # type: ignore
diff --git a/api/oss/src/core/evaluations/service.py b/api/oss/src/core/evaluations/service.py
index b36a9d47ef..a6fff35361 100644
--- a/api/oss/src/core/evaluations/service.py
+++ b/api/oss/src/core/evaluations/service.py
@@ -6,6 +6,7 @@
from celery import current_app as celery_dispatch
+from oss.src.utils.common import is_ee
from oss.src.utils.logging import get_module_logger
from oss.src.core.shared.dtos import Reference, Windowing, Tags, Meta, Data
@@ -178,7 +179,7 @@ async def refresh_runs(
try:
log.info(
- "[LIVE] Dispatching...",
+ "[LIVE]",
project_id=project_id,
run_id=run.id,
#
@@ -186,24 +187,19 @@ async def refresh_runs(
oldest=oldest,
)
- celery_dispatch.send_task( # type: ignore
- "src.tasks.evaluations.live.evaluate",
- kwargs=dict(
- project_id=project_id,
- user_id=user_id,
- #
- run_id=run.id,
- #
- newest=newest,
- oldest=oldest,
- ),
- )
-
- log.info(
- "[LIVE] Dispatched. ",
- project_id=project_id,
- run_id=run.id,
- )
+ if is_ee():
+ celery_dispatch.send_task( # type: ignore
+ "src.tasks.evaluations.live.evaluate",
+ kwargs=dict(
+ project_id=project_id,
+ user_id=user_id,
+ #
+ run_id=run.id,
+ #
+ newest=newest,
+ oldest=oldest,
+ ),
+ )
except Exception as e: # pylint: disable=broad-exception-caught
log.error(f"[LIVE] Error refreshing run {run.id}: {e}", exc_info=True)
@@ -1561,26 +1557,29 @@ async def start(
return None
if _evaluation.data.query_steps:
- celery_dispatch.send_task( # type: ignore
- "src.tasks.evaluations.batch.evaluate_queries",
- kwargs=dict(
- project_id=project_id,
- user_id=user_id,
- #
- run_id=run.id,
- ),
- )
+ if is_ee():
+ celery_dispatch.send_task( # type: ignore
+ "src.tasks.evaluations.batch.evaluate_queries",
+ kwargs=dict(
+ project_id=project_id,
+ user_id=user_id,
+ #
+ run_id=run.id,
+ ),
+ )
elif _evaluation.data.testset_steps:
- celery_dispatch.send_task( # type: ignore
- "src.tasks.evaluations.batch.evaluate_testsets",
- kwargs=dict(
- project_id=project_id,
- user_id=user_id,
- #
- run_id=run.id,
- ),
- )
+ if is_ee():
+ # TODO: Fix typing ?
+ celery_dispatch.send_task( # type: ignore
+ "src.tasks.evaluations.batch.evaluate_testsets",
+ kwargs=dict(
+ project_id=project_id,
+ user_id=user_id,
+ #
+ run_id=run.id,
+ ),
+ )
return _evaluation
diff --git a/api/oss/src/models/db_models.py b/api/oss/src/models/db_models.py
index 867cb30156..3afa51acdb 100644
--- a/api/oss/src/models/db_models.py
+++ b/api/oss/src/models/db_models.py
@@ -89,6 +89,7 @@ class WorkspaceDB(Base):
)
+# KEEP in oss/
class UserDB(Base):
__tablename__ = "users"
@@ -110,6 +111,7 @@ class UserDB(Base):
)
+# KEEP in oss/
class ProjectDB(Base):
__tablename__ = "projects"
@@ -153,6 +155,7 @@ class ProjectDB(Base):
testset = relationship("TestsetDB", cascade=CASCADE_ALL_DELETE, backref="project")
+# KEEP in oss/
class AppDB(Base):
__tablename__ = "app_db"
@@ -187,6 +190,7 @@ class AppDB(Base):
)
+# KEEP in oss/
class DeploymentDB(Base):
__tablename__ = "deployments"
@@ -213,6 +217,7 @@ class DeploymentDB(Base):
app = relationship("AppDB", back_populates="deployment")
+# KEEP in oss/
class VariantBaseDB(Base):
__tablename__ = "bases"
@@ -243,6 +248,7 @@ class VariantBaseDB(Base):
project = relationship("oss.src.models.db_models.ProjectDB")
+# KEEP in oss/
class AppVariantDB(Base):
__tablename__ = "app_variants"
@@ -287,6 +293,7 @@ class AppVariantDB(Base):
)
+# KEEP in oss/
class AppVariantRevisionsDB(Base):
__tablename__ = "app_variant_revisions"
@@ -329,6 +336,7 @@ def get_config(self) -> dict:
return {"config_name": self.config_name, "parameters": self.config_parameters}
+# KEEP in oss/
class AppEnvironmentDB(Base):
__tablename__ = "environments"
@@ -366,6 +374,7 @@ class AppEnvironmentDB(Base):
deployed_app_variant_revision = relationship("AppVariantRevisionsDB")
+# KEEP in oss/
class AppEnvironmentRevisionDB(Base):
__tablename__ = "environments_revisions"
@@ -399,6 +408,7 @@ class AppEnvironmentRevisionDB(Base):
modified_by = relationship("UserDB")
+# KEEP in oss/
class TestsetDB(Base):
__tablename__ = "testsets"
@@ -422,6 +432,7 @@ class TestsetDB(Base):
)
+# KEEP in oss/
class EvaluatorConfigDB(Base):
__tablename__ = "auto_evaluator_configs"
@@ -447,6 +458,7 @@ class EvaluatorConfigDB(Base):
)
+# KEEP in oss/ or KILL
class IDsMappingDB(Base):
__tablename__ = "ids_mapping"
@@ -519,267 +531,3 @@ class APIKeyDB(Base):
project = relationship(
"oss.src.models.db_models.ProjectDB", backref="api_key_project"
)
-
-
-class HumanEvaluationVariantDB(Base):
- __tablename__ = "human_evaluation_variants"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- human_evaluation_id = Column(
- UUID(as_uuid=True), ForeignKey("human_evaluations.id", ondelete="CASCADE")
- )
- variant_id = Column(
- UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
- )
- variant_revision_id = Column(
- UUID(as_uuid=True), ForeignKey("app_variant_revisions.id", ondelete="SET NULL")
- )
-
- variant = relationship("AppVariantDB", backref="evaluation_variant")
- variant_revision = relationship(
- "AppVariantRevisionsDB", backref="evaluation_variant_revision"
- )
-
-
-class HumanEvaluationDB(Base):
- __tablename__ = "human_evaluations"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- app_id = Column(UUID(as_uuid=True), ForeignKey("app_db.id", ondelete="CASCADE"))
- project_id = Column(
- UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
- )
- status = Column(String)
- evaluation_type = Column(String)
- testset_id = Column(UUID(as_uuid=True), ForeignKey("testsets.id"))
- created_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
- updated_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
-
- testset = relationship("TestsetDB")
- evaluation_variant = relationship(
- "HumanEvaluationVariantDB",
- cascade=CASCADE_ALL_DELETE,
- backref="human_evaluation",
- )
- evaluation_scenario = relationship(
- "HumanEvaluationScenarioDB",
- cascade=CASCADE_ALL_DELETE,
- backref="evaluation_scenario",
- )
-
-
-class HumanEvaluationScenarioDB(Base):
- __tablename__ = "human_evaluations_scenarios"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- project_id = Column(
- UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
- )
- evaluation_id = Column(
- UUID(as_uuid=True), ForeignKey("human_evaluations.id", ondelete="CASCADE")
- )
- inputs = Column(
- mutable_json_type(dbtype=JSONB, nested=True)
- ) # List of HumanEvaluationScenarioInput
- outputs = Column(
- mutable_json_type(dbtype=JSONB, nested=True)
- ) # List of HumanEvaluationScenarioOutput
- vote = Column(String)
- score = Column(String)
- correct_answer = Column(String)
- created_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
- updated_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
- is_pinned = Column(Boolean)
- note = Column(String)
-
-
-class EvaluationAggregatedResultDB(Base):
- __tablename__ = "auto_evaluation_aggregated_results"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- evaluation_id = Column(
- UUID(as_uuid=True), ForeignKey("auto_evaluations.id", ondelete="CASCADE")
- )
- evaluator_config_id = Column(
- UUID(as_uuid=True),
- ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
- )
- result = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
-
- evaluator_config = relationship("EvaluatorConfigDB", backref="evaluator_config")
-
-
-class EvaluationScenarioResultDB(Base):
- __tablename__ = "auto_evaluation_scenario_results"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- evaluation_scenario_id = Column(
- UUID(as_uuid=True),
- ForeignKey("auto_evaluation_scenarios.id", ondelete="CASCADE"),
- )
- evaluator_config_id = Column(
- UUID(as_uuid=True),
- ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
- )
- result = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
-
-
-class EvaluationDB(Base):
- __tablename__ = "auto_evaluations"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- app_id = Column(UUID(as_uuid=True), ForeignKey("app_db.id", ondelete="CASCADE"))
- project_id = Column(
- UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
- )
- status = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
- testset_id = Column(
- UUID(as_uuid=True), ForeignKey("testsets.id", ondelete="SET NULL")
- )
- variant_id = Column(
- UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
- )
- variant_revision_id = Column(
- UUID(as_uuid=True), ForeignKey("app_variant_revisions.id", ondelete="SET NULL")
- )
- average_cost = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
- total_cost = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
- average_latency = Column(mutable_json_type(dbtype=JSONB, nested=True)) # Result
- created_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
- updated_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
-
- project = relationship("oss.src.models.db_models.ProjectDB")
- testset = relationship("TestsetDB")
- variant = relationship("AppVariantDB")
- variant_revision = relationship("AppVariantRevisionsDB")
- aggregated_results = relationship(
- "EvaluationAggregatedResultDB",
- cascade=CASCADE_ALL_DELETE,
- backref="evaluation",
- )
- evaluation_scenarios = relationship(
- "EvaluationScenarioDB", cascade=CASCADE_ALL_DELETE, backref="evaluation"
- )
- evaluator_configs = relationship(
- "EvaluationEvaluatorConfigDB",
- cascade=CASCADE_ALL_DELETE,
- backref="evaluation",
- )
-
-
-class EvaluationEvaluatorConfigDB(Base):
- __tablename__ = "auto_evaluation_evaluator_configs"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- evaluation_id = Column(
- UUID(as_uuid=True),
- ForeignKey("auto_evaluations.id", ondelete="CASCADE"),
- primary_key=True,
- )
- evaluator_config_id = Column(
- UUID(as_uuid=True),
- ForeignKey("auto_evaluator_configs.id", ondelete="SET NULL"),
- primary_key=True,
- )
-
-
-class EvaluationScenarioDB(Base):
- __tablename__ = "auto_evaluation_scenarios"
-
- id = Column(
- UUID(as_uuid=True),
- primary_key=True,
- default=uuid.uuid7,
- unique=True,
- nullable=False,
- )
- project_id = Column(
- UUID(as_uuid=True), ForeignKey("projects.id", ondelete="CASCADE")
- )
- evaluation_id = Column(
- UUID(as_uuid=True), ForeignKey("auto_evaluations.id", ondelete="CASCADE")
- )
- variant_id = Column(
- UUID(as_uuid=True), ForeignKey("app_variants.id", ondelete="SET NULL")
- )
- inputs = Column(
- mutable_json_type(dbtype=JSONB, nested=True)
- ) # List of EvaluationScenarioInput
- outputs = Column(
- mutable_json_type(dbtype=JSONB, nested=True)
- ) # List of EvaluationScenarioOutput
- correct_answers = Column(
- mutable_json_type(dbtype=JSONB, nested=True)
- ) # List of CorrectAnswer
- is_pinned = Column(Boolean)
- note = Column(String)
- latency = Column(Integer)
- cost = Column(Integer)
- created_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
- updated_at = Column(
- DateTime(timezone=True), default=lambda: datetime.now(timezone.utc)
- )
-
- project = relationship("oss.src.models.db_models.ProjectDB")
- variant = relationship("AppVariantDB")
- results = relationship(
- "EvaluationScenarioResultDB",
- cascade=CASCADE_ALL_DELETE,
- backref="evaluation_scenario",
- )
diff --git a/api/oss/src/resources/evaluators/evaluators.py b/api/oss/src/resources/evaluators/evaluators.py
index cbca48d4fc..53a2d48542 100644
--- a/api/oss/src/resources/evaluators/evaluators.py
+++ b/api/oss/src/resources/evaluators/evaluators.py
@@ -229,12 +229,12 @@
"description": "Extract information from the user's response.",
"type": "object",
"properties": {
- "score": {
+ "correctness": {
"type": "boolean",
"description": "The grade results",
}
},
- "required": ["score"],
+ "required": ["correctness"],
"strict": True,
},
},
@@ -264,12 +264,12 @@
"description": "Extract information from the user's response.",
"type": "object",
"properties": {
- "score": {
+ "correctness": {
"type": "boolean",
"description": "The hallucination detection result",
}
},
- "required": ["score"],
+ "required": ["correctness"],
"strict": True,
},
},
@@ -339,12 +339,12 @@
"description": "Extract information from the user's response.",
"type": "object",
"properties": {
- "score": {
+ "correctness": {
"type": "boolean",
"description": "The grade results",
}
},
- "required": ["score"],
+ "required": ["correctness"],
"strict": True,
},
},
diff --git a/api/oss/src/services/app_manager.py b/api/oss/src/services/app_manager.py
index f644399ebd..7e11084485 100644
--- a/api/oss/src/services/app_manager.py
+++ b/api/oss/src/services/app_manager.py
@@ -42,7 +42,7 @@ async def get_appdb_str_by_id(object_id: str, object_type: str) -> str:
raise db_manager.NoResultFound(f"Variant with id {object_id} not found")
return str(app_variant_db.app_id)
elif object_type == "evaluation":
- evaluation_db = await db_manager.fetch_evaluation_by_id(
+ evaluation_db = await db_manager_ee.fetch_evaluation_by_id(
project_id=project_id,
evaluation_id=object_id,
)
diff --git a/api/oss/src/services/converters.py b/api/oss/src/services/converters.py
deleted file mode 100644
index ad9cb64169..0000000000
--- a/api/oss/src/services/converters.py
+++ /dev/null
@@ -1,191 +0,0 @@
-import uuid
-from typing import List, Dict, Any
-from datetime import datetime, timezone
-
-from oss.src.services import db_manager
-from oss.src.models.api.evaluation_model import (
- CorrectAnswer,
- Evaluation,
- HumanEvaluation,
- EvaluationScenario,
- SimpleEvaluationOutput,
- EvaluationScenarioInput,
- HumanEvaluationScenario,
- EvaluationScenarioOutput,
-)
-from oss.src.models.db_models import (
- EvaluationDB,
- HumanEvaluationDB,
- EvaluationScenarioDB,
- HumanEvaluationScenarioDB,
-)
-
-
-async def human_evaluation_db_to_simple_evaluation_output(
- human_evaluation_db: HumanEvaluationDB,
-) -> SimpleEvaluationOutput:
- evaluation_variants = await db_manager.fetch_human_evaluation_variants(
- human_evaluation_id=str(human_evaluation_db.id)
- )
- return SimpleEvaluationOutput(
- id=str(human_evaluation_db.id),
- app_id=str(human_evaluation_db.app_id),
- project_id=str(human_evaluation_db.project_id),
- status=human_evaluation_db.status, # type: ignore
- evaluation_type=human_evaluation_db.evaluation_type, # type: ignore
- variant_ids=[
- str(evaluation_variant.variant_id)
- for evaluation_variant in evaluation_variants
- ],
- )
-
-
-async def evaluation_db_to_pydantic(
- evaluation_db: EvaluationDB,
-) -> Evaluation:
- variant_name = (
- evaluation_db.variant.variant_name
- if evaluation_db.variant.variant_name
- else str(evaluation_db.variant_id)
- )
- aggregated_results = aggregated_result_of_evaluation_to_pydantic(
- evaluation_db.aggregated_results
- )
-
- return Evaluation(
- id=str(evaluation_db.id),
- app_id=str(evaluation_db.app_id),
- project_id=str(evaluation_db.project_id),
- status=evaluation_db.status,
- variant_ids=[str(evaluation_db.variant_id)],
- variant_revision_ids=[str(evaluation_db.variant_revision_id)],
- revisions=[str(evaluation_db.variant_revision.revision)],
- variant_names=[variant_name],
- testset_id=str(evaluation_db.testset_id),
- testset_name=evaluation_db.testset.name,
- aggregated_results=aggregated_results,
- created_at=str(evaluation_db.created_at),
- updated_at=str(evaluation_db.updated_at),
- average_cost=evaluation_db.average_cost,
- total_cost=evaluation_db.total_cost,
- average_latency=evaluation_db.average_latency,
- )
-
-
-async def human_evaluation_db_to_pydantic(
- evaluation_db: HumanEvaluationDB,
-) -> HumanEvaluation:
- evaluation_variants = await db_manager.fetch_human_evaluation_variants(
- human_evaluation_id=str(evaluation_db.id) # type: ignore
- )
-
- revisions = []
- variants_ids = []
- variants_names = []
- variants_revision_ids = []
- for evaluation_variant in evaluation_variants:
- variant_name = (
- evaluation_variant.variant.variant_name
- if isinstance(evaluation_variant.variant_id, uuid.UUID)
- else str(evaluation_variant.variant_id)
- )
- variants_names.append(str(variant_name))
- variants_ids.append(str(evaluation_variant.variant_id))
- variant_revision = (
- str(evaluation_variant.variant_revision.revision)
- if isinstance(evaluation_variant.variant_revision_id, uuid.UUID)
- else " None"
- )
- revisions.append(variant_revision)
- variants_revision_ids.append(str(evaluation_variant.variant_revision_id))
-
- return HumanEvaluation(
- id=str(evaluation_db.id),
- app_id=str(evaluation_db.app_id),
- project_id=str(evaluation_db.project_id),
- status=evaluation_db.status, # type: ignore
- evaluation_type=evaluation_db.evaluation_type, # type: ignore
- variant_ids=variants_ids,
- variant_names=variants_names,
- testset_id=str(evaluation_db.testset_id),
- testset_name=evaluation_db.testset.name,
- variants_revision_ids=variants_revision_ids,
- revisions=revisions,
- created_at=str(evaluation_db.created_at), # type: ignore
- updated_at=str(evaluation_db.updated_at), # type: ignore
- )
-
-
-def human_evaluation_scenario_db_to_pydantic(
- evaluation_scenario_db: HumanEvaluationScenarioDB, evaluation_id: str
-) -> HumanEvaluationScenario:
- return HumanEvaluationScenario(
- id=str(evaluation_scenario_db.id),
- evaluation_id=evaluation_id,
- inputs=evaluation_scenario_db.inputs, # type: ignore
- outputs=evaluation_scenario_db.outputs, # type: ignore
- vote=evaluation_scenario_db.vote, # type: ignore
- score=evaluation_scenario_db.score, # type: ignore
- correct_answer=evaluation_scenario_db.correct_answer, # type: ignore
- is_pinned=evaluation_scenario_db.is_pinned or False, # type: ignore
- note=evaluation_scenario_db.note or "", # type: ignore
- )
-
-
-def aggregated_result_of_evaluation_to_pydantic(
- evaluation_aggregated_results: List,
-) -> List[dict]:
- transformed_results = []
- for aggregated_result in evaluation_aggregated_results:
- evaluator_config_dict = (
- {
- "id": str(aggregated_result.evaluator_config.id),
- "name": aggregated_result.evaluator_config.name,
- "evaluator_key": aggregated_result.evaluator_config.evaluator_key,
- "settings_values": aggregated_result.evaluator_config.settings_values,
- "created_at": str(aggregated_result.evaluator_config.created_at),
- "updated_at": str(aggregated_result.evaluator_config.updated_at),
- }
- if isinstance(aggregated_result.evaluator_config_id, uuid.UUID)
- else None
- )
- transformed_results.append(
- {
- "evaluator_config": (
- {} if evaluator_config_dict is None else evaluator_config_dict
- ),
- "result": aggregated_result.result,
- }
- )
- return transformed_results
-
-
-async def evaluation_scenario_db_to_pydantic(
- evaluation_scenario_db: EvaluationScenarioDB, evaluation_id: str
-) -> EvaluationScenario:
- scenario_results = [
- {
- "evaluator_config": str(scenario_result.evaluator_config_id),
- "result": scenario_result.result,
- }
- for scenario_result in evaluation_scenario_db.results
- ]
- return EvaluationScenario(
- id=str(evaluation_scenario_db.id),
- evaluation_id=evaluation_id,
- inputs=[
- EvaluationScenarioInput(**scenario_input) # type: ignore
- for scenario_input in evaluation_scenario_db.inputs
- ],
- outputs=[
- EvaluationScenarioOutput(**scenario_output) # type: ignore
- for scenario_output in evaluation_scenario_db.outputs
- ],
- correct_answers=[
- CorrectAnswer(**correct_answer) # type: ignore
- for correct_answer in evaluation_scenario_db.correct_answers
- ],
- is_pinned=evaluation_scenario_db.is_pinned or False, # type: ignore
- note=evaluation_scenario_db.note or "", # type: ignore
- results=scenario_results, # type: ignore
- )
diff --git a/api/oss/src/services/db_manager.py b/api/oss/src/services/db_manager.py
index 9833c104dc..54fef759d3 100644
--- a/api/oss/src/services/db_manager.py
+++ b/api/oss/src/services/db_manager.py
@@ -10,7 +10,7 @@
from sqlalchemy import func, or_, asc
from sqlalchemy.ext.asyncio import AsyncSession
from supertokens_python.types import AccountInfo
-from sqlalchemy.orm import joinedload, load_only, aliased
+from sqlalchemy.orm import joinedload, load_only, selectinload
from sqlalchemy.exc import NoResultFound, MultipleResultsFound, SQLAlchemyError
from supertokens_python.asyncio import list_users_by_account_info
from supertokens_python.asyncio import delete_user as delete_user_from_supertokens
@@ -22,6 +22,7 @@
from oss.src.dbs.postgres.shared.engine import engine
from oss.src.services.json_importer_helper import get_json
+
if is_ee():
from ee.src.models.db_models import ProjectDB, WorkspaceDB
else:
@@ -47,25 +48,6 @@
AppType,
ConfigDB,
)
-from oss.src.models.shared_models import (
- Result,
- CorrectAnswer,
- AggregatedResult,
- EvaluationScenarioResult,
- EvaluationScenarioInput,
- EvaluationScenarioOutput,
- HumanEvaluationScenarioInput,
-)
-from oss.src.models.db_models import (
- EvaluationDB,
- HumanEvaluationDB,
- EvaluationScenarioDB,
- HumanEvaluationScenarioDB,
- HumanEvaluationVariantDB,
- EvaluationScenarioResultDB,
- EvaluationEvaluatorConfigDB,
- EvaluationAggregatedResultDB,
-)
log = get_module_logger(__name__)
@@ -2976,7 +2958,7 @@ async def find_previous_variant_from_base_id(
async def update_base(
base_id: str,
**kwargs: dict,
-) -> Optional[VariantBaseDB]:
+) -> VariantBaseDB:
"""Update the base object in the database with the provided id.
Arguments:
@@ -3084,9 +3066,7 @@ async def fetch_evaluators_configs(project_id: str):
return evaluators_configs
-async def fetch_evaluator_config(
- evaluator_config_id: str,
-) -> Optional[EvaluatorConfigDB]:
+async def fetch_evaluator_config(evaluator_config_id: str) -> EvaluatorConfigDB:
"""Fetch evaluator configurations from the database.
Args:
@@ -3139,7 +3119,7 @@ async def check_if_evaluators_exist_in_list_of_evaluators_configs(
async def fetch_evaluator_config_by_appId(
app_id: str, evaluator_name: str
-) -> Optional[EvaluatorConfigDB]:
+) -> EvaluatorConfigDB:
"""Fetch the evaluator config from the database using the app Id and evaluator name.
Args:
@@ -3290,7 +3270,7 @@ async def fetch_corresponding_object_uuid(table_name: str, object_id: str) -> st
return str(object_mapping.uuid)
-async def fetch_default_project() -> Optional[ProjectDB]:
+async def fetch_default_project() -> ProjectDB:
"""
Fetch the default project from the database.
Returns:
@@ -3303,9 +3283,7 @@ async def fetch_default_project() -> Optional[ProjectDB]:
return default_project
-async def get_user_api_key_by_prefix(
- api_key_prefix: str, user_id: str
-) -> Optional[APIKeyDB]:
+async def get_user_api_key_by_prefix(api_key_prefix: str, user_id: str) -> APIKeyDB:
"""
Gets the user api key by prefix.
@@ -3351,755 +3329,3 @@ async def update_api_key_timestamp(api_key_id: str) -> None:
await session.commit()
await session.refresh(api_key)
-
-
-async def fetch_evaluation_status_by_id(
- project_id: str,
- evaluation_id: str,
-) -> Optional[str]:
- """Fetch only the status of an evaluation by its ID."""
- assert evaluation_id is not None, "evaluation_id cannot be None"
-
- async with engine.core_session() as session:
- query = (
- select(EvaluationDB)
- .filter_by(project_id=project_id, id=uuid.UUID(evaluation_id))
- .options(load_only(EvaluationDB.status))
- )
-
- result = await session.execute(query)
- evaluation = result.scalars().first()
- return evaluation.status if evaluation else None
-
-
-async def fetch_evaluation_by_id(
- project_id: str,
- evaluation_id: str,
-) -> Optional[EvaluationDB]:
- """Fetches a evaluation by its ID.
-
- Args:
- evaluation_id (str): The ID of the evaluation to fetch.
-
- Returns:
- EvaluationDB: The fetched evaluation, or None if no evaluation was found.
- """
-
- assert evaluation_id is not None, "evaluation_id cannot be None"
- async with engine.core_session() as session:
- base_query = select(EvaluationDB).filter_by(
- project_id=project_id,
- id=uuid.UUID(evaluation_id),
- )
- query = base_query.options(
- joinedload(EvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
- )
-
- result = await session.execute(
- query.options(
- joinedload(EvaluationDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
- joinedload(EvaluationDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.revision), # type: ignore
- joinedload(
- EvaluationDB.aggregated_results.of_type(
- EvaluationAggregatedResultDB
- )
- ).joinedload(EvaluationAggregatedResultDB.evaluator_config),
- )
- )
- evaluation = result.unique().scalars().first()
- return evaluation
-
-
-async def list_human_evaluations(app_id: str, project_id: str):
- """
- Fetches human evaluations belonging to an App.
-
- Args:
- app_id (str): The application identifier
- """
-
- async with engine.core_session() as session:
- base_query = (
- select(HumanEvaluationDB)
- .filter_by(app_id=uuid.UUID(app_id), project_id=uuid.UUID(project_id))
- .filter(HumanEvaluationDB.testset_id.isnot(None))
- )
- query = base_query.options(
- joinedload(HumanEvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
- )
-
- result = await session.execute(query)
- human_evaluations = result.scalars().all()
- return human_evaluations
-
-
-async def create_human_evaluation(
- app: AppDB,
- status: str,
- evaluation_type: str,
- testset_id: str,
- variants_ids: List[str],
-):
- """
- Creates a human evaluation.
-
- Args:
- app (AppDB: The app object
- status (str): The status of the evaluation
- evaluation_type (str): The evaluation type
- testset_id (str): The ID of the evaluation testset
- variants_ids (List[str]): The IDs of the variants for the evaluation
- """
-
- async with engine.core_session() as session:
- human_evaluation = HumanEvaluationDB(
- app_id=app.id,
- project_id=app.project_id,
- status=status,
- evaluation_type=evaluation_type,
- testset_id=testset_id,
- )
-
- session.add(human_evaluation)
- await session.commit()
- await session.refresh(human_evaluation, attribute_names=["testset"])
-
- # create variants for human evaluation
- await create_human_evaluation_variants(
- human_evaluation_id=str(human_evaluation.id),
- variants_ids=variants_ids,
- )
- return human_evaluation
-
-
-async def fetch_human_evaluation_variants(human_evaluation_id: str):
- """
- Fetches human evaluation variants.
-
- Args:
- human_evaluation_id (str): The human evaluation ID
-
- Returns:
- The human evaluation variants.
- """
-
- async with engine.core_session() as session:
- base_query = select(HumanEvaluationVariantDB).filter_by(
- human_evaluation_id=uuid.UUID(human_evaluation_id)
- )
- query = base_query.options(
- joinedload(HumanEvaluationVariantDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
- joinedload(HumanEvaluationVariantDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.id, AppVariantRevisionsDB.revision), # type: ignore
- )
-
- result = await session.execute(query)
- evaluation_variants = result.scalars().all()
- return evaluation_variants
-
-
-async def create_human_evaluation_variants(
- human_evaluation_id: str, variants_ids: List[str]
-):
- """
- Creates human evaluation variants.
-
- Args:
- human_evaluation_id (str): The human evaluation identifier
- variants_ids (List[str]): The variants identifiers
- project_id (str): The project ID
- """
-
- variants_dict = {}
- for variant_id in variants_ids:
- variant = await fetch_app_variant_by_id(app_variant_id=variant_id)
- if variant:
- variants_dict[variant_id] = variant
-
- variants_revisions_dict = {}
- for variant_id, variant in variants_dict.items():
- variant_revision = await fetch_app_variant_revision_by_variant(
- app_variant_id=str(variant.id), project_id=str(variant.project_id), revision=variant.revision # type: ignore
- )
- if variant_revision:
- variants_revisions_dict[variant_id] = variant_revision
-
- if set(variants_dict.keys()) != set(variants_revisions_dict.keys()):
- raise ValueError("Mismatch between variants and their revisions")
-
- async with engine.core_session() as session:
- for variant_id in variants_ids:
- variant = variants_dict[variant_id]
- variant_revision = variants_revisions_dict[variant_id]
- human_evaluation_variant = HumanEvaluationVariantDB(
- human_evaluation_id=uuid.UUID(human_evaluation_id),
- variant_id=variant.id, # type: ignore
- variant_revision_id=variant_revision.id, # type: ignore
- )
- session.add(human_evaluation_variant)
-
- await session.commit()
-
-
-async def fetch_human_evaluation_by_id(
- evaluation_id: str,
-) -> Optional[HumanEvaluationDB]:
- """
- Fetches a evaluation by its ID.
-
- Args:
- evaluation_id (str): The ID of the evaluation to fetch.
-
- Returns:
- EvaluationDB: The fetched evaluation, or None if no evaluation was found.
- """
-
- assert evaluation_id is not None, "evaluation_id cannot be None"
- async with engine.core_session() as session:
- base_query = select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
- query = base_query.options(
- joinedload(HumanEvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
- )
- result = await session.execute(query)
- evaluation = result.scalars().first()
- return evaluation
-
-
-async def update_human_evaluation(evaluation_id: str, values_to_update: dict):
- """Updates human evaluation with the specified values.
-
- Args:
- evaluation_id (str): The evaluation ID
- values_to_update (dict): The values to update
-
- Exceptions:
- NoResultFound: if human evaluation is not found
- """
-
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
- )
- human_evaluation = result.scalars().first()
- if not human_evaluation:
- raise NoResultFound(f"Human evaluation with id {evaluation_id} not found")
-
- for key, value in values_to_update.items():
- if hasattr(human_evaluation, key):
- setattr(human_evaluation, key, value)
-
- await session.commit()
- await session.refresh(human_evaluation)
-
-
-async def delete_human_evaluation(evaluation_id: str):
- """Delete the evaluation by its ID.
-
- Args:
- evaluation_id (str): The ID of the evaluation to delete.
- """
-
- assert evaluation_id is not None, "evaluation_id cannot be None"
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationDB).filter_by(id=uuid.UUID(evaluation_id))
- )
- evaluation = result.scalars().first()
- if not evaluation:
- raise NoResultFound(f"Human evaluation with id {evaluation_id} not found")
-
- await session.delete(evaluation)
- await session.commit()
-
-
-async def create_human_evaluation_scenario(
- inputs: List[HumanEvaluationScenarioInput],
- project_id: str,
- evaluation_id: str,
- evaluation_extend: Dict[str, Any],
-):
- """
- Creates a human evaluation scenario.
-
- Args:
- inputs (List[HumanEvaluationScenarioInput]): The inputs.
- evaluation_id (str): The evaluation identifier.
- evaluation_extend (Dict[str, any]): An extended required payload for the evaluation scenario. Contains score, vote, and correct_answer.
- """
-
- async with engine.core_session() as session:
- evaluation_scenario = HumanEvaluationScenarioDB(
- **evaluation_extend,
- project_id=uuid.UUID(project_id),
- evaluation_id=uuid.UUID(evaluation_id),
- inputs=[input.model_dump() for input in inputs],
- outputs=[],
- )
-
- session.add(evaluation_scenario)
- await session.commit()
-
-
-async def update_human_evaluation_scenario(
- evaluation_scenario_id: str, values_to_update: dict
-):
- """Updates human evaluation scenario with the specified values.
-
- Args:
- evaluation_scenario_id (str): The evaluation scenario ID
- values_to_update (dict): The values to update
-
- Exceptions:
- NoResultFound: if human evaluation scenario is not found
- """
-
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationScenarioDB).filter_by(
- id=uuid.UUID(evaluation_scenario_id)
- )
- )
- human_evaluation_scenario = result.scalars().first()
- if not human_evaluation_scenario:
- raise NoResultFound(
- f"Human evaluation scenario with id {evaluation_scenario_id} not found"
- )
-
- for key, value in values_to_update.items():
- if hasattr(human_evaluation_scenario, key):
- setattr(human_evaluation_scenario, key, value)
-
- await session.commit()
- await session.refresh(human_evaluation_scenario)
-
-
-async def fetch_human_evaluation_scenarios(evaluation_id: str):
- """
- Fetches human evaluation scenarios.
-
- Args:
- evaluation_id (str): The evaluation identifier
-
- Returns:
- The evaluation scenarios.
- """
-
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationScenarioDB)
- .filter_by(evaluation_id=uuid.UUID(evaluation_id))
- .order_by(asc(HumanEvaluationScenarioDB.created_at))
- )
- evaluation_scenarios = result.scalars().all()
- return evaluation_scenarios
-
-
-async def fetch_evaluation_scenarios(evaluation_id: str, project_id: str):
- """
- Fetches evaluation scenarios.
-
- Args:
- evaluation_id (str): The evaluation identifier
- project_id (str): The ID of the project
-
- Returns:
- The evaluation scenarios.
- """
-
- async with engine.core_session() as session:
- result = await session.execute(
- select(EvaluationScenarioDB)
- .filter_by(
- evaluation_id=uuid.UUID(evaluation_id), project_id=uuid.UUID(project_id)
- )
- .options(joinedload(EvaluationScenarioDB.results))
- )
- evaluation_scenarios = result.unique().scalars().all()
- return evaluation_scenarios
-
-
-async def fetch_evaluation_scenario_by_id(
- evaluation_scenario_id: str,
-) -> Optional[EvaluationScenarioDB]:
- """Fetches and evaluation scenario by its ID.
-
- Args:
- evaluation_scenario_id (str): The ID of the evaluation scenario to fetch.
-
- Returns:
- EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
- """
-
- assert evaluation_scenario_id is not None, "evaluation_scenario_id cannot be None"
- async with engine.core_session() as session:
- result = await session.execute(
- select(EvaluationScenarioDB).filter_by(id=uuid.UUID(evaluation_scenario_id))
- )
- evaluation_scenario = result.scalars().first()
- return evaluation_scenario
-
-
-async def fetch_human_evaluation_scenario_by_id(
- evaluation_scenario_id: str,
-) -> Optional[HumanEvaluationScenarioDB]:
- """Fetches and evaluation scenario by its ID.
-
- Args:
- evaluation_scenario_id (str): The ID of the evaluation scenario to fetch.
-
- Returns:
- EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
- """
-
- assert evaluation_scenario_id is not None, "evaluation_scenario_id cannot be None"
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationScenarioDB).filter_by(
- id=uuid.UUID(evaluation_scenario_id)
- )
- )
- evaluation_scenario = result.scalars().first()
- return evaluation_scenario
-
-
-async def fetch_human_evaluation_scenario_by_evaluation_id(
- evaluation_id: str,
-) -> Optional[HumanEvaluationScenarioDB]:
- """Fetches and evaluation scenario by its ID.
- Args:
- evaluation_id (str): The ID of the evaluation object to use in fetching the human evaluation.
- Returns:
- EvaluationScenarioDB: The fetched evaluation scenario, or None if no evaluation scenario was found.
- """
-
- evaluation = await fetch_human_evaluation_by_id(evaluation_id)
- async with engine.core_session() as session:
- result = await session.execute(
- select(HumanEvaluationScenarioDB).filter_by(
- evaluation_id=evaluation.id # type: ignore
- )
- )
- human_eval_scenario = result.scalars().first()
- return human_eval_scenario
-
-
-async def create_new_evaluation(
- app: AppDB,
- project_id: str,
- testset: TestsetDB,
- status: Result,
- variant: str,
- variant_revision: str,
-) -> EvaluationDB:
- """Create a new evaluation scenario.
- Returns:
- EvaluationScenarioDB: The created evaluation scenario.
- """
-
- async with engine.core_session() as session:
- evaluation = EvaluationDB(
- app_id=app.id,
- project_id=uuid.UUID(project_id),
- testset_id=testset.id,
- status=status.model_dump(),
- variant_id=uuid.UUID(variant),
- variant_revision_id=uuid.UUID(variant_revision),
- )
-
- session.add(evaluation)
- await session.commit()
- await session.refresh(
- evaluation,
- attribute_names=[
- "testset",
- "variant",
- "variant_revision",
- "aggregated_results",
- ],
- )
-
- return evaluation
-
-
-async def list_evaluations(app_id: str, project_id: str):
- """Retrieves evaluations of the specified app from the db.
-
- Args:
- app_id (str): The ID of the app
- project_id (str): The ID of the project
- """
-
- async with engine.core_session() as session:
- base_query = select(EvaluationDB).filter_by(
- app_id=uuid.UUID(app_id), project_id=uuid.UUID(project_id)
- )
- query = base_query.options(
- joinedload(EvaluationDB.testset.of_type(TestsetDB)).load_only(TestsetDB.id, TestsetDB.name), # type: ignore
- )
-
- result = await session.execute(
- query.options(
- joinedload(EvaluationDB.variant.of_type(AppVariantDB)).load_only(AppVariantDB.id, AppVariantDB.variant_name), # type: ignore
- joinedload(EvaluationDB.variant_revision.of_type(AppVariantRevisionsDB)).load_only(AppVariantRevisionsDB.revision), # type: ignore
- joinedload(
- EvaluationDB.aggregated_results.of_type(
- EvaluationAggregatedResultDB
- )
- ).joinedload(EvaluationAggregatedResultDB.evaluator_config),
- )
- )
- evaluations = result.unique().scalars().all()
- return evaluations
-
-
-async def fetch_evaluations_by_resource(
- resource_type: str, project_id: str, resource_ids: List[str]
-):
- """
- Fetches an evaluations by resource.
-
- Args:
- resource_type (str): The resource type
- project_id (str): The ID of the project
- resource_ids (List[str]): The resource identifiers
-
- Returns:
- The evaluations by resource.
-
- Raises:
- HTTPException:400 resource_type {type} is not supported
- """
-
- ids = list(map(uuid.UUID, resource_ids))
-
- async with engine.core_session() as session:
- if resource_type == "variant":
- result_evaluations = await session.execute(
- select(EvaluationDB)
- .filter(
- EvaluationDB.variant_id.in_(ids),
- EvaluationDB.project_id == uuid.UUID(project_id),
- )
- .options(load_only(EvaluationDB.id)) # type: ignore
- )
- result_human_evaluations = await session.execute(
- select(HumanEvaluationDB)
- .join(HumanEvaluationVariantDB)
- .filter(
- HumanEvaluationVariantDB.variant_id.in_(ids),
- HumanEvaluationDB.project_id == uuid.UUID(project_id),
- )
- .options(load_only(HumanEvaluationDB.id)) # type: ignore
- )
- res_evaluations = list(result_evaluations.scalars().all())
- res_human_evaluations = list(result_human_evaluations.scalars().all())
- return res_evaluations + res_human_evaluations
-
- elif resource_type == "testset":
- result_evaluations = await session.execute(
- select(EvaluationDB)
- .filter(
- EvaluationDB.testset_id.in_(ids),
- EvaluationDB.project_id == uuid.UUID(project_id),
- )
- .options(load_only(EvaluationDB.id)) # type: ignore
- )
- result_human_evaluations = await session.execute(
- select(HumanEvaluationDB)
- .filter(
- HumanEvaluationDB.testset_id.in_(ids),
- HumanEvaluationDB.project_id
- == uuid.UUID(project_id), # Fixed to match HumanEvaluationDB
- )
- .options(load_only(HumanEvaluationDB.id)) # type: ignore
- )
- res_evaluations = list(result_evaluations.scalars().all())
- res_human_evaluations = list(result_human_evaluations.scalars().all())
- return res_evaluations + res_human_evaluations
-
- elif resource_type == "evaluator_config":
- query = (
- select(EvaluationDB)
- .join(EvaluationDB.evaluator_configs)
- .filter(
- EvaluationEvaluatorConfigDB.evaluator_config_id.in_(ids),
- EvaluationDB.project_id == uuid.UUID(project_id),
- )
- )
- result = await session.execute(query)
- res = result.scalars().all()
- return res
-
- raise HTTPException(
- status_code=400,
- detail=f"resource_type {resource_type} is not supported",
- )
-
-
-async def delete_evaluations(evaluation_ids: List[str]) -> None:
- """Delete evaluations based on the ids provided from the db.
-
- Args:
- evaluations_ids (list[str]): The IDs of the evaluation
- """
-
- async with engine.core_session() as session:
- query = select(EvaluationDB).where(EvaluationDB.id.in_(evaluation_ids))
- result = await session.execute(query)
- evaluations = result.scalars().all()
- for evaluation in evaluations:
- await session.delete(evaluation)
- await session.commit()
-
-
-async def create_new_evaluation_scenario(
- project_id: str,
- evaluation_id: str,
- variant_id: str,
- inputs: List[EvaluationScenarioInput],
- outputs: List[EvaluationScenarioOutput],
- correct_answers: Optional[List[CorrectAnswer]],
- is_pinned: Optional[bool],
- note: Optional[str],
- results: List[EvaluationScenarioResult],
-) -> EvaluationScenarioDB:
- """Create a new evaluation scenario.
-
- Returns:
- EvaluationScenarioDB: The created evaluation scenario.
- """
-
- async with engine.core_session() as session:
- evaluation_scenario = EvaluationScenarioDB(
- project_id=uuid.UUID(project_id),
- evaluation_id=uuid.UUID(evaluation_id),
- variant_id=uuid.UUID(variant_id),
- inputs=[input.model_dump() for input in inputs],
- outputs=[output.model_dump() for output in outputs],
- correct_answers=(
- [correct_answer.model_dump() for correct_answer in correct_answers]
- if correct_answers is not None
- else []
- ),
- is_pinned=is_pinned,
- note=note,
- )
-
- session.add(evaluation_scenario)
- await session.commit()
- await session.refresh(evaluation_scenario)
-
- # create evaluation scenario result
- for result in results:
- evaluation_scenario_result = EvaluationScenarioResultDB(
- evaluation_scenario_id=evaluation_scenario.id,
- evaluator_config_id=uuid.UUID(result.evaluator_config),
- result=result.result.model_dump(),
- )
-
- session.add(evaluation_scenario_result)
-
- await session.commit() # ensures that scenario results insertion is committed
- await session.refresh(evaluation_scenario)
-
- return evaluation_scenario
-
-
-async def update_evaluation_with_aggregated_results(
- evaluation_id: str, aggregated_results: List[AggregatedResult]
-):
- async with engine.core_session() as session:
- for result in aggregated_results:
- aggregated_result = EvaluationAggregatedResultDB(
- evaluation_id=uuid.UUID(evaluation_id),
- evaluator_config_id=uuid.UUID(result.evaluator_config),
- result=result.result.model_dump(),
- )
- session.add(aggregated_result)
-
- await session.commit()
-
-
-async def fetch_eval_aggregated_results(evaluation_id: str):
- """
- Fetches an evaluation aggregated results by evaluation identifier.
-
- Args:
- evaluation_id (str): The evaluation identifier
-
- Returns:
- The evaluation aggregated results by evaluation identifier.
- """
-
- async with engine.core_session() as session:
- base_query = select(EvaluationAggregatedResultDB).filter_by(
- evaluation_id=uuid.UUID(evaluation_id)
- )
- query = base_query.options(
- joinedload(
- EvaluationAggregatedResultDB.evaluator_config.of_type(EvaluatorConfigDB)
- ).load_only(
- EvaluatorConfigDB.id, # type: ignore
- EvaluatorConfigDB.name, # type: ignore
- EvaluatorConfigDB.evaluator_key, # type: ignore
- EvaluatorConfigDB.settings_values, # type: ignore
- EvaluatorConfigDB.created_at, # type: ignore
- EvaluatorConfigDB.updated_at, # type: ignore
- )
- )
-
- result = await session.execute(query)
- aggregated_results = result.scalars().all()
- return aggregated_results
-
-
-async def update_evaluation(
- evaluation_id: str, project_id: str, updates: Dict[str, Any]
-) -> Optional[EvaluationDB]:
- """
- Update an evaluator configuration in the database with the provided id.
-
- Arguments:
- evaluation_id (str): The ID of the evaluator configuration to be updated.
- project_id (str): The ID of the project.
- updates (Dict[str, Any]): The updates to apply to the evaluator configuration.
-
- Returns:
- EvaluatorConfigDB: The updated evaluator configuration object.
- """
-
- async with engine.core_session() as session:
- result = await session.execute(
- select(EvaluationDB).filter_by(
- id=uuid.UUID(evaluation_id), project_id=uuid.UUID(project_id)
- )
- )
- evaluation = result.scalars().first()
- for key, value in updates.items():
- if hasattr(evaluation, key):
- setattr(evaluation, key, value)
-
- await session.commit()
- await session.refresh(evaluation)
-
- return evaluation
-
-
-async def check_if_evaluation_contains_failed_evaluation_scenarios(
- evaluation_id: str,
-) -> bool:
- async with engine.core_session() as session:
- EvaluationResultAlias = aliased(EvaluationScenarioResultDB)
- query = (
- select(func.count(EvaluationScenarioDB.id))
- .join(EvaluationResultAlias, EvaluationScenarioDB.results)
- .where(
- EvaluationScenarioDB.evaluation_id == uuid.UUID(evaluation_id),
- EvaluationResultAlias.result["type"].astext == "error",
- )
- )
-
- result = await session.execute(query)
- count = result.scalar()
- if not count:
- return False
- return count > 0
diff --git a/api/pyproject.toml b/api/pyproject.toml
index e8c38e1e19..23123058db 100644
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "api"
-version = "0.62.1"
+version = "0.61.2"
description = "Agenta API"
authors = [
{ name = "Mahmoud Mabrouk", email = "mahmoud@agenta.ai" },
diff --git a/docs/blog/entries/customize-llm-as-a-judge-output-schemas.mdx b/docs/blog/entries/customize-llm-as-a-judge-output-schemas.mdx
deleted file mode 100644
index 033e29371b..0000000000
--- a/docs/blog/entries/customize-llm-as-a-judge-output-schemas.mdx
+++ /dev/null
@@ -1,71 +0,0 @@
----
-title: "Customize LLM-as-a-Judge Output Schemas"
-slug: customize-llm-as-a-judge-output-schemas
-date: 2025-11-10
-tags: [v0.62.0]
-description: "Learn how to customize LLM-as-a-Judge evaluator output schemas with binary, multiclass, or custom JSON formats. Enable reasoning for better evaluation quality and structure feedback to match your workflow needs."
----
-
-import Image from "@theme/IdealImage";
-
-The LLM-as-a-Judge evaluator now supports custom output schemas. You can define exactly what feedback structure you need for your evaluations.
-
-
-
-
-
-
-
-## What's New
-
-### **Flexible Output Types**
-Configure the evaluator to return different types of outputs:
-- **Binary**: Return a simple yes/no or pass/fail score
-- **Multiclass**: Choose from multiple predefined categories
-- **Custom JSON**: Define any structure that fits your use case
-
-### **Include Reasoning for Better Quality**
-Enable the reasoning option to have the LLM explain its evaluation. This improves prediction quality because the model thinks through its assessment before providing a score.
-
-When you include reasoning, the evaluator returns both the score and a detailed explanation of how it arrived at that judgment.
-
-### **Advanced: Raw JSON Schema**
-For complete control, provide a raw JSON schema. The evaluator will return responses that match your exact structure.
-
-This lets you capture multiple scores, categorical labels, confidence levels, and custom fields in a single evaluation pass. You can structure the output however your workflow requires.
-
-### **Use Custom Schemas in Evaluation**
-Once configured, your custom schemas work seamlessly in the evaluation workflow. The results display in the evaluation dashboard with all your custom fields visible.
-
-This makes it easy to analyze multiple dimensions of quality in a single evaluation run.
-
-## Example Use Cases
-
-**Binary Score with Reasoning:**
-Return a simple correct/incorrect judgment along with an explanation of why the output succeeded or failed.
-
-**Multi-dimensional Feedback:**
-Capture separate scores for accuracy, relevance, completeness, and tone in one evaluation. Include reasoning for each dimension.
-
-**Structured Classification:**
-Return categorical labels (excellent/good/fair/poor) along with specific issues found and suggestions for improvement.
-
-## Getting Started
-
-To use custom output schemas with LLM-as-a-Judge:
-
-1. Open the evaluator configuration
-2. Select your desired output type (binary, multiclass, or custom)
-3. Enable reasoning if you want explanations
-4. For advanced use, provide your JSON schema
-5. Run your evaluation
-
-Learn more in the [LLM-as-a-Judge documentation](/evaluation/configure-evaluators/llm-as-a-judge).
diff --git a/docs/blog/main.mdx b/docs/blog/main.mdx
index 66a0256cb0..e55eed8a9c 100644
--- a/docs/blog/main.mdx
+++ b/docs/blog/main.mdx
@@ -10,33 +10,6 @@ import Image from "@theme/IdealImage";
-### [Customize LLM-as-a-Judge Output Schemas](/changelog/customize-llm-as-a-judge-output-schemas)
-
-_10 November 2025_
-
-**v0.62.0**
-
-
-
-
-
-
-The LLM-as-a-Judge evaluator now supports custom output schemas. Create multiple feedback outputs per evaluator with any structure you need.
-
-You can configure output types (binary, multiclass), include reasoning to improve prediction quality, or provide a raw JSON schema with any structure you define. Use these custom schemas in your evaluations to capture exactly the feedback you need.
-
-Learn more in the [LLM-as-a-Judge documentation](/evaluation/configure-evaluators/llm-as-a-judge).
-
----
-
### [Documentation Overhaul](/changelog/documentation-architecture-overhaul)
_3 November 2025_
diff --git a/docs/docs/evaluation/configure-evaluators/05-llm-as-a-judge.mdx b/docs/docs/evaluation/configure-evaluators/05-llm-as-a-judge.mdx
index a6489b156d..399dfde99e 100644
--- a/docs/docs/evaluation/configure-evaluators/05-llm-as-a-judge.mdx
+++ b/docs/docs/evaluation/configure-evaluators/05-llm-as-a-judge.mdx
@@ -2,8 +2,6 @@
title: "LLM-as-a-Judge"
---
-import Image from "@theme/IdealImage";
-
LLM-as-a-Judge is an evaluator that uses an LLM to assess LLM outputs. It's particularly useful for evaluating text generation tasks or chatbots where there's no single correct answer.

@@ -58,28 +56,4 @@ ANSWER ONLY THE SCORE. DO NOT USE MARKDOWN. DO NOT PROVIDE ANYTHING OTHER THAN T
### The Model
-The model can be configured to select one of the supported options (`gpt-4o`, `gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `claude-3-5-sonnet`, `claude-3-5-haiku`, `claude-3-5-opus`). To use LLM-as-a-Judge, you'll need to set your OpenAI or Anthropic API key in the settings. The key is saved locally and only sent to our servers for evaluation; it's not stored there.
-
-### Output Schema
-
-You can configure the output schema to control what the LLM evaluator returns. This allows you to get structured feedback tailored to your evaluation needs.
-
-#### Basic Configuration
-
-The basic configuration lets you choose from common output types:
-
-- **Binary**: Returns a simple pass/fail or yes/no judgment
-- **Multiclass**: Returns a classification from a predefined set of categories
-- **Continuous**: Returns a score between a minimum and maximum value
-
-You can also enable **Include Reasoning** to have the evaluator explain its judgment. This option significantly improves the quality of evaluations by making the LLM's decision process transparent.
-
-
-
-
-#### Advanced Configuration
-
-For complete control, you can provide a custom JSON schema. This lets you define any output structure you need. For example, you could return multiple scores, confidence levels, detailed feedback categories, or any combination of fields.
-
-
-
+The model can be configured to select one of the supported options (`gpt-3.5-turbo`, `gpt-4o`, `gpt-5`, `gpt-5-mini`, `gpt-5-nano`, `claude-3-5-sonnet`, `claude-3-5-haiku`, `claude-3-5-opus`). To use LLM-as-a-Judge, you'll need to set your OpenAI or Anthropic API key in the settings. The key is saved locally and only sent to our servers for evaluation—it's not stored there.
diff --git a/docs/static/images/changelog/changelog-llm-as-a-judge-response-1.png b/docs/static/images/changelog/changelog-llm-as-a-judge-response-1.png
deleted file mode 100644
index 452c1b718a..0000000000
Binary files a/docs/static/images/changelog/changelog-llm-as-a-judge-response-1.png and /dev/null differ
diff --git a/docs/static/images/changelog/changelog-llm-as-a-judge-response-2.png b/docs/static/images/changelog/changelog-llm-as-a-judge-response-2.png
deleted file mode 100644
index 2a7fa18e42..0000000000
Binary files a/docs/static/images/changelog/changelog-llm-as-a-judge-response-2.png and /dev/null differ
diff --git a/hosting/docker-compose/ee/docker-compose.dev.yml b/hosting/docker-compose/ee/docker-compose.dev.yml
index 09861d7b20..8cccfdc2b9 100644
--- a/hosting/docker-compose/ee/docker-compose.dev.yml
+++ b/hosting/docker-compose/ee/docker-compose.dev.yml
@@ -122,7 +122,6 @@ services:
volumes:
- ../../../api/ee/src/crons/meters.sh:/meters.sh
- - ../../../api/oss/src/crons/queries.sh:/queries.sh
env_file:
- ${ENV_FILE:-./.env.ee.dev}
diff --git a/hosting/docker-compose/oss/docker-compose.dev.yml b/hosting/docker-compose/oss/docker-compose.dev.yml
index 53c0eaef52..faf444c24a 100644
--- a/hosting/docker-compose/oss/docker-compose.dev.yml
+++ b/hosting/docker-compose/oss/docker-compose.dev.yml
@@ -103,30 +103,6 @@ services:
command: >
watchmedo auto-restart --directory=/app/ --pattern=*.py --recursive -- celery -A entrypoint.celery_app worker --concurrency=1 --max-tasks-per-child=1 --prefetch-multiplier=1
- cron:
- image: agenta-oss-dev-api:latest
-
- volumes:
- #
- - ../../../api/oss/src/crons/queries.sh:/queries.sh
-
- env_file:
- - ${ENV_FILE:-./.env.oss.dev}
-
- depends_on:
- - postgres
- - api
-
- extra_hosts:
- - "host.docker.internal:host-gateway"
-
- restart: always
-
- networks:
- - agenta-network
-
- command: cron -f
-
alembic:
build:
context: ../../../api
diff --git a/hosting/docker-compose/oss/docker-compose.gh.ssl.yml b/hosting/docker-compose/oss/docker-compose.gh.ssl.yml
index c7dbd5c730..ab0590b990 100644
--- a/hosting/docker-compose/oss/docker-compose.gh.ssl.yml
+++ b/hosting/docker-compose/oss/docker-compose.gh.ssl.yml
@@ -104,26 +104,6 @@ services:
condition: service_healthy
restart: always
- cron:
- image: agenta-oss-dev-api:latest
-
- env_file:
- - ${ENV_FILE:-./.env.oss.dev}
-
- depends_on:
- - postgres
- - api
-
- extra_hosts:
- - "host.docker.internal:host-gateway"
-
- restart: always
-
- networks:
- - agenta-network
-
- command: cron -f
-
alembic:
build:
context: ../../../api
diff --git a/hosting/docker-compose/oss/docker-compose.gh.yml b/hosting/docker-compose/oss/docker-compose.gh.yml
index e079346a68..cc2e6612a3 100644
--- a/hosting/docker-compose/oss/docker-compose.gh.yml
+++ b/hosting/docker-compose/oss/docker-compose.gh.yml
@@ -80,26 +80,6 @@ services:
- redis
restart: always
- cron:
- image: agenta-oss-dev-api:latest
-
- env_file:
- - ${ENV_FILE:-./.env.oss.dev}
-
- depends_on:
- - postgres
- - api
-
- extra_hosts:
- - "host.docker.internal:host-gateway"
-
- restart: always
-
- networks:
- - agenta-network
-
- command: cron -f
-
alembic:
build:
context: ../../../api
diff --git a/sdk/agenta/sdk/tracing/exporters.py b/sdk/agenta/sdk/tracing/exporters.py
index a121bd857a..c156d7d906 100644
--- a/sdk/agenta/sdk/tracing/exporters.py
+++ b/sdk/agenta/sdk/tracing/exporters.py
@@ -24,7 +24,7 @@
log = get_module_logger(__name__)
-_ASYNC_EXPORT = environ.get("AGENTA_OTLP_ASYNC_EXPORT", "true").lower() in TRUTHY
+_ASYNC_EXPORT = environ.get("AGENTA_OTLP_ASYNC_EXPORT", "false").lower() in TRUTHY
class InlineTraceExporter(SpanExporter):
diff --git a/sdk/agenta/sdk/workflows/handlers.py b/sdk/agenta/sdk/workflows/handlers.py
index 7216761897..738392f345 100644
--- a/sdk/agenta/sdk/workflows/handlers.py
+++ b/sdk/agenta/sdk/workflows/handlers.py
@@ -511,24 +511,20 @@ def field_match_test_v0(
correct_answer = inputs[correct_answer_key]
if not isinstance(outputs, str) and not isinstance(outputs, dict):
- # raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
- return {"success": False}
+ raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
outputs_dict = outputs
if isinstance(outputs, str):
try:
outputs_dict = loads(outputs)
except json.JSONDecodeError as e:
- # raise InvalidOutputsV0Error(expected="dict", got=outputs) from e
- return {"success": False}
+ raise InvalidOutputsV0Error(expected="dict", got=outputs) from e
if not isinstance(outputs_dict, dict):
- # raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
- return {"success": False}
+ raise InvalidOutputsV0Error(expected=["dict", "str"], got=outputs)
if not json_field in outputs_dict:
- # raise MissingOutputV0Error(path=json_field)
- return {"success": False}
+ raise MissingOutputV0Error(path=json_field)
# --------------------------------------------------------------------------
success = outputs_dict[json_field] == correct_answer
diff --git a/web/oss/src/components/DeleteEvaluationModal/DeleteEvaluationModal.tsx b/web/ee/src/components/DeleteEvaluationModal/DeleteEvaluationModal.tsx
similarity index 100%
rename from web/oss/src/components/DeleteEvaluationModal/DeleteEvaluationModal.tsx
rename to web/ee/src/components/DeleteEvaluationModal/DeleteEvaluationModal.tsx
diff --git a/web/oss/src/components/DeleteEvaluationModal/types.ts b/web/ee/src/components/DeleteEvaluationModal/types.ts
similarity index 100%
rename from web/oss/src/components/DeleteEvaluationModal/types.ts
rename to web/ee/src/components/DeleteEvaluationModal/types.ts
diff --git a/web/ee/src/components/DeploymentHistory/DeploymentHistory.tsx b/web/ee/src/components/DeploymentHistory/DeploymentHistory.tsx
index 3b6f9cdb32..d596e2bc42 100644
--- a/web/ee/src/components/DeploymentHistory/DeploymentHistory.tsx
+++ b/web/ee/src/components/DeploymentHistory/DeploymentHistory.tsx
@@ -15,7 +15,7 @@ import {
fetchAllDeploymentRevisions,
} from "@/oss/services/deploymentVersioning/api"
-import {DeploymentRevisionConfig, DeploymentRevisions} from "@agenta/oss/src/lib/types_ee"
+import {DeploymentRevisionConfig, DeploymentRevisions} from "../../lib/types_ee"
dayjs.extend(relativeTime)
dayjs.extend(duration)
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/AutoEvalRunSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/AutoEvalRunSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/AutoEvalRunSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/AutoEvalRunSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/EvalNameTag.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/TagWithLink.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/TagWithLink.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/TagWithLink.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/TagWithLink.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/VariantTag.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/VariantTag.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/VariantTag.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/VariantTag.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/types.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/types.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/utils.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/utils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/utils.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/utils.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/variantUtils.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/variantUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/assets/variantUtils.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/assets/variantUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunCompareMenu/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunCompareMenu/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunCompareMenu/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunCompareMenu/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunOutput.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/assets/RunTraceHeader.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/lib/helpers.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/lib/helpers.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/lib/helpers.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerContent/lib/helpers.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerHeader/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerHeader/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerHeader/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerHeader/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerSidePanel/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerSidePanel/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerSidePanel/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/FocusDrawerSidePanel/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerContentSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerContentSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerContentSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerContentSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerHeaderSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerHeaderSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerHeaderSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerHeaderSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerSidePanelSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerSidePanelSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerSidePanelSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/assets/Skeletons/FocusDrawerSidePanelSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunFocusDrawer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/assets/EvalRunHeaderSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/assets/EvalRunHeaderSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/assets/EvalRunHeaderSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/assets/EvalRunHeaderSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunHeader/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/EvalRunPromptConfigViewerSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/EvalRunPromptConfigViewerSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/EvalRunPromptConfigViewerSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/EvalRunPromptConfigViewerSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/PromptConfigCard.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/PromptConfigCard.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/PromptConfigCard.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/assets/PromptConfigCard.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunPromptConfigViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/EvalRunScoreTableSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/EvalRunScoreTableSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/EvalRunScoreTableSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/EvalRunScoreTableSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/TraceMetrics.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/TraceMetrics.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/TraceMetrics.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/TraceMetrics.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/constants.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/constants.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/assets/constants.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunScoreTable/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunSelectedEvaluations/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunSelectedEvaluations/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunSelectedEvaluations/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunSelectedEvaluations/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewUtilityOptions/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewUtilityOptions/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewUtilityOptions/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewUtilityOptions/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/assets/EvalRunTestcaseViewerSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/assets/EvalRunTestcaseViewerSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/assets/EvalRunTestcaseViewerSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/assets/EvalRunTestcaseViewerSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvalRunTestcaseViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/types.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/types.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetircsSpiderChart/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/TimeSeriesChart.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/TimeSeriesChart.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/TimeSeriesChart.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/TimeSeriesChart.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/BarChart.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/BarChart.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/BarChart.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/BarChart.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/EvaluatorMetricsChartSkeleton.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/EvaluatorMetricsChartSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/EvaluatorMetricsChartSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/EvaluatorMetricsChartSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/HistogramChart.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/HistogramChart.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/HistogramChart.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/HistogramChart.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/LowerBand.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/LowerBand.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/LowerBand.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/LowerBand.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/UpperBand.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/UpperBand.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/UpperBand.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/UpperBand.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/helpers.ts b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/helpers.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/helpers.ts
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/assets/helpers.ts
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/EvaluatorMetricsChart/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/BarChartPlaceholder.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/BarChartPlaceholder.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/BarChartPlaceholder.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/BarChartPlaceholder.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/PlaceholderOverlay.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/PlaceholderOverlay.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/PlaceholderOverlay.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/PlaceholderOverlay.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/SpiderChartPlaceholder.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/SpiderChartPlaceholder.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/components/shared/SpiderChartPlaceholder.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/components/shared/SpiderChartPlaceholder.tsx
diff --git a/web/oss/src/components/EvalRunDetails/AutoEvalRun/index.tsx b/web/ee/src/components/EvalRunDetails/AutoEvalRun/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/AutoEvalRun/index.tsx
rename to web/ee/src/components/EvalRunDetails/AutoEvalRun/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/annotationUtils.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/annotationUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/annotationUtils.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/annotationUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/helpers.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/helpers.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/helpers.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/helpers.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/optimisticUtils.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/optimisticUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/optimisticUtils.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/optimisticUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/runnableSelectors.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/runnableSelectors.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/runnableSelectors.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/runnableSelectors.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/stepsMetricsUtils.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/stepsMetricsUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/stepsMetricsUtils.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/stepsMetricsUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/assets/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/assets/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/AnnotateScenarioButton/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalResultsView/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalResultsView/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalResultsView/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalResultsView/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunBatchActions.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunBatchActions.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunBatchActions.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunBatchActions.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunName/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunName/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunName/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunName/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenario/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/EvalRunScenarioCardBody.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/EvalRunScenarioCardBody.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/EvalRunScenarioCardBody.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/EvalRunScenarioCardBody.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationInputs.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationInputs.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationInputs.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationInputs.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationResponse.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationResponse.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationResponse.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationResponse.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationRun.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationRun.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationRun.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/InvocationRun.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/KeyValue.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/KeyValue.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/KeyValue.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/KeyValue.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/utils.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/utils.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/utils.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/assets/utils.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCard/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCardTitle/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/EvalRunScenarioCards.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/EvalRunScenarioCards.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/EvalRunScenarioCards.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/EvalRunScenarioCards.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/assets/constants.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/assets/constants.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioCards/assets/constants.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioFilters.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioFilters.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioFilters.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/EvalRunScenarioFilters.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/assets/InstructionButton.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/assets/InstructionButton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/assets/InstructionButton.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/assets/InstructionButton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/InstructionModal/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalButton.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalButton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalButton.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalButton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalModalContent.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalModalContent.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalModalContent.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/assets/RenameEvalModalContent.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/RenameEvalModal/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/types.d.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/types.d.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/Modals/types.d.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/Modals/types.d.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/RunEvalScenarioButton/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioAnnotationPanel/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/ScenarioLoadingIndicator.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/ScenarioLoadingIndicator.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/ScenarioLoadingIndicator.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/ScenarioLoadingIndicator.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/assets/constants.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/assets/constants.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/ScenarioLoadingIndicator/assets/constants.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/types.ts b/web/ee/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/types.ts
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/components/SingleScenarioViewer/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/HumanEvalRun/index.tsx b/web/ee/src/components/EvalRunDetails/HumanEvalRun/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/HumanEvalRun/index.tsx
rename to web/ee/src/components/EvalRunDetails/HumanEvalRun/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/OnlineEvalRun/OnlineUrlSync.tsx b/web/ee/src/components/EvalRunDetails/OnlineEvalRun/OnlineUrlSync.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/OnlineEvalRun/OnlineUrlSync.tsx
rename to web/ee/src/components/EvalRunDetails/OnlineEvalRun/OnlineUrlSync.tsx
diff --git a/web/oss/src/components/EvalRunDetails/OnlineEvalRun/components/ConfigurationViewer/index.tsx b/web/ee/src/components/EvalRunDetails/OnlineEvalRun/components/ConfigurationViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/OnlineEvalRun/components/ConfigurationViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/OnlineEvalRun/components/ConfigurationViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/OnlineEvalRun/components/TracesViewer/index.tsx b/web/ee/src/components/EvalRunDetails/OnlineEvalRun/components/TracesViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/OnlineEvalRun/components/TracesViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/OnlineEvalRun/components/TracesViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/OnlineEvalRun/index.tsx b/web/ee/src/components/EvalRunDetails/OnlineEvalRun/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/OnlineEvalRun/index.tsx
rename to web/ee/src/components/EvalRunDetails/OnlineEvalRun/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/UrlSync.tsx b/web/ee/src/components/EvalRunDetails/UrlSync.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/UrlSync.tsx
rename to web/ee/src/components/EvalRunDetails/UrlSync.tsx
diff --git a/web/oss/src/components/EvalRunDetails/assets/renderChatMessages.tsx b/web/ee/src/components/EvalRunDetails/assets/renderChatMessages.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/assets/renderChatMessages.tsx
rename to web/ee/src/components/EvalRunDetails/assets/renderChatMessages.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/ComparisonDataFetcher.tsx b/web/ee/src/components/EvalRunDetails/components/ComparisonDataFetcher.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/ComparisonDataFetcher.tsx
rename to web/ee/src/components/EvalRunDetails/components/ComparisonDataFetcher.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunOverviewViewer/assets/EvalRunOverviewViewerSkeleton.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunOverviewViewer/assets/EvalRunOverviewViewerSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunOverviewViewer/assets/EvalRunOverviewViewerSkeleton.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunOverviewViewer/assets/EvalRunOverviewViewerSkeleton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunOverviewViewer/index.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunOverviewViewer/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunOverviewViewer/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunOverviewViewer/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunScenarioNavigator/index.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunScenarioNavigator/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunScenarioNavigator/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunScenarioNavigator/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/assets/index.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/assets/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/assets/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/assets/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/index.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunScenarioStatusTag/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/assets/constants.ts b/web/ee/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/assets/constants.ts
rename to web/ee/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/assets/constants.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/index.tsx b/web/ee/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/EvalRunScenariosViewSelector/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataButton.tsx b/web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataButton.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataButton.tsx
rename to web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataButton.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataModalContent.tsx b/web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataModalContent.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataModalContent.tsx
rename to web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/SaveDataModalContent.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/types.ts b/web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/SaveDataModal/assets/types.ts
rename to web/ee/src/components/EvalRunDetails/components/SaveDataModal/assets/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/SaveDataModal/index.tsx b/web/ee/src/components/EvalRunDetails/components/SaveDataModal/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/SaveDataModal/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/SaveDataModal/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ComparisonScenarioTable.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ScenarioTable.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ScenarioTable.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ScenarioTable.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/ScenarioTable.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ActionCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ActionCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ActionCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ActionCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx
similarity index 99%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx
index fa0aef82a1..a5f4612b58 100644
--- a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx
+++ b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/CellComponents.tsx
@@ -10,7 +10,7 @@ import TooltipButton from "@/oss/components/Playground/assets/EnhancedButton"
import {Expandable} from "@/oss/components/Tables/ExpandableCell"
import {useOptionalRunId, useRunId} from "@/oss/contexts/RunIdContext"
import {useInvocationResult} from "@/oss/lib/hooks/useInvocationResult"
-import {resolvePath} from "@/oss/lib/evalRunner/pureEnrichment"
+import {resolvePath} from "@/oss/lib/workers/evalRunner/pureEnrichment"
import {useAppNavigation, useAppState} from "@/oss/state/appState"
import {
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ComparisonModeToggle.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ComparisonModeToggle.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ComparisonModeToggle.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ComparisonModeToggle.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedAnnotationValueCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedAnnotationValueCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedAnnotationValueCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedAnnotationValueCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricValueCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricValueCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricValueCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricValueCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricsCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricsCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricsCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/CollapsedMetricsCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/MetricCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/MetricCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/MetricCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/MetricCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/helpers.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/helpers.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/helpers.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/helpers.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/types.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/types.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/MetricCell/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ScenarioTraceSummary.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ScenarioTraceSummary.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ScenarioTraceSummary.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/ScenarioTraceSummary.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/StatusCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/StatusCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/StatusCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/StatusCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/TimestampCell.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/TimestampCell.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/TimestampCell.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/TimestampCell.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/VirtualizedScenarioTableAnnotateDrawer.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/VirtualizedScenarioTableAnnotateDrawer.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/VirtualizedScenarioTableAnnotateDrawer.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/VirtualizedScenarioTableAnnotateDrawer.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/atoms/evaluatorFailures.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/atoms/evaluatorFailures.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/atoms/evaluatorFailures.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/atoms/evaluatorFailures.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/constants.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/constants.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/constants.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/dataSourceBuilder.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorNameUtils.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorNameUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorNameUtils.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorNameUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorSchemaUtils.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorSchemaUtils.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorSchemaUtils.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/evaluatorSchemaUtils.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/flatDataSourceBuilder.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/flatDataSourceBuilder.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/flatDataSourceBuilder.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/flatDataSourceBuilder.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/types.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/types.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/utils.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/utils.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/utils.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/assets/utils.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useExpandableComparisonDataSource.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useScrollToScenario.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useScrollToScenario.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useScrollToScenario.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useScrollToScenario.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useTableDataSource.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useTableDataSource.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useTableDataSource.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/hooks/useTableDataSource.ts
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/index.tsx b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/index.tsx
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/types.ts b/web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/types.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/components/VirtualizedScenarioTable/types.ts
rename to web/ee/src/components/EvalRunDetails/components/VirtualizedScenarioTable/types.ts
diff --git a/web/oss/src/components/EvalRunDetails/hooks/useCachedScenarioSteps.ts b/web/ee/src/components/EvalRunDetails/hooks/useCachedScenarioSteps.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/hooks/useCachedScenarioSteps.ts
rename to web/ee/src/components/EvalRunDetails/hooks/useCachedScenarioSteps.ts
diff --git a/web/oss/src/components/EvalRunDetails/hooks/useMetricStepError.ts b/web/ee/src/components/EvalRunDetails/hooks/useMetricStepError.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/hooks/useMetricStepError.ts
rename to web/ee/src/components/EvalRunDetails/hooks/useMetricStepError.ts
diff --git a/web/oss/src/components/EvalRunDetails/index.tsx b/web/ee/src/components/EvalRunDetails/index.tsx
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/index.tsx
rename to web/ee/src/components/EvalRunDetails/index.tsx
diff --git a/web/oss/src/components/EvalRunDetails/state/evalType.ts b/web/ee/src/components/EvalRunDetails/state/evalType.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/state/evalType.ts
rename to web/ee/src/components/EvalRunDetails/state/evalType.ts
diff --git a/web/oss/src/components/EvalRunDetails/state/focusScenarioAtom.ts b/web/ee/src/components/EvalRunDetails/state/focusScenarioAtom.ts
similarity index 100%
rename from web/oss/src/components/EvalRunDetails/state/focusScenarioAtom.ts
rename to web/ee/src/components/EvalRunDetails/state/focusScenarioAtom.ts
diff --git a/web/oss/src/components/EvalRunDetails/state/urlState.ts b/web/ee/src/components/EvalRunDetails/state/urlState.ts
similarity index 97%
rename from web/oss/src/components/EvalRunDetails/state/urlState.ts
rename to web/ee/src/components/EvalRunDetails/state/urlState.ts
index aa4697994a..29a988ca50 100644
--- a/web/oss/src/components/EvalRunDetails/state/urlState.ts
+++ b/web/ee/src/components/EvalRunDetails/state/urlState.ts
@@ -1,7 +1,7 @@
import {atom} from "jotai"
import {atomWithImmer} from "jotai-immer"
-import {evalTypeAtom} from "./evalType"
+import {evalTypeAtom} from "../state/evalType"
export interface EvalRunUrlState {
view?:
diff --git a/web/oss/src/components/EvaluationTable/ABTestingEvaluationTable.tsx b/web/ee/src/components/EvaluationTable/ABTestingEvaluationTable.tsx
similarity index 100%
rename from web/oss/src/components/EvaluationTable/ABTestingEvaluationTable.tsx
rename to web/ee/src/components/EvaluationTable/ABTestingEvaluationTable.tsx
diff --git a/web/oss/src/components/EvaluationTable/SingleModelEvaluationTable.tsx b/web/ee/src/components/EvaluationTable/SingleModelEvaluationTable.tsx
similarity index 99%
rename from web/oss/src/components/EvaluationTable/SingleModelEvaluationTable.tsx
rename to web/ee/src/components/EvaluationTable/SingleModelEvaluationTable.tsx
index 2bac4883a9..ad8e49c7cb 100644
--- a/web/oss/src/components/EvaluationTable/SingleModelEvaluationTable.tsx
+++ b/web/ee/src/components/EvaluationTable/SingleModelEvaluationTable.tsx
@@ -19,8 +19,8 @@ import {getDefaultStore, useAtomValue} from "jotai"
import debounce from "lodash/debounce"
import {useRouter} from "next/router"
-import SaveTestsetModal from "@/oss/components/SaveTestsetModal/SaveTestsetModal"
import SecondaryButton from "@/oss/components/SecondaryButton/SecondaryButton"
+import {useQueryParamState} from "@/oss/state/appState"
import {EvaluationFlow} from "@/oss/lib/enums"
import {exportSingleModelEvaluationData} from "@/oss/lib/helpers/evaluate"
import {isBaseResponse, isFuncResponse} from "@/oss/lib/helpers/playgroundResp"
@@ -41,7 +41,6 @@ import {transformToRequestBody} from "@/oss/lib/shared/variant/transformer/trans
import type {BaseResponse, EvaluationScenario, KeyValuePair, Variant} from "@/oss/lib/Types"
import {callVariant} from "@/oss/services/api"
import {updateEvaluation, updateEvaluationScenario} from "@/oss/services/human-evaluations/api"
-import {useQueryParamState} from "@/oss/state/appState"
import {customPropertiesByRevisionAtomFamily} from "@/oss/state/newPlayground/core/customProperties"
import {
stablePromptVariablesAtomFamily,
@@ -52,6 +51,7 @@ import {appUriInfoAtom, appSchemaAtom} from "@/oss/state/variant/atoms/fetcher"
import EvaluationCardView from "../Evaluations/EvaluationCardView"
import EvaluationVotePanel from "../Evaluations/EvaluationCardView/EvaluationVotePanel"
+import SaveTestsetModal from "../SaveTestsetModal/SaveTestsetModal"
import {useSingleModelEvaluationTableStyles} from "./assets/styles"
import ParamsFormWithRun from "./components/ParamsFormWithRun"
diff --git a/web/oss/src/components/EvaluationTable/assets/styles.ts b/web/ee/src/components/EvaluationTable/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/EvaluationTable/assets/styles.ts
rename to web/ee/src/components/EvaluationTable/assets/styles.ts
diff --git a/web/oss/src/components/EvaluationTable/components/ParamsFormWithRun.tsx b/web/ee/src/components/EvaluationTable/components/ParamsFormWithRun.tsx
similarity index 100%
rename from web/oss/src/components/EvaluationTable/components/ParamsFormWithRun.tsx
rename to web/ee/src/components/EvaluationTable/components/ParamsFormWithRun.tsx
diff --git a/web/oss/src/components/EvaluationTable/types.d.ts b/web/ee/src/components/EvaluationTable/types.d.ts
similarity index 100%
rename from web/oss/src/components/EvaluationTable/types.d.ts
rename to web/ee/src/components/EvaluationTable/types.d.ts
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/EvaluationCard.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/EvaluationCard.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/EvaluationCard.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/EvaluationCard.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/EvaluationChatResponse.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/EvaluationChatResponse.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/EvaluationChatResponse.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/EvaluationChatResponse.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/EvaluationInputs.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/EvaluationInputs.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/EvaluationInputs.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/EvaluationInputs.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/EvaluationVariantCard.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/EvaluationVariantCard.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/EvaluationVariantCard.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/EvaluationVariantCard.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/EvaluationVotePanel.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/EvaluationVotePanel.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/EvaluationVotePanel.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/EvaluationVotePanel.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/VariantAlphabet.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/VariantAlphabet.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/VariantAlphabet.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/VariantAlphabet.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/assets/styles.ts b/web/ee/src/components/Evaluations/EvaluationCardView/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/assets/styles.ts
rename to web/ee/src/components/Evaluations/EvaluationCardView/assets/styles.ts
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/index.tsx b/web/ee/src/components/Evaluations/EvaluationCardView/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/index.tsx
rename to web/ee/src/components/Evaluations/EvaluationCardView/index.tsx
diff --git a/web/oss/src/components/Evaluations/EvaluationCardView/types.d.ts b/web/ee/src/components/Evaluations/EvaluationCardView/types.d.ts
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationCardView/types.d.ts
rename to web/ee/src/components/Evaluations/EvaluationCardView/types.d.ts
diff --git a/web/oss/src/components/Evaluations/EvaluationErrorModal.tsx b/web/ee/src/components/Evaluations/EvaluationErrorModal.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/EvaluationErrorModal.tsx
rename to web/ee/src/components/Evaluations/EvaluationErrorModal.tsx
diff --git a/web/oss/src/services/evaluationRuns/utils.ts b/web/ee/src/components/Evaluations/HumanEvaluationResult.tsx
similarity index 100%
rename from web/oss/src/services/evaluationRuns/utils.ts
rename to web/ee/src/components/Evaluations/HumanEvaluationResult.tsx
diff --git a/web/oss/src/components/Evaluations/ShareEvaluationModal.tsx b/web/ee/src/components/Evaluations/ShareEvaluationModal.tsx
similarity index 100%
rename from web/oss/src/components/Evaluations/ShareEvaluationModal.tsx
rename to web/ee/src/components/Evaluations/ShareEvaluationModal.tsx
diff --git a/web/oss/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx
rename to web/ee/src/components/Evaluators/assets/cells/EvaluatorTagsCell.tsx
diff --git a/web/oss/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx b/web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx
rename to web/ee/src/components/Evaluators/assets/cells/EvaluatorTypePill.tsx
diff --git a/web/oss/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx b/web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx
rename to web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/index.tsx
diff --git a/web/oss/src/components/Evaluators/assets/cells/TableDropdownMenu/types.ts b/web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/types.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/cells/TableDropdownMenu/types.ts
rename to web/ee/src/components/Evaluators/assets/cells/TableDropdownMenu/types.ts
diff --git a/web/oss/src/components/Evaluators/assets/constants.ts b/web/ee/src/components/Evaluators/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/constants.ts
rename to web/ee/src/components/Evaluators/assets/constants.ts
diff --git a/web/oss/src/components/Evaluators/assets/getColumns.tsx b/web/ee/src/components/Evaluators/assets/getColumns.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/getColumns.tsx
rename to web/ee/src/components/Evaluators/assets/getColumns.tsx
diff --git a/web/oss/src/components/Evaluators/assets/types.ts b/web/ee/src/components/Evaluators/assets/types.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/types.ts
rename to web/ee/src/components/Evaluators/assets/types.ts
diff --git a/web/oss/src/components/Evaluators/assets/utils.ts b/web/ee/src/components/Evaluators/assets/utils.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/assets/utils.ts
rename to web/ee/src/components/Evaluators/assets/utils.ts
diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/assets/ConfigureEvaluatorSkeleton.tsx b/web/ee/src/components/Evaluators/components/ConfigureEvaluator/assets/ConfigureEvaluatorSkeleton.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/ConfigureEvaluator/assets/ConfigureEvaluatorSkeleton.tsx
rename to web/ee/src/components/Evaluators/components/ConfigureEvaluator/assets/ConfigureEvaluatorSkeleton.tsx
diff --git a/web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx b/web/ee/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
rename to web/ee/src/components/Evaluators/components/ConfigureEvaluator/index.tsx
diff --git a/web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx
rename to web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/assets/DeleteEvaluatorsModalContent/index.tsx
diff --git a/web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx
rename to web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/index.tsx
diff --git a/web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts b/web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts
rename to web/ee/src/components/Evaluators/components/DeleteEvaluatorsModal/types.ts
diff --git a/web/oss/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx
rename to web/ee/src/components/Evaluators/components/SelectEvaluatorModal/assets/SelectEvaluatorModalContent/index.tsx
diff --git a/web/oss/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx
rename to web/ee/src/components/Evaluators/components/SelectEvaluatorModal/index.tsx
diff --git a/web/oss/src/components/Evaluators/components/SelectEvaluatorModal/types.ts b/web/ee/src/components/Evaluators/components/SelectEvaluatorModal/types.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/components/SelectEvaluatorModal/types.ts
rename to web/ee/src/components/Evaluators/components/SelectEvaluatorModal/types.ts
diff --git a/web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts b/web/ee/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
similarity index 100%
rename from web/oss/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
rename to web/ee/src/components/Evaluators/hooks/useEvaluatorsRegistryData.ts
diff --git a/web/oss/src/components/Evaluators/index.tsx b/web/ee/src/components/Evaluators/index.tsx
similarity index 100%
rename from web/oss/src/components/Evaluators/index.tsx
rename to web/ee/src/components/Evaluators/index.tsx
diff --git a/web/oss/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx b/web/ee/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
rename to web/ee/src/components/HumanEvaluationModal/HumanEvaluationModal.tsx
diff --git a/web/oss/src/components/HumanEvaluationModal/assets/styles.ts b/web/ee/src/components/HumanEvaluationModal/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluationModal/assets/styles.ts
rename to web/ee/src/components/HumanEvaluationModal/assets/styles.ts
diff --git a/web/oss/src/components/HumanEvaluationModal/types.d.ts b/web/ee/src/components/HumanEvaluationModal/types.d.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluationModal/types.d.ts
rename to web/ee/src/components/HumanEvaluationModal/types.d.ts
diff --git a/web/oss/src/components/HumanEvaluations/AbTestingEvaluation.tsx b/web/ee/src/components/HumanEvaluations/AbTestingEvaluation.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/AbTestingEvaluation.tsx
rename to web/ee/src/components/HumanEvaluations/AbTestingEvaluation.tsx
diff --git a/web/oss/src/components/HumanEvaluations/SingleModelEvaluation.tsx b/web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/SingleModelEvaluation.tsx
rename to web/ee/src/components/HumanEvaluations/SingleModelEvaluation.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/EvaluationStatusCell.tsx b/web/ee/src/components/HumanEvaluations/assets/EvaluationStatusCell.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/EvaluationStatusCell.tsx
rename to web/ee/src/components/HumanEvaluations/assets/EvaluationStatusCell.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/LegacyEvalResultCell.tsx b/web/ee/src/components/HumanEvaluations/assets/LegacyEvalResultCell.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/LegacyEvalResultCell.tsx
rename to web/ee/src/components/HumanEvaluations/assets/LegacyEvalResultCell.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartAxis.tsx b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartAxis.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartAxis.tsx
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartAxis.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartFrame.tsx b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartFrame.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartFrame.tsx
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ChartFrame.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveFrequencyChart.tsx b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveFrequencyChart.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveFrequencyChart.tsx
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveFrequencyChart.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveMetricChart.tsx b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveMetricChart.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveMetricChart.tsx
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/ResponsiveMetricChart.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/chartUtils.ts b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/chartUtils.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/chartUtils.ts
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/chartUtils.ts
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/utils.ts b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/utils.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/utils.ts
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/assets/utils.ts
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/index.tsx b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/index.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/index.tsx
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/index.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/types.ts b/web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/types.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/MetricDetailsPopover/types.ts
rename to web/ee/src/components/HumanEvaluations/assets/MetricDetailsPopover/types.ts
diff --git a/web/oss/src/components/HumanEvaluations/assets/SingleModelEvaluationHeader/index.tsx b/web/ee/src/components/HumanEvaluations/assets/SingleModelEvaluationHeader/index.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/SingleModelEvaluationHeader/index.tsx
rename to web/ee/src/components/HumanEvaluations/assets/SingleModelEvaluationHeader/index.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx b/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx
rename to web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/index.tsx
diff --git a/web/oss/src/components/HumanEvaluations/assets/TableDropdownMenu/types.ts b/web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/types.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/TableDropdownMenu/types.ts
rename to web/ee/src/components/HumanEvaluations/assets/TableDropdownMenu/types.ts
diff --git a/web/oss/src/components/HumanEvaluations/assets/styles.ts b/web/ee/src/components/HumanEvaluations/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/styles.ts
rename to web/ee/src/components/HumanEvaluations/assets/styles.ts
diff --git a/web/oss/src/components/HumanEvaluations/assets/utils.tsx b/web/ee/src/components/HumanEvaluations/assets/utils.tsx
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/assets/utils.tsx
rename to web/ee/src/components/HumanEvaluations/assets/utils.tsx
diff --git a/web/oss/src/components/HumanEvaluations/types.ts b/web/ee/src/components/HumanEvaluations/types.ts
similarity index 100%
rename from web/oss/src/components/HumanEvaluations/types.ts
rename to web/ee/src/components/HumanEvaluations/types.ts
diff --git a/web/oss/src/components/SaveTestsetModal/SaveTestsetModal.tsx b/web/ee/src/components/SaveTestsetModal/SaveTestsetModal.tsx
similarity index 100%
rename from web/oss/src/components/SaveTestsetModal/SaveTestsetModal.tsx
rename to web/ee/src/components/SaveTestsetModal/SaveTestsetModal.tsx
diff --git a/web/oss/src/components/SaveTestsetModal/types.d.ts b/web/ee/src/components/SaveTestsetModal/types.d.ts
similarity index 100%
rename from web/oss/src/components/SaveTestsetModal/types.d.ts
rename to web/ee/src/components/SaveTestsetModal/types.d.ts
diff --git a/web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorModal.tsx b/web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorModal.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorModal.tsx
rename to web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorModal.tsx
diff --git a/web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx b/web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx
rename to web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorPopover.tsx
diff --git a/web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorText.tsx b/web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorText.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorText.tsx
rename to web/ee/src/components/pages/evaluations/EvaluationErrorProps/EvaluationErrorText.tsx
diff --git a/web/oss/src/components/pages/evaluations/EvaluationsView.tsx b/web/ee/src/components/pages/evaluations/EvaluationsView.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/EvaluationsView.tsx
rename to web/ee/src/components/pages/evaluations/EvaluationsView.tsx
diff --git a/web/oss/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx b/web/ee/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
rename to web/ee/src/components/pages/evaluations/FilterColumns/FilterColumns.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/AdvancedSettings.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/AdvancedSettings.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/AdvancedSettings.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/AdvancedSettings.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalContent.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalContent.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalContent.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/NewEvaluationModalContent.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectAppSection.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectEvaluatorSection/SelectEvaluatorSection.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectTestsetSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectTestsetSection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectTestsetSection.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectTestsetSection.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectVariantSection.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectVariantSection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/Components/SelectVariantSection.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/Components/SelectVariantSection.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/index.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/index.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/index.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/index.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/types.ts b/web/ee/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/types.ts
rename to web/ee/src/components/pages/evaluations/NewEvaluation/assets/TabLabel/types.ts
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/assets/constants.ts b/web/ee/src/components/pages/evaluations/NewEvaluation/assets/constants.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/assets/constants.ts
rename to web/ee/src/components/pages/evaluations/NewEvaluation/assets/constants.ts
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/assets/styles.ts b/web/ee/src/components/pages/evaluations/NewEvaluation/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/assets/styles.ts
rename to web/ee/src/components/pages/evaluations/NewEvaluation/assets/styles.ts
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/index.tsx b/web/ee/src/components/pages/evaluations/NewEvaluation/index.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/index.tsx
rename to web/ee/src/components/pages/evaluations/NewEvaluation/index.tsx
diff --git a/web/oss/src/components/pages/evaluations/NewEvaluation/types.ts b/web/ee/src/components/pages/evaluations/NewEvaluation/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/NewEvaluation/types.ts
rename to web/ee/src/components/pages/evaluations/NewEvaluation/types.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/AutoEvaluation.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/AdvancedSettings.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DebugSection.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/DynamicFormField.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorTestcaseModal.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorTestcaseModal.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorTestcaseModal.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorTestcaseModal.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/EvaluatorVariantModal.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaEditor.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts
similarity index 82%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts
index a56de11836..b6acddb008 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts
+++ b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/JSONSchemaGenerator.ts
@@ -23,15 +23,15 @@ export function generateJSONSchema(config: SchemaConfig): GeneratedJSONSchema {
const {responseFormat, includeReasoning, continuousConfig, categoricalOptions} = config
const properties: Record = {}
- const required: string[] = ["score"]
+ const required: string[] = ["correctness"]
// Base description is always "The grade results"
const baseDescription = "The grade results"
- // Add the main score field based on response format
+ // Add the main correctness field based on response format
switch (responseFormat) {
case "continuous":
- properties.score = {
+ properties.correctness = {
type: "number",
description: baseDescription,
minimum: continuousConfig?.minimum ?? 0,
@@ -40,7 +40,7 @@ export function generateJSONSchema(config: SchemaConfig): GeneratedJSONSchema {
break
case "boolean":
- properties.score = {
+ properties.correctness = {
type: "boolean",
description: baseDescription,
}
@@ -53,14 +53,14 @@ export function generateJSONSchema(config: SchemaConfig): GeneratedJSONSchema {
.map((opt) => `"${opt.name}": ${opt.description}`)
.join("| ")
- properties.score = {
+ properties.correctness = {
type: "string",
description: `${baseDescription}. Categories: ${categoryDescriptions}`,
enum: enumValues,
}
} else {
// Fallback if no categories defined
- properties.score = {
+ properties.correctness = {
type: "string",
description: baseDescription,
}
@@ -97,43 +97,43 @@ export function parseJSONSchema(schemaString: string): SchemaConfig | null {
// Handle both old format (direct schema) and new format (with name wrapper)
const schema = parsed.schema || parsed
- if (!schema.properties || !schema.properties.score) {
+ if (!schema.properties || !schema.properties.correctness) {
return null
}
- const score = schema.properties.score
+ const correctness = schema.properties.correctness
const hasReasoning = !!schema.properties.comment
let responseFormat: SchemaConfig["responseFormat"] = "boolean"
let continuousConfig: SchemaConfig["continuousConfig"]
let categoricalOptions: SchemaConfig["categoricalOptions"]
- if (score.type === "number") {
+ if (correctness.type === "number") {
responseFormat = "continuous"
continuousConfig = {
- minimum: score.minimum ?? 0,
- maximum: score.maximum ?? 10,
+ minimum: correctness.minimum ?? 0,
+ maximum: correctness.maximum ?? 10,
}
- } else if (score.type === "boolean") {
+ } else if (correctness.type === "boolean") {
responseFormat = "boolean"
- } else if (score.type === "string" && score.enum) {
+ } else if (correctness.type === "string" && correctness.enum) {
responseFormat = "categorical"
// Parse category descriptions from the description field
- const desc = score.description || ""
+ const desc = correctness.description || ""
const categoriesMatch = desc.match(/Categories: (.+)/)
if (categoriesMatch) {
const categoriesStr = categoriesMatch[1]
const categoryPairs = categoriesStr.split("| ")
- categoricalOptions = score.enum.map((name: string) => {
+ categoricalOptions = correctness.enum.map((name: string) => {
const pair = categoryPairs.find((p: string) => p.startsWith(`"${name}":`))
const description = pair ? pair.split(": ")[1] || "" : ""
return {name, description}
})
} else {
- categoricalOptions = score.enum.map((name: string) => ({
+ categoricalOptions = correctness.enum.map((name: string) => ({
name,
description: "",
}))
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/index.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/index.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/index.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/index.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/types.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/types.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/JSONSchema/types.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/Messages.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/assets/styles.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/assets/styles.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/assets/styles.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/index.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/types.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/types.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/types.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/ConfigureEvaluator/variantUtils.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/DeleteModal.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorCard.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/EvaluatorList.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/Evaluators/index.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/EvaluatorsModal.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorCard.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/NewEvaluatorList.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/EvaluatorsModal/NewEvaluator/index.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/Filters/SearchFilter.tsx
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx
similarity index 99%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx
rename to web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx
index 783fb6804f..e52e9227fb 100644
--- a/web/oss/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx
+++ b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/AutoEvaluationHeader.tsx
@@ -25,9 +25,9 @@ import {getMetricConfig} from "@/oss/lib/metrics/utils"
import {EvaluationStatus} from "@/oss/lib/Types"
import {getAppValues} from "@/oss/state/app"
-import {statusMapper} from "../../cellRenderers/cellRenderers"
+import {statusMapper} from "../../../evaluations/cellRenderers/cellRenderers"
import {buildEvaluationNavigationUrl} from "../../utils"
-import {useStyles} from "./styles"
+import {useStyles} from "../assets/styles"
import {AutoEvaluationHeaderProps} from "./types"
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/assets/styles.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/assets/styles.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/assets/styles.ts
diff --git a/web/oss/src/components/pages/evaluations/autoEvaluation/assets/types.ts b/web/ee/src/components/pages/evaluations/autoEvaluation/assets/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/autoEvaluation/assets/types.ts
rename to web/ee/src/components/pages/evaluations/autoEvaluation/assets/types.ts
diff --git a/web/oss/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx b/web/ee/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
rename to web/ee/src/components/pages/evaluations/cellRenderers/StatusRenderer.tsx
diff --git a/web/oss/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx b/web/ee/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
rename to web/ee/src/components/pages/evaluations/cellRenderers/cellRenderers.tsx
diff --git a/web/oss/src/components/pages/evaluations/customEvaluation/CustomEvaluation.tsx b/web/ee/src/components/pages/evaluations/customEvaluation/CustomEvaluation.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/customEvaluation/CustomEvaluation.tsx
rename to web/ee/src/components/pages/evaluations/customEvaluation/CustomEvaluation.tsx
diff --git a/web/oss/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx b/web/ee/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
rename to web/ee/src/components/pages/evaluations/evaluationCompare/EvaluationCompare.tsx
diff --git a/web/oss/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx b/web/ee/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
rename to web/ee/src/components/pages/evaluations/evaluationScenarios/EvaluationScenarios.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluation.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluation.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluation.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluation.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/OnlineEvaluationDrawer.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/assets/helpers.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/assets/helpers.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/assets/helpers.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/assets/helpers.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/assets/state.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/assets/state.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/assets/state.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/assets/state.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/assets/styles.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/assets/styles.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/assets/styles.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/assets/styles.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorDetailsPreview.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorDetailsPreview.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorDetailsPreview.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorDetailsPreview.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorTypeTag.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorTypeTag.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorTypeTag.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/EvaluatorTypeTag.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/FiltersPreview.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/FiltersPreview.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/FiltersPreview.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/FiltersPreview.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/OnlineEvaluationRowActions.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/OnlineEvaluationRowActions.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/OnlineEvaluationRowActions.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/OnlineEvaluationRowActions.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/PromptPreview.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/PromptPreview.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/PromptPreview.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/PromptPreview.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersCell.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersCell.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersCell.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersCell.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersSummaryCard.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersSummaryCard.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersSummaryCard.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/QueryFiltersSummaryCard.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/ReadOnlyBox.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/ReadOnlyBox.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/ReadOnlyBox.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/ReadOnlyBox.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/components/SamplingRateControl.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/components/SamplingRateControl.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/components/SamplingRateControl.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/components/SamplingRateControl.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/constants.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/constants.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/constants.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/constants.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorDetails.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx b/web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorSelection.tsx
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeFromConfigs.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeMeta.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeMeta.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeMeta.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useEvaluatorTypeMeta.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useOnlineEvaluations.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useOnlineEvaluations.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/hooks/useOnlineEvaluations.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/hooks/useOnlineEvaluations.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/types.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/types.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/types.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/types.ts
diff --git a/web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts b/web/ee/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
rename to web/ee/src/components/pages/evaluations/onlineEvaluation/utils/evaluatorDetails.ts
diff --git a/web/oss/src/components/pages/evaluations/utils.ts b/web/ee/src/components/pages/evaluations/utils.ts
similarity index 100%
rename from web/oss/src/components/pages/evaluations/utils.ts
rename to web/ee/src/components/pages/evaluations/utils.ts
diff --git a/web/oss/src/contexts/RunIdContext.tsx b/web/ee/src/contexts/RunIdContext.tsx
similarity index 100%
rename from web/oss/src/contexts/RunIdContext.tsx
rename to web/ee/src/contexts/RunIdContext.tsx
diff --git a/web/ee/src/lib/helpers/evaluate.ts b/web/ee/src/lib/helpers/evaluate.ts
new file mode 100644
index 0000000000..49c631c561
--- /dev/null
+++ b/web/ee/src/lib/helpers/evaluate.ts
@@ -0,0 +1,469 @@
+import {EvaluationType} from "@agenta/oss/src/lib/enums"
+import {convertToCsv, downloadCsv} from "@agenta/oss/src/lib/helpers/fileManipulations"
+import {formatCurrency, formatLatency} from "@agenta/oss/src/lib/helpers/formatters"
+import {isDemo} from "@agenta/oss/src/lib/helpers/utils"
+import {
+ Evaluation,
+ GenericObject,
+ TypedValue,
+ Variant,
+ _Evaluation,
+ EvaluationScenario,
+} from "@agenta/oss/src/lib/Types"
+import dayjs from "dayjs"
+import capitalize from "lodash/capitalize"
+import round from "lodash/round"
+
+import AlertPopup from "@/oss/components/AlertPopup/AlertPopup"
+import {runningStatuses} from "@/oss/components/pages/evaluations/cellRenderers/cellRenderers"
+import {
+ HumanEvaluationListTableDataType,
+ SingleModelEvaluationListTableDataType,
+} from "@/oss/lib/Types"
+import {fetchEvaluatonIdsByResource} from "@/oss/services/evaluations/api"
+
+export const exportExactEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
+ const exportRow = rows.map((data, ix) => {
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Correct answer"]: data.correctAnswer,
+ ["Evaluation"]: data.score,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportSimilarityEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
+ const exportRow = rows.map((data, ix) => {
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Correct answer"]: data.correctAnswer,
+ ["Score"]: data.score,
+ ["Evaluation"]: data.similarity,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportAICritiqueEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
+ const exportRow = rows.map((data, ix) => {
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Correct answer"]: data.correctAnswer,
+ ["Score"]: data.score,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportABTestingEvaluationData = (
+ evaluation: Evaluation,
+ scenarios: EvaluationScenario[],
+ rows: GenericObject[],
+) => {
+ const exportRow = rows.map((data, ix) => {
+ const inputColumns = evaluation.testset.testsetChatColumn
+ ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]}
+ : data.inputs.reduce(
+ (columns: any, input: {input_name: string; input_value: string}) => {
+ columns[`${input.input_name}`] = input.input_value
+ return columns
+ },
+ {},
+ )
+ return {
+ ...inputColumns,
+ [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ [`App Variant ${evaluation.variants[1].variantName} Output 1`]: data?.columnData1
+ ? data?.columnData1
+ : data.outputs[1]?.variant_output,
+ ["Vote"]:
+ evaluation.variants.find((v: Variant) => v.variantId === data.vote)?.variantName ||
+ data.vote,
+ ["Expected Output"]:
+ scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer,
+ ["Additional notes"]: scenarios[ix]?.note,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.variants[1].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportSingleModelEvaluationData = (
+ evaluation: Evaluation,
+ scenarios: EvaluationScenario[],
+ rows: GenericObject[],
+) => {
+ const exportRow = rows.map((data, ix) => {
+ const inputColumns = evaluation.testset.testsetChatColumn
+ ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]}
+ : data.inputs.reduce(
+ (columns: any, input: {input_name: string; input_value: string}) => {
+ columns[`${input.input_name}`] = input.input_value
+ return columns
+ },
+ {},
+ )
+ const numericScore = parseInt(data.score)
+ return {
+ ...inputColumns,
+ [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Score"]: isNaN(numericScore) ? "-" : numericScore,
+ ["Expected Output"]:
+ scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer,
+ ["Additional notes"]: scenarios[ix]?.note,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportRegexEvaluationData = (
+ evaluation: Evaluation,
+ rows: GenericObject[],
+ settings: GenericObject,
+) => {
+ const exportRow = rows.map((data, ix) => {
+ const isCorrect = data.score === "correct"
+ const isMatch = settings.regexShouldMatch ? isCorrect : !isCorrect
+
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Match / Mismatch"]: isMatch ? "Match" : "Mismatch",
+ ["Evaluation"]: data.score,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportWebhookEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
+ const exportRow = rows.map((data, ix) => {
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Correct answer"]: data.correctAnswer,
+ ["Score"]: data.score,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const exportCustomCodeEvaluationData = (evaluation: Evaluation, rows: GenericObject[]) => {
+ const exportRow = rows.map((data, ix) => {
+ return {
+ ["Inputs"]:
+ evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn] ||
+ data.inputs[0].input_value,
+ [`App Variant ${evaluation.variants[0].variantName} Output`]: data?.columnData0
+ ? data?.columnData0
+ : data.outputs[0]?.variant_output,
+ ["Correct answer"]: data.correctAnswer,
+ ["Score"]: data.score,
+ }
+ })
+ const exportCol = Object.keys(exportRow[0])
+
+ const csvData = convertToCsv(exportRow, exportCol)
+ const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
+ downloadCsv(csvData, filename)
+}
+
+export const calculateResultsDataAvg = (resultsData: Record, multiplier = 10) => {
+ const obj = {...resultsData}
+ Object.keys(obj).forEach((key) => {
+ if (isNaN(+key)) delete obj[key]
+ })
+
+ const count = Object.values(obj).reduce((acc, value) => acc + +value, 0)
+ const sum = Object.keys(obj).reduce((acc, key) => acc + (parseFloat(key) || 0) * +obj[key], 0)
+ return (sum / count) * multiplier
+}
+
+export const getVotesPercentage = (record: HumanEvaluationListTableDataType, index: number) => {
+ const variant = record.votesData.variants[index]
+ return record.votesData.variants_votes_data[variant]?.percentage
+}
+
+export const checkIfResourceValidForDeletion = async (
+ data: Omit[0], "appId">,
+) => {
+ if (isDemo()) {
+ const response = await fetchEvaluatonIdsByResource(data)
+ if (response.data.length > 0) {
+ const name =
+ (data.resourceType === "testset"
+ ? "Testset"
+ : data.resourceType === "evaluator_config"
+ ? "Evaluator"
+ : "Variant") + (data.resourceIds.length > 1 ? "s" : "")
+
+ const suffix = response.data.length > 1 ? "s" : ""
+ AlertPopup({
+ title: `${name} is in use`,
+ message: `The ${name} is currently in used by ${response.data.length} evaluation${suffix}. Please delete the evaluation${suffix} first.`,
+ cancelText: null,
+ okText: "Ok",
+ })
+ return false
+ }
+ }
+ return true
+}
+
+export function getTypedValue(res?: TypedValue) {
+ const {value, type, error} = res || {}
+ if (type === "error") {
+ return error?.message
+ }
+
+ if (value === undefined) return "-"
+
+ switch (type) {
+ case "number":
+ return round(Number(value), 2)
+ case "boolean":
+ case "bool":
+ return capitalize(value?.toString())
+ case "cost":
+ return formatCurrency(Number(value))
+ case "latency":
+ return formatLatency(Number(value))
+ case "string":
+ case "text":
+ return value?.toString() ?? "-"
+ case "code":
+ case "regex":
+ return value?.toString() ?? "-"
+ case "object":
+ return typeof value === "object"
+ ? JSON.stringify(value, null, 2)
+ : (value?.toString() ?? "-")
+ case "messages":
+ return Array.isArray(value)
+ ? value
+ .map((msg) => (typeof msg === "string" ? msg : JSON.stringify(msg)))
+ .join("\n")
+ : (value?.toString() ?? "-")
+ case "multiple_choice":
+ return Array.isArray(value) ? value.join(", ") : (value?.toString() ?? "-")
+ case "hidden":
+ return "-"
+ default:
+ return value?.toString() ?? "-"
+ }
+}
+
+type CellDataType = "number" | "text" | "date"
+export function getFilterParams(type: CellDataType) {
+ const filterParams: GenericObject = {}
+ if (type == "date") {
+ filterParams.comparator = function (
+ filterLocalDateAtMidnight: Date,
+ cellValue: string | null,
+ ) {
+ if (cellValue == null) return -1
+ const cellDate = dayjs(cellValue).startOf("day").toDate()
+ if (filterLocalDateAtMidnight.getTime() === cellDate.getTime()) {
+ return 0
+ }
+ if (cellDate < filterLocalDateAtMidnight) {
+ return -1
+ }
+ if (cellDate > filterLocalDateAtMidnight) {
+ return 1
+ }
+ }
+ }
+
+ return {
+ sortable: true,
+ floatingFilter: true,
+ filter:
+ type === "number"
+ ? "agNumberColumnFilter"
+ : type === "date"
+ ? "agDateColumnFilter"
+ : "agTextColumnFilter",
+ cellDataType: type === "number" ? "text" : type,
+ filterParams,
+ comparator: getCustomComparator(type),
+ }
+}
+
+export const calcEvalDuration = (evaluation: _Evaluation) => {
+ return dayjs(
+ runningStatuses.includes(evaluation.status.value) ? Date.now() : evaluation.updated_at,
+ ).diff(dayjs(evaluation.created_at), "milliseconds")
+}
+
+const getCustomComparator = (type: CellDataType) => (valueA: string, valueB: string) => {
+ const getNumber = (val: string) => {
+ const num = parseFloat(val || "0")
+ return isNaN(num) ? 0 : num
+ }
+
+ valueA = String(valueA)
+ valueB = String(valueB)
+
+ switch (type) {
+ case "date":
+ return dayjs(valueA).diff(dayjs(valueB))
+ case "text":
+ return valueA.localeCompare(valueB)
+ case "number":
+ return getNumber(valueA) - getNumber(valueB)
+ default:
+ return 0
+ }
+}
+
+export const removeCorrectAnswerPrefix = (str: string) => {
+ return str.replace(/^correctAnswer_/, "")
+}
+
+export const mapTestcaseAndEvalValues = (
+ settingsValues: Record,
+ selectedTestcase: Record,
+) => {
+ const testcaseObj: Record = {}
+ const evalMapObj: Record = {}
+
+ Object.entries(settingsValues).forEach(([key, value]) => {
+ if (typeof value === "string" && value.startsWith("testcase.")) {
+ testcaseObj[key] = selectedTestcase[value.split(".")[1]]
+ } else {
+ evalMapObj[key] = value
+ }
+ })
+
+ return {testcaseObj, evalMapObj}
+}
+
+export const transformTraceKeysInSettings = (
+ settingsValues: Record,
+): Record => {
+ return Object.keys(settingsValues).reduce(
+ (acc, curr) => {
+ if (
+ !acc[curr] &&
+ typeof settingsValues[curr] === "string" &&
+ settingsValues[curr].startsWith("trace.")
+ ) {
+ acc[curr] = settingsValues[curr].replace("trace.", "")
+ } else {
+ acc[curr] = settingsValues[curr]
+ }
+
+ return acc
+ },
+ {} as Record,
+ )
+}
+
+export const getEvaluatorTags = () => {
+ const evaluatorTags = [
+ {
+ label: "Classifiers",
+ value: "classifiers",
+ },
+ {
+ label: "Similarity",
+ value: "similarity",
+ },
+ {
+ label: "AI / LLM",
+ value: "ai_llm",
+ },
+ {
+ label: "Functional",
+ value: "functional",
+ },
+ ]
+
+ if (isDemo()) {
+ evaluatorTags.unshift({
+ label: "RAG",
+ value: "rag",
+ })
+ }
+
+ return evaluatorTags
+}
+
+export const calculateAvgScore = (evaluation: SingleModelEvaluationListTableDataType) => {
+ let score = 0
+ if (evaluation.scoresData) {
+ score =
+ ((evaluation.scoresData.correct?.length || evaluation.scoresData.true?.length || 0) /
+ evaluation.scoresData.nb_of_rows) *
+ 100
+ } else if (evaluation.resultsData) {
+ const multiplier = {
+ [EvaluationType.auto_webhook_test]: 100,
+ [EvaluationType.single_model_test]: 1,
+ }
+ score = calculateResultsDataAvg(
+ evaluation.resultsData,
+ multiplier[evaluation.evaluationType as keyof typeof multiplier],
+ )
+ score = isNaN(score) ? 0 : score
+ } else if (evaluation.avgScore) {
+ score = evaluation.avgScore * 100
+ }
+
+ return score
+}
diff --git a/web/ee/src/lib/helpers/hashUtils.ts b/web/ee/src/lib/helpers/hashUtils.ts
new file mode 100644
index 0000000000..5c66724e5a
--- /dev/null
+++ b/web/ee/src/lib/helpers/hashUtils.ts
@@ -0,0 +1,73 @@
+// Utility to generate a hash ID for annotation/invocation steps, aligned with backend make_hash_id
+// Uses blake2b if available, otherwise falls back to SHA-256
+
+import blake from "blakejs"
+// import { v4 as uuidv4 } from "uuid" // Use this for UUIDs if needed
+
+const REFERENCE_KEYS = [
+ "application",
+ "application_variant",
+ "application_revision",
+ "testset",
+ "testcase",
+ "evaluator",
+]
+
+// Recursively stable, whitespace-free JSON stringifier
+function stableStringifyRecursive(obj: any): string {
+ if (obj === null || typeof obj !== "object") {
+ return JSON.stringify(obj)
+ }
+ if (Array.isArray(obj)) {
+ return `[${obj.map(stableStringifyRecursive).join(",")}]`
+ }
+ const keys = Object.keys(obj).sort()
+ const entries = keys.map(
+ (key) => `${JSON.stringify(key)}:${stableStringifyRecursive(obj[key])}`,
+ )
+ return `{${entries.join(",")}}`
+}
+
+export function makeHashId({
+ references,
+ links,
+}: {
+ references?: Record
+ links?: Record
+}): string {
+ if (!references && !links) return ""
+ const payload: Record = {}
+
+ for (const k of Object.keys(references || {})) {
+ if (REFERENCE_KEYS.includes(k)) {
+ const v = references![k]
+ // Only include 'id' field, not 'slug'
+ if (v.id != null) {
+ payload[k] = {id: v.id}
+ }
+ }
+ }
+ for (const k of Object.keys(links || {})) {
+ const v = links![k]
+ payload[k] = {
+ span_id: v.span_id,
+ trace_id: v.trace_id,
+ }
+ }
+ // Stable, deep, whitespace-free JSON
+ const serialized = stableStringifyRecursive(payload)
+
+ // blake2b hash (digest_size=16)
+ try {
+ // Use blakejs (same as backend example)
+ return blake.blake2bHex(serialized, null, 16)
+ } catch (e) {
+ // Fallback: SHA-256
+ if (window.crypto?.subtle) {
+ throw new Error(
+ "blake2b not available and crypto.subtle is async. Provide a polyfill or use a sync fallback.",
+ )
+ }
+ return btoa(serialized)
+ }
+}
diff --git a/web/oss/src/lib/helpers/serviceValidations.ts b/web/ee/src/lib/helpers/serviceValidations.ts
similarity index 100%
rename from web/oss/src/lib/helpers/serviceValidations.ts
rename to web/ee/src/lib/helpers/serviceValidations.ts
diff --git a/web/oss/src/lib/helpers/traceUtils.ts b/web/ee/src/lib/helpers/traceUtils.ts
similarity index 98%
rename from web/oss/src/lib/helpers/traceUtils.ts
rename to web/ee/src/lib/helpers/traceUtils.ts
index 536a7b5bc6..f232711598 100644
--- a/web/oss/src/lib/helpers/traceUtils.ts
+++ b/web/ee/src/lib/helpers/traceUtils.ts
@@ -1,6 +1,6 @@
import {uuidToTraceId} from "@/oss/lib/hooks/useAnnotations/assets/helpers"
-import {TraceData, TraceTree} from "@agenta/oss/src/lib/hooks/useEvaluationRunScenarioSteps/types"
+import {TraceData, TraceTree} from "../hooks/useEvaluationRunScenarioSteps/types"
export function findTraceForStep(traces: any[] | undefined, traceId?: string): any | undefined {
if (!traces?.length || !traceId) return undefined
diff --git a/web/oss/src/lib/hooks/useEvalScenarioQueue/index.ts b/web/ee/src/lib/hooks/useEvalScenarioQueue/index.ts
similarity index 99%
rename from web/oss/src/lib/hooks/useEvalScenarioQueue/index.ts
rename to web/ee/src/lib/hooks/useEvalScenarioQueue/index.ts
index 4250637c7d..bfeead6e7b 100644
--- a/web/oss/src/lib/hooks/useEvalScenarioQueue/index.ts
+++ b/web/ee/src/lib/hooks/useEvalScenarioQueue/index.ts
@@ -16,7 +16,7 @@ import {evaluationRunStateFamily} from "@/oss/lib/hooks/useEvaluationRunData/ass
import {useJwtRefresher} from "@/oss/lib/hooks/useJWT"
import {EvaluationStatus} from "@/oss/lib/Types"
import {slugify} from "@/oss/lib/utils/slugify"
-import type {ConfigMessage, ResultMessage, RunEvalMessage} from "@/oss/lib/evalRunner/types"
+import type {ConfigMessage, ResultMessage, RunEvalMessage} from "@/oss/lib/workers/evalRunner/types"
import {getProjectValues} from "@/oss/state/project"
// import {setOptimisticStepData} from "../../../components/EvalRunDetails/assets/optimisticUtils"
@@ -169,7 +169,7 @@ export function useEvalScenarioQueue(options?: {concurrency?: number; runId?: st
useEffect(() => {
if (!sharedWorker) {
sharedWorker = new Worker(
- new URL("@/oss/lib/evalRunner/evalRunner.worker.ts", import.meta.url),
+ new URL("@/oss/lib/workers/evalRunner/evalRunner.worker.ts", import.meta.url),
)
}
diff --git a/web/oss/src/lib/hooks/useEvalScenarioQueue/responseQueue.ts b/web/ee/src/lib/hooks/useEvalScenarioQueue/responseQueue.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvalScenarioQueue/responseQueue.ts
rename to web/ee/src/lib/hooks/useEvalScenarioQueue/responseQueue.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/bulkFetch.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/bulkFetch.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/bulkFetch.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/bulkFetch.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/cache.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/cache.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/cache.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/cache.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/index.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/index.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/index.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/migrationHelper.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/migrationHelper.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/migrationHelper.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/migrationHelper.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/progress.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/progress.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/progress.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/progress.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedAtoms.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedAtoms.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedAtoms.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedAtoms.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts
similarity index 99%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts
index 4cea5caa76..41abde06a9 100644
--- a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts
+++ b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedMetrics.ts
@@ -23,7 +23,7 @@ import {evalAtomStore} from "./store"
// Re-export the atom families for external use
export {runMetricsCacheFamily, runMetricsStatsCacheFamily}
-import {fetchRunMetricsViaWorker} from "@/agenta-oss-common/lib/evalRunner/runMetricsWorker"
+import {fetchRunMetricsViaWorker} from "@/agenta-oss-common/lib/workers/evalRunner/runMetricsWorker"
// Helper: flatten acc object and nested metrics similar to legacy mergedMetricsAtom
export function flattenMetrics(raw: Record): Record {
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/runScopedScenarios.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/store.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/store.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/store.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/store.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/types.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/utils.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/utils.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/atoms/utils.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/atoms/utils.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/constants.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/constants.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/constants.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/constants.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts
similarity index 93%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts
index b39b1c86e2..ace3e7fa8e 100644
--- a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts
+++ b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioListViaWorker.ts
@@ -7,7 +7,7 @@ let _worker: Worker | null = null
function getWorker() {
if (!_worker) {
_worker = new Worker(
- new URL("@/oss/lib/evalRunner/scenarioListWorker.ts", import.meta.url),
+ new URL("@/oss/lib/workers/evalRunner/scenarioListWorker.ts", import.meta.url),
{
type: "module",
},
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts
similarity index 99%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts
index 2b81ed1826..667b7858ba 100644
--- a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts
+++ b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/fetchScenarioViaWorker.ts
@@ -85,7 +85,7 @@ const performFetch = async (
const {jwt, apiUrl, projectId} = await buildAuthContext()
const {fetchStepsViaWorker} = await import(
- "@/oss/lib/evalRunner/bulkWorker"
+ "@/agenta-oss-common/lib/workers/evalRunner/bulkWorker"
)
const store = evalAtomStore()
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/scenarioFilters.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/scenarioFilters.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/scenarioFilters.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/scenarioFilters.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/index.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/index.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/index.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/types.ts b/web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/assets/helpers/workerContext/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/index.ts b/web/ee/src/lib/hooks/useEvaluationRunData/index.ts
similarity index 99%
rename from web/oss/src/lib/hooks/useEvaluationRunData/index.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/index.ts
index 7be13af806..df4bef1420 100644
--- a/web/oss/src/lib/hooks/useEvaluationRunData/index.ts
+++ b/web/ee/src/lib/hooks/useEvaluationRunData/index.ts
@@ -83,7 +83,7 @@ const useEvaluationRunData = (evaluationTableId: string | null, debug = false, r
const projectId = useAtomValue(projectIdAtom)
const setProjectVariantReferences = useSetAtom(setProjectVariantReferencesAtom)
const user = useAtomValue(userAtom)
- const requireUser = true
+ const requireUser = isDemo()
const enrichRun = useEnrichEvaluationRun({debug, evalType})
const suppressLoadingRef = useRef(false)
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/refreshLiveRun.ts b/web/ee/src/lib/hooks/useEvaluationRunData/refreshLiveRun.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/refreshLiveRun.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/refreshLiveRun.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/types.ts b/web/ee/src/lib/hooks/useEvaluationRunData/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/useEvalRunScenarioData.tsx b/web/ee/src/lib/hooks/useEvaluationRunData/useEvalRunScenarioData.tsx
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/useEvalRunScenarioData.tsx
rename to web/ee/src/lib/hooks/useEvaluationRunData/useEvalRunScenarioData.tsx
diff --git a/web/oss/src/lib/hooks/useEvaluationRunData/useScenarioStepSnapshot.ts b/web/ee/src/lib/hooks/useEvaluationRunData/useScenarioStepSnapshot.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunData/useScenarioStepSnapshot.ts
rename to web/ee/src/lib/hooks/useEvaluationRunData/useScenarioStepSnapshot.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunMetrics/assets/utils.ts b/web/ee/src/lib/hooks/useEvaluationRunMetrics/assets/utils.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunMetrics/assets/utils.ts
rename to web/ee/src/lib/hooks/useEvaluationRunMetrics/assets/utils.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunMetrics/index.ts b/web/ee/src/lib/hooks/useEvaluationRunMetrics/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunMetrics/index.ts
rename to web/ee/src/lib/hooks/useEvaluationRunMetrics/index.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunMetrics/types.ts b/web/ee/src/lib/hooks/useEvaluationRunMetrics/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunMetrics/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunMetrics/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunScenarioSteps/types.ts b/web/ee/src/lib/hooks/useEvaluationRunScenarioSteps/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunScenarioSteps/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunScenarioSteps/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunScenarios/index.ts b/web/ee/src/lib/hooks/useEvaluationRunScenarios/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunScenarios/index.ts
rename to web/ee/src/lib/hooks/useEvaluationRunScenarios/index.ts
diff --git a/web/oss/src/lib/hooks/useEvaluationRunScenarios/types.ts b/web/ee/src/lib/hooks/useEvaluationRunScenarios/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluationRunScenarios/types.ts
rename to web/ee/src/lib/hooks/useEvaluationRunScenarios/types.ts
diff --git a/web/oss/src/lib/hooks/useEvaluations.ts b/web/ee/src/lib/hooks/useEvaluations.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useEvaluations.ts
rename to web/ee/src/lib/hooks/useEvaluations.ts
diff --git a/web/oss/src/lib/hooks/useInvocationResult/index.ts b/web/ee/src/lib/hooks/useInvocationResult/index.ts
similarity index 99%
rename from web/oss/src/lib/hooks/useInvocationResult/index.ts
rename to web/ee/src/lib/hooks/useInvocationResult/index.ts
index 201f709a48..a61fc59695 100644
--- a/web/oss/src/lib/hooks/useInvocationResult/index.ts
+++ b/web/ee/src/lib/hooks/useInvocationResult/index.ts
@@ -4,7 +4,7 @@ import {useAtomValue} from "jotai"
import {renderChatMessages} from "@/oss/components/EvalRunDetails/assets/renderChatMessages"
import {evalTypeAtom} from "@/oss/components/EvalRunDetails/state/evalType"
-import {useRunId} from "@agenta/oss/src/contexts/RunIdContext"
+import {useRunId} from "@/oss/contexts/RunIdContext"
import axios from "@/oss/lib/api/assets/axiosConfig"
import {snakeToCamelCaseKeys} from "@/oss/lib/helpers/casing"
import {readInvocationResponse} from "@/oss/lib/helpers/traceUtils"
diff --git a/web/oss/src/lib/hooks/useInvocationResult/types.ts b/web/ee/src/lib/hooks/useInvocationResult/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useInvocationResult/types.ts
rename to web/ee/src/lib/hooks/useInvocationResult/types.ts
diff --git a/web/oss/src/lib/hooks/usePreviewEvaluations/assets/utils.ts b/web/ee/src/lib/hooks/usePreviewEvaluations/assets/utils.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewEvaluations/assets/utils.ts
rename to web/ee/src/lib/hooks/usePreviewEvaluations/assets/utils.ts
diff --git a/web/oss/src/lib/hooks/usePreviewEvaluations/index.ts b/web/ee/src/lib/hooks/usePreviewEvaluations/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewEvaluations/index.ts
rename to web/ee/src/lib/hooks/usePreviewEvaluations/index.ts
diff --git a/web/oss/src/lib/hooks/usePreviewEvaluations/projectVariantConfigs.ts b/web/ee/src/lib/hooks/usePreviewEvaluations/projectVariantConfigs.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewEvaluations/projectVariantConfigs.ts
rename to web/ee/src/lib/hooks/usePreviewEvaluations/projectVariantConfigs.ts
diff --git a/web/oss/src/lib/hooks/usePreviewEvaluations/states/queryFilterAtoms.ts b/web/ee/src/lib/hooks/usePreviewEvaluations/states/queryFilterAtoms.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewEvaluations/states/queryFilterAtoms.ts
rename to web/ee/src/lib/hooks/usePreviewEvaluations/states/queryFilterAtoms.ts
diff --git a/web/oss/src/lib/hooks/usePreviewEvaluations/types.ts b/web/ee/src/lib/hooks/usePreviewEvaluations/types.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewEvaluations/types.ts
rename to web/ee/src/lib/hooks/usePreviewEvaluations/types.ts
diff --git a/web/oss/src/lib/hooks/usePreviewRunningEvaluations/index.ts b/web/ee/src/lib/hooks/usePreviewRunningEvaluations/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewRunningEvaluations/index.ts
rename to web/ee/src/lib/hooks/usePreviewRunningEvaluations/index.ts
diff --git a/web/oss/src/lib/hooks/usePreviewRunningEvaluations/states/runningEvalAtom.ts b/web/ee/src/lib/hooks/usePreviewRunningEvaluations/states/runningEvalAtom.ts
similarity index 100%
rename from web/oss/src/lib/hooks/usePreviewRunningEvaluations/states/runningEvalAtom.ts
rename to web/ee/src/lib/hooks/usePreviewRunningEvaluations/states/runningEvalAtom.ts
diff --git a/web/oss/src/lib/hooks/useRunMetricsMap/index.ts b/web/ee/src/lib/hooks/useRunMetricsMap/index.ts
similarity index 100%
rename from web/oss/src/lib/hooks/useRunMetricsMap/index.ts
rename to web/ee/src/lib/hooks/useRunMetricsMap/index.ts
diff --git a/web/oss/src/lib/metricColumnFactory.tsx b/web/ee/src/lib/metricColumnFactory.tsx
similarity index 100%
rename from web/oss/src/lib/metricColumnFactory.tsx
rename to web/ee/src/lib/metricColumnFactory.tsx
diff --git a/web/oss/src/lib/metricSorter.ts b/web/ee/src/lib/metricSorter.ts
similarity index 100%
rename from web/oss/src/lib/metricSorter.ts
rename to web/ee/src/lib/metricSorter.ts
diff --git a/web/oss/src/lib/metricUtils.ts b/web/ee/src/lib/metricUtils.ts
similarity index 100%
rename from web/oss/src/lib/metricUtils.ts
rename to web/ee/src/lib/metricUtils.ts
diff --git a/web/oss/src/lib/metrics/utils.ts b/web/ee/src/lib/metrics/utils.ts
similarity index 100%
rename from web/oss/src/lib/metrics/utils.ts
rename to web/ee/src/lib/metrics/utils.ts
diff --git a/web/oss/src/lib/tableUtils.ts b/web/ee/src/lib/tableUtils.ts
similarity index 100%
rename from web/oss/src/lib/tableUtils.ts
rename to web/ee/src/lib/tableUtils.ts
diff --git a/web/oss/src/lib/types_ee.ts b/web/ee/src/lib/types_ee.ts
similarity index 100%
rename from web/oss/src/lib/types_ee.ts
rename to web/ee/src/lib/types_ee.ts
diff --git a/web/oss/src/lib/evalRunner/bulkWorker.ts b/web/ee/src/lib/workers/evalRunner/bulkWorker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/bulkWorker.ts
rename to web/ee/src/lib/workers/evalRunner/bulkWorker.ts
diff --git a/web/oss/src/lib/evalRunner/evalRunner.worker.ts b/web/ee/src/lib/workers/evalRunner/evalRunner.worker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/evalRunner.worker.ts
rename to web/ee/src/lib/workers/evalRunner/evalRunner.worker.ts
diff --git a/web/oss/src/lib/evalRunner/fetchRunMetrics.worker.ts b/web/ee/src/lib/workers/evalRunner/fetchRunMetrics.worker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/fetchRunMetrics.worker.ts
rename to web/ee/src/lib/workers/evalRunner/fetchRunMetrics.worker.ts
diff --git a/web/oss/src/lib/evalRunner/fetchSteps.worker.ts b/web/ee/src/lib/workers/evalRunner/fetchSteps.worker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/fetchSteps.worker.ts
rename to web/ee/src/lib/workers/evalRunner/fetchSteps.worker.ts
diff --git a/web/oss/src/lib/evalRunner/pureEnrichment.ts b/web/ee/src/lib/workers/evalRunner/pureEnrichment.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/pureEnrichment.ts
rename to web/ee/src/lib/workers/evalRunner/pureEnrichment.ts
diff --git a/web/oss/src/lib/evalRunner/runMetricsWorker.ts b/web/ee/src/lib/workers/evalRunner/runMetricsWorker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/runMetricsWorker.ts
rename to web/ee/src/lib/workers/evalRunner/runMetricsWorker.ts
diff --git a/web/oss/src/lib/evalRunner/scenarioListWorker.ts b/web/ee/src/lib/workers/evalRunner/scenarioListWorker.ts
similarity index 100%
rename from web/oss/src/lib/evalRunner/scenarioListWorker.ts
rename to web/ee/src/lib/workers/evalRunner/scenarioListWorker.ts
diff --git a/web/oss/src/lib/evalRunner/types.ts b/web/ee/src/lib/workers/evalRunner/types.ts
similarity index 89%
rename from web/oss/src/lib/evalRunner/types.ts
rename to web/ee/src/lib/workers/evalRunner/types.ts
index 16f1173255..1b98796efd 100644
--- a/web/oss/src/lib/evalRunner/types.ts
+++ b/web/ee/src/lib/workers/evalRunner/types.ts
@@ -1,6 +1,6 @@
import {EvaluationStatus} from "@/oss/lib/Types"
-import {IStepResponse} from "@agenta/oss/src/lib/hooks/useEvaluationRunScenarioSteps/types"
+import {IStepResponse} from "../../hooks/useEvaluationRunScenarioSteps/types"
export interface RunEvalMessage {
type: "run-invocation"
diff --git a/web/oss/src/lib/evalRunner/workerFetch.ts b/web/ee/src/lib/workers/evalRunner/workerFetch.ts
similarity index 98%
rename from web/oss/src/lib/evalRunner/workerFetch.ts
rename to web/ee/src/lib/workers/evalRunner/workerFetch.ts
index 2cf69dc70b..2e45a07575 100644
--- a/web/oss/src/lib/evalRunner/workerFetch.ts
+++ b/web/ee/src/lib/workers/evalRunner/workerFetch.ts
@@ -20,8 +20,8 @@ import {PreviewTestcase, PreviewTestset} from "@/oss/lib/Types"
import {
deserializeRunIndex,
RunIndex,
-} from "@agenta/oss/src/lib/hooks/useEvaluationRunData/assets/helpers/buildRunIndex"
-import {EvalRunDataContextType} from "@agenta/oss/src/lib/hooks/useEvaluationRunData/types"
+} from "../../hooks/useEvaluationRunData/assets/helpers/buildRunIndex"
+import {EvalRunDataContextType} from "../../hooks/useEvaluationRunData/types"
import {
buildScenarioCore,
diff --git a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx
index 97047a940b..df1b8461be 100644
--- a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx
+++ b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx
@@ -1,3 +1,20 @@
-import EvaluatorConfigureRoute from "@agenta/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id]"
+import {useMemo} from "react"
+
+import {useRouter} from "next/router"
+
+import ConfigureEvaluatorPage from "@/oss/components/Evaluators/components/ConfigureEvaluator"
+
+const EvaluatorConfigureRoute = () => {
+ const router = useRouter()
+ const evaluatorId = useMemo(() => {
+ const id = router.query.evaluator_id
+ if (Array.isArray(id)) {
+ return id[0]
+ }
+ return id ?? null
+ }, [router.query.evaluator_id])
+
+ return
+}
export default EvaluatorConfigureRoute
diff --git a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx
index a5cb6daf29..7996228a65 100644
--- a/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx
+++ b/web/ee/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx
@@ -1,3 +1,7 @@
-import ProjectEvaluatorsPage from "@agenta/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index"
+import EvaluatorsRegistry from "@/oss/components/Evaluators"
+
+const ProjectEvaluatorsPage = () => {
+ return
+}
export default ProjectEvaluatorsPage
diff --git a/web/oss/src/services/evaluationRuns/api/index.ts b/web/ee/src/services/evaluationRuns/api/index.ts
similarity index 100%
rename from web/oss/src/services/evaluationRuns/api/index.ts
rename to web/ee/src/services/evaluationRuns/api/index.ts
diff --git a/web/oss/src/services/evaluationRuns/api/types.ts b/web/ee/src/services/evaluationRuns/api/types.ts
similarity index 100%
rename from web/oss/src/services/evaluationRuns/api/types.ts
rename to web/ee/src/services/evaluationRuns/api/types.ts
diff --git a/web/ee/src/services/evaluationRuns/utils.ts b/web/ee/src/services/evaluationRuns/utils.ts
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/web/oss/src/services/evaluations/api/index.ts b/web/ee/src/services/evaluations/api/index.ts
similarity index 100%
rename from web/oss/src/services/evaluations/api/index.ts
rename to web/ee/src/services/evaluations/api/index.ts
diff --git a/web/oss/src/services/evaluations/api_ee/index.ts b/web/ee/src/services/evaluations/api_ee/index.ts
similarity index 97%
rename from web/oss/src/services/evaluations/api_ee/index.ts
rename to web/ee/src/services/evaluations/api_ee/index.ts
index cab7fe809b..4ae4376f4d 100644
--- a/web/oss/src/services/evaluations/api_ee/index.ts
+++ b/web/ee/src/services/evaluations/api_ee/index.ts
@@ -14,7 +14,7 @@ import {
EvaluatorMappingInput,
EvaluatorMappingOutput,
EvaluatorOutputInterface,
-} from "@agenta/oss/src/lib/types_ee"
+} from "../../../lib/types_ee"
export const createEvaluatorDataMapping = async (
config: EvaluatorMappingInput,
diff --git a/web/oss/src/services/evaluations/workerUtils.ts b/web/ee/src/services/evaluations/workerUtils.ts
similarity index 100%
rename from web/oss/src/services/evaluations/workerUtils.ts
rename to web/ee/src/services/evaluations/workerUtils.ts
diff --git a/web/oss/src/services/human-evaluations/api/index.ts b/web/ee/src/services/human-evaluations/api/index.ts
similarity index 100%
rename from web/oss/src/services/human-evaluations/api/index.ts
rename to web/ee/src/services/human-evaluations/api/index.ts
diff --git a/web/oss/src/services/human-evaluations/hooks/useEvaluationResults.ts b/web/ee/src/services/human-evaluations/hooks/useEvaluationResults.ts
similarity index 100%
rename from web/oss/src/services/human-evaluations/hooks/useEvaluationResults.ts
rename to web/ee/src/services/human-evaluations/hooks/useEvaluationResults.ts
diff --git a/web/ee/src/services/onlineEvaluations/api.ts b/web/ee/src/services/onlineEvaluations/api.ts
new file mode 100644
index 0000000000..e0650b45d9
--- /dev/null
+++ b/web/ee/src/services/onlineEvaluations/api.ts
@@ -0,0 +1,188 @@
+import axios from "@/oss/lib/api/assets/axiosConfig"
+import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
+import {getProjectValues} from "@/oss/state/project"
+
+type LogicalOperator = "and" | "or" | "not" | "nand" | "nor"
+
+export interface QueryConditionPayload {
+ field: string
+ key?: string
+ value?: unknown
+ operator?: string
+ options?: Record
+}
+
+export interface QueryFilteringPayload {
+ operator?: LogicalOperator
+ conditions: (QueryConditionPayload | QueryFilteringPayload)[]
+}
+
+export interface QueryWindowingPayload {
+ newest?: string
+ oldest?: string
+ next?: string
+ limit?: number
+ order?: "ascending" | "descending"
+ interval?: number
+ rate?: number
+}
+
+export interface QueryRevisionDataPayload {
+ filtering?: QueryFilteringPayload
+ windowing?: QueryWindowingPayload
+}
+
+export interface SimpleQueryCreatePayload {
+ slug: string
+ name?: string
+ description?: string
+ flags?: Record
+ tags?: Record
+ meta?: Record
+ data?: QueryRevisionDataPayload
+}
+
+export interface SimpleQueryCreateRequest {
+ query: SimpleQueryCreatePayload
+}
+
+export interface SimpleQueryResponse {
+ count: number
+ query?: {
+ id: string
+ slug?: string
+ data?: QueryRevisionDataPayload
+ meta?: Record
+ } | null
+}
+
+export interface QueryRevisionRetrieveRequest {
+ query_ref?: {id?: string; slug?: string} | null
+ query_variant_ref?: {id?: string; slug?: string} | null
+ query_revision_ref?: {id?: string; slug?: string} | null
+}
+
+export interface QueryRevisionResponse {
+ count: number
+ query_revision?: {
+ id?: string
+ slug?: string
+ variant_id?: string
+ version?: string | number
+ data?: QueryRevisionDataPayload
+ } | null
+}
+
+export interface SimpleEvaluationFlagsPayload {
+ is_live?: boolean
+ is_closed?: boolean
+ is_active?: boolean
+}
+
+export interface SimpleEvaluationDataPayload {
+ status?: string
+ query_steps?: string[] | Record
+ testset_steps?: Record
+ application_steps?: Record
+ evaluator_steps?: string[] | Record
+ repeats?: number
+ // Structured references for online evaluations
+ query_ref?: {id?: string; slug?: string} | null
+ query_revision_ref?: {id?: string; slug?: string} | null
+ evaluator_ref?: {id?: string; slug?: string} | null
+ configuration?: Record
+}
+
+export interface SimpleEvaluationCreatePayload {
+ name?: string
+ description?: string
+ flags?: SimpleEvaluationFlagsPayload
+ tags?: Record
+ meta?: Record
+ data: SimpleEvaluationDataPayload
+}
+
+export interface SimpleEvaluationCreateRequest {
+ evaluation: SimpleEvaluationCreatePayload
+}
+
+export interface SimpleEvaluationResponse {
+ count: number
+ evaluation?: SimpleEvaluationPayload | null
+}
+
+export interface SimpleEvaluationPayload {
+ id?: string
+ slug?: string
+ name?: string
+ description?: string
+ created_at?: string
+ updated_at?: string
+ created_by_id?: string
+ updated_by_id?: string
+ flags?: SimpleEvaluationFlagsPayload
+ data?: SimpleEvaluationDataPayload
+ meta?: Record
+ tags?: Record
+}
+
+export interface SimpleEvaluationsResponse {
+ count: number
+ evaluations: SimpleEvaluationPayload[]
+}
+
+export interface SimpleEvaluationsQueryRequest {
+ evaluation?: {
+ flags?: SimpleEvaluationFlagsPayload
+ ids?: string[]
+ }
+ tags?: Record
+ meta?: Record
+}
+
+const getProjectUrl = (path: string) => {
+ const {projectId} = getProjectValues()
+ return `${getAgentaApiUrl()}${path}?project_id=${projectId}`
+}
+
+export const createSimpleQuery = async (
+ payload: SimpleQueryCreateRequest,
+): Promise => {
+ const {data} = await axios.post(getProjectUrl("/preview/simple/queries/"), payload)
+ return data as SimpleQueryResponse
+}
+
+export const retrieveQueryRevision = async (
+ payload: QueryRevisionRetrieveRequest,
+): Promise => {
+ const {data} = await axios.post(getProjectUrl("/preview/queries/revisions/retrieve"), payload)
+ return data as QueryRevisionResponse
+}
+
+export const createSimpleEvaluation = async (
+ payload: SimpleEvaluationCreateRequest,
+): Promise => {
+ const {data} = await axios.post(getProjectUrl("/preview/simple/evaluations/"), payload)
+ return data as SimpleEvaluationResponse
+}
+
+export const querySimpleEvaluations = async (
+ payload?: SimpleEvaluationsQueryRequest,
+): Promise => {
+ const url = getProjectUrl("/preview/simple/evaluations/query")
+ const body = payload ?? {}
+ const {data} = await axios.post(url, body)
+ return data as SimpleEvaluationsResponse
+}
+
+export const stopSimpleEvaluation = async (evaluationId: string) => {
+ const url = getProjectUrl(`/preview/simple/evaluations/${evaluationId}/stop`)
+ const {data} = await axios.post(url)
+ return data
+}
+
+export const startSimpleEvaluation = async (evaluationId: string) => {
+ const url = getProjectUrl(`/preview/simple/evaluations/${evaluationId}/start`)
+ const {data} = await axios.post(url)
+ return data
+}
diff --git a/web/ee/src/services/promptVersioning/api/index.ts b/web/ee/src/services/promptVersioning/api/index.ts
new file mode 100644
index 0000000000..d51cd8ac75
--- /dev/null
+++ b/web/ee/src/services/promptVersioning/api/index.ts
@@ -0,0 +1,41 @@
+import axios from "@/oss/lib/api/assets/axiosConfig"
+import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
+import {getProjectValues} from "@/oss/state/project"
+
+//Prefix convention:
+// - fetch: GET single entity from server
+// - fetchAll: GET all entities from server
+// - create: POST data to server
+// - update: PUT data to server
+// - delete: DELETE data from server
+
+// versioning
+export const fetchAllPromptVersioning = async (variantId: string, ignoreAxiosError = false) => {
+ const {projectId} = getProjectValues()
+
+ const {data} = await axios.get(
+ `${getAgentaApiUrl()}/variants/${variantId}/revisions?project_id=${projectId}`,
+ {
+ _ignoreError: ignoreAxiosError,
+ } as any,
+ )
+
+ return data
+}
+
+export const fetchPromptRevision = async (
+ variantId: string,
+ revisionNumber: number,
+ ignoreAxiosError = false,
+) => {
+ const {projectId} = getProjectValues()
+
+ const {data} = await axios.get(
+ `${getAgentaApiUrl()}/variants/${variantId}/revisions/${revisionNumber}?project_id=${projectId}`,
+ {
+ _ignoreError: ignoreAxiosError,
+ } as any,
+ )
+
+ return data
+}
diff --git a/web/oss/src/services/runMetrics/api/assets/contants.ts b/web/ee/src/services/runMetrics/api/assets/contants.ts
similarity index 100%
rename from web/oss/src/services/runMetrics/api/assets/contants.ts
rename to web/ee/src/services/runMetrics/api/assets/contants.ts
diff --git a/web/oss/src/services/runMetrics/api/index.ts b/web/ee/src/services/runMetrics/api/index.ts
similarity index 100%
rename from web/oss/src/services/runMetrics/api/index.ts
rename to web/ee/src/services/runMetrics/api/index.ts
diff --git a/web/oss/src/services/runMetrics/api/types.ts b/web/ee/src/services/runMetrics/api/types.ts
similarity index 100%
rename from web/oss/src/services/runMetrics/api/types.ts
rename to web/ee/src/services/runMetrics/api/types.ts
diff --git a/web/oss/src/services/variantConfigs/api/index.ts b/web/ee/src/services/variantConfigs/api/index.ts
similarity index 100%
rename from web/oss/src/services/variantConfigs/api/index.ts
rename to web/ee/src/services/variantConfigs/api/index.ts
diff --git a/web/oss/src/state/url/focusDrawer.ts b/web/ee/src/state/url/focusDrawer.ts
similarity index 100%
rename from web/oss/src/state/url/focusDrawer.ts
rename to web/ee/src/state/url/focusDrawer.ts
diff --git a/web/oss/next.config.ts b/web/oss/next.config.ts
index fd65177200..be4f43f167 100644
--- a/web/oss/next.config.ts
+++ b/web/oss/next.config.ts
@@ -1,13 +1,7 @@
-import {createRequire} from "module"
import path from "path"
import type {NextConfig} from "next"
-const require = createRequire(import.meta.url)
-const reduxToolkitCjsEntry = path.join(
- path.dirname(require.resolve("@reduxjs/toolkit/package.json")),
- "dist/cjs/index.js",
-)
const isDevelopment = process.env.NODE_ENV === "development"
const COMMON_CONFIG: NextConfig = {
@@ -68,12 +62,6 @@ const COMMON_CONFIG: NextConfig = {
"@ant-design/icons-svg",
],
webpack: (config, {webpack, isServer}) => {
- config.resolve ??= {}
- config.resolve.alias = {
- ...(config.resolve.alias ?? {}),
- "@reduxjs/toolkit": reduxToolkitCjsEntry,
- }
-
const envs: Record = {}
config.cache = false
diff --git a/web/oss/package.json b/web/oss/package.json
index e5e89e0037..520e433bde 100644
--- a/web/oss/package.json
+++ b/web/oss/package.json
@@ -1,6 +1,6 @@
{
"name": "@agenta/oss",
- "version": "0.62.1",
+ "version": "0.61.2",
"private": true,
"engines": {
"node": ">=18"
@@ -113,7 +113,6 @@
"react-resizable": "^3.0.5",
"react-syntax-highlighter": "^15.6.0",
"react-window": "^1.8.11",
- "recharts": "^3.1.0",
"semver": "^7.7.2",
"shiki": "^3.12.2",
"stable-hash": "^0.0.6",
diff --git a/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx b/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx
index 2b52c69b21..dd8fa3cbbd 100644
--- a/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx
+++ b/web/oss/src/components/Sidebar/hooks/useSidebarConfig/index.tsx
@@ -55,14 +55,14 @@ export const useSidebarConfig = () => {
key: "project-evaluators-link",
title: "Evaluators",
link: `${projectURL}/evaluators`,
- // isHidden: !isDemo(),
+ isHidden: !isDemo(),
icon: ,
},
{
key: "project-evaluations-link",
title: "Evaluations",
link: `${projectURL}/evaluations`,
- // isHidden: !isDemo(),
+ isHidden: !isDemo(),
icon: ,
},
{
@@ -96,7 +96,7 @@ export const useSidebarConfig = () => {
key: "app-evaluations-link",
title: "Evaluations",
link: `${appURL || recentlyVisitedAppURL}/evaluations`,
- isHidden: !currentApp && !recentlyVisitedAppId,
+ isHidden: (!currentApp && !recentlyVisitedAppId) || !isDemo(),
icon: ,
},
{
diff --git a/web/oss/src/lib/atoms/breadcrumb/index.ts b/web/oss/src/lib/atoms/breadcrumb/index.ts
index f32ee2c155..9a88b979e8 100644
--- a/web/oss/src/lib/atoms/breadcrumb/index.ts
+++ b/web/oss/src/lib/atoms/breadcrumb/index.ts
@@ -56,6 +56,25 @@ export const prependBreadcrumbAtom = atom(null, (get, set, item: BreadcrumbAtom)
set(breadcrumbOverridesAtom, {...item, ...current})
})
+export const removeBreadcrumbsAtom = atom(null, (get, set, keys: string[] | undefined) => {
+ if (!keys || keys.length === 0) return
+
+ const current = get(breadcrumbOverridesAtom) || {}
+ let changed = false
+ const next = {...current}
+
+ keys.forEach((key) => {
+ if (Object.prototype.hasOwnProperty.call(next, key)) {
+ delete next[key]
+ changed = true
+ }
+ })
+
+ if (changed) {
+ set(breadcrumbOverridesAtom, next)
+ }
+})
+
// Helper atom to clear breadcrumbs (reset to URL-based)
export const clearBreadcrumbsAtom = atom(null, (get, set) => {
set(breadcrumbOverridesAtom, {})
diff --git a/web/oss/src/lib/helpers/buildBreadcrumbs.ts b/web/oss/src/lib/helpers/buildBreadcrumbs.ts
index 5b36db44eb..2f920c98d6 100644
--- a/web/oss/src/lib/helpers/buildBreadcrumbs.ts
+++ b/web/oss/src/lib/helpers/buildBreadcrumbs.ts
@@ -2,7 +2,7 @@ import {BreadcrumbAtom} from "@/oss/lib/atoms/breadcrumb/types"
import {isUuid} from "@/oss/lib/helpers/utils"
import {ListAppsItem} from "@/oss/lib/Types"
-const IGNORE_PATHS = new Set(["testsets", "evaluations", "settings"])
+const IGNORE_PATHS = new Set(["testsets", "evaluations", "settings", "configure"])
export interface BreadcrumbContext {
uriPath: string
diff --git a/web/oss/src/lib/helpers/evaluate.ts b/web/oss/src/lib/helpers/evaluate.ts
index c3c280f3e1..b172237700 100644
--- a/web/oss/src/lib/helpers/evaluate.ts
+++ b/web/oss/src/lib/helpers/evaluate.ts
@@ -1,341 +1,3 @@
-import {EvaluationType} from "@agenta/oss/src/lib/enums"
-import {convertToCsv, downloadCsv} from "@agenta/oss/src/lib/helpers/fileManipulations"
-import {formatCurrency, formatLatency} from "@agenta/oss/src/lib/helpers/formatters"
-import {isDemo} from "@agenta/oss/src/lib/helpers/utils"
-import {
- Evaluation,
- GenericObject,
- TypedValue,
- Variant,
- _Evaluation,
- EvaluationScenario,
-} from "@agenta/oss/src/lib/Types"
-import dayjs from "dayjs"
-import capitalize from "lodash/capitalize"
-import round from "lodash/round"
-
-import AlertPopup from "@/oss/components/AlertPopup/AlertPopup"
-import {runningStatuses} from "@/oss/components/pages/evaluations/cellRenderers/cellRenderers"
-import {
- HumanEvaluationListTableDataType,
- SingleModelEvaluationListTableDataType,
-} from "@/oss/lib/Types"
-import {fetchEvaluatonIdsByResource} from "@/oss/services/evaluations/api"
-
-export const exportABTestingEvaluationData = (
- evaluation: Evaluation,
- scenarios: EvaluationScenario[],
- rows: GenericObject[],
-) => {
- const exportRow = rows.map((data, ix) => {
- const inputColumns = evaluation.testset.testsetChatColumn
- ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]}
- : data.inputs.reduce(
- (columns: any, input: {input_name: string; input_value: string}) => {
- columns[`${input.input_name}`] = input.input_value
- return columns
- },
- {},
- )
- return {
- ...inputColumns,
- [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0
- ? data?.columnData0
- : data.outputs[0]?.variant_output,
- [`App Variant ${evaluation.variants[1].variantName} Output 1`]: data?.columnData1
- ? data?.columnData1
- : data.outputs[1]?.variant_output,
- ["Vote"]:
- evaluation.variants.find((v: Variant) => v.variantId === data.vote)?.variantName ||
- data.vote,
- ["Expected Output"]:
- scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer,
- ["Additional notes"]: scenarios[ix]?.note,
- }
- })
- const exportCol = Object.keys(exportRow[0])
-
- const csvData = convertToCsv(exportRow, exportCol)
- const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.variants[1].variantName}_${evaluation.evaluationType}.csv`
- downloadCsv(csvData, filename)
-}
-
-export const exportSingleModelEvaluationData = (
- evaluation: Evaluation,
- scenarios: EvaluationScenario[],
- rows: GenericObject[],
-) => {
- const exportRow = rows.map((data, ix) => {
- const inputColumns = evaluation.testset.testsetChatColumn
- ? {Input: evaluation.testset.csvdata[ix]?.[evaluation.testset.testsetChatColumn]}
- : data.inputs.reduce(
- (columns: any, input: {input_name: string; input_value: string}) => {
- columns[`${input.input_name}`] = input.input_value
- return columns
- },
- {},
- )
- const numericScore = parseInt(data.score)
- return {
- ...inputColumns,
- [`App Variant ${evaluation.variants[0].variantName} Output 0`]: data?.columnData0
- ? data?.columnData0
- : data.outputs[0]?.variant_output,
- ["Score"]: isNaN(numericScore) ? "-" : numericScore,
- ["Expected Output"]:
- scenarios[ix]?.correctAnswer || evaluation.testset.csvdata[ix].correct_answer,
- ["Additional notes"]: scenarios[ix]?.note,
- }
- })
- const exportCol = Object.keys(exportRow[0])
-
- const csvData = convertToCsv(exportRow, exportCol)
- const filename = `${evaluation.appName}_${evaluation.variants[0].variantName}_${evaluation.evaluationType}.csv`
- downloadCsv(csvData, filename)
-}
-
-export const calculateResultsDataAvg = (resultsData: Record, multiplier = 10) => {
- const obj = {...resultsData}
- Object.keys(obj).forEach((key) => {
- if (isNaN(+key)) delete obj[key]
- })
-
- const count = Object.values(obj).reduce((acc, value) => acc + +value, 0)
- const sum = Object.keys(obj).reduce((acc, key) => acc + (parseFloat(key) || 0) * +obj[key], 0)
- return (sum / count) * multiplier
-}
-
-export const getVotesPercentage = (record: HumanEvaluationListTableDataType, index: number) => {
- const variant = record.votesData.variants[index]
- return record.votesData.variants_votes_data[variant]?.percentage
-}
-
-export const checkIfResourceValidForDeletion = async (
- data: Omit[0], "appId">,
-) => {
- if (isDemo()) {
- const response = await fetchEvaluatonIdsByResource(data)
- if (response.data.length > 0) {
- const name =
- (data.resourceType === "testset"
- ? "Testset"
- : data.resourceType === "evaluator_config"
- ? "Evaluator"
- : "Variant") + (data.resourceIds.length > 1 ? "s" : "")
-
- const suffix = response.data.length > 1 ? "s" : ""
- AlertPopup({
- title: `${name} is in use`,
- message: `The ${name} is currently in used by ${response.data.length} evaluation${suffix}. Please delete the evaluation${suffix} first.`,
- cancelText: null,
- okText: "Ok",
- })
- return false
- }
- }
+export const checkIfResourceValidForDeletion = async (data: any) => {
return true
}
-
-export function getTypedValue(res?: TypedValue) {
- const {value, type, error} = res || {}
- if (type === "error") {
- return error?.message
- }
-
- if (value === undefined) return "-"
-
- switch (type) {
- case "number":
- return round(Number(value), 2)
- case "boolean":
- case "bool":
- return capitalize(value?.toString())
- case "cost":
- return formatCurrency(Number(value))
- case "latency":
- return formatLatency(Number(value))
- case "string":
- case "text":
- return value?.toString() ?? "-"
- case "code":
- case "regex":
- return value?.toString() ?? "-"
- case "object":
- return typeof value === "object"
- ? JSON.stringify(value, null, 2)
- : (value?.toString() ?? "-")
- case "messages":
- return Array.isArray(value)
- ? value
- .map((msg) => (typeof msg === "string" ? msg : JSON.stringify(msg)))
- .join("\n")
- : (value?.toString() ?? "-")
- case "multiple_choice":
- return Array.isArray(value) ? value.join(", ") : (value?.toString() ?? "-")
- case "hidden":
- return "-"
- default:
- return value?.toString() ?? "-"
- }
-}
-
-type CellDataType = "number" | "text" | "date"
-export function getFilterParams(type: CellDataType) {
- const filterParams: GenericObject = {}
- if (type == "date") {
- filterParams.comparator = function (
- filterLocalDateAtMidnight: Date,
- cellValue: string | null,
- ) {
- if (cellValue == null) return -1
- const cellDate = dayjs(cellValue).startOf("day").toDate()
- if (filterLocalDateAtMidnight.getTime() === cellDate.getTime()) {
- return 0
- }
- if (cellDate < filterLocalDateAtMidnight) {
- return -1
- }
- if (cellDate > filterLocalDateAtMidnight) {
- return 1
- }
- }
- }
-
- return {
- sortable: true,
- floatingFilter: true,
- filter:
- type === "number"
- ? "agNumberColumnFilter"
- : type === "date"
- ? "agDateColumnFilter"
- : "agTextColumnFilter",
- cellDataType: type === "number" ? "text" : type,
- filterParams,
- comparator: getCustomComparator(type),
- }
-}
-
-export const calcEvalDuration = (evaluation: _Evaluation) => {
- return dayjs(
- runningStatuses.includes(evaluation.status.value) ? Date.now() : evaluation.updated_at,
- ).diff(dayjs(evaluation.created_at), "milliseconds")
-}
-
-const getCustomComparator = (type: CellDataType) => (valueA: string, valueB: string) => {
- const getNumber = (val: string) => {
- const num = parseFloat(val || "0")
- return isNaN(num) ? 0 : num
- }
-
- valueA = String(valueA)
- valueB = String(valueB)
-
- switch (type) {
- case "date":
- return dayjs(valueA).diff(dayjs(valueB))
- case "text":
- return valueA.localeCompare(valueB)
- case "number":
- return getNumber(valueA) - getNumber(valueB)
- default:
- return 0
- }
-}
-
-export const removeCorrectAnswerPrefix = (str: string) => {
- return str.replace(/^correctAnswer_/, "")
-}
-
-export const mapTestcaseAndEvalValues = (
- settingsValues: Record,
- selectedTestcase: Record,
-) => {
- const testcaseObj: Record = {}
- const evalMapObj: Record = {}
-
- Object.entries(settingsValues).forEach(([key, value]) => {
- if (typeof value === "string" && value.startsWith("testcase.")) {
- testcaseObj[key] = selectedTestcase[value.split(".")[1]]
- } else {
- evalMapObj[key] = value
- }
- })
-
- return {testcaseObj, evalMapObj}
-}
-
-export const transformTraceKeysInSettings = (
- settingsValues: Record,
-): Record => {
- return Object.keys(settingsValues).reduce(
- (acc, curr) => {
- if (
- !acc[curr] &&
- typeof settingsValues[curr] === "string" &&
- settingsValues[curr].startsWith("trace.")
- ) {
- acc[curr] = settingsValues[curr].replace("trace.", "")
- } else {
- acc[curr] = settingsValues[curr]
- }
-
- return acc
- },
- {} as Record,
- )
-}
-
-export const getEvaluatorTags = () => {
- const evaluatorTags = [
- {
- label: "Classifiers",
- value: "classifiers",
- },
- {
- label: "Similarity",
- value: "similarity",
- },
- {
- label: "AI / LLM",
- value: "ai_llm",
- },
- {
- label: "Functional",
- value: "functional",
- },
- ]
-
- if (isDemo()) {
- evaluatorTags.unshift({
- label: "RAG",
- value: "rag",
- })
- }
-
- return evaluatorTags
-}
-
-export const calculateAvgScore = (evaluation: SingleModelEvaluationListTableDataType) => {
- let score = 0
- if (evaluation.scoresData) {
- score =
- ((evaluation.scoresData.correct?.length || evaluation.scoresData.true?.length || 0) /
- evaluation.scoresData.nb_of_rows) *
- 100
- } else if (evaluation.resultsData) {
- const multiplier = {
- [EvaluationType.auto_webhook_test]: 100,
- [EvaluationType.single_model_test]: 1,
- }
- score = calculateResultsDataAvg(
- evaluation.resultsData,
- multiplier[evaluation.evaluationType as keyof typeof multiplier],
- )
- score = isNaN(score) ? 0 : score
- } else if (evaluation.avgScore) {
- score = evaluation.avgScore * 100
- }
-
- return score
-}
diff --git a/web/oss/src/lib/hooks/useBreadcrumbs.ts b/web/oss/src/lib/hooks/useBreadcrumbs.ts
index ace4c83327..6d0228f5c4 100644
--- a/web/oss/src/lib/hooks/useBreadcrumbs.ts
+++ b/web/oss/src/lib/hooks/useBreadcrumbs.ts
@@ -6,6 +6,7 @@ import {
appendBreadcrumbAtom,
clearBreadcrumbsAtom,
prependBreadcrumbAtom,
+ removeBreadcrumbsAtom,
setBreadcrumbsAtom,
type BreadcrumbAtom,
} from "@/oss/lib/atoms/breadcrumb"
@@ -15,12 +16,14 @@ export const useBreadcrumbs = () => {
const appendBreadcrumb = useSetAtom(appendBreadcrumbAtom)
const prependBreadcrumb = useSetAtom(prependBreadcrumbAtom)
const clearBreadcrumbs = useSetAtom(clearBreadcrumbsAtom)
+ const removeBreadcrumbs = useSetAtom(removeBreadcrumbsAtom)
return {
setBreadcrumbs,
appendBreadcrumb,
prependBreadcrumb,
clearBreadcrumbs,
+ removeBreadcrumbs,
}
}
@@ -43,24 +46,37 @@ export const useBreadcrumbsEffect = (
}: {breadcrumbs: BreadcrumbAtom; type?: "prepend" | "append" | "new"; condition?: boolean},
deps: React.DependencyList = [],
) => {
- const {setBreadcrumbs, clearBreadcrumbs, appendBreadcrumb, prependBreadcrumb} = useBreadcrumbs()
+ const {
+ setBreadcrumbs,
+ clearBreadcrumbs,
+ appendBreadcrumb,
+ prependBreadcrumb,
+ removeBreadcrumbs,
+ } = useBreadcrumbs()
useEffect(() => {
- if (condition) {
- if (type === "prepend") {
- prependBreadcrumb(breadcrumbs)
- } else if (type === "append") {
- appendBreadcrumb(breadcrumbs)
- } else {
- setBreadcrumbs(breadcrumbs)
+ if (!condition) return
+
+ const keys = Object.keys(breadcrumbs)
+ if (!keys.length) return
+
+ if (type === "prepend") {
+ prependBreadcrumb(breadcrumbs)
+ return () => {
+ removeBreadcrumbs(keys)
}
}
- // Cleanup: reset to URL-based breadcrumbs when component unmounts
- return () => {
- if (type === "new") {
- clearBreadcrumbs()
+ if (type === "append") {
+ appendBreadcrumb(breadcrumbs)
+ return () => {
+ removeBreadcrumbs(keys)
}
}
+
+ setBreadcrumbs(breadcrumbs)
+ return () => {
+ clearBreadcrumbs()
+ }
}, deps)
}
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/human_a_b_testing/[evaluation_id]/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/human_a_b_testing/[evaluation_id]/index.tsx
deleted file mode 100644
index 76e6526898..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/human_a_b_testing/[evaluation_id]/index.tsx
+++ /dev/null
@@ -1,115 +0,0 @@
-import {useEffect, useState} from "react"
-
-import {useAtom, useAtomValue} from "jotai"
-import dynamic from "next/dynamic"
-import {useRouter} from "next/router"
-
-// Avoid SSR for this heavy component to prevent server-side ReferenceErrors from client-only libs
-const ABTestingEvaluationTable = dynamic(
- () => import("@/oss/components/EvaluationTable/ABTestingEvaluationTable"),
- {ssr: false},
-)
-import useURL from "@/oss/hooks/useURL"
-import {evaluationAtom, evaluationScenariosAtom} from "@/oss/lib/atoms/evaluation"
-import {getTestsetChatColumn} from "@/oss/lib/helpers/testset"
-import {useBreadcrumbsEffect} from "@/oss/lib/hooks/useBreadcrumbs"
-import type {Evaluation} from "@/oss/lib/Types"
-import {
- fetchLoadEvaluation,
- fetchAllLoadEvaluationsScenarios,
-} from "@/oss/services/human-evaluations/api"
-import {fetchTestset} from "@/oss/services/testsets/api"
-import {projectIdAtom} from "@/oss/state/project"
-import {variantsAtom} from "@/oss/state/variant/atoms/fetcher"
-
-export default function Evaluation() {
- const router = useRouter()
- const projectId = useAtomValue(projectIdAtom)
- const evaluationTableId = router.query.evaluation_id
- ? router.query.evaluation_id.toString()
- : ""
- const [evaluationScenarios, setEvaluationScenarios] = useAtom(evaluationScenariosAtom)
- const [evaluation, setEvaluation] = useAtom(evaluationAtom)
- const [isLoading, setIsLoading] = useState(true)
- const appId = router.query.app_id as string
- const columnsCount = 2
- const {baseAppURL} = useURL()
- // variants from global store
- const variantsStore = useAtomValue(variantsAtom)
-
- useEffect(() => {
- if (!evaluation || !projectId) {
- return
- }
- const init = async () => {
- setIsLoading(true)
- try {
- const data = await fetchAllLoadEvaluationsScenarios(evaluationTableId, evaluation)
- setEvaluationScenarios(data)
- } finally {
- setTimeout(() => setIsLoading(false), 1000)
- }
- }
- init()
- }, [evaluation, projectId])
-
- useEffect(() => {
- if (!evaluationTableId) {
- return
- }
- const init = async () => {
- const evaluation: Evaluation = await fetchLoadEvaluation(evaluationTableId)
- const backendVariants = variantsStore
- const testset = await fetchTestset(evaluation.testset._id)
- // Create a map for faster access to first array elements
- const backendVariantsMap = new Map()
- backendVariants.forEach((obj) => backendVariantsMap.set(obj.variantId, obj))
-
- // Update variants in second object
- evaluation.variants = evaluation.variants.map((variant) => {
- const backendVariant = backendVariantsMap.get(variant.variantId)
- return backendVariant ? backendVariant : variant
- })
- evaluation.testset = {
- ...evaluation.testset,
- ...testset,
- testsetChatColumn: getTestsetChatColumn(testset.csvdata),
- }
- setEvaluation(evaluation)
- }
-
- init()
- }, [evaluationTableId])
-
- // breadcrumbs
- useBreadcrumbsEffect(
- {
- breadcrumbs: {
- appPage: {
- label: "human ab testing",
- href: `${baseAppURL}/${appId}/evaluations?selectedEvaluation=human_ab_testing`,
- },
- "eval-detail": {
- label: evaluationTableId,
- value: evaluationTableId,
- },
- },
- type: "append",
- condition: !!evaluationTableId,
- },
- [evaluationTableId],
- )
-
- return (
-
- {evaluationTableId && evaluationScenarios && evaluation && (
-
- )}
-
- )
-}
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/index.tsx
deleted file mode 100644
index 5f9c0ce406..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvaluationsView from "@/oss/components/pages/evaluations/EvaluationsView"
-
-const AppEvaluationsPage = () => {
- return
-}
-
-export default AppEvaluationsPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/[evaluation_id]/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/[evaluation_id]/index.tsx
deleted file mode 100644
index 8a3e7e4523..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/[evaluation_id]/index.tsx
+++ /dev/null
@@ -1,23 +0,0 @@
-import {useRouter} from "next/router"
-
-import EvalRunDetailsPage from "@/oss/components/EvalRunDetails"
-
-const AppEvaluationResultsPage = () => {
- const router = useRouter()
- const rawType =
- (Array.isArray(router.query.eval_type)
- ? router.query.eval_type[0]
- : router.query.eval_type) ||
- (Array.isArray(router.query.type) ? router.query.type[0] : router.query.type)
- const normalized =
- rawType === "online"
- ? "online"
- : rawType === "human"
- ? "human"
- : rawType === "custom"
- ? "custom"
- : "auto"
- return
-}
-
-export default AppEvaluationResultsPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/compare/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/compare/index.tsx
deleted file mode 100644
index 9a24e505d7..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/results/compare/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvaluationCompare from "@/oss/components/pages/evaluations/evaluationCompare/EvaluationCompare"
-
-const EvaluationCompareDetails = () => {
- return
-}
-
-export default EvaluationCompareDetails
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
deleted file mode 100644
index 209e1772ec..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvalRunDetailsPage from "@/oss/components/EvalRunDetails"
-
-const EvaluationPage = () => {
- return
-}
-
-export default EvaluationPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/overview/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/overview/index.tsx
index 9afd259da5..baa1f07530 100644
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/overview/index.tsx
+++ b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/apps/[app_id]/overview/index.tsx
@@ -15,6 +15,7 @@ import {openEditAppModalAtom} from "@/oss/components/pages/app-management/modals
import DeploymentOverview from "@/oss/components/pages/overview/deployments/DeploymentOverview"
import VariantsOverview from "@/oss/components/pages/overview/variants/VariantsOverview"
import useURL from "@/oss/hooks/useURL"
+import {isDemo} from "@/oss/lib/helpers/utils"
import type {JSSTheme} from "@/oss/lib/Types"
import {deleteApp} from "@/oss/services/app-selector/api"
import {useEnvironments} from "@/oss/services/deployment/hooks/useEnvironments"
@@ -125,10 +126,13 @@ const OverviewPage = () => {
-
-
-
-
+ {isDemo() && (
+ <>
+
+
+
+ >
+ )}
{
- return
-}
-
-export default ProjectEvaluationsPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/[evaluation_id]/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/[evaluation_id]/index.tsx
deleted file mode 100644
index 8cabe50e3a..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/[evaluation_id]/index.tsx
+++ /dev/null
@@ -1,23 +0,0 @@
-import {useRouter} from "next/router"
-
-import EvalRunDetailsPage from "@/oss/components/EvalRunDetails"
-
-const ProjectEvaluationResultsPage = () => {
- const router = useRouter()
- const rawType =
- (Array.isArray(router.query.eval_type)
- ? router.query.eval_type[0]
- : router.query.eval_type) ||
- (Array.isArray(router.query.type) ? router.query.type[0] : router.query.type)
- const normalized =
- rawType === "online"
- ? "online"
- : rawType === "human"
- ? "human"
- : rawType === "custom"
- ? "custom"
- : "auto"
- return
-}
-
-export default ProjectEvaluationResultsPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/compare/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/compare/index.tsx
deleted file mode 100644
index 4fc96755ce..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/results/compare/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvaluationCompare from "@/oss/components/pages/evaluations/evaluationCompare/EvaluationCompare"
-
-const ProjectEvaluationCompareDetails = () => {
- return
-}
-
-export default ProjectEvaluationCompareDetails
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/single_model_test/[evaluation_id]/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
deleted file mode 100644
index 67c0827984..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluations/single_model_test/[evaluation_id]/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvalRunDetailsPage from "@/oss/components/EvalRunDetails"
-
-const ProjectHumanEvaluationPage = () => {
- return
-}
-
-export default ProjectHumanEvaluationPage
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx
deleted file mode 100644
index df1b8461be..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/configure/[evaluator_id].tsx
+++ /dev/null
@@ -1,20 +0,0 @@
-import {useMemo} from "react"
-
-import {useRouter} from "next/router"
-
-import ConfigureEvaluatorPage from "@/oss/components/Evaluators/components/ConfigureEvaluator"
-
-const EvaluatorConfigureRoute = () => {
- const router = useRouter()
- const evaluatorId = useMemo(() => {
- const id = router.query.evaluator_id
- if (Array.isArray(id)) {
- return id[0]
- }
- return id ?? null
- }, [router.query.evaluator_id])
-
- return
-}
-
-export default EvaluatorConfigureRoute
diff --git a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx b/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx
deleted file mode 100644
index 7996228a65..0000000000
--- a/web/oss/src/pages/w/[workspace_id]/p/[project_id]/evaluators/index.tsx
+++ /dev/null
@@ -1,7 +0,0 @@
-import EvaluatorsRegistry from "@/oss/components/Evaluators"
-
-const ProjectEvaluatorsPage = () => {
- return
-}
-
-export default ProjectEvaluatorsPage
diff --git a/web/oss/src/services/onlineEvaluations/api.ts b/web/oss/src/services/onlineEvaluations/api.ts
index e0650b45d9..3ffa241891 100644
--- a/web/oss/src/services/onlineEvaluations/api.ts
+++ b/web/oss/src/services/onlineEvaluations/api.ts
@@ -2,179 +2,11 @@ import axios from "@/oss/lib/api/assets/axiosConfig"
import {getAgentaApiUrl} from "@/oss/lib/helpers/api"
import {getProjectValues} from "@/oss/state/project"
-type LogicalOperator = "and" | "or" | "not" | "nand" | "nor"
-
-export interface QueryConditionPayload {
- field: string
- key?: string
- value?: unknown
- operator?: string
- options?: Record
-}
-
-export interface QueryFilteringPayload {
- operator?: LogicalOperator
- conditions: (QueryConditionPayload | QueryFilteringPayload)[]
-}
-
-export interface QueryWindowingPayload {
- newest?: string
- oldest?: string
- next?: string
- limit?: number
- order?: "ascending" | "descending"
- interval?: number
- rate?: number
-}
-
-export interface QueryRevisionDataPayload {
- filtering?: QueryFilteringPayload
- windowing?: QueryWindowingPayload
-}
-
-export interface SimpleQueryCreatePayload {
- slug: string
- name?: string
- description?: string
- flags?: Record
- tags?: Record
- meta?: Record
- data?: QueryRevisionDataPayload
-}
-
-export interface SimpleQueryCreateRequest {
- query: SimpleQueryCreatePayload
-}
-
-export interface SimpleQueryResponse {
- count: number
- query?: {
- id: string
- slug?: string
- data?: QueryRevisionDataPayload
- meta?: Record
- } | null
-}
-
-export interface QueryRevisionRetrieveRequest {
- query_ref?: {id?: string; slug?: string} | null
- query_variant_ref?: {id?: string; slug?: string} | null
- query_revision_ref?: {id?: string; slug?: string} | null
-}
-
-export interface QueryRevisionResponse {
- count: number
- query_revision?: {
- id?: string
- slug?: string
- variant_id?: string
- version?: string | number
- data?: QueryRevisionDataPayload
- } | null
-}
-
-export interface SimpleEvaluationFlagsPayload {
- is_live?: boolean
- is_closed?: boolean
- is_active?: boolean
-}
-
-export interface SimpleEvaluationDataPayload {
- status?: string
- query_steps?: string[] | Record
- testset_steps?: Record
- application_steps?: Record
- evaluator_steps?: string[] | Record
- repeats?: number
- // Structured references for online evaluations
- query_ref?: {id?: string; slug?: string} | null
- query_revision_ref?: {id?: string; slug?: string} | null
- evaluator_ref?: {id?: string; slug?: string} | null
- configuration?: Record
-}
-
-export interface SimpleEvaluationCreatePayload {
- name?: string
- description?: string
- flags?: SimpleEvaluationFlagsPayload
- tags?: Record
- meta?: Record
- data: SimpleEvaluationDataPayload
-}
-
-export interface SimpleEvaluationCreateRequest {
- evaluation: SimpleEvaluationCreatePayload
-}
-
-export interface SimpleEvaluationResponse {
- count: number
- evaluation?: SimpleEvaluationPayload | null
-}
-
-export interface SimpleEvaluationPayload {
- id?: string
- slug?: string
- name?: string
- description?: string
- created_at?: string
- updated_at?: string
- created_by_id?: string
- updated_by_id?: string
- flags?: SimpleEvaluationFlagsPayload
- data?: SimpleEvaluationDataPayload
- meta?: Record
- tags?: Record
-}
-
-export interface SimpleEvaluationsResponse {
- count: number
- evaluations: SimpleEvaluationPayload[]
-}
-
-export interface SimpleEvaluationsQueryRequest {
- evaluation?: {
- flags?: SimpleEvaluationFlagsPayload
- ids?: string[]
- }
- tags?: Record
- meta?: Record
-}
-
const getProjectUrl = (path: string) => {
const {projectId} = getProjectValues()
return `${getAgentaApiUrl()}${path}?project_id=${projectId}`
}
-export const createSimpleQuery = async (
- payload: SimpleQueryCreateRequest,
-): Promise => {
- const {data} = await axios.post(getProjectUrl("/preview/simple/queries/"), payload)
- return data as SimpleQueryResponse
-}
-
-export const retrieveQueryRevision = async (
- payload: QueryRevisionRetrieveRequest,
-): Promise => {
- const {data} = await axios.post(getProjectUrl("/preview/queries/revisions/retrieve"), payload)
- return data as QueryRevisionResponse
-}
-
-export const createSimpleEvaluation = async (
- payload: SimpleEvaluationCreateRequest,
-): Promise => {
- const {data} = await axios.post(getProjectUrl("/preview/simple/evaluations/"), payload)
- return data as SimpleEvaluationResponse
-}
-
-export const querySimpleEvaluations = async (
- payload?: SimpleEvaluationsQueryRequest,
-): Promise => {
- const url = getProjectUrl("/preview/simple/evaluations/query")
- const body = payload ?? {}
- const {data} = await axios.post(url, body)
- return data as SimpleEvaluationsResponse
-}
-
export const stopSimpleEvaluation = async (evaluationId: string) => {
const url = getProjectUrl(`/preview/simple/evaluations/${evaluationId}/stop`)
const {data} = await axios.post(url)
diff --git a/web/pnpm-lock.yaml b/web/pnpm-lock.yaml
index 0057554c07..dee9870f08 100644
--- a/web/pnpm-lock.yaml
+++ b/web/pnpm-lock.yaml
@@ -84,7 +84,7 @@ importers:
version: 0.1.13(prettier@3.6.0)
ts-node:
specifier: ^10.9.2
- version: 10.9.2(@swc/core@1.11.8(@swc/helpers@0.5.17))(@types/node@20.19.19)(typescript@5.8.3)
+ version: 10.9.2(@swc/core@1.11.8(@swc/helpers@0.5.17))(@types/node@20.19.13)(typescript@5.8.3)
tsconfig-paths:
specifier: ^4.2.0
version: 4.2.0
@@ -267,7 +267,7 @@ importers:
version: 21.1.0
swc-loader:
specifier: ^0.2.6
- version: 0.2.6(@swc/core@1.11.8(@swc/helpers@0.5.17))(webpack@5.98.0(@swc/core@1.11.8(@swc/helpers@0.5.17)))
+ version: 0.2.6(@swc/core@1.11.8(@swc/helpers@0.5.17))(webpack@5.98.0(@swc/core@1.11.8(@swc/helpers@0.5.17))(esbuild@0.25.10))
swr:
specifier: ^2.3.0
version: 2.3.3(react@19.0.0)
@@ -593,9 +593,6 @@ importers:
react-window:
specifier: ^1.8.11
version: 1.8.11(react-dom@19.0.0(react@19.0.0))(react@19.0.0)
- recharts:
- specifier: ^3.1.0
- version: 3.1.0(@types/react@19.0.10)(react-dom@19.0.0(react@19.0.0))(react-is@18.3.1)(react@19.0.0)(redux@5.0.1)
semver:
specifier: ^7.7.2
version: 7.7.2
@@ -613,7 +610,7 @@ importers:
version: 21.1.0
swc-loader:
specifier: ^0.2.6
- version: 0.2.6(@swc/core@1.11.8(@swc/helpers@0.5.17))(webpack@5.98.0(@swc/core@1.11.8(@swc/helpers@0.5.17))(esbuild@0.25.10))
+ version: 0.2.6(@swc/core@1.11.8(@swc/helpers@0.5.17))(webpack@5.98.0(@swc/core@1.11.8(@swc/helpers@0.5.17)))
swr:
specifier: ^2.3.0
version: 2.3.3(react@19.0.0)
@@ -2054,6 +2051,9 @@ packages:
'@types/node@20.19.11':
resolution: {integrity: sha512-uug3FEEGv0r+jrecvUUpbY8lLisvIjg6AAic6a2bSP5OEOLeJsDSnvhCDov7ipFFMXS3orMpzlmi0ZcuGkBbow==}
+ '@types/node@20.19.13':
+ resolution: {integrity: sha512-yCAeZl7a0DxgNVteXFHt9+uyFbqXGy/ShC4BlcHkoE0AfGXYv/BUiplV72DjMYXHDBXFjhvr6DD1NiRVfB4j8g==}
+
'@types/node@20.19.19':
resolution: {integrity: sha512-pb1Uqj5WJP7wrcbLU7Ru4QtA0+3kAXrkutGiD26wUKzSMgNNaPARTUDQmElUXp64kh3cWdou3Q0C7qwwxqSFmg==}
@@ -3974,6 +3974,7 @@ packages:
resolution: {integrity: sha512-Quz3MvAwHxVYNXsOByL7xI5EB2WYOeFswqaHIA3qOK3isRWTxiplBEocmmru6XmxDB2L7jDNYtYA4FyimoAFEw==}
engines: {node: '>=8.17.0'}
hasBin: true
+ bundledDependencies: []
jsonpointer@5.0.1:
resolution: {integrity: sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==}
@@ -7283,6 +7284,10 @@ snapshots:
dependencies:
undici-types: 6.21.0
+ '@types/node@20.19.13':
+ dependencies:
+ undici-types: 6.21.0
+
'@types/node@20.19.19':
dependencies:
undici-types: 6.21.0
@@ -11368,14 +11373,14 @@ snapshots:
'@swc/core': 1.11.8(@swc/helpers@0.5.17)
optional: true
- ts-node@10.9.2(@swc/core@1.11.8(@swc/helpers@0.5.17))(@types/node@20.19.19)(typescript@5.8.3):
+ ts-node@10.9.2(@swc/core@1.11.8(@swc/helpers@0.5.17))(@types/node@20.19.13)(typescript@5.8.3):
dependencies:
'@cspotcode/source-map-support': 0.8.1
'@tsconfig/node10': 1.0.11
'@tsconfig/node12': 1.0.11
'@tsconfig/node14': 1.0.3
'@tsconfig/node16': 1.0.4
- '@types/node': 20.19.19
+ '@types/node': 20.19.13
acorn: 8.15.0
acorn-walk: 8.3.4
arg: 4.1.3