cleanlab
diff --git a/‎.stats.yml‎
Lines changed: 1 addition & 1 deletion b/‎.stats.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/codex/resources/projects/evals.py‎
Lines changed: 36 additions & 12 deletions b/‎src/codex/resources/projects/evals.py‎
Lines changed: 36 additions & 12 deletions
diff --git a/‎src/codex/types/project_create_params.py‎
Lines changed: 30 additions & 12 deletions b/‎src/codex/types/project_create_params.py‎
Lines changed: 30 additions & 12 deletions
diff --git a/‎src/codex/types/project_list_response.py‎
Lines changed: 30 additions & 12 deletions b/‎src/codex/types/project_list_response.py‎
Lines changed: 30 additions & 12 deletions
@@ -1,3 +1,3 @@
 configured_endpoints: 65
-openapi_spec_hash: eeb8ebc5600523bdfad046381a929572
+openapi_spec_hash: 80696dc202de8bacc0e43506d7c210b0
 config_hash: 14b2643a0ec60cf326dfed00939644ff
@@ -59,6 +59,7 @@ def create(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -95,8 +96,10 @@ def create(
           response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
               response.
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -126,6 +129,7 @@ def create(
                     "query_identifier": query_identifier,
                     "response_identifier": response_identifier,
                     "should_escalate": should_escalate,
+                    "should_guardrail": should_guardrail,
                     "threshold": threshold,
                     "threshold_direction": threshold_direction,
                 },
@@ -153,6 +157,7 @@ def update(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -189,8 +194,10 @@ def update(
           response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
               response.
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -216,6 +223,7 @@ def update(
         enabled: bool | NotGiven = NOT_GIVEN,
         priority: Optional[int] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -237,8 +245,10 @@ def update(
           priority: Priority order for evals (lower number = higher priority) to determine primary
               eval issue to surface
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -270,6 +280,7 @@ def update(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -297,6 +308,7 @@ def update(
                     "query_identifier": query_identifier,
                     "response_identifier": response_identifier,
                     "should_escalate": should_escalate,
+                    "should_guardrail": should_guardrail,
                     "threshold": threshold,
                     "threshold_direction": threshold_direction,
                 },
@@ -412,6 +424,7 @@ async def create(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -448,8 +461,10 @@ async def create(
           response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
               response.
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -479,6 +494,7 @@ async def create(
                     "query_identifier": query_identifier,
                     "response_identifier": response_identifier,
                     "should_escalate": should_escalate,
+                    "should_guardrail": should_guardrail,
                     "threshold": threshold,
                     "threshold_direction": threshold_direction,
                 },
@@ -506,6 +522,7 @@ async def update(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,8 +559,10 @@ async def update(
           response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
               response.
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -569,6 +588,7 @@ async def update(
         enabled: bool | NotGiven = NOT_GIVEN,
         priority: Optional[int] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -590,8 +610,10 @@ async def update(
           priority: Priority order for evals (lower number = higher priority) to determine primary
               eval issue to surface
 
-          should_escalate: If true, failing this eval means the response is considered bad and can trigger
-              escalation to Codex/SME
+          should_escalate: If true, failing this eval means the question should be escalated to Codex for
+              an SME to review
+
+          should_guardrail: If true, failing this eval means the response should be guardrailed
 
           threshold: Threshold value that determines if the evaluation fails
 
@@ -623,6 +645,7 @@ async def update(
         query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
         should_escalate: bool | NotGiven = NOT_GIVEN,
+        should_guardrail: bool | NotGiven = NOT_GIVEN,
         threshold: float | NotGiven = NOT_GIVEN,
         threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
         # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -650,6 +673,7 @@ async def update(
                     "query_identifier": query_identifier,
                     "response_identifier": response_identifier,
                     "should_escalate": should_escalate,
+                    "should_guardrail": should_guardrail,
                     "threshold": threshold,
                     "threshold_direction": threshold_direction,
                 },
 
@@ -79,10 +79,13 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
@@ -115,10 +118,13 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
@@ -147,10 +153,13 @@ class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
@@ -179,10 +188,13 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
@@ -211,10 +223,13 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
@@ -243,10 +258,13 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
 
     should_escalate: bool
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: bool
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: float
     """Threshold value that determines if the evaluation fails"""
 
 
@@ -69,10 +69,13 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""
 
@@ -105,10 +108,13 @@ class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""
 
@@ -137,10 +143,13 @@ class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""
 
@@ -169,10 +178,13 @@ class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""
 
@@ -201,10 +213,13 @@ class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""
 
@@ -233,10 +248,13 @@ class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
 
     should_escalate: Optional[bool] = None
     """
-    If true, failing this eval means the response is considered bad and can trigger
-    escalation to Codex/SME
+    If true, failing this eval means the question should be escalated to Codex for
+    an SME to review
     """
 
+    should_guardrail: Optional[bool] = None
+    """If true, failing this eval means the response should be guardrailed"""
+
     threshold: Optional[float] = None
     """Threshold value that determines if the evaluation fails"""