Skip to content

Commit e9ca3b5

Browse files
feat(api): api update
1 parent aea4c5c commit e9ca3b5

28 files changed

+321
-99
lines changed

.stats.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
configured_endpoints: 65
2-
openapi_spec_hash: eeb8ebc5600523bdfad046381a929572
2+
openapi_spec_hash: 80696dc202de8bacc0e43506d7c210b0
33
config_hash: 14b2643a0ec60cf326dfed00939644ff

src/codex/resources/projects/evals.py

Lines changed: 36 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ def create(
5959
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
6060
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
6161
should_escalate: bool | NotGiven = NOT_GIVEN,
62+
should_guardrail: bool | NotGiven = NOT_GIVEN,
6263
threshold: float | NotGiven = NOT_GIVEN,
6364
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
6465
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -95,8 +96,10 @@ def create(
9596
response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
9697
response.
9798
98-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
99-
escalation to Codex/SME
99+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
100+
an SME to review
101+
102+
should_guardrail: If true, failing this eval means the response should be guardrailed
100103
101104
threshold: Threshold value that determines if the evaluation fails
102105
@@ -126,6 +129,7 @@ def create(
126129
"query_identifier": query_identifier,
127130
"response_identifier": response_identifier,
128131
"should_escalate": should_escalate,
132+
"should_guardrail": should_guardrail,
129133
"threshold": threshold,
130134
"threshold_direction": threshold_direction,
131135
},
@@ -153,6 +157,7 @@ def update(
153157
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
154158
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
155159
should_escalate: bool | NotGiven = NOT_GIVEN,
160+
should_guardrail: bool | NotGiven = NOT_GIVEN,
156161
threshold: float | NotGiven = NOT_GIVEN,
157162
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
158163
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -189,8 +194,10 @@ def update(
189194
response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
190195
response.
191196
192-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
193-
escalation to Codex/SME
197+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
198+
an SME to review
199+
200+
should_guardrail: If true, failing this eval means the response should be guardrailed
194201
195202
threshold: Threshold value that determines if the evaluation fails
196203
@@ -216,6 +223,7 @@ def update(
216223
enabled: bool | NotGiven = NOT_GIVEN,
217224
priority: Optional[int] | NotGiven = NOT_GIVEN,
218225
should_escalate: bool | NotGiven = NOT_GIVEN,
226+
should_guardrail: bool | NotGiven = NOT_GIVEN,
219227
threshold: float | NotGiven = NOT_GIVEN,
220228
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
221229
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -237,8 +245,10 @@ def update(
237245
priority: Priority order for evals (lower number = higher priority) to determine primary
238246
eval issue to surface
239247
240-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
241-
escalation to Codex/SME
248+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
249+
an SME to review
250+
251+
should_guardrail: If true, failing this eval means the response should be guardrailed
242252
243253
threshold: Threshold value that determines if the evaluation fails
244254
@@ -270,6 +280,7 @@ def update(
270280
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
271281
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
272282
should_escalate: bool | NotGiven = NOT_GIVEN,
283+
should_guardrail: bool | NotGiven = NOT_GIVEN,
273284
threshold: float | NotGiven = NOT_GIVEN,
274285
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
275286
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -297,6 +308,7 @@ def update(
297308
"query_identifier": query_identifier,
298309
"response_identifier": response_identifier,
299310
"should_escalate": should_escalate,
311+
"should_guardrail": should_guardrail,
300312
"threshold": threshold,
301313
"threshold_direction": threshold_direction,
302314
},
@@ -412,6 +424,7 @@ async def create(
412424
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
413425
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
414426
should_escalate: bool | NotGiven = NOT_GIVEN,
427+
should_guardrail: bool | NotGiven = NOT_GIVEN,
415428
threshold: float | NotGiven = NOT_GIVEN,
416429
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
417430
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -448,8 +461,10 @@ async def create(
448461
response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
449462
response.
450463
451-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
452-
escalation to Codex/SME
464+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
465+
an SME to review
466+
467+
should_guardrail: If true, failing this eval means the response should be guardrailed
453468
454469
threshold: Threshold value that determines if the evaluation fails
455470
@@ -479,6 +494,7 @@ async def create(
479494
"query_identifier": query_identifier,
480495
"response_identifier": response_identifier,
481496
"should_escalate": should_escalate,
497+
"should_guardrail": should_guardrail,
482498
"threshold": threshold,
483499
"threshold_direction": threshold_direction,
484500
},
@@ -506,6 +522,7 @@ async def update(
506522
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
507523
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
508524
should_escalate: bool | NotGiven = NOT_GIVEN,
525+
should_guardrail: bool | NotGiven = NOT_GIVEN,
509526
threshold: float | NotGiven = NOT_GIVEN,
510527
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
511528
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -542,8 +559,10 @@ async def update(
542559
response_identifier: The exact string used in your evaluation criteria to reference the RAG/LLM
543560
response.
544561
545-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
546-
escalation to Codex/SME
562+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
563+
an SME to review
564+
565+
should_guardrail: If true, failing this eval means the response should be guardrailed
547566
548567
threshold: Threshold value that determines if the evaluation fails
549568
@@ -569,6 +588,7 @@ async def update(
569588
enabled: bool | NotGiven = NOT_GIVEN,
570589
priority: Optional[int] | NotGiven = NOT_GIVEN,
571590
should_escalate: bool | NotGiven = NOT_GIVEN,
591+
should_guardrail: bool | NotGiven = NOT_GIVEN,
572592
threshold: float | NotGiven = NOT_GIVEN,
573593
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
574594
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -590,8 +610,10 @@ async def update(
590610
priority: Priority order for evals (lower number = higher priority) to determine primary
591611
eval issue to surface
592612
593-
should_escalate: If true, failing this eval means the response is considered bad and can trigger
594-
escalation to Codex/SME
613+
should_escalate: If true, failing this eval means the question should be escalated to Codex for
614+
an SME to review
615+
616+
should_guardrail: If true, failing this eval means the response should be guardrailed
595617
596618
threshold: Threshold value that determines if the evaluation fails
597619
@@ -623,6 +645,7 @@ async def update(
623645
query_identifier: Optional[str] | NotGiven = NOT_GIVEN,
624646
response_identifier: Optional[str] | NotGiven = NOT_GIVEN,
625647
should_escalate: bool | NotGiven = NOT_GIVEN,
648+
should_guardrail: bool | NotGiven = NOT_GIVEN,
626649
threshold: float | NotGiven = NOT_GIVEN,
627650
threshold_direction: Literal["above", "below"] | NotGiven = NOT_GIVEN,
628651
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -650,6 +673,7 @@ async def update(
650673
"query_identifier": query_identifier,
651674
"response_identifier": response_identifier,
652675
"should_escalate": should_escalate,
676+
"should_guardrail": should_guardrail,
653677
"threshold": threshold,
654678
"threshold_direction": threshold_direction,
655679
},

src/codex/types/project_create_params.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -79,10 +79,13 @@ class ConfigEvalConfigCustomEvalsEvals(TypedDict, total=False):
7979

8080
should_escalate: bool
8181
"""
82-
If true, failing this eval means the response is considered bad and can trigger
83-
escalation to Codex/SME
82+
If true, failing this eval means the question should be escalated to Codex for
83+
an SME to review
8484
"""
8585

86+
should_guardrail: bool
87+
"""If true, failing this eval means the response should be guardrailed"""
88+
8689
threshold: float
8790
"""Threshold value that determines if the evaluation fails"""
8891

@@ -115,10 +118,13 @@ class ConfigEvalConfigDefaultEvalsContextSufficiency(TypedDict, total=False):
115118

116119
should_escalate: bool
117120
"""
118-
If true, failing this eval means the response is considered bad and can trigger
119-
escalation to Codex/SME
121+
If true, failing this eval means the question should be escalated to Codex for
122+
an SME to review
120123
"""
121124

125+
should_guardrail: bool
126+
"""If true, failing this eval means the response should be guardrailed"""
127+
122128
threshold: float
123129
"""Threshold value that determines if the evaluation fails"""
124130

@@ -147,10 +153,13 @@ class ConfigEvalConfigDefaultEvalsQueryEase(TypedDict, total=False):
147153

148154
should_escalate: bool
149155
"""
150-
If true, failing this eval means the response is considered bad and can trigger
151-
escalation to Codex/SME
156+
If true, failing this eval means the question should be escalated to Codex for
157+
an SME to review
152158
"""
153159

160+
should_guardrail: bool
161+
"""If true, failing this eval means the response should be guardrailed"""
162+
154163
threshold: float
155164
"""Threshold value that determines if the evaluation fails"""
156165

@@ -179,10 +188,13 @@ class ConfigEvalConfigDefaultEvalsResponseGroundedness(TypedDict, total=False):
179188

180189
should_escalate: bool
181190
"""
182-
If true, failing this eval means the response is considered bad and can trigger
183-
escalation to Codex/SME
191+
If true, failing this eval means the question should be escalated to Codex for
192+
an SME to review
184193
"""
185194

195+
should_guardrail: bool
196+
"""If true, failing this eval means the response should be guardrailed"""
197+
186198
threshold: float
187199
"""Threshold value that determines if the evaluation fails"""
188200

@@ -211,10 +223,13 @@ class ConfigEvalConfigDefaultEvalsResponseHelpfulness(TypedDict, total=False):
211223

212224
should_escalate: bool
213225
"""
214-
If true, failing this eval means the response is considered bad and can trigger
215-
escalation to Codex/SME
226+
If true, failing this eval means the question should be escalated to Codex for
227+
an SME to review
216228
"""
217229

230+
should_guardrail: bool
231+
"""If true, failing this eval means the response should be guardrailed"""
232+
218233
threshold: float
219234
"""Threshold value that determines if the evaluation fails"""
220235

@@ -243,10 +258,13 @@ class ConfigEvalConfigDefaultEvalsTrustworthiness(TypedDict, total=False):
243258

244259
should_escalate: bool
245260
"""
246-
If true, failing this eval means the response is considered bad and can trigger
247-
escalation to Codex/SME
261+
If true, failing this eval means the question should be escalated to Codex for
262+
an SME to review
248263
"""
249264

265+
should_guardrail: bool
266+
"""If true, failing this eval means the response should be guardrailed"""
267+
250268
threshold: float
251269
"""Threshold value that determines if the evaluation fails"""
252270

src/codex/types/project_list_response.py

Lines changed: 30 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,13 @@ class ProjectConfigEvalConfigCustomEvalsEvals(BaseModel):
6969

7070
should_escalate: Optional[bool] = None
7171
"""
72-
If true, failing this eval means the response is considered bad and can trigger
73-
escalation to Codex/SME
72+
If true, failing this eval means the question should be escalated to Codex for
73+
an SME to review
7474
"""
7575

76+
should_guardrail: Optional[bool] = None
77+
"""If true, failing this eval means the response should be guardrailed"""
78+
7679
threshold: Optional[float] = None
7780
"""Threshold value that determines if the evaluation fails"""
7881

@@ -105,10 +108,13 @@ class ProjectConfigEvalConfigDefaultEvalsContextSufficiency(BaseModel):
105108

106109
should_escalate: Optional[bool] = None
107110
"""
108-
If true, failing this eval means the response is considered bad and can trigger
109-
escalation to Codex/SME
111+
If true, failing this eval means the question should be escalated to Codex for
112+
an SME to review
110113
"""
111114

115+
should_guardrail: Optional[bool] = None
116+
"""If true, failing this eval means the response should be guardrailed"""
117+
112118
threshold: Optional[float] = None
113119
"""Threshold value that determines if the evaluation fails"""
114120

@@ -137,10 +143,13 @@ class ProjectConfigEvalConfigDefaultEvalsQueryEase(BaseModel):
137143

138144
should_escalate: Optional[bool] = None
139145
"""
140-
If true, failing this eval means the response is considered bad and can trigger
141-
escalation to Codex/SME
146+
If true, failing this eval means the question should be escalated to Codex for
147+
an SME to review
142148
"""
143149

150+
should_guardrail: Optional[bool] = None
151+
"""If true, failing this eval means the response should be guardrailed"""
152+
144153
threshold: Optional[float] = None
145154
"""Threshold value that determines if the evaluation fails"""
146155

@@ -169,10 +178,13 @@ class ProjectConfigEvalConfigDefaultEvalsResponseGroundedness(BaseModel):
169178

170179
should_escalate: Optional[bool] = None
171180
"""
172-
If true, failing this eval means the response is considered bad and can trigger
173-
escalation to Codex/SME
181+
If true, failing this eval means the question should be escalated to Codex for
182+
an SME to review
174183
"""
175184

185+
should_guardrail: Optional[bool] = None
186+
"""If true, failing this eval means the response should be guardrailed"""
187+
176188
threshold: Optional[float] = None
177189
"""Threshold value that determines if the evaluation fails"""
178190

@@ -201,10 +213,13 @@ class ProjectConfigEvalConfigDefaultEvalsResponseHelpfulness(BaseModel):
201213

202214
should_escalate: Optional[bool] = None
203215
"""
204-
If true, failing this eval means the response is considered bad and can trigger
205-
escalation to Codex/SME
216+
If true, failing this eval means the question should be escalated to Codex for
217+
an SME to review
206218
"""
207219

220+
should_guardrail: Optional[bool] = None
221+
"""If true, failing this eval means the response should be guardrailed"""
222+
208223
threshold: Optional[float] = None
209224
"""Threshold value that determines if the evaluation fails"""
210225

@@ -233,10 +248,13 @@ class ProjectConfigEvalConfigDefaultEvalsTrustworthiness(BaseModel):
233248

234249
should_escalate: Optional[bool] = None
235250
"""
236-
If true, failing this eval means the response is considered bad and can trigger
237-
escalation to Codex/SME
251+
If true, failing this eval means the question should be escalated to Codex for
252+
an SME to review
238253
"""
239254

255+
should_guardrail: Optional[bool] = None
256+
"""If true, failing this eval means the response should be guardrailed"""
257+
240258
threshold: Optional[float] = None
241259
"""Threshold value that determines if the evaluation fails"""
242260

0 commit comments

Comments
 (0)