22
33from __future__ import annotations
44
5- from typing import Optional
6- from typing_extensions import Required , TypedDict
5+ from typing import Dict , Optional
6+ from typing_extensions import Literal , Required , TypedDict
77
8- __all__ = ["ProjectCreateParams" , "Config" ]
8+ __all__ = [
9+ "ProjectCreateParams" ,
10+ "Config" ,
11+ "ConfigEvalConfig" ,
12+ "ConfigEvalConfigCustomEvals" ,
13+ "ConfigEvalConfigCustomEvalsEvals" ,
14+ "ConfigEvalConfigDefaultEvals" ,
15+ "ConfigEvalConfigDefaultEvalsContextSufficiency" ,
16+ "ConfigEvalConfigDefaultEvalsQueryEase" ,
17+ "ConfigEvalConfigDefaultEvalsResponseGroundedness" ,
18+ "ConfigEvalConfigDefaultEvalsResponseHelpfulness" ,
19+ "ConfigEvalConfigDefaultEvalsTrustworthiness" ,
20+ ]
921
1022
1123class ProjectCreateParams (TypedDict , total = False ):
@@ -18,9 +30,276 @@ class ProjectCreateParams(TypedDict, total=False):
1830 description : Optional [str ]
1931
2032
33+ class ConfigEvalConfigCustomEvalsEvals (TypedDict , total = False ):
34+ criteria : Required [str ]
35+ """
36+ The evaluation criteria text that describes what aspect is being evaluated and
37+ how
38+ """
39+
40+ eval_key : Required [str ]
41+ """
42+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
43+ and eval_scores dictionary key to check against threshold
44+ """
45+
46+ name : Required [str ]
47+ """Display name/label for the evaluation metric"""
48+
49+ context_identifier : Optional [str ]
50+ """
51+ The exact string used in your evaluation criteria to reference the retrieved
52+ context.
53+ """
54+
55+ enabled : bool
56+ """Allows the evaluation to be disabled without removing it"""
57+
58+ priority : Optional [int ]
59+ """
60+ Priority order for evals (lower number = higher priority) to determine primary
61+ eval issue to surface
62+ """
63+
64+ query_identifier : Optional [str ]
65+ """
66+ The exact string used in your evaluation criteria to reference the user's query.
67+ """
68+
69+ response_identifier : Optional [str ]
70+ """
71+ The exact string used in your evaluation criteria to reference the RAG/LLM
72+ response.
73+ """
74+
75+ should_escalate : bool
76+ """
77+ If true, failing this eval means the response is considered bad and can trigger
78+ escalation to Codex/SME
79+ """
80+
81+ threshold : float
82+ """Threshold value that determines if the evaluation fails"""
83+
84+ threshold_direction : Literal ["above" , "below" ]
85+ """Whether the evaluation fails when score is above or below the threshold"""
86+
87+
88+ class ConfigEvalConfigCustomEvals (TypedDict , total = False ):
89+ evals : Dict [str , ConfigEvalConfigCustomEvalsEvals ]
90+
91+
92+ class ConfigEvalConfigDefaultEvalsContextSufficiency (TypedDict , total = False ):
93+ eval_key : Required [str ]
94+ """
95+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
96+ and eval_scores dictionary key to check against threshold
97+ """
98+
99+ name : Required [str ]
100+ """Display name/label for the evaluation metric"""
101+
102+ enabled : bool
103+ """Allows the evaluation to be disabled without removing it"""
104+
105+ priority : Optional [int ]
106+ """
107+ Priority order for evals (lower number = higher priority) to determine primary
108+ eval issue to surface
109+ """
110+
111+ should_escalate : bool
112+ """
113+ If true, failing this eval means the response is considered bad and can trigger
114+ escalation to Codex/SME
115+ """
116+
117+ threshold : float
118+ """Threshold value that determines if the evaluation fails"""
119+
120+ threshold_direction : Literal ["above" , "below" ]
121+ """Whether the evaluation fails when score is above or below the threshold"""
122+
123+
124+ class ConfigEvalConfigDefaultEvalsQueryEase (TypedDict , total = False ):
125+ eval_key : Required [str ]
126+ """
127+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
128+ and eval_scores dictionary key to check against threshold
129+ """
130+
131+ name : Required [str ]
132+ """Display name/label for the evaluation metric"""
133+
134+ enabled : bool
135+ """Allows the evaluation to be disabled without removing it"""
136+
137+ priority : Optional [int ]
138+ """
139+ Priority order for evals (lower number = higher priority) to determine primary
140+ eval issue to surface
141+ """
142+
143+ should_escalate : bool
144+ """
145+ If true, failing this eval means the response is considered bad and can trigger
146+ escalation to Codex/SME
147+ """
148+
149+ threshold : float
150+ """Threshold value that determines if the evaluation fails"""
151+
152+ threshold_direction : Literal ["above" , "below" ]
153+ """Whether the evaluation fails when score is above or below the threshold"""
154+
155+
156+ class ConfigEvalConfigDefaultEvalsResponseGroundedness (TypedDict , total = False ):
157+ eval_key : Required [str ]
158+ """
159+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
160+ and eval_scores dictionary key to check against threshold
161+ """
162+
163+ name : Required [str ]
164+ """Display name/label for the evaluation metric"""
165+
166+ enabled : bool
167+ """Allows the evaluation to be disabled without removing it"""
168+
169+ priority : Optional [int ]
170+ """
171+ Priority order for evals (lower number = higher priority) to determine primary
172+ eval issue to surface
173+ """
174+
175+ should_escalate : bool
176+ """
177+ If true, failing this eval means the response is considered bad and can trigger
178+ escalation to Codex/SME
179+ """
180+
181+ threshold : float
182+ """Threshold value that determines if the evaluation fails"""
183+
184+ threshold_direction : Literal ["above" , "below" ]
185+ """Whether the evaluation fails when score is above or below the threshold"""
186+
187+
188+ class ConfigEvalConfigDefaultEvalsResponseHelpfulness (TypedDict , total = False ):
189+ eval_key : Required [str ]
190+ """
191+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
192+ and eval_scores dictionary key to check against threshold
193+ """
194+
195+ name : Required [str ]
196+ """Display name/label for the evaluation metric"""
197+
198+ enabled : bool
199+ """Allows the evaluation to be disabled without removing it"""
200+
201+ priority : Optional [int ]
202+ """
203+ Priority order for evals (lower number = higher priority) to determine primary
204+ eval issue to surface
205+ """
206+
207+ should_escalate : bool
208+ """
209+ If true, failing this eval means the response is considered bad and can trigger
210+ escalation to Codex/SME
211+ """
212+
213+ threshold : float
214+ """Threshold value that determines if the evaluation fails"""
215+
216+ threshold_direction : Literal ["above" , "below" ]
217+ """Whether the evaluation fails when score is above or below the threshold"""
218+
219+
220+ class ConfigEvalConfigDefaultEvalsTrustworthiness (TypedDict , total = False ):
221+ eval_key : Required [str ]
222+ """
223+ Unique key for eval metric - currently maps to the TrustworthyRAG name property
224+ and eval_scores dictionary key to check against threshold
225+ """
226+
227+ name : Required [str ]
228+ """Display name/label for the evaluation metric"""
229+
230+ enabled : bool
231+ """Allows the evaluation to be disabled without removing it"""
232+
233+ priority : Optional [int ]
234+ """
235+ Priority order for evals (lower number = higher priority) to determine primary
236+ eval issue to surface
237+ """
238+
239+ should_escalate : bool
240+ """
241+ If true, failing this eval means the response is considered bad and can trigger
242+ escalation to Codex/SME
243+ """
244+
245+ threshold : float
246+ """Threshold value that determines if the evaluation fails"""
247+
248+ threshold_direction : Literal ["above" , "below" ]
249+ """Whether the evaluation fails when score is above or below the threshold"""
250+
251+
252+ class ConfigEvalConfigDefaultEvals (TypedDict , total = False ):
253+ context_sufficiency : ConfigEvalConfigDefaultEvalsContextSufficiency
254+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
255+
256+ The evaluation criteria and identifiers are immutable and system-managed, while
257+ other properties like thresholds and priorities can be configured.
258+ """
259+
260+ query_ease : ConfigEvalConfigDefaultEvalsQueryEase
261+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
262+
263+ The evaluation criteria and identifiers are immutable and system-managed, while
264+ other properties like thresholds and priorities can be configured.
265+ """
266+
267+ response_groundedness : ConfigEvalConfigDefaultEvalsResponseGroundedness
268+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
269+
270+ The evaluation criteria and identifiers are immutable and system-managed, while
271+ other properties like thresholds and priorities can be configured.
272+ """
273+
274+ response_helpfulness : ConfigEvalConfigDefaultEvalsResponseHelpfulness
275+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
276+
277+ The evaluation criteria and identifiers are immutable and system-managed, while
278+ other properties like thresholds and priorities can be configured.
279+ """
280+
281+ trustworthiness : ConfigEvalConfigDefaultEvalsTrustworthiness
282+ """A pre-configured evaluation metric from TrustworthyRAG or built into the system.
283+
284+ The evaluation criteria and identifiers are immutable and system-managed, while
285+ other properties like thresholds and priorities can be configured.
286+ """
287+
288+
289+ class ConfigEvalConfig (TypedDict , total = False ):
290+ custom_evals : ConfigEvalConfigCustomEvals
291+ """Configuration for custom evaluation metrics."""
292+
293+ default_evals : ConfigEvalConfigDefaultEvals
294+ """Configuration for default evaluation metrics."""
295+
296+
21297class Config (TypedDict , total = False ):
22298 clustering_use_llm_matching : bool
23299
300+ eval_config : ConfigEvalConfig
301+ """Configuration for project-specific evaluation metrics"""
302+
24303 llm_matching_model : str
25304
26305 llm_matching_quality_preset : str
0 commit comments