Skip to content

Commit 4bbdd01

Browse files
authored
Port getEvaluationRunInfos to backend (tensorzero#5254)
- Add EvaluationRunInfoByIdRow struct and get_evaluation_run_infos method to EvaluationQueries trait - Implement ClickHouse query for evaluation run infos - Add route handler at POST /internal/evaluations/run-infos - Add e2e tests for the endpoint - Update TensorZeroClient with getEvaluationRunInfos method - Update evaluations.server.ts to use TensorZeroClient instead of direct ClickHouse query wip: Fix bindings Port getEvaluationRunInfos to backend - Add EvaluationRunInfoByIdRow struct and get_evaluation_run_infos method to EvaluationQueries trait - Implement ClickHouse query for evaluation run infos - Add route handler at GET /internal/evaluations/run-infos with query parameters - Add e2e tests for the endpoint - Update TensorZeroClient with getEvaluationRunInfos method - Update evaluations.server.ts to use TensorZeroClient instead of direct ClickHouse query Port getEvaluationRunInfos to backend - Add EvaluationRunInfoByIdRow struct and get_evaluation_run_infos method to EvaluationQueries trait - Implement ClickHouse query for evaluation run infos - Add route handler at GET /internal/evaluations/run-infos with query parameters - Add e2e tests for the endpoint - Add e2e database tests for evaluation_queries - Update TensorZeroClient with getEvaluationRunInfos method - Update evaluations.server.ts to use TensorZeroClient instead of direct ClickHouse query
1 parent f97bd1e commit 4bbdd01

File tree

16 files changed

+687
-101
lines changed

16 files changed

+687
-101
lines changed

gateway/src/routes/internal.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ pub fn build_internal_non_otel_enabled_routes() -> Router<AppStateData> {
112112
"/internal/evaluations/runs/search",
113113
get(endpoints::internal::evaluations::search_evaluation_runs_handler),
114114
)
115+
.route(
116+
"/internal/evaluations/run-infos",
117+
get(endpoints::internal::evaluations::get_evaluation_run_infos_handler),
118+
)
115119
// Workflow evaluation endpoints
116120
.route(
117121
"/internal/workflow-evaluations/projects",
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
2+
3+
/**
4+
* Information about a single evaluation run (returned by get_evaluation_run_infos).
5+
*/
6+
export type EvaluationRunInfoById = {
7+
evaluation_run_id: string;
8+
variant_name: string;
9+
most_recent_inference_date: string;
10+
};
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
2+
import type { EvaluationRunInfoById } from "./EvaluationRunInfoById";
3+
4+
/**
5+
* Response containing evaluation run infos.
6+
*/
7+
export type GetEvaluationRunInfosResponse = {
8+
run_infos: Array<EvaluationRunInfoById>;
9+
};

internal/tensorzero-node/lib/bindings/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ export * from "./EvaluationRunErrorEvent";
7979
export * from "./EvaluationRunEvent";
8080
export * from "./EvaluationRunFatalErrorEvent";
8181
export * from "./EvaluationRunInfo";
82+
export * from "./EvaluationRunInfoById";
8283
export * from "./EvaluationRunStartEvent";
8384
export * from "./EvaluationRunStatsResponse";
8485
export * from "./EvaluationRunSuccessEvent";
@@ -118,6 +119,7 @@ export * from "./GetDatapointParams";
118119
export * from "./GetDatapointsRequest";
119120
export * from "./GetDatapointsResponse";
120121
export * from "./GetEpisodeInferenceCountResponse";
122+
export * from "./GetEvaluationRunInfosResponse";
121123
export * from "./GetFeedbackByTargetIdResponse";
122124
export * from "./GetFeedbackByVariantParams";
123125
export * from "./GetInferencesRequest";

tensorzero-core/src/db/clickhouse/evaluation_queries.rs

Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use async_trait::async_trait;
77
use super::ClickHouseConnectionInfo;
88
use super::select_queries::{parse_count, parse_json_rows};
99
use crate::db::evaluation_queries::EvaluationQueries;
10+
use crate::db::evaluation_queries::EvaluationRunInfoByIdRow;
1011
use crate::db::evaluation_queries::EvaluationRunInfoRow;
1112
use crate::db::evaluation_queries::EvaluationRunSearchResult;
1213
use crate::error::Error;
@@ -191,6 +192,50 @@ impl EvaluationQueries for ClickHouseConnectionInfo {
191192
let response = self.run_query_synchronous(sql_query, &params).await?;
192193
parse_json_rows(response.response.as_str())
193194
}
195+
196+
async fn get_evaluation_run_infos(
197+
&self,
198+
evaluation_run_ids: &[uuid::Uuid],
199+
function_name: &str,
200+
) -> Result<Vec<EvaluationRunInfoByIdRow>, Error> {
201+
// Format evaluation_run_ids as array for ClickHouse
202+
let eval_run_ids_str: Vec<String> = evaluation_run_ids
203+
.iter()
204+
.map(|id| format!("'{id}'"))
205+
.collect();
206+
let eval_run_ids_joined = format!("[{}]", eval_run_ids_str.join(","));
207+
208+
let sql_query = r"
209+
SELECT
210+
any(run_tag.value) as evaluation_run_id,
211+
any(run_tag.variant_name) as variant_name,
212+
formatDateTime(
213+
max(UUIDv7ToDateTime(inference_id)),
214+
'%Y-%m-%dT%H:%i:%SZ'
215+
) as most_recent_inference_date
216+
FROM
217+
TagInference AS run_tag FINAL
218+
WHERE
219+
run_tag.key = 'tensorzero::evaluation_run_id'
220+
AND run_tag.value IN ({evaluation_run_ids:Array(String)})
221+
AND run_tag.function_name = {function_name:String}
222+
GROUP BY
223+
run_tag.value
224+
ORDER BY
225+
toUInt128(toUUID(evaluation_run_id)) DESC
226+
FORMAT JSONEachRow
227+
"
228+
.to_string();
229+
230+
let function_name_str = function_name.to_string();
231+
232+
let mut params = HashMap::new();
233+
params.insert("evaluation_run_ids", eval_run_ids_joined.as_str());
234+
params.insert("function_name", function_name_str.as_str());
235+
236+
let response = self.run_query_synchronous(sql_query, &params).await?;
237+
parse_json_rows(response.response.as_str())
238+
}
194239
}
195240

196241
#[cfg(test)]
@@ -568,4 +613,132 @@ mod tests {
568613

569614
assert_eq!(result.len(), 0);
570615
}
616+
617+
#[tokio::test]
618+
async fn test_get_evaluation_run_infos() {
619+
let mut mock_clickhouse_client = MockClickHouseClient::new();
620+
621+
mock_clickhouse_client
622+
.expect_run_query_synchronous()
623+
.withf(|query, params| {
624+
assert_query_contains(query, "SELECT
625+
any(run_tag.value) as evaluation_run_id,
626+
any(run_tag.variant_name) as variant_name,
627+
formatDateTime(
628+
max(UUIDv7ToDateTime(inference_id)),
629+
'%Y-%m-%dT%H:%i:%SZ'
630+
) as most_recent_inference_date
631+
FROM
632+
TagInference AS run_tag FINAL
633+
WHERE
634+
run_tag.key = 'tensorzero::evaluation_run_id'
635+
AND run_tag.value IN ({evaluation_run_ids:Array(String)})
636+
AND run_tag.function_name = {function_name:String}
637+
GROUP BY
638+
run_tag.value
639+
ORDER BY
640+
toUInt128(toUUID(evaluation_run_id)) DESC
641+
FORMAT JSONEachRow");
642+
assert_eq!(params.get("function_name"), Some(&"test_func"));
643+
assert_eq!(
644+
params.get("evaluation_run_ids"),
645+
Some(&"['0196ee9c-d808-74f3-8000-02ec7409b95d']")
646+
);
647+
true
648+
})
649+
.returning(|_, _| {
650+
Ok(ClickHouseResponse {
651+
response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","variant_name":"test_variant","most_recent_inference_date":"2025-05-20T16:52:58Z"}"#.to_string(),
652+
metadata: ClickHouseResponseMetadata {
653+
read_rows: 1,
654+
written_rows: 0,
655+
},
656+
})
657+
});
658+
659+
let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
660+
let result = conn
661+
.get_evaluation_run_infos(
662+
&[Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()],
663+
"test_func",
664+
)
665+
.await
666+
.unwrap();
667+
668+
assert_eq!(result.len(), 1);
669+
assert_eq!(result[0].variant_name, "test_variant");
670+
assert_eq!(
671+
result[0].evaluation_run_id,
672+
Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()
673+
);
674+
}
675+
676+
#[tokio::test]
677+
async fn test_get_evaluation_run_infos_multiple() {
678+
let mut mock_clickhouse_client = MockClickHouseClient::new();
679+
680+
mock_clickhouse_client
681+
.expect_run_query_synchronous()
682+
.withf(|_query, params| {
683+
assert_eq!(
684+
params.get("evaluation_run_ids"),
685+
Some(&"['0196ee9c-d808-74f3-8000-02ec7409b95d','0196ee9c-d808-74f3-8000-02ec7409b95e']")
686+
);
687+
true
688+
})
689+
.returning(|_, _| {
690+
Ok(ClickHouseResponse {
691+
response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","variant_name":"variant1","most_recent_inference_date":"2025-05-20T16:52:58Z"}
692+
{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95e","variant_name":"variant2","most_recent_inference_date":"2025-05-20T17:52:58Z"}"#.to_string(),
693+
metadata: ClickHouseResponseMetadata {
694+
read_rows: 2,
695+
written_rows: 0,
696+
},
697+
})
698+
});
699+
700+
let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
701+
let result = conn
702+
.get_evaluation_run_infos(
703+
&[
704+
Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap(),
705+
Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95e").unwrap(),
706+
],
707+
"test_func",
708+
)
709+
.await
710+
.unwrap();
711+
712+
assert_eq!(result.len(), 2);
713+
assert_eq!(result[0].variant_name, "variant1");
714+
assert_eq!(result[1].variant_name, "variant2");
715+
}
716+
717+
#[tokio::test]
718+
async fn test_get_evaluation_run_infos_empty() {
719+
let mut mock_clickhouse_client = MockClickHouseClient::new();
720+
721+
mock_clickhouse_client
722+
.expect_run_query_synchronous()
723+
.returning(|_, _| {
724+
Ok(ClickHouseResponse {
725+
response: String::new(),
726+
metadata: ClickHouseResponseMetadata {
727+
read_rows: 0,
728+
written_rows: 0,
729+
},
730+
})
731+
});
732+
733+
let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
734+
let result = conn
735+
.get_evaluation_run_infos(
736+
&[Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()],
737+
"nonexistent_func",
738+
)
739+
.await
740+
.unwrap();
741+
742+
assert_eq!(result.len(), 0);
743+
}
571744
}

tensorzero-core/src/db/evaluation_queries.rs

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,15 @@ pub struct EvaluationRunSearchResult {
2828
pub variant_name: String,
2929
}
3030

31+
/// Database struct for deserializing evaluation run info by IDs from ClickHouse.
32+
/// This is a simpler struct than `EvaluationRunInfoRow` - used when querying by specific run IDs.
33+
#[derive(Debug, Deserialize)]
34+
pub struct EvaluationRunInfoByIdRow {
35+
pub evaluation_run_id: Uuid,
36+
pub variant_name: String,
37+
pub most_recent_inference_date: DateTime<Utc>,
38+
}
39+
3140
/// Trait for evaluation-related queries.
3241
#[async_trait]
3342
#[cfg_attr(test, automock)]
@@ -58,4 +67,11 @@ pub trait EvaluationQueries {
5867
limit: u32,
5968
offset: u32,
6069
) -> Result<Vec<EvaluationRunSearchResult>, Error>;
70+
71+
/// Gets evaluation run info for specific evaluation run IDs and function name.
72+
async fn get_evaluation_run_infos(
73+
&self,
74+
evaluation_run_ids: &[Uuid],
75+
function_name: &str,
76+
) -> Result<Vec<EvaluationRunInfoByIdRow>, Error>;
6177
}

0 commit comments

Comments
 (0)