Govcraft
diff --git a/‎gateway/src/routes/internal.rs‎
Lines changed: 4 additions & 0 deletions b/‎gateway/src/routes/internal.rs‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/EvaluationRunInfoById.ts‎
Lines changed: 10 additions & 0 deletions b/‎internal/tensorzero-node/lib/bindings/EvaluationRunInfoById.ts‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/GetEvaluationRunInfosResponse.ts‎
Lines changed: 9 additions & 0 deletions b/‎internal/tensorzero-node/lib/bindings/GetEvaluationRunInfosResponse.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎internal/tensorzero-node/lib/bindings/index.ts‎
Lines changed: 2 additions & 0 deletions b/‎internal/tensorzero-node/lib/bindings/index.ts‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎tensorzero-core/src/db/clickhouse/evaluation_queries.rs‎
Lines changed: 173 additions & 0 deletions b/‎tensorzero-core/src/db/clickhouse/evaluation_queries.rs‎
Lines changed: 173 additions & 0 deletions
diff --git a/‎tensorzero-core/src/db/evaluation_queries.rs‎
Lines changed: 16 additions & 0 deletions b/‎tensorzero-core/src/db/evaluation_queries.rs‎
Lines changed: 16 additions & 0 deletions
@@ -112,6 +112,10 @@ pub fn build_internal_non_otel_enabled_routes() -> Router<AppStateData> {
             "/internal/evaluations/runs/search",
             get(endpoints::internal::evaluations::search_evaluation_runs_handler),
         )
+        .route(
+            "/internal/evaluations/run-infos",
+            get(endpoints::internal::evaluations::get_evaluation_run_infos_handler),
+        )
         // Workflow evaluation endpoints
         .route(
             "/internal/workflow-evaluations/projects",
 
@@ -0,0 +1,10 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+
+/**
+ * Information about a single evaluation run (returned by get_evaluation_run_infos).
+ */
+export type EvaluationRunInfoById = {
+  evaluation_run_id: string;
+  variant_name: string;
+  most_recent_inference_date: string;
+};
@@ -0,0 +1,9 @@
+// This file was generated by [ts-rs](https://github.com/Aleph-Alpha/ts-rs). Do not edit this file manually.
+import type { EvaluationRunInfoById } from "./EvaluationRunInfoById";
+
+/**
+ * Response containing evaluation run infos.
+ */
+export type GetEvaluationRunInfosResponse = {
+  run_infos: Array<EvaluationRunInfoById>;
+};
@@ -79,6 +79,7 @@ export * from "./EvaluationRunErrorEvent";
 export * from "./EvaluationRunEvent";
 export * from "./EvaluationRunFatalErrorEvent";
 export * from "./EvaluationRunInfo";
+export * from "./EvaluationRunInfoById";
 export * from "./EvaluationRunStartEvent";
 export * from "./EvaluationRunStatsResponse";
 export * from "./EvaluationRunSuccessEvent";
@@ -118,6 +119,7 @@ export * from "./GetDatapointParams";
 export * from "./GetDatapointsRequest";
 export * from "./GetDatapointsResponse";
 export * from "./GetEpisodeInferenceCountResponse";
+export * from "./GetEvaluationRunInfosResponse";
 export * from "./GetFeedbackByTargetIdResponse";
 export * from "./GetFeedbackByVariantParams";
 export * from "./GetInferencesRequest";
 
@@ -7,6 +7,7 @@ use async_trait::async_trait;
 use super::ClickHouseConnectionInfo;
 use super::select_queries::{parse_count, parse_json_rows};
 use crate::db::evaluation_queries::EvaluationQueries;
+use crate::db::evaluation_queries::EvaluationRunInfoByIdRow;
 use crate::db::evaluation_queries::EvaluationRunInfoRow;
 use crate::db::evaluation_queries::EvaluationRunSearchResult;
 use crate::error::Error;
@@ -191,6 +192,50 @@ impl EvaluationQueries for ClickHouseConnectionInfo {
         let response = self.run_query_synchronous(sql_query, &params).await?;
         parse_json_rows(response.response.as_str())
     }
+
+    async fn get_evaluation_run_infos(
+        &self,
+        evaluation_run_ids: &[uuid::Uuid],
+        function_name: &str,
+    ) -> Result<Vec<EvaluationRunInfoByIdRow>, Error> {
+        // Format evaluation_run_ids as array for ClickHouse
+        let eval_run_ids_str: Vec<String> = evaluation_run_ids
+            .iter()
+            .map(|id| format!("'{id}'"))
+            .collect();
+        let eval_run_ids_joined = format!("[{}]", eval_run_ids_str.join(","));
+
+        let sql_query = r"
+            SELECT
+                any(run_tag.value) as evaluation_run_id,
+                any(run_tag.variant_name) as variant_name,
+                formatDateTime(
+                    max(UUIDv7ToDateTime(inference_id)),
+                    '%Y-%m-%dT%H:%i:%SZ'
+                ) as most_recent_inference_date
+            FROM
+                TagInference AS run_tag FINAL
+            WHERE
+                run_tag.key = 'tensorzero::evaluation_run_id'
+                AND run_tag.value IN ({evaluation_run_ids:Array(String)})
+                AND run_tag.function_name = {function_name:String}
+            GROUP BY
+                run_tag.value
+            ORDER BY
+                toUInt128(toUUID(evaluation_run_id)) DESC
+            FORMAT JSONEachRow
+        "
+        .to_string();
+
+        let function_name_str = function_name.to_string();
+
+        let mut params = HashMap::new();
+        params.insert("evaluation_run_ids", eval_run_ids_joined.as_str());
+        params.insert("function_name", function_name_str.as_str());
+
+        let response = self.run_query_synchronous(sql_query, &params).await?;
+        parse_json_rows(response.response.as_str())
+    }
 }
 
 #[cfg(test)]
@@ -568,4 +613,132 @@ mod tests {
 
         assert_eq!(result.len(), 0);
     }
+
+    #[tokio::test]
+    async fn test_get_evaluation_run_infos() {
+        let mut mock_clickhouse_client = MockClickHouseClient::new();
+
+        mock_clickhouse_client
+            .expect_run_query_synchronous()
+            .withf(|query, params| {
+                assert_query_contains(query, "SELECT
+                    any(run_tag.value) as evaluation_run_id,
+                    any(run_tag.variant_name) as variant_name,
+                    formatDateTime(
+                        max(UUIDv7ToDateTime(inference_id)),
+                        '%Y-%m-%dT%H:%i:%SZ'
+                    ) as most_recent_inference_date
+                FROM
+                    TagInference AS run_tag FINAL
+                WHERE
+                    run_tag.key = 'tensorzero::evaluation_run_id'
+                    AND run_tag.value IN ({evaluation_run_ids:Array(String)})
+                    AND run_tag.function_name = {function_name:String}
+                GROUP BY
+                    run_tag.value
+                ORDER BY
+                    toUInt128(toUUID(evaluation_run_id)) DESC
+                FORMAT JSONEachRow");
+                assert_eq!(params.get("function_name"), Some(&"test_func"));
+                assert_eq!(
+                    params.get("evaluation_run_ids"),
+                    Some(&"['0196ee9c-d808-74f3-8000-02ec7409b95d']")
+                );
+                true
+            })
+            .returning(|_, _| {
+                Ok(ClickHouseResponse {
+                    response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","variant_name":"test_variant","most_recent_inference_date":"2025-05-20T16:52:58Z"}"#.to_string(),
+                    metadata: ClickHouseResponseMetadata {
+                        read_rows: 1,
+                        written_rows: 0,
+                    },
+                })
+            });
+
+        let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
+        let result = conn
+            .get_evaluation_run_infos(
+                &[Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()],
+                "test_func",
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 1);
+        assert_eq!(result[0].variant_name, "test_variant");
+        assert_eq!(
+            result[0].evaluation_run_id,
+            Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()
+        );
+    }
+
+    #[tokio::test]
+    async fn test_get_evaluation_run_infos_multiple() {
+        let mut mock_clickhouse_client = MockClickHouseClient::new();
+
+        mock_clickhouse_client
+            .expect_run_query_synchronous()
+            .withf(|_query, params| {
+                assert_eq!(
+                    params.get("evaluation_run_ids"),
+                    Some(&"['0196ee9c-d808-74f3-8000-02ec7409b95d','0196ee9c-d808-74f3-8000-02ec7409b95e']")
+                );
+                true
+            })
+            .returning(|_, _| {
+                Ok(ClickHouseResponse {
+                    response: r#"{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95d","variant_name":"variant1","most_recent_inference_date":"2025-05-20T16:52:58Z"}
+{"evaluation_run_id":"0196ee9c-d808-74f3-8000-02ec7409b95e","variant_name":"variant2","most_recent_inference_date":"2025-05-20T17:52:58Z"}"#.to_string(),
+                    metadata: ClickHouseResponseMetadata {
+                        read_rows: 2,
+                        written_rows: 0,
+                    },
+                })
+            });
+
+        let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
+        let result = conn
+            .get_evaluation_run_infos(
+                &[
+                    Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap(),
+                    Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95e").unwrap(),
+                ],
+                "test_func",
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 2);
+        assert_eq!(result[0].variant_name, "variant1");
+        assert_eq!(result[1].variant_name, "variant2");
+    }
+
+    #[tokio::test]
+    async fn test_get_evaluation_run_infos_empty() {
+        let mut mock_clickhouse_client = MockClickHouseClient::new();
+
+        mock_clickhouse_client
+            .expect_run_query_synchronous()
+            .returning(|_, _| {
+                Ok(ClickHouseResponse {
+                    response: String::new(),
+                    metadata: ClickHouseResponseMetadata {
+                        read_rows: 0,
+                        written_rows: 0,
+                    },
+                })
+            });
+
+        let conn = ClickHouseConnectionInfo::new_mock(Arc::new(mock_clickhouse_client));
+        let result = conn
+            .get_evaluation_run_infos(
+                &[Uuid::parse_str("0196ee9c-d808-74f3-8000-02ec7409b95d").unwrap()],
+                "nonexistent_func",
+            )
+            .await
+            .unwrap();
+
+        assert_eq!(result.len(), 0);
+    }
 }
@@ -28,6 +28,15 @@ pub struct EvaluationRunSearchResult {
     pub variant_name: String,
 }
 
+/// Database struct for deserializing evaluation run info by IDs from ClickHouse.
+/// This is a simpler struct than `EvaluationRunInfoRow` - used when querying by specific run IDs.
+#[derive(Debug, Deserialize)]
+pub struct EvaluationRunInfoByIdRow {
+    pub evaluation_run_id: Uuid,
+    pub variant_name: String,
+    pub most_recent_inference_date: DateTime<Utc>,
+}
+
 /// Trait for evaluation-related queries.
 #[async_trait]
 #[cfg_attr(test, automock)]
@@ -58,4 +67,11 @@ pub trait EvaluationQueries {
         limit: u32,
         offset: u32,
     ) -> Result<Vec<EvaluationRunSearchResult>, Error>;
+
+    /// Gets evaluation run info for specific evaluation run IDs and function name.
+    async fn get_evaluation_run_infos(
+        &self,
+        evaluation_run_ids: &[Uuid],
+        function_name: &str,
+    ) -> Result<Vec<EvaluationRunInfoByIdRow>, Error>;
 }