Govcraft
diff --git a/‎.cargo/config.toml‎
Lines changed: 11 additions & 0 deletions b/‎.cargo/config.toml‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎.github/workflows/build-gateway-container.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/build-gateway-container.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎.github/workflows/build-mock-inference-container.yml‎
Lines changed: 2 additions & 0 deletions b/‎.github/workflows/build-mock-inference-container.yml‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 1 addition & 0 deletions b/‎AGENTS.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎Cargo.lock‎
Lines changed: 2 additions & 0 deletions b/‎Cargo.lock‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions b/‎Cargo.toml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎clients/python/src/lib.rs‎
Lines changed: 53 additions & 5 deletions b/‎clients/python/src/lib.rs‎
Lines changed: 53 additions & 5 deletions
diff --git a/‎clients/python/tensorzero/__init__.py‎
Lines changed: 3 additions & 0 deletions b/‎clients/python/tensorzero/__init__.py‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎clients/python/tensorzero/generated_types.py‎
Lines changed: 1 addition & 1 deletion b/‎clients/python/tensorzero/generated_types.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎clients/python/tensorzero/tensorzero.pyi‎
Lines changed: 22 additions & 0 deletions b/‎clients/python/tensorzero/tensorzero.pyi‎
Lines changed: 22 additions & 0 deletions
@@ -75,6 +75,17 @@ test-clickhouse = [
     "--profile",
     "clickhouse",
 ]
+test-clickhouse-fast = [
+    "nextest",
+    "run",
+    "--features",
+    "e2e_tests",
+    "--profile",
+    "clickhouse",
+    "--retries",
+    "0",
+    "--no-fail-fast",
+]
 test-rate-limit-load = [
     "run",
     "--release",
 
@@ -50,6 +50,8 @@ jobs:
 
       - name: Login to Namespace registry
         run: nsc docker login
+        continue-on-error: ${{ github.event.pull_request.head.repo.full_name != github.repository || github.actor == 'dependabot[bot]' }}
 
       - name: Push `gateway` container to Namespace registry
         run: docker push nscr.io/igvf4asmf8kri/gateway:sha-${{ github.sha }}
+        continue-on-error: ${{ github.event.pull_request.head.repo.full_name != github.repository || github.actor == 'dependabot[bot]' }}
@@ -48,6 +48,8 @@ jobs:
 
       - name: Login to Namespace registry
         run: nsc docker login
+        continue-on-error: ${{ github.event.pull_request.head.repo.full_name != github.repository || github.actor == 'dependabot[bot]' }}
 
       - name: Push `mock-inference` container to Namespace registry
         run: docker push nscr.io/igvf4asmf8kri/mock-inference-provider:sha-${{ github.sha }}
+        continue-on-error: ${{ github.event.pull_request.head.repo.full_name != github.repository || github.actor == 'dependabot[bot]' }}
@@ -8,6 +8,7 @@
   - Run `cargo fmt`.
   - Run `cargo clippy --all-targets --all-features -- -D warnings` to catch warnings and errors.
   - Run unit tests with `cargo test-unit-fast` which uses `nextest` under the hood.
+- When writing tests, key assertions should include a custom message stating the expected behavior.
 
 ## For APIs
 
 
@@ -98,6 +98,7 @@ metrics-exporter-prometheus = { version = "0.18.0", features = [
     "http-listener",
 ], default-features = false }
 schemars = "1.1.0"
+blake3 = "1.8.2"
 moka = { version = "0.12.10", features = ["sync"] }
 
 [workspace.lints.rust]
 
@@ -11,7 +11,10 @@
 /// and defines methods on them.
 use std::{collections::HashMap, path::PathBuf, sync::Arc, time::Duration};
 
-use evaluations::{EvaluationCoreArgs, EvaluationVariant, run_evaluation_core_streaming};
+use evaluations::{
+    EvaluationCoreArgs, EvaluationFunctionConfig, EvaluationFunctionConfigTable, EvaluationVariant,
+    run_evaluation_core_streaming,
+};
 use futures::StreamExt;
 use pyo3::{
     IntoPyObjectExt,
@@ -43,8 +46,8 @@ use tensorzero_core::{
         OptimizationJobInfoPyClass, OptimizationJobStatus, UninitializedOptimizerInfo,
         dicl::UninitializedDiclOptimizationConfig, fireworks_sft::UninitializedFireworksSFTConfig,
         gcp_vertex_gemini_sft::UninitializedGCPVertexGeminiSFTConfig,
-        openai_rft::UninitializedOpenAIRFTConfig, openai_sft::UninitializedOpenAISFTConfig,
-        together_sft::UninitializedTogetherSFTConfig,
+        gepa::UninitializedGEPAConfig, openai_rft::UninitializedOpenAIRFTConfig,
+        openai_sft::UninitializedOpenAISFTConfig, together_sft::UninitializedTogetherSFTConfig,
     },
     tool::ProviderTool,
     variant::{
@@ -107,6 +110,7 @@ fn tensorzero(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<UninitializedFireworksSFTConfig>()?;
     m.add_class::<UninitializedDiclOptimizationConfig>()?;
     m.add_class::<UninitializedGCPVertexGeminiSFTConfig>()?;
+    m.add_class::<UninitializedGEPAConfig>()?;
     m.add_class::<UninitializedTogetherSFTConfig>()?;
     m.add_class::<Datapoint>()?;
     m.add_class::<ResolvedInput>()?;
@@ -1431,10 +1435,32 @@ impl TensorZeroGateway {
             })
             .transpose()?;
 
+        // Extract evaluation config from app_state
+        let evaluation_config = app_state
+            .config
+            .evaluations
+            .get(&evaluation_name)
+            .ok_or_else(|| {
+                pyo3::exceptions::PyValueError::new_err(format!(
+                    "evaluation '{evaluation_name}' not found"
+                ))
+            })?
+            .clone();
+
+        // Build function configs table from all functions in the config
+        let function_configs: EvaluationFunctionConfigTable = app_state
+            .config
+            .functions
+            .iter()
+            .map(|(name, func)| (name.clone(), EvaluationFunctionConfig::from(func.as_ref())))
+            .collect();
+        let function_configs = Arc::new(function_configs);
+
         let core_args = EvaluationCoreArgs {
             tensorzero_client: client.clone(),
             clickhouse_client: app_state.clickhouse_connection_info.clone(),
-            config: app_state.config.clone(),
+            evaluation_config,
+            function_configs,
             evaluation_name,
             evaluation_run_id,
             dataset_name,
@@ -2646,10 +2672,32 @@ impl AsyncTensorZeroGateway {
 
             let evaluation_run_id = uuid::Uuid::now_v7();
 
+            // Extract evaluation config from app_state
+            let evaluation_config = app_state
+                .config
+                .evaluations
+                .get(&evaluation_name)
+                .ok_or_else(|| {
+                    pyo3::exceptions::PyValueError::new_err(format!(
+                        "evaluation '{evaluation_name}' not found"
+                    ))
+                })?
+                .clone();
+
+            // Build function configs table from all functions in the config
+            let function_configs: EvaluationFunctionConfigTable = app_state
+                .config
+                .functions
+                .iter()
+                .map(|(name, func)| (name.clone(), EvaluationFunctionConfig::from(func.as_ref())))
+                .collect();
+            let function_configs = Arc::new(function_configs);
+
             let core_args = EvaluationCoreArgs {
                 tensorzero_client: client.clone(),
                 clickhouse_client: app_state.clickhouse_connection_info.clone(),
-                config: app_state.config.clone(),
+                evaluation_config,
+                function_configs,
                 evaluation_name,
                 evaluation_run_id,
                 dataset_name,
 
@@ -93,6 +93,7 @@
     FunctionConfigJson,
     FunctionsConfig,
     GCPVertexGeminiSFTConfig,
+    GEPAConfig,
     LegacyDatapoint,
     MixtureOfNConfig,
     OpenAIRFTConfig,
@@ -187,6 +188,7 @@ def __new__(cls, *args: Any, **kwargs: Any):
     TogetherSFTConfig,
     DICLOptimizationConfig,
     OpenAIRFTConfig,
+    GEPAConfig,
     t.Dict[str, Any],
 ]
 ChatInferenceOutput = t.List[ContentBlock]
@@ -240,6 +242,7 @@ def __new__(cls, *args: Any, **kwargs: Any):
     "FunctionsConfig",
     "FunctionTool",
     "GCPVertexGeminiSFTConfig",
+    "GEPAConfig",
     "GetDatapointsResponse",
     "GetInferencesRequest",
     "GetInferencesResponse",
 
@@ -1200,7 +1200,7 @@ class Thought:
     Struct that represents a model's reasoning
     """
 
-    _internal_provider_type: str | None = None
+    provider_type: str | None = None
     """
     When set, this `Thought` block will only be used for providers
     matching this type (e.g. `anthropic`). Other providers will emit
 
@@ -319,6 +319,27 @@ class GCPVertexGeminiSFTConfig:
         bucket_path_prefix: Optional[str] = None,
     ) -> None: ...
 
+@final
+class GEPAConfig:
+    def __init__(
+        self,
+        *,
+        function_name: str,
+        evaluation_name: str,
+        analysis_model: str,
+        mutation_model: str,
+        initial_variants: Optional[List[str]] = None,
+        variant_prefix: Optional[str] = None,
+        batch_size: Optional[int] = None,
+        max_iterations: Optional[int] = None,
+        max_concurrency: Optional[int] = None,
+        seed: Optional[int] = None,
+        timeout: Optional[int] = None,
+        include_inference_for_mutation: Optional[bool] = None,
+        retries: Optional[Dict[str, Any]] = None,
+        max_tokens: Optional[int] = None,
+    ) -> None: ...
+
 @final
 class TogetherSFTConfig:
     """
@@ -1589,6 +1610,7 @@ __all__ = [
     "FunctionConfigJson",
     "FunctionsConfig",
     "GCPVertexGeminiSFTConfig",
+    "GEPAConfig",
     "LocalHttpGateway",
     "MixtureOfNConfig",
     "OpenAIRFTConfig",