Kiln-AI · sfierro · Jan 27, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 25, 2026
diff --git a/app/web_ui/src/lib/api_schema.d.ts b/app/web_ui/src/lib/api_schema.d.ts
@@ -326,6 +326,37 @@ export interface paths {
         patch: operations["update_spec_api_projects__project_id__tasks__task_id__specs__spec_id__patch"];
         trace?: never;
     };
+    "/api/projects/{project_id}/tasks/{task_id}/spec_with_copilot": {
+        parameters: {
+            query?: never;
+            header?: never;
+            path?: never;
+            cookie?: never;
+        };
+        get?: never;
+        put?: never;
+        /**
+         * Create Spec With Copilot
+         * @description Create a spec using Kiln Copilot.
+         *
+         *     This endpoint uses Kiln Copilot to create a spec with:
+         *     1. An eval for the spec with appropriate template
+         *     2. Batch examples via copilot API for eval, train, and golden datasets
+         *     3. A judge eval config (if judge_info provided)
+         *     4. The spec itself
+         *
+         *     If you don't need copilot, use POST /spec instead.
+         *
+         *     All models are validated before any saves occur. If validation fails,
+         *     no data is persisted.
+         */
+        post: operations["create_spec_with_copilot_api_projects__project_id__tasks__task_id__spec_with_copilot_post"];
+        delete?: never;
+        options?: never;
+        head?: never;
+        patch?: never;
+        trace?: never;
+    };
     "/api/projects/{project_id}/tasks/{task_id}/runs/{run_id}": {
         parameters: {
             query?: never;
@@ -2717,9 +2748,9 @@ export interface components {
         ClarifySpecApiOutput: {
             /** Examples For Feedback */
             examples_for_feedback: components["schemas"]["SubsampleBatchOutputItemApi"][];
-            judge_result: components["schemas"]["PromptGenerationResultApi"];
-            topic_generation_result: components["schemas"]["PromptGenerationResultApi"];
-            input_generation_result: components["schemas"]["PromptGenerationResultApi"];
+            judge_result: components["schemas"]["PromptGenerationResultApi-Output"];
+            topic_generation_result: components["schemas"]["PromptGenerationResultApi-Output"];
+            input_generation_result: components["schemas"]["PromptGenerationResultApi-Output"];
         };
         /** CohereCompatibleProperties */
         CohereCompatibleProperties: {
@@ -2990,6 +3021,54 @@ export interface components {
              */
             properties: components["schemas"]["CohereCompatibleProperties"];
         };
+        /**
+         * CreateSpecWithCopilotRequest
+         * @description Request model for creating a spec with Kiln Copilot.
+         *
+         *     This endpoint uses Kiln Copilot to:
+         *     - Generate batch examples for eval, train, and golden datasets
+         *     - Create a judge eval config
+         *     - Create an eval with appropriate template/output scores
+         *     - Create and save the spec
+         *
+         *     If you don't want to use copilot, use the regular POST /spec endpoint instead.
+         *
+         *     The client is responsible for building:
+         *     - definition: The spec definition string (use buildSpecDefinition on client)
+         *     - properties: The spec properties object (filtered, with spec_type included)
+         */
+        CreateSpecWithCopilotRequest: {
+            /** Name */
+            name: string;
+            /**
+             * Definition
+             * @description The spec definition string, built by client using buildSpecDefinition()
+             */
+            definition: string;
+            /**
+             * Properties
+             * @description The spec properties object, pre-built by client with spec_type included
+             */
+            properties: components["schemas"]["DesiredBehaviourProperties"] | components["schemas"]["IssueProperties"] | components["schemas"]["ToneProperties"] | components["schemas"]["FormattingProperties"] | components["schemas"]["LocalizationProperties"] | components["schemas"]["AppropriateToolUseProperties"] | components["schemas"]["ReferenceAnswerAccuracyProperties"] | components["schemas"]["FactualCorrectnessProperties"] | components["schemas"]["HallucinationsProperties"] | components["schemas"]["CompletenessProperties"] | components["schemas"]["ToxicityProperties"] | components["schemas"]["BiasProperties"] | components["schemas"]["MaliciousnessProperties"] | components["schemas"]["NsfwProperties"] | components["schemas"]["TabooProperties"] | components["schemas"]["JailbreakProperties"] | components["schemas"]["PromptLeakageProperties"];
+            /**
+             * Evaluate Full Trace
+             * @default false
+             */
+            evaluate_full_trace: boolean;
+            /** Reviewed Examples */
+            reviewed_examples?: components["schemas"]["ReviewedExample"][];
+            judge_info: components["schemas"]["PromptGenerationResultApi-Input"];
+            /**
+             * Task Description
+             * @default
+             */
+            task_description: string;
+            /**
+             * Task Prompt With Few Shot
+             * @default
+             */
+            task_prompt_with_few_shot: string;
+        };
         /** CreateTaskRunConfigRequest */
         CreateTaskRunConfigRequest: {
             /** Name */
@@ -5037,7 +5116,13 @@ export interface components {
             chain_of_thought_instructions?: string | null;
         };
         /** PromptGenerationResultApi */
-        PromptGenerationResultApi: {
+        "PromptGenerationResultApi-Input": {
+            task_metadata: components["schemas"]["TaskMetadataApi"];
+            /** Prompt */
+            prompt: string;
+        };
+        /** PromptGenerationResultApi */
+        "PromptGenerationResultApi-Output": {
             task_metadata: components["schemas"]["TaskMetadataApi"];
             /** Prompt */
             prompt: string;
@@ -5545,6 +5630,25 @@ export interface components {
             /** Models */
             models: components["schemas"]["RerankerModelDetails"][];
         };
+        /**
+         * ReviewedExample
+         * @description A reviewed example from the spec review process.
+         *
+         *     Extends SampleApi with review-specific fields for tracking
+         *     model and user judgments on spec compliance.
+         */
+        ReviewedExample: {
+            /** Input */
+            input: string;
+            /** Output */
+            output: string;
+            /** Model Says Meets Spec */
+            model_says_meets_spec: boolean;
+            /** User Says Meets Spec */
+            user_says_meets_spec: boolean;
+            /** Feedback */
+            feedback: string;
+        };
         /** RunConfigEvalResult */
         RunConfigEvalResult: {
             /** Eval Id */
@@ -7333,6 +7437,42 @@ export interface operations {
             };
         };
     };
+    create_spec_with_copilot_api_projects__project_id__tasks__task_id__spec_with_copilot_post: {
+        parameters: {
+            query?: never;
+            header?: never;
+            path: {
+                project_id: string;
+                task_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody: {
+            content: {
+                "application/json": components["schemas"]["CreateSpecWithCopilotRequest"];
+            };
+        };
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["Spec"];
+                };
+            };
+            /** @description Validation Error */
+            422: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["HTTPValidationError"];
+                };
+            };
+        };
+    };
     get_run_api_projects__project_id__tasks__task_id__runs__run_id__get: {
         parameters: {
             query?: never;

diff --git a/app/web_ui/src/lib/types.ts b/app/web_ui/src/lib/types.ts
@@ -123,6 +123,15 @@ export type DocumentLibraryState = components["schemas"]["DocumentLibraryState"]
 export type Spec = components["schemas"]["Spec"]
 export type SpecStatus = components["schemas"]["SpecStatus"]
 export type Priority = components["schemas"]["Priority"]
+
+// Copilot API types
+export type PromptGenerationResultApi =
+  components["schemas"]["PromptGenerationResultApi-Input"]
+export type TaskMetadataApi = components["schemas"]["TaskMetadataApi"]
+export type ReviewedExample = components["schemas"]["ReviewedExample"]
+export type SampleApi = components["schemas"]["SampleApi"]
+export type SubsampleBatchOutputItemApi =
+  components["schemas"]["SubsampleBatchOutputItemApi"]
 export type SpecProperties =
   | components["schemas"]["AppropriateToolUseProperties"]
   | components["schemas"]["DesiredBehaviourProperties"]

diff --git a/app/web_ui/src/routes/(app)/specs/[project_id]/[task_id]/spec_builder/+page.svelte b/app/web_ui/src/routes/(app)/specs/[project_id]/[task_id]/spec_builder/+page.svelte
@@ -11,6 +11,9 @@
     QuestionSet,
     SubmitAnswersRequest,
     QuestionWithAnswer,
+    SpecProperties,
+    PromptGenerationResultApi,
+    ReviewedExample,
   } from "$lib/types"
   import { goto } from "$app/navigation"
   import { spec_field_configs } from "../select_template/spec_templates"
@@ -20,11 +23,6 @@
     buildSpecDefinition,
     type SuggestedEdit,
   } from "../spec_utils"
-  import {
-    createSpec,
-    type JudgeInfo,
-    type ReviewedExample,
-  } from "./spec_persistence"
   import { client } from "$lib/api_client"
   import {
     load_task,
@@ -38,6 +36,7 @@
   import type { FewShotExample } from "$lib/utils/few_shot_example"
   import { build_prompt_with_few_shot } from "$lib/utils/few_shot_example"
   import Questions from "./questions.svelte"
+  import type { ReviewRow } from "./spec_utils.ts"
 
   $: project_id = $page.params.project_id!
   $: task_id = $page.params.task_id!
@@ -113,12 +112,10 @@
   let question_set: QuestionSet | null = null
 
   // Review state
-  type ReviewRow = ReviewedExample & { id: string }
-
   let review_rows: ReviewRow[] = []
   let reviewed_examples: ReviewedExample[] = []
 
-  let judge_info: JudgeInfo | null = null
+  let judge_info: PromptGenerationResultApi | null = null
 
   // Refine state
   let refined_property_values: Record<string, string | null> = {}
@@ -289,14 +286,11 @@
       throw new Error("Failed to analyze spec for review. Please try again.")
     }
 
-    judge_info = {
-      prompt: data.judge_result.prompt,
-      model_name: data.judge_result.task_metadata.model_name,
-      model_provider: data.judge_result.task_metadata.model_provider_name,
-    }
+    // Use judge_result directly as it already matches PromptGenerationResultApi
+    judge_info = data.judge_result
 
     review_rows = data.examples_for_feedback.map((example, index) => ({
-      id: String(index + 1),
+      row_id: String(index + 1),
       input: example.input,
       output: example.output,
       model_says_meets_spec: !example.fails_specification,
@@ -349,20 +343,71 @@
     examples: ReviewedExample[],
     signal?: AbortSignal,
   ) {
-    const spec_id = await createSpec(
-      project_id,
-      task_id,
-      task?.instruction || "",
-      task_prompt_with_few_shot,
-      name,
-      spec_type,
-      values,
-      use_kiln_copilot,
-      evaluate_full_trace,
-      examples,
-      judge_info,
-      signal,
+    // Build definition and properties on the client side
+    const definition = buildSpecDefinition(spec_type, values)
+
+    // Build properties object with spec_type, filtering out null and empty values
+    const filteredValues = Object.fromEntries(
+      Object.entries(values).filter(
+        ([_, value]) => value !== null && value.trim() !== "",
+      ),
     )
+    const properties = {
+      spec_type: spec_type,
+      ...filteredValues,
+    } as SpecProperties
+
+    // Call the appropriate endpoint based on whether copilot is being used
+    let spec_id: string | null | undefined
+    if (use_kiln_copilot) {
+      if (!judge_info) {
+        throw new Error("Judge info is required for copilot spec creation")
+      }
+      const { data, error: api_error } = await client.POST(
+        "/api/projects/{project_id}/tasks/{task_id}/spec_with_copilot",
+        {
+          params: { path: { project_id, task_id } },
+          body: {
+            name,
+            definition,
+            properties,
+            evaluate_full_trace,
+            reviewed_examples: examples.map((e) => ({
+              ...e,
+              user_says_meets_spec: e.user_says_meets_spec ?? false,
+            })),
+            judge_info,
+            task_description: task?.instruction || "",
+            task_prompt_with_few_shot,
+          },
+          signal,
+        },
+      )
+      if (api_error) throw api_error
+      spec_id = data?.id
+    } else {
+      const { data, error: api_error } = await client.POST(
+        "/api/projects/{project_id}/tasks/{task_id}/spec",
+        {
+          params: { path: { project_id, task_id } },
+          body: {
+            name,
+            definition,
+            properties,
+            priority: 1,
+            status: "active",
+            tags: [],
+            eval_id: null,
+          },
+        },
+      )
+      if (api_error) throw api_error
+      spec_id = data?.id
+    }
+
+    if (!spec_id) {
+      throw new Error("Failed to create spec")
+    }
 
     complete = true
     goto(`/specs/${project_id}/${task_id}/${spec_id}`)