Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
148 changes: 144 additions & 4 deletions app/web_ui/src/lib/api_schema.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,37 @@ export interface paths {
patch: operations["update_spec_api_projects__project_id__tasks__task_id__specs__spec_id__patch"];
trace?: never;
};
"/api/projects/{project_id}/tasks/{task_id}/spec_with_copilot": {
parameters: {
query?: never;
header?: never;
path?: never;
cookie?: never;
};
get?: never;
put?: never;
/**
* Create Spec With Copilot
* @description Create a spec using Kiln Copilot.
*
* This endpoint uses Kiln Copilot to create a spec with:
* 1. An eval for the spec with appropriate template
* 2. Batch examples via copilot API for eval, train, and golden datasets
* 3. A judge eval config (if judge_info provided)
* 4. The spec itself
*
* If you don't need copilot, use POST /spec instead.
*
* All models are validated before any saves occur. If validation fails,
* no data is persisted.
*/
post: operations["create_spec_with_copilot_api_projects__project_id__tasks__task_id__spec_with_copilot_post"];
delete?: never;
options?: never;
head?: never;
patch?: never;
trace?: never;
};
"/api/projects/{project_id}/tasks/{task_id}/runs/{run_id}": {
parameters: {
query?: never;
Expand Down Expand Up @@ -2717,9 +2748,9 @@ export interface components {
ClarifySpecApiOutput: {
/** Examples For Feedback */
examples_for_feedback: components["schemas"]["SubsampleBatchOutputItemApi"][];
judge_result: components["schemas"]["PromptGenerationResultApi"];
topic_generation_result: components["schemas"]["PromptGenerationResultApi"];
input_generation_result: components["schemas"]["PromptGenerationResultApi"];
judge_result: components["schemas"]["PromptGenerationResultApi-Output"];
topic_generation_result: components["schemas"]["PromptGenerationResultApi-Output"];
input_generation_result: components["schemas"]["PromptGenerationResultApi-Output"];
};
/** CohereCompatibleProperties */
CohereCompatibleProperties: {
Expand Down Expand Up @@ -2990,6 +3021,54 @@ export interface components {
*/
properties: components["schemas"]["CohereCompatibleProperties"];
};
/**
* CreateSpecWithCopilotRequest
* @description Request model for creating a spec with Kiln Copilot.
*
* This endpoint uses Kiln Copilot to:
* - Generate batch examples for eval, train, and golden datasets
* - Create a judge eval config
* - Create an eval with appropriate template/output scores
* - Create and save the spec
*
* If you don't want to use copilot, use the regular POST /spec endpoint instead.
*
* The client is responsible for building:
* - definition: The spec definition string (use buildSpecDefinition on client)
* - properties: The spec properties object (filtered, with spec_type included)
*/
CreateSpecWithCopilotRequest: {
/** Name */
name: string;
/**
* Definition
* @description The spec definition string, built by client using buildSpecDefinition()
*/
definition: string;
/**
* Properties
* @description The spec properties object, pre-built by client with spec_type included
*/
properties: components["schemas"]["DesiredBehaviourProperties"] | components["schemas"]["IssueProperties"] | components["schemas"]["ToneProperties"] | components["schemas"]["FormattingProperties"] | components["schemas"]["LocalizationProperties"] | components["schemas"]["AppropriateToolUseProperties"] | components["schemas"]["ReferenceAnswerAccuracyProperties"] | components["schemas"]["FactualCorrectnessProperties"] | components["schemas"]["HallucinationsProperties"] | components["schemas"]["CompletenessProperties"] | components["schemas"]["ToxicityProperties"] | components["schemas"]["BiasProperties"] | components["schemas"]["MaliciousnessProperties"] | components["schemas"]["NsfwProperties"] | components["schemas"]["TabooProperties"] | components["schemas"]["JailbreakProperties"] | components["schemas"]["PromptLeakageProperties"];
/**
* Evaluate Full Trace
* @default false
*/
evaluate_full_trace: boolean;
/** Reviewed Examples */
reviewed_examples?: components["schemas"]["ReviewedExample"][];
judge_info: components["schemas"]["PromptGenerationResultApi-Input"];
/**
* Task Description
* @default
*/
task_description: string;
/**
* Task Prompt With Few Shot
* @default
*/
task_prompt_with_few_shot: string;
};
/** CreateTaskRunConfigRequest */
CreateTaskRunConfigRequest: {
/** Name */
Expand Down Expand Up @@ -5037,7 +5116,13 @@ export interface components {
chain_of_thought_instructions?: string | null;
};
/** PromptGenerationResultApi */
PromptGenerationResultApi: {
"PromptGenerationResultApi-Input": {
task_metadata: components["schemas"]["TaskMetadataApi"];
/** Prompt */
prompt: string;
};
/** PromptGenerationResultApi */
"PromptGenerationResultApi-Output": {
task_metadata: components["schemas"]["TaskMetadataApi"];
/** Prompt */
prompt: string;
Expand Down Expand Up @@ -5545,6 +5630,25 @@ export interface components {
/** Models */
models: components["schemas"]["RerankerModelDetails"][];
};
/**
* ReviewedExample
* @description A reviewed example from the spec review process.
*
* Extends SampleApi with review-specific fields for tracking
* model and user judgments on spec compliance.
*/
ReviewedExample: {
/** Input */
input: string;
/** Output */
output: string;
/** Model Says Meets Spec */
model_says_meets_spec: boolean;
/** User Says Meets Spec */
user_says_meets_spec: boolean;
/** Feedback */
feedback: string;
};
/** RunConfigEvalResult */
RunConfigEvalResult: {
/** Eval Id */
Expand Down Expand Up @@ -7333,6 +7437,42 @@ export interface operations {
};
};
};
create_spec_with_copilot_api_projects__project_id__tasks__task_id__spec_with_copilot_post: {
parameters: {
query?: never;
header?: never;
path: {
project_id: string;
task_id: string;
};
cookie?: never;
};
requestBody: {
content: {
"application/json": components["schemas"]["CreateSpecWithCopilotRequest"];
};
};
responses: {
/** @description Successful Response */
200: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["Spec"];
};
};
/** @description Validation Error */
422: {
headers: {
[name: string]: unknown;
};
content: {
"application/json": components["schemas"]["HTTPValidationError"];
};
};
};
};
get_run_api_projects__project_id__tasks__task_id__runs__run_id__get: {
parameters: {
query?: never;
Expand Down
9 changes: 9 additions & 0 deletions app/web_ui/src/lib/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,15 @@ export type DocumentLibraryState = components["schemas"]["DocumentLibraryState"]
export type Spec = components["schemas"]["Spec"]
export type SpecStatus = components["schemas"]["SpecStatus"]
export type Priority = components["schemas"]["Priority"]

// Copilot API types
export type PromptGenerationResultApi =
components["schemas"]["PromptGenerationResultApi-Input"]
export type TaskMetadataApi = components["schemas"]["TaskMetadataApi"]
export type ReviewedExample = components["schemas"]["ReviewedExample"]
export type SampleApi = components["schemas"]["SampleApi"]
export type SubsampleBatchOutputItemApi =
components["schemas"]["SubsampleBatchOutputItemApi"]
export type SpecProperties =
| components["schemas"]["AppropriateToolUseProperties"]
| components["schemas"]["DesiredBehaviourProperties"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@
QuestionSet,
SubmitAnswersRequest,
QuestionWithAnswer,
SpecProperties,
PromptGenerationResultApi,
ReviewedExample,
} from "$lib/types"
import { goto } from "$app/navigation"
import { spec_field_configs } from "../select_template/spec_templates"
Expand All @@ -20,11 +23,6 @@
buildSpecDefinition,
type SuggestedEdit,
} from "../spec_utils"
import {
createSpec,
type JudgeInfo,
type ReviewedExample,
} from "./spec_persistence"
import { client } from "$lib/api_client"
import {
load_task,
Expand All @@ -38,6 +36,7 @@
import type { FewShotExample } from "$lib/utils/few_shot_example"
import { build_prompt_with_few_shot } from "$lib/utils/few_shot_example"
import Questions from "./questions.svelte"
import type { ReviewRow } from "./spec_utils.ts"

$: project_id = $page.params.project_id!
$: task_id = $page.params.task_id!
Expand Down Expand Up @@ -113,12 +112,10 @@
let question_set: QuestionSet | null = null

// Review state
type ReviewRow = ReviewedExample & { id: string }

let review_rows: ReviewRow[] = []
let reviewed_examples: ReviewedExample[] = []

let judge_info: JudgeInfo | null = null
let judge_info: PromptGenerationResultApi | null = null

// Refine state
let refined_property_values: Record<string, string | null> = {}
Expand Down Expand Up @@ -289,14 +286,11 @@
throw new Error("Failed to analyze spec for review. Please try again.")
}

judge_info = {
prompt: data.judge_result.prompt,
model_name: data.judge_result.task_metadata.model_name,
model_provider: data.judge_result.task_metadata.model_provider_name,
}
// Use judge_result directly as it already matches PromptGenerationResultApi
judge_info = data.judge_result

review_rows = data.examples_for_feedback.map((example, index) => ({
id: String(index + 1),
row_id: String(index + 1),
input: example.input,
output: example.output,
model_says_meets_spec: !example.fails_specification,
Expand Down Expand Up @@ -349,20 +343,71 @@
examples: ReviewedExample[],
signal?: AbortSignal,
) {
const spec_id = await createSpec(
project_id,
task_id,
task?.instruction || "",
task_prompt_with_few_shot,
name,
spec_type,
values,
use_kiln_copilot,
evaluate_full_trace,
examples,
judge_info,
signal,
// Build definition and properties on the client side
const definition = buildSpecDefinition(spec_type, values)

// Build properties object with spec_type, filtering out null and empty values
const filteredValues = Object.fromEntries(
Object.entries(values).filter(
([_, value]) => value !== null && value.trim() !== "",
),
)
const properties = {
spec_type: spec_type,
...filteredValues,
} as SpecProperties

// Call the appropriate endpoint based on whether copilot is being used
let spec_id: string | null | undefined
if (use_kiln_copilot) {
if (!judge_info) {
throw new Error("Judge info is required for copilot spec creation")
}
const { data, error: api_error } = await client.POST(
"/api/projects/{project_id}/tasks/{task_id}/spec_with_copilot",
{
params: { path: { project_id, task_id } },
body: {
name,
definition,
properties,
evaluate_full_trace,
reviewed_examples: examples.map((e) => ({
...e,
user_says_meets_spec: e.user_says_meets_spec ?? false,
})),
judge_info,
task_description: task?.instruction || "",
task_prompt_with_few_shot,
},
signal,
},
)
if (api_error) throw api_error
spec_id = data?.id
} else {
const { data, error: api_error } = await client.POST(
"/api/projects/{project_id}/tasks/{task_id}/spec",
{
params: { path: { project_id, task_id } },
body: {
name,
definition,
properties,
priority: 1,
status: "active",
tags: [],
eval_id: null,
},
},
)
if (api_error) throw api_error
spec_id = data?.id
}

if (!spec_id) {
throw new Error("Failed to create spec")
}

complete = true
goto(`/specs/${project_id}/${task_id}/${spec_id}`)
Expand Down
Loading
Loading