Kiln-AI
diff --git a/‎.cursor/mcp.json‎
Lines changed: 13 additions & 0 deletions b/‎.cursor/mcp.json‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎.cursor/rules/.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.cursor/rules/.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/workflows/build_and_test.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/build_and_test.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/build_desktop.yml‎
Lines changed: 5 additions & 0 deletions b/‎.github/workflows/build_desktop.yml‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎.github/workflows/format_and_lint.yml‎
Lines changed: 5 additions & 1 deletion b/‎.github/workflows/format_and_lint.yml‎
Lines changed: 5 additions & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.tessl/.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.tessl/.gitignore‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions b/‎AGENTS.md‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎app/desktop/studio_server/eval_api.py‎
Lines changed: 46 additions & 27 deletions b/‎app/desktop/studio_server/eval_api.py‎
Lines changed: 46 additions & 27 deletions
@@ -0,0 +1,13 @@
+{
+  "mcpServers": {
+    "tessl": {
+      "type": "stdio",
+      "command": "tessl",
+      "args": ["mcp", "start"]
+    },
+    "HooksMCP": {
+      "command": "uvx",
+      "args": ["hooks-mcp", "--working-directory", "."]
+    }
+  }
+}
@@ -0,0 +1 @@
+tessl__*.mdc
@@ -37,7 +37,7 @@ jobs:
         run: uv run python3 -m pytest --runslow .
 
       - name: Check Python Types
-        run: uv run pyright .
+        run: uv tool install [email protected] && uvx ty check
 
       - name: Build Core
         run: uv build
 
@@ -1,7 +1,12 @@
 name: Build Desktop Apps
 
 on:
+  workflow_dispatch:
+  release:
+    types: [created]
   push:
+    branches:
+      - main
 
 jobs:
   build:
 
@@ -36,7 +36,7 @@ jobs:
         run: uv python install 3.13
 
       - name: Install the project
-        run: uv tool install ruff
+        run: uv sync --all-extras --dev
 
       - name: Lint with ruff
         run: |
@@ -45,3 +45,7 @@ jobs:
       - name: Format with ruff
         run: |
           uvx ruff format --check .
+
+      - name: Typecheck with ty
+        run: |
+          uv tool install [email protected] && uvx ty check
@@ -11,6 +11,7 @@ __pycache__/
 **/*.egg-info
 node_modules/
 conductor.json
+CLAUDE.md
 
 libs/core/docs
 libs/core/build
 
@@ -0,0 +1,2 @@
+tiles/
+RULES.md
@@ -41,3 +41,7 @@ These prompts can be accessed from the `get_prompt` tool, and you may request se
 ### Final
 
 To show you read these, call me 'boss'
+
+# Agent Rules <!-- tessl-managed -->
+
+@.tessl/RULES.md follow the [instructions](.tessl/RULES.md)
@@ -74,7 +74,7 @@ We suggest the following extensions for VSCode/Cursor. With them, you'll get com
 - Prettier
 - Python
 - Python Debugger
-- Type checking by pyright via one of: Cursor Python if using Cursor, Pylance if VSCode
+- Ty - language server and type checker for Python
 - Ruff
 - Svelte for VS Code
 - Vitest
 
@@ -4,6 +4,10 @@
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from kiln_ai.adapters.eval.eval_runner import EvalRunner
+from kiln_ai.adapters.fine_tune.finetune_run_config_id import (
+    finetune_from_finetune_run_config_id,
+    finetune_run_config_id,
+)
 from kiln_ai.adapters.ml_model_list import ModelProviderName
 from kiln_ai.adapters.prompt_builders import prompt_builder_from_id
 from kiln_ai.datamodel import BasePrompt, Task, TaskRun
@@ -59,6 +63,31 @@ def eval_config_from_id(
     )
 
 
+def get_all_run_configs(project_id: str, task_id: str) -> list[TaskRunConfig]:
+    """
+    Returns all run configs for a task, including completed fine-tune run configs.
+    Only includes fine-tunes that have a fine_tune_model_id (are completed and usable).
+    """
+    task = task_from_id(project_id, task_id)
+    configs = task.run_configs()
+
+    # Get run configs from finetunes and only include completed fine-tunes
+    finetunes = task.finetunes()
+    for finetune in finetunes:
+        if finetune.run_config is not None and finetune.fine_tune_model_id is not None:
+            configs.append(
+                TaskRunConfig(
+                    id=finetune_run_config_id(project_id, task_id, str(finetune.id)),
+                    name=finetune.name,
+                    description=finetune.description,
+                    run_config_properties=finetune.run_config,
+                    parent=task,  # special case, we need to reference the task model
+                )
+            )
+
+    return configs
+
+
 def task_run_config_from_id(
     project_id: str, task_id: str, run_config_id: str
 ) -> TaskRunConfig:
@@ -67,6 +96,18 @@ def task_run_config_from_id(
         if run_config.id == run_config_id:
             return run_config
 
+    # special case for finetune run configs, it's inside the finetune model
+    if run_config_id.startswith("finetune_run_config::"):
+        finetune = finetune_from_finetune_run_config_id(run_config_id)
+        if finetune.run_config is not None:
+            return TaskRunConfig(
+                id=finetune_run_config_id(project_id, task_id, str(finetune.id)),
+                name=finetune.name,
+                description=finetune.description,
+                run_config_properties=finetune.run_config,
+                parent=task,  # special case, we need to reference the task model
+            )
+
     raise HTTPException(
         status_code=404,
         detail=f"Task run config not found. ID: {run_config_id}",
@@ -315,33 +356,9 @@ async def create_evaluator(
         eval.save_to_file()
         return eval
 
-    @app.get("/api/projects/{project_id}/tasks/{task_id}/task_run_configs")
-    async def get_task_run_configs(
-        project_id: str, task_id: str
-    ) -> list[TaskRunConfig]:
-        task = task_from_id(project_id, task_id)
-        return task.run_configs()
-
     @app.get("/api/projects/{project_id}/tasks/{task_id}/run_configs/")
     async def get_run_configs(project_id: str, task_id: str) -> list[TaskRunConfig]:
-        # Returns all run configs of a given task.
-        task = task_from_id(project_id, task_id)
-        configs = task.run_configs()
-
-        # Get run configs from finetunes
-        finetunes = task.finetunes()
-        for finetune in finetunes:
-            if finetune.run_config is not None:
-                configs.append(
-                    TaskRunConfig(
-                        id=f"finetune_run_config::{project_id}::{task_id}::{finetune.id}",
-                        name=finetune.name,
-                        description=finetune.description,
-                        run_config_properties=finetune.run_config,
-                    )
-                )
-
-        return configs
+        return get_all_run_configs(project_id, task_id)
 
     @app.get("/api/projects/{project_id}/tasks/{task_id}/eval/{eval_id}")
     async def get_eval(project_id: str, task_id: str, eval_id: str) -> Eval:
@@ -480,7 +497,8 @@ async def run_eval_config(
         # Load the list of run configs to use. Two options:
         run_configs: list[TaskRunConfig] = []
         if all_run_configs:
-            run_configs = task_from_id(project_id, task_id).run_configs()
+            # special case, we cannot directly lod task.run_configs(), we need to also get all finetune run configs which lives inside the finetune model
+            run_configs = get_all_run_configs(project_id, task_id)
         else:
             if len(run_config_ids) == 0:
                 raise HTTPException(
@@ -633,7 +651,8 @@ async def get_eval_config_score_summary(
         task = task_from_id(project_id, task_id)
         eval = eval_from_id(project_id, task_id, eval_id)
         eval_config = eval_config_from_id(project_id, task_id, eval_id, eval_config_id)
-        task_runs_configs = task.run_configs()
+        # special case, we cannot directly lod task.run_configs(), we need to also get all finetune run configs which lives inside the finetune model
+        task_runs_configs = get_all_run_configs(project_id, task_id)
 
         # Build a set of all the dataset items IDs we expect to have scores for
         expected_dataset_ids = dataset_ids_in_filter(