Kiln-AI · tawnymanticore · Jan 6, 2026 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
diff --git a/.cursor/mcp.json b/.cursor/mcp.json
@@ -0,0 +1,13 @@
+{
+  "mcpServers": {
+    "tessl": {
+      "type": "stdio",
+      "command": "tessl",
+      "args": ["mcp", "start"]
+    },
+    "HooksMCP": {
+      "command": "uvx",
+      "args": ["hooks-mcp", "--working-directory", "."]
+    }
+  }
+}
diff --git a/.cursor/rules/.gitignore b/.cursor/rules/.gitignore
@@ -0,0 +1 @@
+tessl__*.mdc
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -37,7 +37,7 @@ jobs:
         run: uv run python3 -m pytest --runslow .
 
       - name: Check Python Types
-        run: uv run pyright .
+        run: uv tool install [email protected] && uvx ty check
 
       - name: Build Core
         run: uv build

diff --git a/.github/workflows/build_desktop.yml b/.github/workflows/build_desktop.yml
@@ -1,7 +1,12 @@
 name: Build Desktop Apps
 
 on:
+  workflow_dispatch:
+  release:
+    types: [created]
   push:
+    branches:
+      - main
 
 jobs:
   build:

diff --git a/.github/workflows/format_and_lint.yml b/.github/workflows/format_and_lint.yml
@@ -36,7 +36,7 @@ jobs:
         run: uv python install 3.13
 
       - name: Install the project
-        run: uv tool install ruff
+        run: uv sync --all-extras --dev
 
       - name: Lint with ruff
         run: |
@@ -45,3 +45,7 @@ jobs:
       - name: Format with ruff
         run: |
           uvx ruff format --check .
+
+      - name: Typecheck with ty
+        run: |
+          uv tool install [email protected] && uvx ty check
diff --git a/.gitignore b/.gitignore
@@ -11,6 +11,7 @@ __pycache__/
 **/*.egg-info
 node_modules/
 conductor.json
+CLAUDE.md
 
 libs/core/docs
 libs/core/build

diff --git a/.tessl/.gitignore b/.tessl/.gitignore
@@ -0,0 +1,2 @@
+tiles/
+RULES.md
diff --git a/AGENTS.md b/AGENTS.md
@@ -41,3 +41,7 @@ These prompts can be accessed from the `get_prompt` tool, and you may request se
 ### Final
 
 To show you read these, call me 'boss'
+
+# Agent Rules <!-- tessl-managed -->
+
+@.tessl/RULES.md follow the [instructions](.tessl/RULES.md)
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -74,7 +74,7 @@ We suggest the following extensions for VSCode/Cursor. With them, you'll get com
 - Prettier
 - Python
 - Python Debugger
-- Type checking by pyright via one of: Cursor Python if using Cursor, Pylance if VSCode
+- Ty - language server and type checker for Python
 - Ruff
 - Svelte for VS Code
 - Vitest

diff --git a/app/desktop/studio_server/eval_api.py b/app/desktop/studio_server/eval_api.py
@@ -4,6 +4,10 @@
 from fastapi import FastAPI, HTTPException, Query
 from fastapi.responses import StreamingResponse
 from kiln_ai.adapters.eval.eval_runner import EvalRunner
+from kiln_ai.adapters.fine_tune.finetune_run_config_id import (
+    finetune_from_finetune_run_config_id,
+    finetune_run_config_id,
+)
 from kiln_ai.adapters.ml_model_list import ModelProviderName
 from kiln_ai.adapters.prompt_builders import prompt_builder_from_id
 from kiln_ai.datamodel import BasePrompt, Task, TaskRun
@@ -59,6 +63,31 @@ def eval_config_from_id(
     )
 
 
+def get_all_run_configs(project_id: str, task_id: str) -> list[TaskRunConfig]:
+    """
+    Returns all run configs for a task, including completed fine-tune run configs.
+    Only includes fine-tunes that have a fine_tune_model_id (are completed and usable).
+    """
+    task = task_from_id(project_id, task_id)
+    configs = task.run_configs()
+
+    # Get run configs from finetunes and only include completed fine-tunes
+    finetunes = task.finetunes()
+    for finetune in finetunes:
+        if finetune.run_config is not None and finetune.fine_tune_model_id is not None:
+            configs.append(
+                TaskRunConfig(
+                    id=finetune_run_config_id(project_id, task_id, str(finetune.id)),
+                    name=finetune.name,
+                    description=finetune.description,
+                    run_config_properties=finetune.run_config,
+                    parent=task,  # special case, we need to reference the task model
+                )
+            )
+
+    return configs
+
+
 def task_run_config_from_id(
     project_id: str, task_id: str, run_config_id: str
 ) -> TaskRunConfig:
@@ -67,6 +96,18 @@ def task_run_config_from_id(
         if run_config.id == run_config_id:
             return run_config
 
+    # special case for finetune run configs, it's inside the finetune model
+    if run_config_id.startswith("finetune_run_config::"):
+        finetune = finetune_from_finetune_run_config_id(run_config_id)
+        if finetune.run_config is not None:
+            return TaskRunConfig(
+                id=finetune_run_config_id(project_id, task_id, str(finetune.id)),
+                name=finetune.name,
+                description=finetune.description,
+                run_config_properties=finetune.run_config,
+                parent=task,  # special case, we need to reference the task model
+            )
+
     raise HTTPException(
         status_code=404,
         detail=f"Task run config not found. ID: {run_config_id}",
@@ -315,33 +356,9 @@ async def create_evaluator(
         eval.save_to_file()
         return eval
 
-    @app.get("/api/projects/{project_id}/tasks/{task_id}/task_run_configs")
-    async def get_task_run_configs(
-        project_id: str, task_id: str
-    ) -> list[TaskRunConfig]:
-        task = task_from_id(project_id, task_id)
-        return task.run_configs()
-
     @app.get("/api/projects/{project_id}/tasks/{task_id}/run_configs/")
     async def get_run_configs(project_id: str, task_id: str) -> list[TaskRunConfig]:
-        # Returns all run configs of a given task.
-        task = task_from_id(project_id, task_id)
-        configs = task.run_configs()
-
-        # Get run configs from finetunes
-        finetunes = task.finetunes()
-        for finetune in finetunes:
-            if finetune.run_config is not None:
-                configs.append(
-                    TaskRunConfig(
-                        id=f"finetune_run_config::{project_id}::{task_id}::{finetune.id}",
-                        name=finetune.name,
-                        description=finetune.description,
-                        run_config_properties=finetune.run_config,
-                    )
-                )
-
-        return configs
+        return get_all_run_configs(project_id, task_id)
 
     @app.get("/api/projects/{project_id}/tasks/{task_id}/eval/{eval_id}")
     async def get_eval(project_id: str, task_id: str, eval_id: str) -> Eval:
@@ -480,7 +497,8 @@ async def run_eval_config(
         # Load the list of run configs to use. Two options:
         run_configs: list[TaskRunConfig] = []
         if all_run_configs:
-            run_configs = task_from_id(project_id, task_id).run_configs()
+            # special case, we cannot directly lod task.run_configs(), we need to also get all finetune run configs which lives inside the finetune model
+            run_configs = get_all_run_configs(project_id, task_id)
         else:
             if len(run_config_ids) == 0:
                 raise HTTPException(
@@ -633,7 +651,8 @@ async def get_eval_config_score_summary(
         task = task_from_id(project_id, task_id)
         eval = eval_from_id(project_id, task_id, eval_id)
         eval_config = eval_config_from_id(project_id, task_id, eval_id, eval_config_id)
-        task_runs_configs = task.run_configs()
+        # special case, we cannot directly lod task.run_configs(), we need to also get all finetune run configs which lives inside the finetune model
+        task_runs_configs = get_all_run_configs(project_id, task_id)
 
         # Build a set of all the dataset items IDs we expect to have scores for
         expected_dataset_ids = dataset_ids_in_filter(

diff --git a/app/desktop/studio_server/finetune_api.py b/app/desktop/studio_server/finetune_api.py
@@ -281,7 +281,7 @@ async def finetune(
                 status_code=400,
                 detail=f"Fine tune provider '{finetune.provider}' not found",
             )
-        finetune_adapter = finetune_registry[finetune.provider]
+        finetune_adapter = finetune_registry[finetune.provider]  # type: ignore[invalid-argument-type]
         status = await finetune_adapter(finetune).status()
         return FinetuneWithStatus(finetune=finetune, status=status)
 
@@ -360,7 +360,7 @@ async def finetune_hyperparameters(
             raise HTTPException(
                 status_code=400, detail=f"Fine tune provider '{provider_id}' not found"
             )
-        finetune_adapter_class = finetune_registry[provider_id]
+        finetune_adapter_class = finetune_registry[provider_id]  # type: ignore[invalid-argument-type]
         return finetune_adapter_class.available_parameters()
 
     @app.get("/api/projects/{project_id}/tasks/{task_id}/finetune_dataset_info")
@@ -433,7 +433,7 @@ async def create_finetune(
                 status_code=400,
                 detail=f"Fine tune provider '{request.provider}' not found",
             )
-        finetune_adapter_class = finetune_registry[request.provider]
+        finetune_adapter_class = finetune_registry[request.provider]  # type: ignore[invalid-argument-type]
 
         dataset = DatasetSplit.from_id_and_parent_path(request.dataset_id, task.path)
         if dataset is None:

diff --git a/app/desktop/studio_server/test_eval_api.py b/app/desktop/studio_server/test_eval_api.py
@@ -44,6 +44,7 @@
     CreateEvaluatorRequest,
     connect_evals_api,
     eval_config_from_id,
+    get_all_run_configs,
     task_run_config_from_id,
 )
 
@@ -297,7 +298,7 @@ async def test_create_task_run_config_with_freezing(
         == "Frozen copy of prompt 'simple_chain_of_thought_prompt_builder'."
     )
     # Fetch it from API
-    fetch_response = client.get("/api/projects/project1/tasks/task1/task_run_configs")
+    fetch_response = client.get("/api/projects/project1/tasks/task1/run_configs/")
     assert fetch_response.status_code == 200
     configs = fetch_response.json()
     assert len(configs) == 1
@@ -548,6 +549,104 @@ async def test_task_run_config_from_id(
         task_run_config_from_id("project1", "task1", "non_existent")
 
 
+@pytest.mark.asyncio
+async def test_task_run_config_from_id_finetune(mock_task_from_id, mock_task):
+    mock_task_from_id.return_value = mock_task
+
+    run_config_props = RunConfigProperties(
+        model_name="gpt-4",
+        model_provider_name=ModelProviderName.openai,
+        prompt_id="simple_chain_of_thought_prompt_builder",
+        structured_output_mode=StructuredOutputMode.json_schema,
+    )
+
+    mock_finetune = Finetune(
+        id="ft_test",
+        name="Test Finetune",
+        description="Test finetune description",
+        provider="openai",
+        base_model_id="model1",
+        dataset_split_id="split1",
+        system_message="System message",
+        latest_status=FineTuneStatusType.completed,
+        run_config=run_config_props,
+        fine_tune_model_id="ft_model_123",
+        parent=mock_task,
+    )
+
+    with patch(
+        "app.desktop.studio_server.eval_api.finetune_from_finetune_run_config_id"
+    ) as mock_finetune_from_id:
+        mock_finetune_from_id.return_value = mock_finetune
+
+        run_config = task_run_config_from_id(
+            "project1", "task1", "finetune_run_config::project1::task1::ft_test"
+        )
+
+        assert run_config.id == "finetune_run_config::project1::task1::ft_test"
+        assert run_config.name == "Test Finetune"
+        assert run_config.description == "Test finetune description"
+        assert run_config.run_config_properties == run_config_props
+        assert run_config.parent == mock_task
+
+
+@pytest.mark.asyncio
+async def test_get_all_run_configs(mock_task_from_id, mock_task):
+    """Test that get_all_run_configs returns regular run configs and completed finetune run configs."""
+    mock_task_from_id.return_value = mock_task
+
+    run_config_props = RunConfigProperties(
+        model_name="gpt-4",
+        model_provider_name=ModelProviderName.openai,
+        prompt_id="simple_chain_of_thought_prompt_builder",
+        structured_output_mode=StructuredOutputMode.json_schema,
+    )
+
+    regular_run_config = TaskRunConfig(
+        id="regular_run_config1",
+        name="Regular Run Config",
+        description="A regular run config",
+        run_config_properties=run_config_props,
+        parent=mock_task,
+    )
+    regular_run_config.save_to_file()
+
+    completed_finetune = Finetune(
+        id="ft_completed",
+        name="Completed Finetune",
+        provider="openai",
+        base_model_id="model1",
+        dataset_split_id="split1",
+        system_message="System message",
+        latest_status=FineTuneStatusType.completed,
+        run_config=run_config_props,
+        fine_tune_model_id="ft_model_123",
+        parent=mock_task,
+    )
+    completed_finetune.save_to_file()
+
+    incomplete_finetune = Finetune(
+        id="ft_incomplete",
+        name="Incomplete Finetune",
+        provider="openai",
+        base_model_id="model2",
+        dataset_split_id="split2",
+        system_message="System message",
+        latest_status=FineTuneStatusType.running,
+        run_config=run_config_props,
+        fine_tune_model_id=None,
+        parent=mock_task,
+    )
+    incomplete_finetune.save_to_file()
+
+    configs = get_all_run_configs("project1", "task1")
+
+    config_ids = [config.id for config in configs]
+    assert "regular_run_config1" in config_ids
+    assert "finetune_run_config::project1::task1::ft_completed" in config_ids
+    assert "finetune_run_config::project1::task1::ft_incomplete" not in config_ids
+
+
 @pytest.fixture
 def mock_eval_for_score_summary():
     eval = Mock(spec=Eval)
@@ -635,6 +734,7 @@ async def test_get_eval_config_score_summary(
             Mock(spec=TaskRunConfig, id="run4"),
             Mock(spec=TaskRunConfig, id="run5"),
         ]
+        mock_task.finetunes.return_value = []
         mock_task_from_id.return_value = mock_task
 
         response = client.get(
@@ -1910,6 +2010,7 @@ async def test_get_run_configs_includes_finetunes_with_run_config(
             system_message="System message",
             latest_status=FineTuneStatusType.completed,
             run_config=run_config_props,
+            fine_tune_model_id="ft_model_123",
             parent=mock_task,
         ),
         Finetune(
@@ -1921,6 +2022,7 @@ async def test_get_run_configs_includes_finetunes_with_run_config(
             system_message="System message",
             latest_status=FineTuneStatusType.running,
             run_config=run_config_props,
+            fine_tune_model_id=None,
             parent=mock_task,
         ),
         Finetune(
@@ -1932,6 +2034,7 @@ async def test_get_run_configs_includes_finetunes_with_run_config(
             system_message="System message",
             latest_status=FineTuneStatusType.unknown,
             run_config=run_config_props,
+            fine_tune_model_id=None,
             parent=mock_task,
         ),
         Finetune(
@@ -1943,6 +2046,7 @@ async def test_get_run_configs_includes_finetunes_with_run_config(
             system_message="System message",
             latest_status=FineTuneStatusType.failed,
             run_config=run_config_props,
+            fine_tune_model_id=None,
             parent=mock_task,
         ),
         Finetune(
@@ -1969,7 +2073,7 @@ async def test_get_run_configs_includes_finetunes_with_run_config(
     config_ids = [config["id"] for config in configs]
 
     assert "finetune_run_config::project1::task1::ft_completed" in config_ids
-    assert "finetune_run_config::project1::task1::ft_running" in config_ids
-    assert "finetune_run_config::project1::task1::ft_failed" in config_ids
-    assert "finetune_run_config::project1::task1::ft_unknown" in config_ids
+    assert "finetune_run_config::project1::task1::ft_running" not in config_ids
+    assert "finetune_run_config::project1::task1::ft_failed" not in config_ids
+    assert "finetune_run_config::project1::task1::ft_unknown" not in config_ids
     assert "finetune_run_config::project1::task1::ft_no_run_config" not in config_ids