Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions .github/workflows/developer_content_check.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
name: Developer Content Check

on:
push:
branches: [main]
pull_request:

jobs:
developer-check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Search for leftover developer content
run: |
set +e
found=0
echo "Checking for console.log statements"
logs=$(grep -nR --exclude-dir=node_modules --exclude-dir=.venv --exclude-dir=.git --exclude-dir=.github --exclude-dir=build --exclude-dir=dist --exclude-dir=.svelte-kit -e 'console\.log' . || true)
if [ -n "$logs" ]; then
echo "$logs"
found=1
fi

echo "Checking for python print statements"
prints=$(grep -nR --include='*.py' --exclude-dir=node_modules --exclude-dir=.venv --exclude-dir=.git --exclude-dir=.github --exclude-dir=build --exclude-dir=dist --exclude-dir=.svelte-kit -e 'print(' . || true)
if [ -n "$prints" ]; then
echo "$prints"
found=1
fi

echo "Checking for TODO or FIXME"
notes=$(grep -nR --exclude-dir=node_modules --exclude-dir=.venv --exclude-dir=.git --exclude-dir=.github --exclude-dir=build --exclude-dir=dist --exclude-dir=.svelte-kit -e 'TODO' -e 'FIXME' . || true)
if [ -n "$notes" ]; then
echo "$notes"
found=1
fi

if [ "$found" -ne 0 ]; then
echo "Developer content found" >&2
exit 1
else
echo "No developer content found"
fi
2 changes: 1 addition & 1 deletion .github/workflows/windows_release_build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ jobs:
files-folder: ${{ github.workspace }}/app/desktop/build/dist
files-folder-recurse: true
files-folder-filter: exe
# TODO: consider signing dlls as well. But for testing, we don't want to use all our quota.
# We should consider signing dlls as well, but for testing we don't want to use all our quota.
# files-folder-filter: exe,dll
file-digest: SHA256
timestamp-rfc3161: http://timestamp.acs.microsoft.com
Expand Down
2 changes: 1 addition & 1 deletion app/desktop/build_desktop_app.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ else
fi

# Builds the desktop app
# TODO: use a spec instead of long winded command line
# We should use a spec instead of a long-winded command line
pyinstaller $(printf %s "$PLATFORM_OPTS") \
--add-data "./taskbar.png:." --add-data "../../web_ui/build:./web_ui/build" \
--noconfirm --distpath=./desktop/build/dist --workpath=./desktop/build/work \
Expand Down
2 changes: 1 addition & 1 deletion app/desktop/desktop.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
from app.desktop.custom_tray import KilnTray
from app.desktop.desktop_server import ThreadedServer, server_config

# TODO: remove this and all other globals in this file
# We should remove this and all other globals in this file
root = None # type: tk.Tk | None
tray = None # type: ignore

Expand Down
13 changes: 8 additions & 5 deletions app/desktop/studio_server/test_generate_docs.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
from typing import List

import pytest
Expand Down Expand Up @@ -77,11 +78,13 @@ async def test_generate_model_table():
row = f"| {model.friendly_name} | {provider_names} | {structured_output} | {reasoning} | {data_gen} | {finetune} |"
table.append(row)

# Print the table (useful for documentation)
print("\nModel Capability Matrix:\n")
print("\n".join(table))
print("\n\nFireworks models remaining:\n")
print("- " + "\n- ".join(f"{m.name}" for m in fireworks_models), "\n\n")
# Log the table (useful for documentation)
logger = logging.getLogger(__name__)
logger.info("\nModel Capability Matrix:\n%s", "\n".join(table))
logger.info(
"\n\nFireworks models remaining:\n- %s\n\n",
"\n- ".join(f"{m.name}" for m in fireworks_models),
)

# Basic assertions to ensure the table is well-formed
assert len(table) > 2, "Table should have header and at least one row"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
// @ts-expect-error list_page is not a property of PageState
$: list_page = ($page.state.list_page || []) as string[]

// TODO: we need to remove task_id from the URL, or load it by ID. $current_task is a lie
// We should remove task_id from the URL, or load it by ID. $current_task is a lie
let run: TaskRun | null = null
let loading = true
let load_error: KilnError | null = null
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ export class SynthDataGuidanceDataModel {

private apply_selected_template(template: string) {
if (template == "custom") {
// TODO make each unique
// We should make each unique
this.topic_guidance.set(null)
this.input_guidance.set(null)
this.output_guidance.set(null)
Expand Down
4 changes: 2 additions & 2 deletions app/web_ui/src/routes/(app)/run/+page.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
import Collapse from "$lib/ui/collapse.svelte"
import posthog from "posthog-js"

// TODO: implement checking input content
// We should implement checking input content
// let warn_before_unload
// TODO UI for errors
// We should add a UI for errors
let error: KilnError | null = null
let submitting = false
let run_complete = false
Expand Down
2 changes: 1 addition & 1 deletion app/web_ui/src/routes/(app)/run/run.svelte
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@
let show_raw_data = false
let save_rating_error: KilnError | null = null
let show_create_tag = false
// TODO warn_before_unload
// We should implement warn_before_unload

type RatingValue = number | null
let overall_rating: RatingValue = null
Expand Down
4 changes: 2 additions & 2 deletions libs/core/kiln_ai/adapters/data_gen/data_gen_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ class DataGenCategoriesTask(Task, parent_of={}):
"""

def __init__(self, gen_type: Literal["training", "eval"], guidance: str | None):
# Keep the typechecker happy. TODO: shouldn't need this or parent_of above.
# Keep the typechecker happy. We should be able to avoid this and parent_of above.
tmp_project = Project(name="DataGen")

instruction = generate_topic_tree_prompt(gen_type=gen_type, guidance=guidance)
Expand Down Expand Up @@ -181,7 +181,7 @@ def __init__(
gen_type: Literal["training", "eval"],
guidance: str | None,
):
# Keep the typechecker happy. TODO: shouldn't need this or parent_of above.
# Keep the typechecker happy. We should be able to avoid this and parent_of above.
tmp_project = Project(name="DataGenSample")

instruction = generate_sample_generation_prompt(
Expand Down
2 changes: 1 addition & 1 deletion libs/core/kiln_ai/adapters/eval/test_g_eval_data.py

Large diffs are not rendered by default.

10 changes: 5 additions & 5 deletions libs/core/kiln_ai/adapters/ml_model_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class KilnModelProvider(BaseModel):
suggested_for_uncensored_data_gen: bool = False
tuned_chat_strategy: ChatStrategy | None = None

# TODO P1: Need a more generalized way to handle custom provider parameters.
# We need a more generalized way to handle custom provider parameters.
# Making them quite declarative here for now, isolating provider specific logic
# to this file. Later I should be able to override anything in this file via config.
r1_openrouter_options: bool = False
Expand Down Expand Up @@ -1332,7 +1332,7 @@ class KilnModel(BaseModel):
KilnModelProvider(
name=ModelProviderName.vertex,
model_id="meta/llama-3.3-70b-instruct-maas",
# Doesn't work, TODO to debug
# Doesn't work yet; needs debugging
supports_structured_output=False,
supports_data_gen=False,
),
Expand Down Expand Up @@ -1486,7 +1486,7 @@ class KilnModel(BaseModel):
),
KilnModelProvider(
name=ModelProviderName.openrouter,
# TODO: swap to non-free model when available (more reliable)
# Swap to non-free model when available (more reliable)
model_id="google/gemma-3-1b-it:free",
supports_structured_output=False,
supports_data_gen=False,
Expand All @@ -1507,7 +1507,7 @@ class KilnModel(BaseModel):
KilnModelProvider(
name=ModelProviderName.openrouter,
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
# TODO: swap to non-free model when available (more reliable)
# Swap to non-free model when available (more reliable)
model_id="google/gemma-3-4b-it:free",
),
],
Expand All @@ -1525,7 +1525,7 @@ class KilnModel(BaseModel):
KilnModelProvider(
name=ModelProviderName.openrouter,
structured_output_mode=StructuredOutputMode.json_instruction_and_object,
# TODO: swap to non-free model when available (more reliable)
# Swap to non-free model when available (more reliable)
model_id="google/gemma-3-12b-it:free",
),
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ def tool_call_params(self, strict: bool) -> dict[str, Any]:
}

def build_extra_body(self, provider: KilnModelProvider) -> dict[str, Any]:
# TODO P1: Don't love having this logic here. But it's a usability improvement
# We should consider moving this logic elsewhere for better usability
# so better to keep it than exclude it. Should figure out how I want to isolate
# this sort of logic so it's config driven and can be overridden

Expand Down
2 changes: 1 addition & 1 deletion libs/core/kiln_ai/adapters/repair/repair_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from kiln_ai.datamodel import Priority, Project, Task, TaskRequirement, TaskRun


# TODO add evaluator rating
# We should add evaluator rating
class RepairTaskInput(BaseModel):
original_prompt: str
original_input: str
Expand Down
4 changes: 2 additions & 2 deletions libs/core/kiln_ai/adapters/test_adapter_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def test_openai_compatible_adapter_creation(mock_config, basic_task, provider):
assert adapter.run_config.model_name == "test-model"


# TODO should run for all cases
# We should run for all cases
def test_custom_prompt_builder(mock_config, basic_task):
adapter = adapter_for_task(
kiln_task=basic_task,
Expand All @@ -124,7 +124,7 @@ def test_custom_prompt_builder(mock_config, basic_task):
assert adapter.run_config.prompt_id == "simple_chain_of_thought_prompt_builder"


# TODO should run for all cases
# We should run for all cases
def test_tags_passed_through(mock_config, basic_task):
tags = ["test-tag-1", "test-tag-2"]
adapter = adapter_for_task(
Expand Down
7 changes: 3 additions & 4 deletions libs/core/kiln_ai/adapters/test_remote_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,12 +160,11 @@ def test_backwards_compatibility_with_v0_18(tmp_path):
for provider in model.providers:
assert hasattr(provider, 'name')

print("SUCCESS: v0.18 successfully parsed JSON from current version")
print(f"Parsed {{len(models)}} models")
# Indicate success
pass

except Exception as e:
print(f"ERROR: {{e}}")
sys.exit(1)
raise SystemExit(f"ERROR: {e}")
'''

try:
Expand Down
2 changes: 1 addition & 1 deletion libs/core/kiln_ai/datamodel/basemodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -470,7 +470,7 @@ def validate_and_save_with_subrelations(
ValidationError: If validation fails for the model or any of its children
"""
# Validate first, then save. Don't want error half way through, and partly persisted
# TODO P2: save to tmp dir, then move atomically. But need to merge directories so later.
# We should save to a tmp dir and move atomically, but need to merge directories later.
cls._validate_nested(data, save=False, path=path, parent=parent)
instance = cls._validate_nested(data, save=True, path=path, parent=parent)
return instance
Expand Down
2 changes: 1 addition & 1 deletion libs/core/kiln_ai/datamodel/project.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,6 @@ class Project(KilnParentModel, parent_of={"tasks": Task}):
description="A description of the project for you and your team. Will not be used in prompts/training/validation.",
)

# Needed for typechecking. TODO P2: fix this in KilnParentModel
# Needed for typechecking. We should fix this in KilnParentModel
def tasks(self) -> list[Task]:
return super().tasks() # type: ignore
2 changes: 1 addition & 1 deletion libs/core/kiln_ai/datamodel/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def input_schema(self) -> Dict | None:
return None
return schema_from_json_str(self.input_json_schema)

# These wrappers help for typechecking. TODO P2: fix this in KilnParentModel
# These wrappers help for typechecking. We should fix this in KilnParentModel
def runs(self, readonly: bool = False) -> list[TaskRun]:
return super().runs(readonly=readonly) # type: ignore

Expand Down
4 changes: 3 additions & 1 deletion libs/core/kiln_ai/datamodel/test_model_perf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import shutil
import uuid

Expand Down Expand Up @@ -121,6 +122,7 @@ def test_benchmark_load_from_file(benchmark, task_run):

# I get 8k ops per second on my MBP. Lower value here for CI and parallel testing.
# Prior to optimization was 290 ops per second.
print(f"Ops per second: {ops_per_second:.6f}")
logger = logging.getLogger(__name__)
logger.info("Ops per second: %.6f", ops_per_second)
if ops_per_second < 500:
pytest.fail(f"Ops per second: {ops_per_second:.6f}, expected more than 1k ops")
Loading