MakazhanAlpamys
diff --git a/‎CLAUDE.md‎
Lines changed: 25 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎soup_cli/experiment/tracker.py‎
Lines changed: 1 addition & 1 deletion b/‎soup_cli/experiment/tracker.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/test_callback.py‎
Lines changed: 100 additions & 0 deletions b/‎tests/test_callback.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎tests/test_chat.py‎
Lines changed: 76 additions & 0 deletions b/‎tests/test_chat.py‎
Lines changed: 76 additions & 0 deletions
diff --git a/‎tests/test_display.py‎
Lines changed: 99 additions & 0 deletions b/‎tests/test_display.py‎
Lines changed: 99 additions & 0 deletions
@@ -62,6 +62,7 @@ soup train --config soup.yaml
 - **Output:** Use `rich.console.Console` — never bare `print()`
 - **Lazy imports:** Heavy deps (torch, transformers, peft, datasketch, lm_eval, plotext) are imported inside functions, not at module level
 - **Variable naming:** Avoid single-letter names (ruff E741) — use `entry`, `part`, `length` instead of `l`
+- **Testing:** Rich Panel objects must be rendered via `Console(file=StringIO())` for string assertions, not `str(panel)`
 
 ## Git Workflow
 
@@ -70,3 +71,27 @@ soup train --config soup.yaml
 - CI: GitHub Actions runs ruff lint + pytest on Python 3.9/3.11/3.12
 - Always run `ruff check soup_cli/ tests/` before committing
 - Always run `pytest tests/ -v` before committing
+
+## Tests
+
+Test suite (~147 tests) lives in `tests/`:
+
+| File | Covers |
+|---|---|
+| `test_config.py` | Config loading, validation, defaults |
+| `test_data.py` | Format detection, conversion, validation |
+| `test_gpu.py` | GPU detection, batch size estimation |
+| `test_cli.py` | CLI commands basic validation |
+| `test_tracker.py` | SQLite experiment tracker |
+| `test_runs.py` | `soup runs` CLI commands |
+| `test_data_tools.py` | Data convert/merge/dedup/stats commands |
+| `test_eval.py` | Eval command |
+| `test_smoke_train.py` | Full pipeline smoke tests (GPU) |
+| `test_chat.py` | Chat command, `_detect_base_model` |
+| `test_push.py` | Push command, `_format_size`, `_generate_model_card` |
+| `test_init.py` | Init command, templates, overwrite logic |
+| `test_callback.py` | `SoupTrainerCallback` (mock-based) |
+| `test_display.py` | `TrainingDisplay` rendering |
+| `test_loader.py` | Data loading (JSONL/JSON/CSV, edge cases) |
+| `test_validator.py` | `validate_and_stats`, `extended_stats`, `_percentile` |
+| `test_formats.py` | Reverse conversion, round-trips, edge cases |
@@ -191,7 +191,7 @@ def list_runs(self, limit: int = 50) -> list[dict]:
         """Return list of runs ordered by created_at desc."""
         conn = self._get_conn()
         rows = conn.execute(
-            "SELECT * FROM runs ORDER BY created_at DESC LIMIT ?", (limit,)
+            "SELECT * FROM runs ORDER BY created_at DESC, rowid DESC LIMIT ?", (limit,)
         ).fetchall()
         return [dict(row) for row in rows]
 
 
@@ -0,0 +1,100 @@
+"""Tests for SoupTrainerCallback."""
+
+from unittest.mock import MagicMock, patch
+
+from soup_cli.monitoring.callback import SoupTrainerCallback
+
+
+def _make_state(global_step=10, max_steps=100, epoch=1.0):
+    """Create a mock TrainerState."""
+    state = MagicMock()
+    state.global_step = global_step
+    state.max_steps = max_steps
+    state.epoch = epoch
+    return state
+
+
+def _make_args():
+    """Create a mock TrainingArguments."""
+    return MagicMock()
+
+
+def test_on_train_begin_starts_display():
+    """on_train_begin should call display.start with total_steps."""
+    display = MagicMock()
+    callback = SoupTrainerCallback(display=display)
+    state = _make_state(max_steps=500)
+
+    callback.on_train_begin(_make_args(), state, MagicMock())
+
+    display.start.assert_called_once_with(total_steps=500)
+
+
+def test_on_train_end_stops_display():
+    """on_train_end should call display.stop."""
+    display = MagicMock()
+    callback = SoupTrainerCallback(display=display)
+
+    callback.on_train_end(_make_args(), _make_state(), MagicMock())
+
+    display.stop.assert_called_once()
+
+
+def test_on_log_updates_display():
+    """on_log should call display.update with metrics from logs."""
+    display = MagicMock()
+    callback = SoupTrainerCallback(display=display)
+
+    logs = {
+        "loss": 1.234,
+        "learning_rate": 2e-5,
+        "grad_norm": 0.5,
+        "train_steps_per_second": 3.0,
+    }
+    state = _make_state(global_step=42, epoch=1.5)
+
+    with patch("soup_cli.monitoring.callback.torch", create=True):
+        callback.on_log(_make_args(), state, MagicMock(), logs=logs)
+
+    display.update.assert_called_once()
+    call_kwargs = display.update.call_args
+    assert call_kwargs[1]["step"] == 42 or call_kwargs[0][0] == 42
+
+
+def test_on_log_none_logs():
+    """on_log with logs=None should do nothing."""
+    display = MagicMock()
+    callback = SoupTrainerCallback(display=display)
+
+    callback.on_log(_make_args(), _make_state(), MagicMock(), logs=None)
+
+    display.update.assert_not_called()
+
+
+def test_on_log_with_tracker():
+    """on_log should forward metrics to tracker if provided."""
+    display = MagicMock()
+    tracker = MagicMock()
+    callback = SoupTrainerCallback(display=display, tracker=tracker, run_id="run_123")
+
+    logs = {"loss": 0.5, "learning_rate": 1e-5}
+    state = _make_state(global_step=10, epoch=1.0)
+
+    callback.on_log(_make_args(), state, MagicMock(), logs=logs)
+
+    tracker.log_metrics.assert_called_once()
+    call_kwargs = tracker.log_metrics.call_args[1]
+    assert call_kwargs["run_id"] == "run_123"
+    assert call_kwargs["step"] == 10
+    assert call_kwargs["loss"] == 0.5
+
+
+def test_on_log_without_tracker():
+    """on_log without tracker should not crash."""
+    display = MagicMock()
+    callback = SoupTrainerCallback(display=display, tracker=None, run_id="")
+
+    logs = {"loss": 0.5}
+    callback.on_log(_make_args(), _make_state(), MagicMock(), logs=logs)
+
+    display.update.assert_called_once()
@@ -0,0 +1,76 @@
+"""Tests for soup chat command."""
+
+import json
+from pathlib import Path
+
+from typer.testing import CliRunner
+
+from soup_cli.cli import app
+from soup_cli.commands.chat import _detect_base_model
+
+runner = CliRunner()
+
+
+def test_chat_missing_model_path():
+    """Chat with nonexistent path should fail."""
+    result = runner.invoke(app, ["chat", "--model", "/nonexistent/path"])
+    assert result.exit_code == 1
+
+
+def test_detect_base_model_valid(tmp_path: Path):
+    """Should read base_model_name_or_path from adapter_config.json."""
+    config_path = tmp_path / "adapter_config.json"
+    config_path.write_text(json.dumps({
+        "base_model_name_or_path": "meta-llama/Llama-3.1-8B-Instruct",
+        "r": 64,
+        "lora_alpha": 16,
+    }))
+    result = _detect_base_model(config_path)
+    assert result == "meta-llama/Llama-3.1-8B-Instruct"
+
+
+def test_detect_base_model_missing_key(tmp_path: Path):
+    """Should return None if base_model_name_or_path is missing."""
+    config_path = tmp_path / "adapter_config.json"
+    config_path.write_text(json.dumps({"r": 64}))
+    result = _detect_base_model(config_path)
+    assert result is None
+
+
+def test_detect_base_model_invalid_json(tmp_path: Path):
+    """Should return None for malformed JSON."""
+    config_path = tmp_path / "adapter_config.json"
+    config_path.write_text("not valid json {{{")
+    result = _detect_base_model(config_path)
+    assert result is None
+
+
+def test_detect_base_model_missing_file(tmp_path: Path):
+    """Should return None for nonexistent file."""
+    config_path = tmp_path / "nonexistent.json"
+    result = _detect_base_model(config_path)
+    assert result is None
+
+
+def test_chat_adapter_without_base_model(tmp_path: Path):
+    """Chat with adapter that has no base model info should fail."""
+    # Create fake adapter directory with adapter_config.json but no base model
+    adapter_dir = tmp_path / "adapter"
+    adapter_dir.mkdir()
+    config = adapter_dir / "adapter_config.json"
+    config.write_text(json.dumps({"r": 64}))
+
+    result = runner.invoke(app, ["chat", "--model", str(adapter_dir)])
+    assert result.exit_code == 1
+    assert "Cannot detect base model" in result.output
+
+
+def test_chat_non_adapter_directory(tmp_path: Path):
+    """Chat with directory that has no adapter_config.json skips base model detection."""
+    model_dir = tmp_path / "model"
+    model_dir.mkdir()
+    # No adapter_config.json → not an adapter, will try to load directly
+    # This will fail because there's no actual model, but it should get past validation
+    result = runner.invoke(app, ["chat", "--model", str(model_dir)])
+    # Should fail during model loading, not during validation
+    assert result.exit_code == 1
@@ -0,0 +1,99 @@
+"""Tests for TrainingDisplay."""
+
+from io import StringIO
+
+from rich.console import Console
+
+from soup_cli.config.schema import SoupConfig
+from soup_cli.monitoring.display import TrainingDisplay
+
+
+def _render_to_str(panel) -> str:
+    """Render a Rich Panel to a plain string for assertion."""
+    buf = StringIO()
+    console = Console(file=buf, width=120, force_terminal=True)
+    console.print(panel)
+    return buf.getvalue()
+
+
+def _make_config():
+    """Create a minimal SoupConfig for display testing."""
+    return SoupConfig(
+        base="test-model",
+        data={"train": "./data.jsonl"},
+        training={"epochs": 3},
+    )
+
+
+def test_display_init():
+    """Display should initialize with default values."""
+    display = TrainingDisplay(_make_config(), device_name="cuda")
+    assert display.current_step == 0
+    assert display.total_steps == 0
+    assert display.loss == 0.0
+    assert display.device_name == "cuda"
+
+
+def test_display_update():
+    """Update should store new metric values."""
+    display = TrainingDisplay(_make_config())
+    display.total_steps = 100
+
+    display.update(step=50, epoch=1.5, loss=0.876, lr=1e-5, speed=3.2, gpu_mem="12/24 GB")
+
+    assert display.current_step == 50
+    assert display.current_epoch == 1.5
+    assert display.loss == 0.876
+    assert display.lr == 1e-5
+    assert display.speed == 3.2
+    assert display.gpu_mem == "12/24 GB"
+
+
+def test_display_render_panel():
+    """_render should produce a Panel with correct content."""
+    display = TrainingDisplay(_make_config(), device_name="cuda:0")
+    display.total_steps = 100
+    display.update(step=62, epoch=2.0, loss=0.847, lr=1.4e-5, speed=3.2, gpu_mem="18/24 GB")
+
+    panel = display._render()
+    rendered = _render_to_str(panel)
+    assert "62/100" in rendered
+    assert "0.847" in rendered
+
+
+def test_display_render_zero_steps():
+    """_render with total_steps=0 should not crash (division by zero)."""
+    display = TrainingDisplay(_make_config())
+    display.total_steps = 0
+    panel = display._render()
+    assert panel is not None
+
+
+def test_display_experiment_name():
+    """Display should use experiment_name in panel title if set."""
+    config = SoupConfig(
+        base="test-model",
+        data={"train": "./data.jsonl"},
+        experiment_name="my-experiment",
+    )
+    display = TrainingDisplay(config)
+    display.total_steps = 10
+    panel = display._render()
+    rendered = _render_to_str(panel)
+    assert "my-experiment" in rendered
+
+
+def test_display_start_stop():
+    """Start and stop should not crash (we don't test actual terminal rendering)."""
+    display = TrainingDisplay(_make_config())
+    display.start(total_steps=100)
+    assert display.total_steps == 100
+    assert display._live is not None
+    display.stop()
+
+
+def test_display_update_without_live():
+    """Update without calling start should not crash."""
+    display = TrainingDisplay(_make_config())
+    display.update(step=1, epoch=0.1, loss=2.0, lr=1e-4)
+    assert display.current_step == 1