transformerlab
diff --git a/‎test/api/test_experiment_jobs.py‎
Lines changed: 12 additions & 0 deletions b/‎test/api/test_experiment_jobs.py‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎test/api/test_remote.py‎
Lines changed: 232 additions & 0 deletions b/‎test/api/test_remote.py‎
Lines changed: 232 additions & 0 deletions
diff --git a/‎test/api/test_server_info.py‎
Lines changed: 47 additions & 0 deletions b/‎test/api/test_server_info.py‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎test/api/test_train.py‎
Lines changed: 0 additions & 12 deletions b/‎test/api/test_train.py‎
Lines changed: 0 additions & 12 deletions
diff --git a/‎transformerlab/plugins/mlx_exporter/index.json‎
Lines changed: 1 addition & 1 deletion b/‎transformerlab/plugins/mlx_exporter/index.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎transformerlab/plugins/mlx_exporter/setup.sh‎
Lines changed: 2 additions & 2 deletions b/‎transformerlab/plugins/mlx_exporter/setup.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎transformerlab/plugins/mlx_lora_trainer/index.json‎
Lines changed: 1 addition & 1 deletion b/‎transformerlab/plugins/mlx_lora_trainer/index.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎transformerlab/plugins/mlx_lora_trainer/setup.sh‎
Lines changed: 2 additions & 2 deletions b/‎transformerlab/plugins/mlx_lora_trainer/setup.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎transformerlab/plugins/mlx_rlaif_trainer/index.json‎
Lines changed: 1 addition & 1 deletion b/‎transformerlab/plugins/mlx_rlaif_trainer/index.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎transformerlab/plugins/mlx_rlaif_trainer/setup.sh‎
Lines changed: 2 additions & 2 deletions b/‎transformerlab/plugins/mlx_rlaif_trainer/setup.sh‎
Lines changed: 2 additions & 2 deletions
@@ -127,3 +127,15 @@ def test_job_edge_cases(client):
 
     resp = client.get("/experiment/alpha/jobs/create?type=DOWNLOAD_MODEL&status=QUEUED&data={}")
     assert resp.status_code == 200
+
+
+def test_train_sweep_results(client):
+    resp = client.get("/experiment/alpha/jobs/1/sweep_results")
+    assert resp.status_code == 200
+    data = resp.json()
+    assert "status" in data
+    assert data["status"] in ("success", "error")
+    if data["status"] == "success":
+        assert "data" in data
+    else:
+        assert "message" in data
@@ -0,0 +1,232 @@
+import pytest
+from unittest.mock import patch, AsyncMock, MagicMock
+from io import BytesIO
+
+
+# Use the client fixture from conftest.py - no need to create our own
+
+
+@pytest.fixture
+def gpu_orchestration_env_vars(monkeypatch):
+    """Set up GPU orchestration environment variables for testing"""
+    monkeypatch.setenv("GPU_ORCHESTRATION_SERVER", "http://test-orchestrator.example.com")
+    monkeypatch.setenv("GPU_ORCHESTRATION_SERVER_PORT", "8080")
+    yield
+    # Cleanup - remove env vars after test
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER", raising=False)
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER_PORT", raising=False)
+
+
+@pytest.fixture
+def no_gpu_orchestration_env(monkeypatch):
+    """Ensure GPU orchestration environment variables are not set"""
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER", raising=False)
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER_PORT", raising=False)
+
+
+@pytest.fixture
+def mock_experiment_id(client):
+    """Create a test experiment and return its ID, cleaning up after the test"""
+    import os
+    import time
+    import uuid
+    from transformerlab.services import experiment_service
+    
+    # Use a unique name to avoid conflicts - add UUID for better uniqueness
+    unique_name = f"test_exp_remote_{os.getpid()}_{int(time.time())}_{uuid.uuid4().hex[:8]}"
+    
+    # Check if experiment already exists and delete it if it does
+    existing = experiment_service.experiment_get(unique_name)
+    if existing:
+        experiment_service.experiment_delete(unique_name)
+    
+    # Create the experiment
+    exp_id = experiment_service.experiment_create(unique_name, {})
+    
+    yield exp_id
+    
+    # Cleanup: delete all jobs in the experiment, then delete the experiment
+    try:
+        from transformerlab.services import job_service
+        job_service.job_delete_all(exp_id)
+    except Exception:
+        pass
+    
+    try:
+        experiment_service.experiment_delete(exp_id)
+    except Exception:
+        # Ignore errors during cleanup
+        pass
+
+
+@pytest.fixture
+def job_cleanup():
+    """Fixture to track and cleanup jobs created during tests"""
+    created_jobs = []  # List of (job_id, experiment_id) tuples
+    
+    yield created_jobs
+    
+    # Cleanup: delete all tracked jobs
+    from transformerlab.services import job_service
+    for job_id, experiment_id in created_jobs:
+        try:
+            job_service.job_delete(job_id, experiment_id)
+        except Exception:
+            # Ignore errors during cleanup
+            pass
+
+
+class TestValidateGPUOrchestratorEnvVars:
+    """Test the validate_gpu_orchestrator_env_vars function"""
+
+    def test_validate_env_vars_with_both_set(self, gpu_orchestration_env_vars):
+        """Test validation when both env vars are set"""
+        from transformerlab.routers.remote import validate_gpu_orchestrator_env_vars
+        
+        url, port = validate_gpu_orchestrator_env_vars()
+        assert url == "http://test-orchestrator.example.com"
+        assert port == "8080"
+
+    def test_validate_env_vars_missing_url(self, monkeypatch):
+        """Test validation when GPU_ORCHESTRATION_SERVER is missing"""
+        monkeypatch.delenv("GPU_ORCHESTRATION_SERVER", raising=False)
+        monkeypatch.setenv("GPU_ORCHESTRATION_SERVER_PORT", "8080")
+        
+        from transformerlab.routers.remote import validate_gpu_orchestrator_env_vars
+        
+        result = validate_gpu_orchestrator_env_vars()
+        url, error_response = result
+        assert url is None
+        assert isinstance(error_response, dict)
+        assert error_response["status"] == "error"
+        assert "GPU_ORCHESTRATION_SERVER" in error_response["message"]
+
+    def test_validate_env_vars_missing_port(self, monkeypatch):
+        """Test validation when GPU_ORCHESTRATION_SERVER_PORT is missing"""
+        monkeypatch.setenv("GPU_ORCHESTRATION_SERVER", "http://test-orchestrator.example.com")
+        monkeypatch.delenv("GPU_ORCHESTRATION_SERVER_PORT", raising=False)
+        
+        from transformerlab.routers.remote import validate_gpu_orchestrator_env_vars
+        
+        result = validate_gpu_orchestrator_env_vars()
+        url, error_response = result
+        # When port is missing, the function returns None as the URL
+        assert url is None
+        assert isinstance(error_response, dict)
+        assert error_response["status"] == "error"
+        assert "GPU_ORCHESTRATION_SERVER_PORT" in error_response["message"]
+
+
+
+
+class TestStopRemote:
+    """Test the /remote/stop endpoint"""
+
+
+    def test_stop_remote_missing_env_vars(self, client, no_gpu_orchestration_env):
+        """Test stopping when GPU orchestration env vars are not set"""
+        response = client.post(
+            "/remote/stop",
+            data={
+                "job_id": "test-job-id",
+                "cluster_name": "test-cluster",
+            },
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "error"
+        assert "GPU_ORCHESTRATION_SERVER" in data["message"]
+
+
+class TestUploadDirectory:
+    """Test the /remote/upload endpoint"""
+
+    @patch("transformerlab.routers.remote.httpx.AsyncClient")
+    def test_upload_directory_success(self, mock_client_class, client, gpu_orchestration_env_vars):
+        """Test uploading a directory successfully"""
+        # Create test files using BytesIO for proper file-like objects
+        files = [
+            ("dir_files", ("test1.txt", BytesIO(b"content1"), "text/plain")),
+            ("dir_files", ("test2.txt", BytesIO(b"content2"), "text/plain")),
+        ]
+
+        # Mock the async client and response
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "status": "uploaded",
+            "upload_path": "/remote/path",
+        }
+        
+        mock_httpx_client = AsyncMock()
+        mock_httpx_client.post = AsyncMock(return_value=mock_response)
+        mock_httpx_client.__aenter__.return_value = mock_httpx_client
+        mock_httpx_client.__aexit__.return_value = None
+        mock_client_class.return_value = mock_httpx_client
+
+        response = client.post(
+            "/remote/upload",
+            files=files,
+            data={"dir_name": "test-dir"},
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "success"
+        assert "local_storage_path" in data
+
+    def test_upload_directory_missing_env_vars(self, client, no_gpu_orchestration_env):
+        """Test uploading when GPU orchestration env vars are not set"""
+        files = [("dir_files", ("test.txt", BytesIO(b"content"), "text/plain"))]
+        
+        response = client.post(
+            "/remote/upload",
+            files=files,
+        )
+        assert response.status_code == 200
+        data = response.json()
+        assert data["status"] == "error"
+        assert "GPU_ORCHESTRATION_SERVER" in data["message"]
+
+
+class TestCheckRemoteJobStatus:
+    """Test the /remote/check-status endpoint"""
+
+    @patch("transformerlab.routers.remote.httpx.AsyncClient")
+    def test_check_status_no_launching_jobs(self, mock_client_class, client, gpu_orchestration_env_vars):
+        """Test checking status when there are no LAUNCHING jobs"""
+        response = client.get("/remote/check-status")
+        assert response.status_code == 200
+        data = response.json()
+        assert "updated_jobs" in data
+        assert data["updated_jobs"] == []
+
+
+class TestGetOrchestratorLogs:
+    """Test the /remote/logs/{request_id} endpoint"""
+
+    @patch("transformerlab.routers.remote.httpx.AsyncClient")
+    def test_get_logs_success(self, mock_client_class, client, gpu_orchestration_env_vars):
+        """Test getting logs successfully"""
+        request_id = "test-request-123"
+        
+        # Mock streaming response
+        mock_stream_response = MagicMock()
+        mock_stream_response.status_code = 200
+        mock_stream_response.aiter_bytes = AsyncMock(return_value=iter([b"log line 1\n", b"log line 2\n"]))
+        
+        mock_httpx_client = AsyncMock()
+        mock_httpx_client.stream = AsyncMock(return_value=mock_stream_response)
+        mock_httpx_client.__aenter__.return_value = mock_httpx_client
+        mock_httpx_client.__aexit__.return_value = None
+        mock_client_class.return_value = mock_httpx_client
+
+        response = client.get(f"/remote/logs/{request_id}")
+        # Streaming responses return 200 with appropriate headers
+        assert response.status_code == 200
+
+    def test_get_logs_missing_env_vars(self, client, no_gpu_orchestration_env):
+        """Test getting logs when GPU orchestration env vars are not set"""
+        response = client.get("/remote/logs/test-request-123")
+        # The endpoint should return an error response
+        assert response.status_code in [200, 500]  # May return error as JSON or raise exception
+
@@ -61,3 +61,50 @@ def fake_check_output(*args, **kwargs):
     from transformerlab.routers import serverinfo
 
     assert serverinfo.is_wsl() is False
+
+
+def test_healthz_local_mode(client, monkeypatch):
+    """Test healthz endpoint in local mode (no GPU orchestration)"""
+    # Ensure GPU orchestration env vars are not set
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER", raising=False)
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER_PORT", raising=False)
+    
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["message"] == "OK"
+    assert data["mode"] == "local"
+    assert data["gpu_orchestration_server"] == ""
+    assert data["gpu_orchestration_server_port"] == ""
+
+
+def test_healthz_gpu_orchestration_mode(client, monkeypatch):
+    """Test healthz endpoint in GPU orchestration mode"""
+    # Set GPU orchestration env vars
+    monkeypatch.setenv("GPU_ORCHESTRATION_SERVER", "http://orchestrator.example.com")
+    monkeypatch.setenv("GPU_ORCHESTRATION_SERVER_PORT", "8080")
+    
+    # The healthz endpoint reads env vars at request time, so monkeypatch should work
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["message"] == "OK"
+    assert data["mode"] == "gpu_orchestration"
+    assert data["gpu_orchestration_server"] == "http://orchestrator.example.com"
+    assert data["gpu_orchestration_server_port"] == "8080"
+
+
+def test_healthz_gpu_orchestration_mode_no_port(client, monkeypatch):
+    """Test healthz endpoint in GPU orchestration mode without port"""
+    # Set only GPU orchestration server URL
+    monkeypatch.setenv("GPU_ORCHESTRATION_SERVER", "http://orchestrator.example.com")
+    monkeypatch.delenv("GPU_ORCHESTRATION_SERVER_PORT", raising=False)
+    
+    # The healthz endpoint reads env vars at request time, so monkeypatch should work
+    response = client.get("/healthz")
+    assert response.status_code == 200
+    data = response.json()
+    assert data["message"] == "OK"
+    assert data["mode"] == "gpu_orchestration"
+    assert data["gpu_orchestration_server"] == "http://orchestrator.example.com"
+    assert data["gpu_orchestration_server_port"] == ""
@@ -13,15 +13,3 @@ def test_train_create_template(client):
     data = {"name": "test_template", "description": "desc", "type": "test", "config": "{}"}
     resp = client.post("/train/template/create", data=data)
     assert resp.status_code in (200, 422, 400)
-
-
-def test_train_sweep_results(client):
-    resp = client.get("/train/job/1/sweep_results")
-    assert resp.status_code == 200
-    data = resp.json()
-    assert "status" in data
-    assert data["status"] in ("success", "error")
-    if data["status"] == "success":
-        assert "data" in data
-    else:
-        assert "message" in data
@@ -4,7 +4,7 @@
   "description": "Exports the current model to MLX format so it can be run on Apple Silicon.",
   "plugin-format": "python",
   "type": "exporter",
-  "version": "1.0.22",
+  "version": "1.0.23",
   "model_architectures": [
     "CohereForCausalLM",
     "DeepseekV2ForCausalLM",
 
@@ -1,3 +1,3 @@
 #!/usr/bin/env bash
-uv pip install mlx==0.27.1 --upgrade
-uv pip install "mlx-lm==0.26.3" --upgrade
+uv pip install mlx==0.29.3 --upgrade
+uv pip install "mlx-lm==0.28.3" --upgrade
@@ -4,7 +4,7 @@
   "description": "MLX Machine learning research on your laptop or in a data center - by Apple",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.4.21",
+  "version": "0.4.22",
   "model_architectures": [
     "MLX",
     "CohereForCausalLM",
 
@@ -1,4 +1,4 @@
 #!/usr/bin/env bash
 uv pip install trl
-uv pip install mlx==0.27.1 --upgrade
-uv pip install "mlx-lm==0.26.3" --upgrade
+uv pip install mlx==0.29.3 --upgrade
+uv pip install "mlx-lm==0.28.3" --upgrade
@@ -4,7 +4,7 @@
   "description": "MLX PPO (Proximal Policy Optimization) Reinforcement Learning from AI Feedback (RLAIF) trainer for MLX models.",
   "plugin-format": "python",
   "type": "trainer",
-  "version": "0.1.6",
+  "version": "0.1.7",
   "model_architectures": [
     "MLX",
     "LlamaForCausalLM",
 
@@ -1,5 +1,5 @@
 #!/usr/bin/env bash
-uv pip install mlx==0.27.1 --upgrade
-uv pip install "mlx-lm==0.26.3" --upgrade
+uv pip install mlx==0.29.3 --upgrade
+uv pip install "mlx-lm==0.28.3" --upgrade
 uv pip install "mlx_embedding_models==0.0.11"
 uv pip install --upgrade wandb