add python backend

andrii-i · andrii-i · commit b0d7a63a0b17 · 2025-12-12T08:22:53.000-08:00
diff --git a/jupyter_scheduler/backends.py b/jupyter_scheduler/backends.py
@@ -17,6 +17,18 @@ class JupyterServerNotebookBackend(BaseBackend):
     priority = 0
 
 
+class JupyterServerPythonBackend(BaseBackend):
+    """Built-in backend executing Python scripts via subprocess on the Jupyter server."""
+
+    id = "jupyter_server_py"
+    name = "Python Script"
+    description = "Execute Python scripts on the Jupyter server"
+    scheduler_class = "jupyter_scheduler.scheduler.Scheduler"
+    execution_manager_class = "jupyter_scheduler.python_executor.PythonScriptExecutionManager"
+    file_extensions = ["py"]
+    priority = 0
+
+
 @dataclass
 class BackendConfig:
     """Runtime configuration for an initialized backend instance."""
diff --git a/jupyter_scheduler/python_executor.py b/jupyter_scheduler/python_executor.py
@@ -0,0 +1,92 @@
+import os
+import subprocess
+import sys
+from typing import Dict
+
+import fsspec
+
+from jupyter_scheduler.executors import ExecutionManager
+from jupyter_scheduler.models import JobFeature
+from jupyter_scheduler.orm import Job
+
+
+class PythonScriptExecutionManager(ExecutionManager):
+    """Execute Python scripts via subprocess."""
+
+    def execute(self):
+        """Execute the Python script and capture output."""
+        job = self.model
+        staging_dir = os.path.dirname(self.staging_paths["input"])
+
+        # Build environment with job parameters as JUPYTER_PARAM_* vars
+        env = os.environ.copy()
+        if job.parameters:
+            for key, value in job.parameters.items():
+                env[f"JUPYTER_PARAM_{key}"] = str(value)
+
+        # Execute script using sys.executable (guaranteed to work in all environments)
+        result = subprocess.run(
+            [sys.executable, self.staging_paths["input"]],
+            cwd=staging_dir,
+            capture_output=True,
+            text=True,
+            env=env,
+        )
+
+        # Capture side effect files (same pattern as DefaultExecutionManager)
+        self.add_side_effects_files(staging_dir)
+
+        # Write stdout/stderr to staging directory
+        stdout_path = os.path.join(staging_dir, "stdout.log")
+        stderr_path = os.path.join(staging_dir, "stderr.log")
+
+        with fsspec.open(stdout_path, "w", encoding="utf-8") as f:
+            f.write(result.stdout)
+        with fsspec.open(stderr_path, "w", encoding="utf-8") as f:
+            f.write(result.stderr)
+
+        if result.returncode != 0:
+            raise RuntimeError(
+                f"Script exited with code {result.returncode}\nstderr: {result.stderr[:500]}"
+            )
+
+    def add_side_effects_files(self, staging_dir: str):
+        """Scan for files created during execution and update job's packaged_files."""
+        input_script = os.path.basename(self.staging_paths["input"])
+        new_files = set()
+        for root, _, files in os.walk(staging_dir):
+            for file in files:
+                rel_path = os.path.relpath(os.path.join(root, file), staging_dir)
+                if rel_path != input_script:
+                    new_files.add(rel_path)
+
+        if new_files:
+            with self.db_session() as session:
+                current = set(
+                    session.query(Job.packaged_files)
+                    .filter(Job.job_id == self.job_id)
+                    .scalar()
+                    or []
+                )
+                session.query(Job).filter(Job.job_id == self.job_id).update(
+                    {"packaged_files": list(current.union(new_files))}
+                )
+                session.commit()
+
+    @classmethod
+    def supported_features(cls) -> Dict[JobFeature, bool]:
+        return {
+            JobFeature.job_name: True,
+            JobFeature.output_formats: False,  # No notebook conversion for .py
+            JobFeature.job_definition: False,
+            JobFeature.idempotency_token: False,
+            JobFeature.tags: False,
+            JobFeature.email_notifications: False,
+            JobFeature.timeout_seconds: False,
+            JobFeature.retry_on_timeout: False,
+            JobFeature.max_retries: False,
+            JobFeature.min_retry_interval_millis: False,
+            JobFeature.output_filename_template: False,
+            JobFeature.stop_job: True,
+            JobFeature.delete_job: True,
+        }
diff --git a/jupyter_scheduler/tests/test_python_executor.py b/jupyter_scheduler/tests/test_python_executor.py
@@ -0,0 +1,239 @@
+import os
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from jupyter_scheduler.models import DescribeJob
+from jupyter_scheduler.orm import Job
+from jupyter_scheduler.python_executor import PythonScriptExecutionManager
+
+
+@pytest.fixture
+def python_script_staging_dir(jp_scheduler_staging_dir) -> Path:
+    """Create a staging directory with a simple Python script."""
+    job_staging_dir = jp_scheduler_staging_dir / "job-py-1"
+    job_staging_dir.mkdir()
+    return job_staging_dir
+
+
+@pytest.fixture
+def simple_script(python_script_staging_dir) -> Path:
+    """Create a simple print script."""
+    script_path = python_script_staging_dir / "test_script.py"
+    script_path.write_text('print("Hello from Python script!")\n')
+    return script_path
+
+
+@pytest.fixture
+def script_with_params(python_script_staging_dir) -> Path:
+    """Create a script that reads JUPYTER_PARAM_* env vars."""
+    script_path = python_script_staging_dir / "param_script.py"
+    script_path.write_text(
+        """import os
+learning_rate = os.environ.get('JUPYTER_PARAM_learning_rate', 'not_set')
+batch_size = os.environ.get('JUPYTER_PARAM_batch_size', 'not_set')
+print(f"lr={learning_rate}, batch={batch_size}")
+"""
+    )
+    return script_path
+
+
+@pytest.fixture
+def failing_script(python_script_staging_dir) -> Path:
+    """Create a script that exits with non-zero code."""
+    script_path = python_script_staging_dir / "failing_script.py"
+    script_path.write_text('import sys; print("error message", file=sys.stderr); sys.exit(1)\n')
+    return script_path
+
+
+@pytest.fixture
+def script_with_side_effects(python_script_staging_dir) -> Path:
+    """Create a script that creates output files."""
+    script_path = python_script_staging_dir / "side_effects_script.py"
+    script_path.write_text(
+        """
+with open('output.txt', 'w') as f:
+    f.write('Generated output')
+print("Created output.txt")
+"""
+    )
+    return script_path
+
+
+@pytest.fixture
+def python_job_record(simple_script, jp_scheduler_db) -> str:
+    """Create a job record for the Python script."""
+    job = Job(
+        name="test_python_job",
+        runtime_environment_name="default",
+        input_filename=simple_script.name,
+    )
+    jp_scheduler_db.add(job)
+    jp_scheduler_db.commit()
+    return job.job_id
+
+
+@pytest.fixture
+def python_job_with_params(script_with_params, jp_scheduler_db) -> str:
+    """Create a job record with parameters."""
+    job = Job(
+        name="test_python_job_with_params",
+        runtime_environment_name="default",
+        input_filename=script_with_params.name,
+        parameters={"learning_rate": "0.01", "batch_size": "32"},
+    )
+    jp_scheduler_db.add(job)
+    jp_scheduler_db.commit()
+    return job.job_id
+
+
+class TestPythonScriptExecutionManager:
+    def test_execute_simple_script(
+        self,
+        python_job_record,
+        simple_script,
+        jp_scheduler_root_dir,
+        jp_scheduler_db_url,
+        jp_scheduler_db,
+    ):
+        """Execute a simple print script and verify stdout is captured."""
+        manager = PythonScriptExecutionManager(
+            job_id=python_job_record,
+            root_dir=str(jp_scheduler_root_dir),
+            db_url=jp_scheduler_db_url,
+            staging_paths={"input": str(simple_script)},
+        )
+
+        # Execute should not raise
+        manager.execute()
+
+        # Check stdout.log was created
+        stdout_path = simple_script.parent / "stdout.log"
+        assert stdout_path.exists()
+        assert "Hello from Python script!" in stdout_path.read_text()
+
+    def test_execute_with_parameters(
+        self,
+        python_job_with_params,
+        script_with_params,
+        jp_scheduler_root_dir,
+        jp_scheduler_db_url,
+        jp_scheduler_db,
+    ):
+        """Parameters are passed as JUPYTER_PARAM_* env vars."""
+        manager = PythonScriptExecutionManager(
+            job_id=python_job_with_params,
+            root_dir=str(jp_scheduler_root_dir),
+            db_url=jp_scheduler_db_url,
+            staging_paths={"input": str(script_with_params)},
+        )
+
+        manager.execute()
+
+        stdout_path = script_with_params.parent / "stdout.log"
+        content = stdout_path.read_text()
+        assert "lr=0.01" in content
+        assert "batch=32" in content
+
+    def test_execute_script_failure(
+        self,
+        failing_script,
+        jp_scheduler_root_dir,
+        jp_scheduler_db_url,
+        jp_scheduler_db,
+    ):
+        """Non-zero exit code raises RuntimeError."""
+        job = Job(
+            name="test_failing_script",
+            runtime_environment_name="default",
+            input_filename=failing_script.name,
+        )
+        jp_scheduler_db.add(job)
+        jp_scheduler_db.commit()
+
+        manager = PythonScriptExecutionManager(
+            job_id=job.job_id,
+            root_dir=str(jp_scheduler_root_dir),
+            db_url=jp_scheduler_db_url,
+            staging_paths={"input": str(failing_script)},
+        )
+
+        with pytest.raises(RuntimeError) as exc_info:
+            manager.execute()
+
+        assert "exited with code 1" in str(exc_info.value)
+        assert "error message" in str(exc_info.value)
+
+    def test_stdout_stderr_captured(
+        self,
+        failing_script,
+        jp_scheduler_root_dir,
+        jp_scheduler_db_url,
+        jp_scheduler_db,
+    ):
+        """Both stdout and stderr are written to files even on failure."""
+        job = Job(
+            name="test_stderr_capture",
+            runtime_environment_name="default",
+            input_filename=failing_script.name,
+        )
+        jp_scheduler_db.add(job)
+        jp_scheduler_db.commit()
+
+        manager = PythonScriptExecutionManager(
+            job_id=job.job_id,
+            root_dir=str(jp_scheduler_root_dir),
+            db_url=jp_scheduler_db_url,
+            staging_paths={"input": str(failing_script)},
+        )
+
+        with pytest.raises(RuntimeError):
+            manager.execute()
+
+        stderr_path = failing_script.parent / "stderr.log"
+        assert stderr_path.exists()
+        assert "error message" in stderr_path.read_text()
+
+    def test_side_effects_captured(
+        self,
+        script_with_side_effects,
+        jp_scheduler_root_dir,
+        jp_scheduler_db_url,
+        jp_scheduler_db,
+    ):
+        """Files created by the script are recorded in packaged_files."""
+        job = Job(
+            name="test_side_effects",
+            runtime_environment_name="default",
+            input_filename=script_with_side_effects.name,
+        )
+        jp_scheduler_db.add(job)
+        jp_scheduler_db.commit()
+
+        manager = PythonScriptExecutionManager(
+            job_id=job.job_id,
+            root_dir=str(jp_scheduler_root_dir),
+            db_url=jp_scheduler_db_url,
+            staging_paths={"input": str(script_with_side_effects)},
+        )
+
+        manager.execute()
+
+        # Refresh job from DB
+        jp_scheduler_db.expire_all()
+        job = jp_scheduler_db.query(Job).filter(Job.job_id == job.job_id).one()
+
+        # output.txt should be in packaged_files
+        assert "output.txt" in job.packaged_files
+
+    def test_supported_features(self):
+        """Verify supported features match expected values."""
+        from jupyter_scheduler.models import JobFeature
+
+        features = PythonScriptExecutionManager.supported_features()
+
+        assert features[JobFeature.job_name] is True
+        assert features[JobFeature.output_formats] is False
+        assert features[JobFeature.stop_job] is True
+        assert features[JobFeature.delete_job] is True
diff --git a/pyproject.toml b/pyproject.toml
@@ -60,6 +60,7 @@ Homepage = "https://github.com/jupyter-server/jupyter-scheduler"
 
 [project.entry-points."jupyter_scheduler.backends"]
 jupyter_server_nb = "jupyter_scheduler.backends:JupyterServerNotebookBackend"
+jupyter_server_py = "jupyter_scheduler.backends:JupyterServerPythonBackend"
 
 [tool.check-wheel-contents]
 ignore = ["W002"]