Merge branch 'job_files_fastapi_put' into arc

kysrpex · kysrpex · commit 3a0e965499b0 · 2025-05-26T16:43:17.000+02:00
diff --git a/client/src/api/schema/schema.ts b/client/src/api/schema/schema.ts
@@ -3060,7 +3060,8 @@ export interface paths {
          *     This API method is intended only for consumption by job runners, not end users.
          */
         get: operations["index_api_jobs__job_id__files_get"];
-        put?: never;
+        /** Populate an output file. */
+        put: operations["populate_api_jobs__job_id__files_put"];
         /**
          * Populate an output file.
          * @description Populate an output file (formal dataset, task split part, working directory file (such as those related to
@@ -31635,6 +31636,76 @@ export interface operations {
             };
         };
     };
+    populate_api_jobs__job_id__files_put: {
+        parameters: {
+            query: {
+                /** @description Path to file to create/replace. */
+                path: string;
+                /** @description A key used to authenticate this request as acting on behalf of a job runner for the specified job. */
+                job_key: string;
+            };
+            header?: {
+                /** @description The user ID that will be used to effectively make this API call. Only admins and designated users can make API calls on behalf of other users. */
+                "run-as"?: string | null;
+            };
+            path: {
+                /** @description Encoded id string of the job. */
+                job_id: string;
+            };
+            cookie?: never;
+        };
+        requestBody?: never;
+        responses: {
+            /** @description Successful Response */
+            200: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": unknown;
+                };
+            };
+            /** @description A new file has been created. */
+            201: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content?: never;
+            };
+            /** @description An existing file has been replaced. */
+            204: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content?: never;
+            };
+            /** @description Bad request. */
+            400: {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content?: never;
+            };
+            /** @description Request Error */
+            "4XX": {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["MessageExceptionModel"];
+                };
+            };
+            /** @description Server Error */
+            "5XX": {
+                headers: {
+                    [name: string]: unknown;
+                };
+                content: {
+                    "application/json": components["schemas"]["MessageExceptionModel"];
+                };
+            };
+        };
+    };
     create_api_jobs__job_id__files_post: {
         parameters: {
             query?: {
diff --git a/lib/galaxy/webapps/galaxy/api/__init__.py b/lib/galaxy/webapps/galaxy/api/__init__.py
@@ -242,6 +242,9 @@ def __init__(self, request: Request):
     def base(self) -> str:
         return str(self.__request.base_url)
 
+    def stream(self) -> AsyncGenerator:
+        return self.__request.stream()
+
     @property
     def url_path(self) -> str:
         scope = self.__request.scope
@@ -250,6 +253,10 @@ def url_path(self) -> str:
             url = urljoin(url, root_path)
         return url
 
+    @property
+    def url(self) -> str:
+        return str(self.__request.url)
+
     @property
     def host(self) -> str:
         return self.__request.base_url.netloc
diff --git a/lib/galaxy/webapps/galaxy/api/job_files.py b/lib/galaxy/webapps/galaxy/api/job_files.py
@@ -2,6 +2,7 @@
 API for asynchronous job running mechanisms can use to fetch or put files related to running and queued jobs.
 """
 
+import asyncio
 import logging
 import os
 import re
@@ -19,6 +20,7 @@
     Path,
     Query,
     Request,
+    Response,
     UploadFile,
 )
 from fastapi.params import Depends
@@ -220,6 +222,63 @@ def index(
 
         return GalaxyFileResponse(path)
 
+    # The ARC remote job runner (`lib.galaxy.jobs.runners.pulsar.PulsarARCJobRunner`) expects a `PUT` endpoint to stage
+    # out result files back to Galaxy.
+    @router.put(
+        "/api/jobs/{job_id}/files",
+        summary="Populate an output file.",
+        responses={
+            201: {"description": "A new file has been created."},
+            204: {"description": "An existing file has been replaced."},
+            400: {"description": "Bad request."},
+        },
+    )
+    def populate(
+        self,
+        job_id: Annotated[str, Path(description="Encoded id string of the job.")],
+        path: Annotated[str, Query(description="Path to file to create/replace.")],
+        job_key: Annotated[
+            str,
+            Query(
+                description=(
+                    "A key used to authenticate this request as acting on behalf of a job runner for the specified job."
+                ),
+            ),
+        ],
+        trans: SessionRequestContext = DependsOnTrans,
+    ):
+        path = unquote(path)
+
+        job = self.__authorize_job_access(trans, job_id, path=path, job_key=job_key)
+        self.__check_job_can_write_to_path(trans, job, path)
+
+        destination_file_exists = os.path.exists(path)
+
+        # FastAPI can only read the file contents from the request body in an async context. To write the file without
+        # using an async endpoint, the async code that reads the file from the body and writes it to disk will have to
+        # run within the sync endpoint. Since the code that writes the data to disk is blocking
+        # `destination_file.write(chunk)`, it has to run on its own event loop within the thread spawned to answer the
+        # request to the sync endpoint.
+        async def write():
+            with open(path, "wb") as destination_file:
+                async for chunk in trans.request.stream():
+                    destination_file.write(chunk)
+
+        target_dir = os.path.dirname(path)
+        util.safe_makedirs(target_dir)
+        event_loop = asyncio.new_event_loop()
+        try:
+            asyncio.set_event_loop(event_loop)
+            event_loop.run_until_complete(write())
+        finally:
+            event_loop.close()
+
+        return (
+            Response(status_code=201, headers={"Location": str(trans.request.url)})
+            if not destination_file_exists
+            else Response(status_code=204)
+        )
+
     @router.post(
         "/api/jobs/{job_id}/files",
         summary="Populate an output file.",
diff --git a/lib/galaxy/work/context.py b/lib/galaxy/work/context.py
@@ -1,6 +1,7 @@
 import abc
 from typing import (
     Any,
+    AsyncGenerator,
     Dict,
     List,
     Optional,
@@ -87,11 +88,20 @@ class GalaxyAbstractRequest:
     def base(self) -> str:
         """Base URL of the request."""
 
+    @abc.abstractmethod
+    def stream(self) -> AsyncGenerator:
+        """Request body split in parts."""
+
     @property
     @abc.abstractmethod
     def url_path(self) -> str:
         """Base with optional prefix added."""
 
+    @property
+    @abc.abstractmethod
+    def url(self):
+        """URL of the request."""
+
     @property
     @abc.abstractmethod
     def host(self) -> str:
diff --git a/test/integration/test_job_files.py b/test/integration/test_job_files.py
@@ -296,6 +296,29 @@ def test_write_with_underscored_file_param(self):
         api_asserts.assert_status_code_is_ok(response)
         assert open(path).read() == "some initial text data"
 
+    def test_write_with_put_request(self):
+        job, output_hda, working_directory = self.create_static_job_with_state("running")
+        job_id, job_key = self._api_job_keys(job)
+        path = self._app.object_store.get_filename(output_hda.dataset)
+        assert path
+        data = {"path": path, "job_key": job_key}
+
+        new_file_path = os.path.join(working_directory, "new_file.txt")
+        put_url = self._api_url(f"jobs/{job_id}/files", use_key=False)
+        response = requests.put(
+            put_url,
+            params={"path": new_file_path, "job_key": job_key},
+            data=b"whole contents of the file",
+        )
+        assert response.status_code == 201
+        assert open(new_file_path).read() == "whole contents of the file"
+
+        assert os.path.exists(path)
+        put_url = self._api_url(f"jobs/{job_id}/files", use_key=False)
+        response = requests.put(put_url, params=data, data=b"contents of a replacement file")
+        assert response.status_code == 204
+        assert open(path).read() == "contents of a replacement file"
+
     def test_write_protection(self):
         job, _, _ = self.create_static_job_with_state("running")
         job_id, job_key = self._api_job_keys(job)