Skip to content

Commit f553524

Browse files
committed
Improve compliance of FastAPIJobFiles with the HTTP spec (files endpoint)
This commit introduces the following differences in behavior: - GET and HEAD requests to `/api/jobs/{job_id}/files` return HTTP status code 400 when the given path does not refer to a file or the input datasets for the job have been purged and 404 when the given path does not exist. - PROPFIND requests to `/api/jobs/{job_id}/files` are answered with HTTP status code 501 (read motivation for this change below). - POST requests to `/api/jobs/{job_id}/files` are answered with HTTP status code 400 when no file is provided. The reason behind the code explicitly answering `PROPFIND` requests with status code 501 is an unfortunate interaction between the ARC remote job runner that is under development, the behavior of legacy API endpoints and how they are integrated within the FastAPI app. The ARC remote job runner (which will be implemented as `lib.galaxy.jobs.runners.pulsar.PulsarARCJobRunner`) expects this endpoint to return HTTP codes other than 404 when `PROPFIND` requests are issued. They are not part of the HTTP spec, but they are used in the WebDAV protocol. The correct answer to such requests is likely 501 (not implemented). FastAPI returns HTTP 405 (method not allowed) for `PROPFIND`, which maybe is not fully correct but tolerable because it is one less quirk to maintain. However, because of the way legacy WSGI endpoints are injected into the FastAPI app (using `app.mount("/", wsgi_handler)`), the built-in support for returning HTTP 405 for `PROPFIND` breaks, because such requests are passed to the `wsgi_handler` sub-application. This means that the endpoint still needs to include some code to handle this behavior. When ALL routes have been migrated to ASGI (no WSGI handler sub-application needed anymore), some lines of code can be removed, they are labeled using comments.
1 parent 071d2d5 commit f553524

File tree

5 files changed

+165
-22
lines changed

5 files changed

+165
-22
lines changed

client/src/api/schema/schema.ts

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31259,8 +31259,26 @@ export interface operations {
3125931259
"application/octet-stream": unknown;
3126031260
};
3126131261
};
31262-
/** @description File not found, path does not refer to a file, or input dataset(s) for job have been purged. */
31262+
/** @description Path does not refer to a file, or input dataset(s) for job have been purged. */
3126331263
400: {
31264+
headers: {
31265+
[name: string]: unknown;
31266+
};
31267+
content: {
31268+
"application/json": unknown;
31269+
};
31270+
};
31271+
/** @description File not found. */
31272+
404: {
31273+
headers: {
31274+
[name: string]: unknown;
31275+
};
31276+
content: {
31277+
"application/json": unknown;
31278+
};
31279+
};
31280+
/** @description Input dataset(s) for job have been purged. */
31281+
500: {
3126431282
headers: {
3126531283
[name: string]: unknown;
3126631284
};
@@ -31319,6 +31337,13 @@ export interface operations {
3131931337
"application/json": unknown;
3132031338
};
3132131339
};
31340+
/** @description Bad request (including no file provided). */
31341+
400: {
31342+
headers: {
31343+
[name: string]: unknown;
31344+
};
31345+
content?: never;
31346+
};
3132231347
/** @description Request Error */
3132331348
"4XX": {
3132431349
headers: {
@@ -31368,8 +31393,26 @@ export interface operations {
3136831393
"application/octet-stream": unknown;
3136931394
};
3137031395
};
31371-
/** @description File not found, path does not refer to a file, or input dataset(s) for job have been purged. */
31396+
/** @description Path does not refer to a file, or input dataset(s) for job have been purged. */
3137231397
400: {
31398+
headers: {
31399+
[name: string]: unknown;
31400+
};
31401+
content: {
31402+
"application/json": unknown;
31403+
};
31404+
};
31405+
/** @description File not found. */
31406+
404: {
31407+
headers: {
31408+
[name: string]: unknown;
31409+
};
31410+
content: {
31411+
"application/json": unknown;
31412+
};
31413+
};
31414+
/** @description Input dataset(s) for job have been purged. */
31415+
500: {
3137331416
headers: {
3137431417
[name: string]: unknown;
3137531418
};

lib/galaxy/webapps/galaxy/api/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ def environ(self) -> Environ:
265265
self.__environ = build_environ(self.__request.scope, None) # type: ignore[arg-type]
266266
return self.__environ
267267

268+
@property
269+
def method(self):
270+
return self.__request.method
271+
268272
@property
269273
def headers(self):
270274
return self.__request.headers

lib/galaxy/webapps/galaxy/api/job_files.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
DependsOnTrans,
3737
Router,
3838
)
39+
from galaxy.work.context import SessionRequestContext
3940
from . import BaseGalaxyAPIController
4041

4142
__all__ = ("FastAPIJobFiles", "JobFilesAPIController", "router")
@@ -128,16 +129,48 @@ class FastAPIJobFiles:
128129
"content": {"application/json": None, "application/octet-stream": {"example": None}},
129130
},
130131
400: {
131-
"description": (
132-
"File not found, path does not refer to a file, or input dataset(s) for job have been purged."
133-
)
132+
"description": "Path does not refer to a file, or input dataset(s) for job have been purged.",
133+
"content": {
134+
"application/json": {
135+
"example": {
136+
"detail": (
137+
"Path does not refer to a file, or input dataset(s) for job have been purged."
138+
)
139+
},
140+
}
141+
},
134142
},
143+
404: {
144+
"description": "File not found.",
145+
"content": {
146+
"application/json": {
147+
"example": {"detail": "File not found."},
148+
}
149+
},
150+
},
151+
500: {"description": "Input dataset(s) for job have been purged."},
135152
},
136153
)
137154
),
138155
)
139156
@router.head(*_args, **_kwargs) # type: ignore[name-defined]
140157
# remove `@router.head(...)` when ALL endpoints have been migrated to FastAPI
158+
@router.api_route(
159+
*_args, # type: ignore[name-defined]
160+
**{key: value for key, value in _kwargs.items() if key != "responses"}, # type: ignore[name-defined]
161+
responses={501: {"description": "Not implemented."}},
162+
methods=["PROPFIND"],
163+
include_in_schema=False,
164+
)
165+
# remove `@router.api_route(..., methods=["PROPFIND"])` when ALL endpoints have been migrated to FastAPI
166+
# The ARC remote job runner (`lib.galaxy.jobs.runners.pulsar.PulsarARCJobRunner`) expects this to return HTTP codes
167+
# other than 404 when `PROPFIND` requests are issued. They are not part of the HTTP spec, but they are used in the
168+
# WebDAV protocol. The correct answer to such requests is likely 501 (not implemented). FastAPI returns HTTP 405
169+
# (method not allowed) for `PROPFIND`, which maybe is not fully correct but tolerable because it is one less quirk
170+
# to maintain. However, because of the way legacy WSGI endpoints are injected into the FastAPI app (using
171+
# `app.mount("/", wsgi_handler)`), the built-in support for returning HTTP 405 for `PROPFIND` breaks, because such
172+
# requests are passed to the `wsgi_handler` sub-application. This means that the endpoint still needs to include
173+
# some code to handle this behavior.
141174
def index(
142175
self,
143176
job_id: Annotated[str, Path(description="Encoded id string of the job.")],
@@ -155,14 +188,19 @@ def index(
155188
),
156189
),
157190
],
158-
trans: ProvidesAppContext = DependsOnTrans,
191+
trans: SessionRequestContext = DependsOnTrans,
159192
) -> GalaxyFileResponse:
160193
"""
161194
Get a file required to staging a job (proper datasets, extra inputs, task-split inputs, working directory
162195
files).
163196
164197
This API method is intended only for consumption by job runners, not end users.
165198
"""
199+
# PROPFIND is not implemented, but the endpoint needs to return a non-404 error code for it
200+
# remove me when ALL endpoints have been migrated to FastAPI
201+
if trans.request.method == "PROPFIND":
202+
raise exceptions.NotImplemented()
203+
166204
path = unquote(path)
167205

168206
job = self.__authorize_job_access(trans, job_id, path=path, job_key=job_key)
@@ -176,6 +214,9 @@ def index(
176214
# This looks like a galaxy dataset, check if any job input has been deleted.
177215
if any(jtid.dataset.dataset.purged for jtid in job.input_datasets):
178216
raise exceptions.ItemDeletionException("Input dataset(s) for job have been purged.")
217+
raise exceptions.ObjectNotFound("File not found.")
218+
elif not os.path.isfile(path):
219+
raise exceptions.RequestParameterInvalidException("Path does not refer to a file.")
179220

180221
return GalaxyFileResponse(path)
181222

@@ -184,6 +225,7 @@ def index(
184225
summary="Populate an output file.",
185226
responses={
186227
200: {"description": "An okay message.", "content": {"application/json": {"example": {"message": "ok"}}}},
228+
400: {"description": "Bad request (including no file provided)."},
187229
},
188230
)
189231
def create(

lib/galaxy/work/context.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,11 @@ def url_path(self) -> str:
9797
def host(self) -> str:
9898
"""The host address."""
9999

100+
@property
101+
@abc.abstractmethod
102+
def method(self) -> str:
103+
"""The request's HTTP method."""
104+
100105
@property
101106
@abc.abstractmethod
102107
def is_secure(self) -> bool:

test/integration/test_job_files.py

Lines changed: 65 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ class TestJobFilesIntegration(integration_util.IntegrationTestCase):
4848
initialized = False
4949
dataset_populator: DatasetPopulator
5050

51+
hist_id: int # cannot use `history_id` as name, it collides with a pytest fixture
5152
input_hda: model.HistoryDatasetAssociation
5253
input_hda_dict: Dict[str, Any]
5354
_nginx_upload_job_files_store: str
@@ -61,7 +62,6 @@ def handle_galaxy_config_kwds(cls, config):
6162
config["server_name"] = "files"
6263
config["nginx_upload_job_files_store"] = tempfile.mkdtemp()
6364
cls._nginx_upload_job_files_store = config["nginx_upload_job_files_store"]
64-
cls.initialized = False
6565

6666
@classmethod
6767
def tearDownClass(cls):
@@ -70,17 +70,14 @@ def tearDownClass(cls):
7070

7171
def setUp(self):
7272
super().setUp()
73-
cls = TestJobFilesIntegration
74-
cls.dataset_populator = DatasetPopulator(self.galaxy_interactor)
75-
if not cls.initialized:
76-
history_id = cls.dataset_populator.new_history()
77-
sa_session = self.sa_session
78-
stmt = select(model.HistoryDatasetAssociation)
79-
assert len(sa_session.scalars(stmt).all()) == 0
80-
cls.input_hda_dict = cls.dataset_populator.new_dataset(history_id, content=TEST_INPUT_TEXT, wait=True)
81-
assert len(sa_session.scalars(stmt).all()) == 1
82-
cls.input_hda = sa_session.scalars(stmt).all()[0]
83-
cls.initialized = True
73+
self.dataset_populator = DatasetPopulator(self.galaxy_interactor)
74+
history_id_encoded = self.dataset_populator.new_history()
75+
self.hist_id = self._app.security.decode_id(history_id_encoded)
76+
self.input_hda_dict = self.dataset_populator.new_dataset(history_id_encoded, content=TEST_INPUT_TEXT, wait=True)
77+
sa_session = self.sa_session
78+
stmt = select(model.HistoryDatasetAssociation).where(model.HistoryDatasetAssociation.history_id == self.hist_id)
79+
assert len(sa_session.scalars(stmt).all()) == 1
80+
self.input_hda = sa_session.scalars(stmt).first()
8481

8582
def test_read_by_state(self):
8683
job, _, _ = self.create_static_job_with_state("running")
@@ -115,9 +112,57 @@ def test_read_fails_if_input_file_purged(self):
115112
self.input_hda_dict["history_id"], content_id=self.input_hda_dict["id"], purge=True, wait_for_purge=True
116113
)
117114
assert delete_response.status_code == 200
118-
head_response = requests.get(get_url, params=data)
115+
response = requests.get(get_url, params=data)
116+
assert response.status_code == 400
117+
assert response.json()["err_msg"] == "Input dataset(s) for job have been purged."
118+
119+
def test_read_missing_file(self):
120+
job, _, _ = self.create_static_job_with_state("running")
121+
job_id, job_key = self._api_job_keys(job)
122+
data = {"path": self.input_hda.get_file_name() + "_missing", "job_key": job_key}
123+
get_url = self._api_url(f"jobs/{job_id}/files", use_key=True)
124+
125+
head_response = requests.head(get_url, params=data)
126+
assert head_response.status_code == 404
127+
128+
response = requests.get(get_url, params=data)
129+
assert response.status_code == 404
130+
131+
def test_read_folder(self):
132+
job, _, _ = self.create_static_job_with_state("running")
133+
job_id, job_key = self._api_job_keys(job)
134+
data = {"path": os.path.dirname(self.input_hda.get_file_name()), "job_key": job_key}
135+
get_url = self._api_url(f"jobs/{job_id}/files", use_key=True)
136+
137+
head_response = requests.head(get_url, params=data)
119138
assert head_response.status_code == 400
120-
assert head_response.json()["err_msg"] == "Input dataset(s) for job have been purged."
139+
140+
response = requests.get(get_url, params=data)
141+
assert response.status_code == 400
142+
143+
def test_write_no_file(self):
144+
job, output_hda, working_directory = self.create_static_job_with_state("running")
145+
job_id, job_key = self._api_job_keys(job)
146+
path = self._app.object_store.get_filename(output_hda.dataset)
147+
assert path
148+
data = {"path": path, "job_key": job_key}
149+
150+
post_url = self._api_url(f"jobs/{job_id}/files", use_key=False)
151+
response = requests.post(post_url, data=data)
152+
assert response.status_code == 400
153+
154+
def test_propfind(self):
155+
# remove this test when ALL Galaxy endpoints have been migrated to FastAPI; it will then be FastAPI's
156+
# responsibility to return a status code other than 404
157+
job, output_hda, working_directory = self.create_static_job_with_state("running")
158+
job_id, job_key = self._api_job_keys(job)
159+
path = self._app.object_store.get_filename(output_hda.dataset)
160+
assert path
161+
data = {"path": path, "job_key": job_key}
162+
163+
propfind_url = self._api_url(f"jobs/{job_id}/files", use_key=False)
164+
response = requests.request("PROPFIND", propfind_url, params=data)
165+
assert response.status_code == 501
121166

122167
def test_write_by_state(self):
123168
job, output_hda, working_directory = self.create_static_job_with_state("running")
@@ -269,9 +314,13 @@ def sa_session(self):
269314
def create_static_job_with_state(self, state):
270315
"""Create a job with unknown handler so its state won't change."""
271316
sa_session = self.sa_session
272-
hda = sa_session.scalars(select(model.HistoryDatasetAssociation)).all()[0]
317+
stmt_hda = select(model.HistoryDatasetAssociation).where(
318+
model.HistoryDatasetAssociation.history_id == self.hist_id
319+
)
320+
hda = sa_session.scalars(stmt_hda).first()
273321
assert hda
274-
history = sa_session.scalars(select(model.History)).all()[0]
322+
stmt_history = select(model.History).where(model.History.id == self.hist_id)
323+
history = sa_session.scalars(stmt_history).first()
275324
assert history
276325
user = sa_session.scalars(select(model.User)).all()[0]
277326
assert user

0 commit comments

Comments
 (0)