Skip to content

Commit e050a15

Browse files
committed
[2/2] [a] Fix: Can't use curl to download a single manifest in one invocation (#5918)
Add a wait parameter option to the manifest endpoint
1 parent 78e5b48 commit e050a15

File tree

6 files changed

+150
-33
lines changed

6 files changed

+150
-33
lines changed

lambdas/service/app.py

Lines changed: 37 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@
8686
from azul.service.app_controller import (
8787
validate_catalog,
8888
validate_params,
89+
validate_wait,
8990
)
9091
from azul.service.catalog_controller import (
9192
CatalogController,
@@ -1081,6 +1082,26 @@ def get_summary():
10811082
authentication=request.authentication)
10821083

10831084

1085+
def wait_parameter_spec(*, default: int) -> JSON:
1086+
valid_values = [0, 1]
1087+
assert default in valid_values, default
1088+
return params.query(
1089+
'wait',
1090+
schema.optional(schema.default(default,
1091+
form=schema.enum(*valid_values))),
1092+
description=fd('''
1093+
If 0, the client is responsible for honoring the waiting period
1094+
specified in the `Retry-After` response header. If 1, the server
1095+
will delay the response in order to consume as much of that waiting
1096+
period as possible. This parameter should only be set to 1 by
1097+
clients who can't honor the `Retry-After` header, preventing them
1098+
from quickly exhausting the maximum number of redirects. If the
1099+
server cannot wait the full amount, any amount of wait time left
1100+
will still be returned in the `Retry-After` header of the response.
1101+
''')
1102+
)
1103+
1104+
10841105
post_manifest_example_url = (
10851106
f'{app.base_url}/manifest/files'
10861107
f'?catalog={list(config.catalogs.keys())[0]}'
@@ -1115,7 +1136,8 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
11151136
'parameters': [
11161137
params.path('token', str, description=fd('''
11171138
An opaque string representing the manifest preparation job
1118-
'''))
1139+
''')),
1140+
*([] if fetch else [wait_parameter_spec(default=0)])
11191141
]
11201142
},
11211143
spec={
@@ -1233,6 +1255,7 @@ def manifest_route(*, fetch: bool, initiate: bool, curl: bool = False):
12331255
'''),
12341256
'parameters': [
12351257
catalog_param_spec,
1258+
*([wait_parameter_spec(default=1)] if curl else []),
12361259
filters_param_spec,
12371260
params.query(
12381261
'format',
@@ -1420,24 +1443,32 @@ def _file_manifest(fetch: bool, token_or_key: str | None = None):
14201443
and request.headers.get('content-type') == 'application/x-www-form-urlencoded'
14211444
and request.raw_body != b''
14221445
):
1423-
raise BRE('The body must be empty for a POST request of content-type '
1424-
'`application/x-www-form-urlencoded` to this endpoint')
1446+
raise BRE('POST requests to this endpoint must have an empty body if '
1447+
'they specify a `Content-Type` header of '
1448+
'`application/x-www-form-urlencoded`')
14251449
query_params = request.query_params or {}
14261450
_hoist_parameters(query_params, request)
14271451
if token_or_key is None:
14281452
query_params.setdefault('filters', '{}')
1453+
if post:
1454+
query_params.setdefault('wait', '1')
14291455
# We list the `catalog` validator first so that the catalog is validated
14301456
# before any other potentially catalog-dependent validators are invoked
14311457
validate_params(query_params,
14321458
catalog=validate_catalog,
14331459
format=validate_manifest_format,
1434-
filters=validate_filters)
1460+
filters=validate_filters,
1461+
**({'wait': validate_wait} if post else {}))
14351462
# Now that the catalog is valid, we can provide the default format that
14361463
# depends on it
14371464
default_format = app.metadata_plugin.manifest_formats[0].value
14381465
query_params.setdefault('format', default_format)
14391466
else:
1440-
validate_params(query_params)
1467+
validate_params(query_params,
1468+
# If the initial request was a POST to the non-fetch
1469+
# endpoint, the 'wait' param will be carried over to
1470+
# each subsequent GET request to the non-fetch endpoint.
1471+
**({'wait': validate_wait} if not fetch else {}))
14411472
return app.manifest_controller.get_manifest_async(query_params=query_params,
14421473
token_or_key=token_or_key,
14431474
fetch=fetch,
@@ -1484,21 +1515,7 @@ def generate_manifest(event: AnyJSON, _context: LambdaContext):
14841515
made. If that fails, the UUID of the file will be used instead.
14851516
''')
14861517
),
1487-
params.query(
1488-
'wait',
1489-
schema.optional(schema.default(0)),
1490-
description=fd('''
1491-
If 0, the client is responsible for honoring the waiting period
1492-
specified in the Retry-After response header. If 1, the server
1493-
will delay the response in order to consume as much of that
1494-
waiting period as possible. This parameter should only be set to
1495-
1 by clients who can't honor the `Retry-After` header,
1496-
preventing them from quickly exhausting the maximum number of
1497-
redirects. If the server cannot wait the full amount, any amount
1498-
of wait time left will still be returned in the Retry-After
1499-
header of the response.
1500-
''')
1501-
),
1518+
wait_parameter_spec(default=0),
15021519
params.query(
15031520
'replica',
15041521
schema.optional(str),

lambdas/service/openapi.json

Lines changed: 76 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8799,6 +8799,22 @@
87998799
],
88008800
"additionalProperties": false
88018801
},
8802+
"dataUseRestriction": {
8803+
"type": "object",
8804+
"properties": {
8805+
"is": {
8806+
"type": "array",
8807+
"items": {
8808+
"type": "string",
8809+
"nullable": true
8810+
}
8811+
}
8812+
},
8813+
"required": [
8814+
"is"
8815+
],
8816+
"additionalProperties": false
8817+
},
88028818
"developmentStage": {
88038819
"type": "object",
88048820
"properties": {
@@ -8873,6 +8889,22 @@
88738889
],
88748890
"additionalProperties": false
88758891
},
8892+
"duosId": {
8893+
"type": "object",
8894+
"properties": {
8895+
"is": {
8896+
"type": "array",
8897+
"items": {
8898+
"type": "string",
8899+
"nullable": true
8900+
}
8901+
}
8902+
},
8903+
"required": [
8904+
"is"
8905+
],
8906+
"additionalProperties": false
8907+
},
88768908
"effectiveCellCount": {
88778909
"oneOf": [
88788910
{
@@ -9842,7 +9874,7 @@
98429874
}
98439875
}
98449876
},
9845-
"description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, developmentStage, donorCount, donorDisease, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
9877+
"description": "\nCriteria to filter entities from the search results.\n\nEach filter consists of a field name, a relation (relational operator),\nand an array of field values. The available relations are \"is\",\n\"within\", \"contains\", and \"intersects\". Multiple filters are combined\nusing \"and\" logic. An entity must match all filters to be included in\nthe response. How multiple field values within a single filter are\ncombined depends on the relation.\n\nFor the \"is\" relation, multiple values are combined using \"or\" logic.\nFor example, `{\"fileFormat\": {\"is\": [\"fastq\", \"fastq.gz\"]}}` selects\nentities where the file format is either \"fastq\" or \"fastq.gz\". For the\n\"within\", \"intersects\", and \"contains\" relations, the field values must\ncome in nested pairs specifying upper and lower bounds, and multiple\npairs are combined using \"and\" logic. For example, `{\"donorCount\":\n{\"within\": [[1,5], [5,10]]}}` selects entities whose donor organism\ncount falls within both ranges, i.e., is exactly 5.\n\nThe accessions field supports filtering for a specific accession and/or\nnamespace within a project. For example, `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\"}]}}` will filter for projects that have an\n`array_express` accession. Similarly, `{\"accessions\": {\"is\": [\n{\"accession\":\"ERP112843\"}]}}` will filter for projects that have the\naccession `ERP112843` while `{\"accessions\": {\"is\": [\n{\"namespace\":\"array_express\", \"accession\": \"E-AAAA-00\"}]}}` will filter\nfor projects that match both values.\n\nThe organismAge field is special in that it contains two property keys:\nvalue and unit. For example, `{\"organismAge\": {\"is\": [{\"value\": \"20\",\n\"unit\": \"year\"}]}}`. Both keys are required. `{\"organismAge\": {\"is\":\n[null]}}` selects entities that have no organism age.\n\nSupported field names are: accessions, aggregateLastModifiedDate, aggregateSubmissionDate, aggregateUpdateDate, assayType, biologicalSex, bionetworkName, bundleUuid, bundleVersion, cellCount, cellLineType, contactName, contentDescription, dataUseRestriction, developmentStage, donorCount, donorDisease, duosId, effectiveCellCount, effectiveOrgan, entryId, fileFormat, fileId, fileName, fileSize, fileSource, fileVersion, genusSpecies, institution, instrumentManufacturerModel, isIntermediate, isTissueAtlasProject, laboratory, lastModifiedDate, libraryConstructionApproach, matrixCellCount, modelOrgan, modelOrganPart, nucleicAcidSource, organ, organPart, organismAge, organismAgeRange, pairedEnd, preservationMethod, project, projectDescription, projectEstimatedCellCount, projectId, projectTitle, publicationTitle, sampleDisease, sampleEntityType, sampleId, selectedCellType, sourceId, sourceSpec, specimenDisease, specimenOrgan, specimenOrganPart, submissionDate, tissueAtlas, updateDate, workflow, accessible\n"
98469878
},
98479879
{
98489880
"name": "format",
@@ -9892,6 +9924,9 @@
98929924
}
98939925
}
98949926
}
9927+
},
9928+
"504": {
9929+
"description": "\nRequest timed out. When handling this response, clients\nshould wait the number of seconds specified in the\n`Retry-After` header and then retry the request.\n"
98959930
}
98969931
}
98979932
},
@@ -9916,6 +9951,21 @@
99169951
},
99179952
"description": "The name of the catalog to query."
99189953
},
9954+
{
9955+
"name": "wait",
9956+
"in": "query",
9957+
"required": false,
9958+
"schema": {
9959+
"type": "integer",
9960+
"format": "int64",
9961+
"enum": [
9962+
0,
9963+
1
9964+
],
9965+
"default": 1
9966+
},
9967+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
9968+
},
99199969
{
99209970
"name": "filters",
99219971
"in": "query",
@@ -11307,6 +11357,21 @@
1130711357
"type": "string"
1130811358
},
1130911359
"description": "\nAn opaque string representing the manifest preparation job\n"
11360+
},
11361+
{
11362+
"name": "wait",
11363+
"in": "query",
11364+
"required": false,
11365+
"schema": {
11366+
"type": "integer",
11367+
"format": "int64",
11368+
"enum": [
11369+
0,
11370+
1
11371+
],
11372+
"default": 0
11373+
},
11374+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1131011375
}
1131111376
],
1131211377
"get": {
@@ -12900,9 +12965,13 @@
1290012965
"schema": {
1290112966
"type": "integer",
1290212967
"format": "int64",
12968+
"enum": [
12969+
0,
12970+
1
12971+
],
1290312972
"default": 0
1290412973
},
12905-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
12974+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1290612975
},
1290712976
{
1290812977
"name": "replica",
@@ -13039,9 +13108,13 @@
1303913108
"schema": {
1304013109
"type": "integer",
1304113110
"format": "int64",
13111+
"enum": [
13112+
0,
13113+
1
13114+
],
1304213115
"default": 0
1304313116
},
13044-
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the Retry-After response header. If 1, the server\nwill delay the response in order to consume as much of that\nwaiting period as possible. This parameter should only be set to\n1 by clients who can't honor the `Retry-After` header,\npreventing them from quickly exhausting the maximum number of\nredirects. If the server cannot wait the full amount, any amount\nof wait time left will still be returned in the Retry-After\nheader of the response.\n"
13117+
"description": "\nIf 0, the client is responsible for honoring the waiting period\nspecified in the `Retry-After` response header. If 1, the server\nwill delay the response in order to consume as much of that waiting\nperiod as possible. This parameter should only be set to 1 by\nclients who can't honor the `Retry-After` header, preventing them\nfrom quickly exhausting the maximum number of redirects. If the\nserver cannot wait the full amount, any amount of wait time left\nwill still be returned in the `Retry-After` header of the response.\n"
1304513118
},
1304613119
{
1304713120
"name": "replica",

0 commit comments

Comments
 (0)