Skip to content

Commit 2e0428c

Browse files
authored
Merge pull request #34 from neurostuff/enh/use_different_machine_sizes
[ENH] use different machine sizes
2 parents 6e24cc5 + 69f4e86 commit 2e0428c

File tree

8 files changed

+344
-40
lines changed

8 files changed

+344
-40
lines changed

Dockerfile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,12 @@
11
FROM python:3.13-slim
22

3+
ARG COMPOSE_RUNNER_VERSION
4+
ENV COMPOSE_RUNNER_VERSION=${COMPOSE_RUNNER_VERSION}
5+
LABEL org.opencontainers.image.title="compose-runner ecs task"
6+
LABEL org.opencontainers.image.version=${COMPOSE_RUNNER_VERSION}
7+
8+
RUN test -n "$COMPOSE_RUNNER_VERSION" || (echo "COMPOSE_RUNNER_VERSION build arg is required" && exit 1)
9+
310
RUN apt-get update && apt-get install -y \
411
git \
512
&& rm -rf /var/lib/apt/lists/*

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ The deployed architecture works like this:
4444
Pass `-c resultsBucketName=<bucket>` to use an existing S3 bucket, or omit it
4545
to let the stack create and retain a dedicated bucket. Additional knobs:
4646

47-
- `-c stateMachineTimeoutSeconds=7200` to control the max wall clock per run
47+
- `-c stateMachineTimeoutSeconds=32400` to control the max wall clock per run
4848
- `-c submitTimeoutSeconds` / `-c statusTimeoutSeconds` / `-c pollTimeoutSeconds`
4949
to tune Lambda timeouts
5050
- `-c taskEphemeralStorageGiB` if the default 21 GiB scratch volume is insufficient

compose_runner/aws_lambda/run_handler.py

Lines changed: 71 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
import logging
55
import os
66
import uuid
7+
import urllib.error
8+
import urllib.request
79
from typing import Any, Dict, Optional
810

911
import boto3
@@ -22,20 +24,84 @@
2224
NSC_KEY_ENV = "NSC_KEY"
2325
NV_KEY_ENV = "NV_KEY"
2426

27+
DEFAULT_TASK_SIZE = "standard"
28+
2529

2630
def _log(job_id: str, message: str, **details: Any) -> None:
2731
payload = {"job_id": job_id, "message": message, **details}
2832
# Ensure consistent JSON logging for ingestion/filtering.
2933
logger.info(json.dumps(payload))
3034

3135

36+
def _compose_api_base_url(environment: str) -> str:
37+
env = (environment or "production").lower()
38+
if env == "staging":
39+
return "https://synth.neurostore.xyz/api"
40+
if env == "local":
41+
return "http://localhost:81/api"
42+
return "https://compose.neurosynth.org/api"
43+
44+
45+
def _fetch_meta_analysis(meta_analysis_id: str, environment: str) -> Optional[Dict[str, Any]]:
46+
base_url = _compose_api_base_url(environment).rstrip("/")
47+
url = f"{base_url}/meta-analyses/{meta_analysis_id}?nested=true"
48+
request = urllib.request.Request(url, headers={"User-Agent": "compose-runner/submit"})
49+
try:
50+
with urllib.request.urlopen(request, timeout=10) as response:
51+
return json.load(response)
52+
except (urllib.error.URLError, urllib.error.HTTPError, json.JSONDecodeError) as exc:
53+
logger.warning("Failed to fetch meta-analysis %s: %s", meta_analysis_id, exc)
54+
return None
55+
56+
57+
def _requires_large_task(specification: Dict[str, Any]) -> bool:
58+
if not isinstance(specification, dict):
59+
return False
60+
corrector = specification.get("corrector")
61+
if not isinstance(corrector, dict):
62+
return False
63+
if corrector.get("type") != "FWECorrector":
64+
return False
65+
args = corrector.get("args")
66+
if not isinstance(args, dict):
67+
return False
68+
method = args.get("method")
69+
if method is None:
70+
kwargs = args.get("**kwargs")
71+
if isinstance(kwargs, dict):
72+
method = kwargs.get("method")
73+
if isinstance(method, str) and method.lower() == "montecarlo":
74+
return True
75+
return False
76+
77+
78+
def _select_task_size(meta_analysis_id: str, environment: str, artifact_prefix: str) -> str:
79+
doc = _fetch_meta_analysis(meta_analysis_id, environment)
80+
if not doc:
81+
return DEFAULT_TASK_SIZE
82+
specification = doc.get("specification")
83+
try:
84+
if _requires_large_task(specification):
85+
_log(
86+
artifact_prefix,
87+
"workflow.task_size_selected",
88+
task_size="large",
89+
reason="montecarlo_fwe",
90+
)
91+
return "large"
92+
except Exception as exc: # noqa: broad-except
93+
logger.warning("Failed to evaluate specification for %s: %s", meta_analysis_id, exc)
94+
return DEFAULT_TASK_SIZE
95+
96+
3297
def _job_input(
3398
payload: Dict[str, Any],
3499
artifact_prefix: str,
35100
bucket: Optional[str],
36101
prefix: Optional[str],
37102
nsc_key: Optional[str],
38103
nv_key: Optional[str],
104+
task_size: str,
39105
) -> Dict[str, Any]:
40106
no_upload_flag = bool(payload.get("no_upload", False))
41107
doc: Dict[str, Any] = {
@@ -44,6 +110,7 @@ def _job_input(
44110
"environment": payload.get("environment", "production"),
45111
"no_upload": "true" if no_upload_flag else "false",
46112
"results": {"bucket": bucket or "", "prefix": prefix or ""},
113+
"task_size": task_size,
47114
}
48115
n_cores = payload.get("n_cores")
49116
doc["n_cores"] = str(n_cores) if n_cores is not None else ""
@@ -76,7 +143,10 @@ def handler(event: Dict[str, Any], context: Any) -> Dict[str, Any]:
76143
nsc_key = payload.get("nsc_key") or os.environ.get(NSC_KEY_ENV)
77144
nv_key = payload.get("nv_key") or os.environ.get(NV_KEY_ENV)
78145

79-
job_input = _job_input(payload, artifact_prefix, bucket, prefix, nsc_key, nv_key)
146+
environment = payload.get("environment", "production")
147+
task_size = _select_task_size(payload["meta_analysis_id"], environment, artifact_prefix)
148+
149+
job_input = _job_input(payload, artifact_prefix, bucket, prefix, nsc_key, nv_key, task_size)
80150
params = {
81151
"stateMachineArn": os.environ[STATE_MACHINE_ARN_ENV],
82152
"name": artifact_prefix,

compose_runner/ecs_task.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ def main() -> None:
9393
nv_key = os.environ.get(NV_KEY_ENV) or None
9494
no_upload = _bool_from_env(os.environ.get(NO_UPLOAD_ENV))
9595
n_cores = _resolve_n_cores(os.environ.get(N_CORES_ENV))
96+
compose_runner_version = os.environ.get("COMPOSE_RUNNER_VERSION", "unknown")
9697

9798
bucket = os.environ.get(RESULTS_BUCKET_ENV)
9899
prefix = os.environ.get(RESULTS_PREFIX_ENV)
@@ -106,6 +107,7 @@ def main() -> None:
106107
meta_analysis_id=meta_analysis_id,
107108
environment=environment,
108109
no_upload=no_upload,
110+
compose_runner_version=compose_runner_version,
109111
)
110112
try:
111113
url, _ = run_compose(
@@ -125,6 +127,7 @@ def main() -> None:
125127
"result_url": url,
126128
"artifacts_bucket": bucket,
127129
"artifacts_prefix": prefix,
130+
"compose_runner_version": compose_runner_version,
128131
}
129132

130133
if bucket:
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
interactions:
2+
- request:
3+
method: GET
4+
uri: https://synth.neurostore.xyz/api/meta-analyses/ZPSvyvhZAopz?nested=true
5+
body: null
6+
headers:
7+
Accept:
8+
- '*/*'
9+
Accept-Encoding:
10+
- gzip, deflate
11+
Connection:
12+
- keep-alive
13+
User-Agent:
14+
- python-requests/2.32.4
15+
response:
16+
status:
17+
code: 200
18+
message: OK
19+
headers:
20+
Server:
21+
- nginx/1.21.6
22+
Date:
23+
- Tue, 21 Oct 2025 14:08:45 GMT
24+
Content-Type:
25+
- application/json
26+
Transfer-Encoding:
27+
- chunked
28+
Connection:
29+
- keep-alive
30+
Vary:
31+
- Accept-Encoding
32+
Content-Encoding:
33+
- gzip
34+
Strict-Transport-Security:
35+
- max-age=31536000
36+
body:
37+
string: '{"id": "ZPSvyvhZAopz", "created_at": "2025-10-21T04:57:40.236536+00:00",
38+
"updated_at": null, "user": "github|12564882", "username": "James Kent", "name":
39+
"Untitled MKDADensity Meta Analysis: included", "description": "MKDADensity
40+
meta analysis with FWECorrector", "provenance": null, "specification": {"id":
41+
"zQdMa4uAaYYU", "created_at": "2025-10-21T04:57:39.888528+00:00", "updated_at":
42+
null, "user": "github|12564882", "username": "James Kent", "type": "CBMA",
43+
"estimator": {"type": "MKDADensity", "args": {"null_method": "approximate",
44+
"n_iters": 5000, "**kwargs": {}, "kernel__r": 10, "kernel__value": 1}}, "database_studyset":
45+
null, "filter": "included", "corrector": {"type": "FWECorrector", "args":
46+
{"voxel_thresh": 0.001, "n_iters": 5000, "vfwe_only": false, "method": "montecarlo"}},
47+
"conditions": [true], "weights": [1.0]}, "neurostore_analysis": {"id": "8S5xRedCGRkz",
48+
"created_at": "2025-10-21T04:57:40.255480+00:00", "updated_at": null, "neurostore_id":
49+
null, "exception": null, "traceback": null, "status": "PENDING"}, "studyset":
50+
{"id": "9jPvdkuRufUP", "created_at": "2025-10-21T04:57:40.008456+00:00", "updated_at":
51+
null, "user": "github|12564882", "username": "James Kent", "snapshot": null,
52+
"neurostore_id": "3EmvH2LELwR2", "version": null, "url": "https://neurostore.org/api/studysets/3EmvH2LELwR2"},
53+
"annotation": {"id": "YVLt6DRFKdd5", "created_at": "2025-10-21T04:57:40.121637+00:00",
54+
"updated_at": null, "user": "github|12564882", "username": "James Kent", "snapshot":
55+
null, "neurostore_id": "TebrRstj8ofh", "studyset": "3EmvH2LELwR2", "url":
56+
"https://neurostore.org/api/annotations/TebrRstj8ofh"}, "project": "D2cTfoxNfpLy",
57+
"cached_studyset": "9jPvdkuRufUP", "cached_annotation": "YVLt6DRFKdd5", "run_key":
58+
"PDeDnh_8MXc88xoVJySz3w", "results": [], "neurostore_url": null}'
59+
http_version: HTTP/1.1
60+
version: 1
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
interactions:
2+
- request:
3+
body: null
4+
headers:
5+
Connection:
6+
- close
7+
Host:
8+
- synth.neurostore.xyz
9+
User-Agent:
10+
- compose-runner/submit
11+
method: GET
12+
uri: https://synth.neurostore.xyz/api/meta-analyses/VtFZJFniCKvG?nested=true
13+
response:
14+
body:
15+
string: '{"id": "VtFZJFniCKvG", "created_at": "2025-10-21T14:10:35.309383+00:00",
16+
"updated_at": null, "user": "github|12564882", "username": "James Kent", "name":
17+
"Untitled MKDADensity Meta Analysis: included (1)", "description": "MKDADensity
18+
meta analysis with FDRCorrector", "provenance": null, "specification": {"id":
19+
"DtVzKEKGaXLu", "created_at": "2025-10-21T14:10:34.564365+00:00", "updated_at":
20+
null, "user": "github|12564882", "username": "James Kent", "type": "CBMA",
21+
"estimator": {"type": "MKDADensity", "args": {"null_method": "approximate",
22+
"n_iters": 5000, "**kwargs": {}, "kernel__r": 10, "kernel__value": 1}}, "database_studyset":
23+
null, "filter": "included", "corrector": {"type": "FDRCorrector", "args":
24+
{"method": "indep", "alpha": 0.05}}, "conditions": [true], "weights": [1.0]},
25+
"neurostore_analysis": {"id": "564c8kRnJVT4", "created_at": "2025-10-21T14:10:35.325173+00:00",
26+
"updated_at": null, "neurostore_id": null, "exception": null, "traceback":
27+
null, "status": "PENDING"}, "studyset": {"id": "FA3BDBdGRZ5d", "created_at":
28+
"2025-10-21T14:10:34.821625+00:00", "updated_at": null, "user": "github|12564882",
29+
"username": "James Kent", "snapshot": null, "neurostore_id": "3EmvH2LELwR2",
30+
"version": null, "url": "https://neurostore.org/api/studysets/3EmvH2LELwR2"},
31+
"annotation": {"id": "XELVYV7ftp7e", "created_at": "2025-10-21T14:10:35.183354+00:00",
32+
"updated_at": null, "user": "github|12564882", "username": "James Kent", "snapshot":
33+
null, "neurostore_id": "TebrRstj8ofh", "studyset": "3EmvH2LELwR2", "url":
34+
"https://neurostore.org/api/annotations/TebrRstj8ofh"}, "project": "D2cTfoxNfpLy",
35+
"cached_studyset": "FA3BDBdGRZ5d", "cached_annotation": "XELVYV7ftp7e", "run_key":
36+
"V_jTcP2zfNlWD4KhwKKcJw", "results": [], "neurostore_url": null}'
37+
headers:
38+
Connection:
39+
- close
40+
Content-Length:
41+
- '1750'
42+
Content-Type:
43+
- application/json
44+
Date:
45+
- Tue, 21 Oct 2025 14:14:50 GMT
46+
Server:
47+
- nginx/1.21.6
48+
Strict-Transport-Security:
49+
- max-age=31536000
50+
Vary:
51+
- Accept-Encoding
52+
status:
53+
code: 200
54+
message: OK
55+
version: 1

compose_runner/tests/test_lambda_handlers.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
from datetime import datetime, timezone
55
from typing import Any, Dict
66

7+
import pytest
8+
79
from compose_runner.aws_lambda import log_poll_handler, results_handler, run_handler, status_handler
810

911

@@ -23,6 +25,28 @@ def _make_http_event(payload: Dict[str, Any]) -> Dict[str, Any]:
2325
}
2426

2527

28+
def test_requires_large_task_detection():
29+
spec = {"corrector": {"type": "FWECorrector", "args": {"method": "montecarlo"}}}
30+
assert run_handler._requires_large_task(spec)
31+
32+
33+
def test_requires_large_task_false_when_method_differs():
34+
spec = {"corrector": {"type": "FWECorrector", "args": {"method": "bonferroni"}}}
35+
assert run_handler._requires_large_task(spec) is False
36+
37+
38+
@pytest.mark.vcr(record_mode="once")
39+
def test_select_task_size_uses_large_for_montecarlo():
40+
task_size = run_handler._select_task_size("ZPSvyvhZAopz", "staging", "artifact-test")
41+
assert task_size == "large"
42+
43+
44+
@pytest.mark.vcr(record_mode="once")
45+
def test_select_task_size_uses_standard_for_fdr():
46+
task_size = run_handler._select_task_size("VtFZJFniCKvG", "staging", "artifact-test")
47+
assert task_size == "standard"
48+
49+
2650
def test_run_handler_http_success(monkeypatch, tmp_path):
2751
captured = {}
2852

@@ -36,6 +60,7 @@ class ExecutionAlreadyExists(Exception):
3660
...
3761

3862
monkeypatch.setattr(run_handler, "_SFN_CLIENT", FakeSFN())
63+
monkeypatch.setattr(run_handler, "_select_task_size", lambda *args: "standard")
3964
monkeypatch.setenv("STATE_MACHINE_ARN", "arn:aws:states:state-machine")
4065
monkeypatch.setenv("RESULTS_BUCKET", "bucket")
4166
monkeypatch.setenv("RESULTS_PREFIX", "prefix")
@@ -63,6 +88,32 @@ class ExecutionAlreadyExists(Exception):
6388
assert input_doc["results"]["prefix"] == "prefix"
6489
assert input_doc["nsc_key"] == "nsc"
6590
assert input_doc["nv_key"] == "nv"
91+
assert input_doc["task_size"] == "standard"
92+
93+
94+
def test_run_handler_http_uses_large_task(monkeypatch):
95+
captured = {}
96+
97+
class FakeSFN:
98+
def start_execution(self, **kwargs):
99+
captured.update(kwargs)
100+
return {"executionArn": "arn:aws:states:us-east-1:123:execution:state-machine:run-456"}
101+
102+
class exceptions:
103+
class ExecutionAlreadyExists(Exception):
104+
...
105+
106+
monkeypatch.setattr(run_handler, "_SFN_CLIENT", FakeSFN())
107+
monkeypatch.setattr(run_handler, "_select_task_size", lambda *args: "large")
108+
monkeypatch.setenv("STATE_MACHINE_ARN", "arn:aws:states:state-machine")
109+
monkeypatch.setenv("RESULTS_BUCKET", "bucket")
110+
monkeypatch.setenv("RESULTS_PREFIX", "prefix")
111+
112+
event = _make_http_event({"meta_analysis_id": "abc123"})
113+
response = run_handler.handler(event, DummyContext())
114+
assert response["statusCode"] == 202
115+
input_doc = json.loads(captured["input"])
116+
assert input_doc["task_size"] == "large"
66117

67118

68119
def test_run_handler_missing_meta_analysis(monkeypatch):

0 commit comments

Comments
 (0)