Skip to content

Commit 4ba45f9

Browse files
Backend: Add Fargate Spot configuration to Challenge model and update service creation logic (#5056)
* Backend: Add Fargate Spot configuration to Challenge model and update service creation logic * Backend: Update unit tests to include 'use_fargate_spot' attribute in Challenge model and service creation logic
1 parent 8b776c2 commit 4ba45f9

File tree

6 files changed

+368
-9
lines changed

6 files changed

+368
-9
lines changed

apps/challenges/admin.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ class ChallengeAdmin(ImportExportTimeStampedAdmin):
5656
"remote_evaluation",
5757
"created_at",
5858
"workers",
59+
"use_fargate_spot",
5960
"task_def_arn",
6061
"github_repository",
6162
)
@@ -68,6 +69,7 @@ class ChallengeAdmin(ImportExportTimeStampedAdmin):
6869
"enable_forum",
6970
"anonymous_leaderboard",
7071
"remote_evaluation",
72+
"use_fargate_spot",
7173
"start_date",
7274
"end_date",
7375
)

apps/challenges/aws_utils.py

Lines changed: 82 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,43 @@
100100
}
101101

102102

103+
def get_capacity_provider_strategy(challenge):
104+
"""
105+
Build the ECS capacityProviderStrategy list from per-challenge fields.
106+
107+
Returns a list of dicts suitable for passing to client.create_service().
108+
Only includes providers whose weight > 0.
109+
Falls back to a single FARGATE_SPOT entry if both weights are 0.
110+
"""
111+
strategy = []
112+
spot_weight = getattr(challenge, "fargate_spot_weight", 0) or 0
113+
spot_base = getattr(challenge, "fargate_spot_base", 0) or 0
114+
fg_weight = getattr(challenge, "fargate_weight", 0) or 0
115+
fg_base = getattr(challenge, "fargate_base", 0) or 0
116+
117+
if spot_weight > 0:
118+
strategy.append(
119+
{
120+
"capacityProvider": "FARGATE_SPOT",
121+
"weight": spot_weight,
122+
"base": spot_base,
123+
}
124+
)
125+
if fg_weight > 0:
126+
strategy.append(
127+
{
128+
"capacityProvider": "FARGATE",
129+
"weight": fg_weight,
130+
"base": fg_base,
131+
}
132+
)
133+
if not strategy:
134+
strategy = [
135+
{"capacityProvider": "FARGATE_SPOT", "weight": 1, "base": 0}
136+
]
137+
return strategy
138+
139+
103140
def get_code_upload_setup_meta_for_challenge(challenge_pk):
104141
"""
105142
Return the EKS cluster network and arn meta for a challenge
@@ -739,15 +776,51 @@ def create_service_by_challenge_pk(client, challenge, client_token):
739776
if response["ResponseMetadata"]["HTTPStatusCode"] != HTTPStatus.OK:
740777
return response
741778
task_def_arn = challenge.task_def_arn
742-
definition = service_definition.format(
743-
CLUSTER=COMMON_SETTINGS_DICT["CLUSTER"],
744-
service_name=service_name,
745-
task_def_arn=task_def_arn,
746-
client_token=client_token,
747-
challenge_pk=str(challenge.pk),
748-
**VPC_DICT,
749-
)
750-
definition = eval(definition)
779+
if getattr(challenge, "use_fargate_spot", False):
780+
definition = {
781+
"cluster": COMMON_SETTINGS_DICT["CLUSTER"],
782+
"serviceName": service_name,
783+
"taskDefinition": task_def_arn,
784+
"desiredCount": 1,
785+
"clientToken": client_token,
786+
"platformVersion": "LATEST",
787+
"capacityProviderStrategy": get_capacity_provider_strategy(
788+
challenge
789+
),
790+
"networkConfiguration": {
791+
"awsvpcConfiguration": {
792+
"subnets": [
793+
VPC_DICT["SUBNET_1"],
794+
VPC_DICT["SUBNET_2"],
795+
],
796+
"securityGroups": [VPC_DICT["SUBNET_SECURITY_GROUP"]],
797+
"assignPublicIp": "ENABLED",
798+
}
799+
},
800+
"schedulingStrategy": "REPLICA",
801+
"deploymentController": {"type": "ECS"},
802+
"deploymentConfiguration": {
803+
"deploymentCircuitBreaker": {
804+
"enable": True,
805+
"rollback": False,
806+
}
807+
},
808+
"tags": [
809+
{"key": "challenge_pk", "value": str(challenge.pk)},
810+
{"key": "managed_by", "value": "evalai"},
811+
],
812+
"propagateTags": "SERVICE",
813+
}
814+
else:
815+
definition = service_definition.format(
816+
CLUSTER=COMMON_SETTINGS_DICT["CLUSTER"],
817+
service_name=service_name,
818+
task_def_arn=task_def_arn,
819+
client_token=client_token,
820+
challenge_pk=str(challenge.pk),
821+
**VPC_DICT,
822+
)
823+
definition = eval(definition)
751824
try:
752825
response = client.create_service(**definition)
753826
if response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK:
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
# Generated by Django 2.2.20 on 2026-03-05 02:19
2+
3+
from django.db import migrations, models
4+
5+
6+
class Migration(migrations.Migration):
7+
8+
dependencies = [
9+
(
10+
"challenges",
11+
"0120_remove_duplicate_star_challenges_and_add_unique_constraint",
12+
),
13+
]
14+
15+
operations = [
16+
migrations.AddField(
17+
model_name="challenge",
18+
name="fargate_base",
19+
field=models.PositiveSmallIntegerField(
20+
default=0,
21+
help_text="Minimum number of tasks placed on FARGATE before weights apply.",
22+
verbose_name="Fargate Base",
23+
),
24+
),
25+
migrations.AddField(
26+
model_name="challenge",
27+
name="fargate_spot_base",
28+
field=models.PositiveSmallIntegerField(
29+
default=0,
30+
help_text="Minimum number of tasks placed on FARGATE_SPOT before weights apply.",
31+
verbose_name="Fargate Spot Base",
32+
),
33+
),
34+
migrations.AddField(
35+
model_name="challenge",
36+
name="fargate_spot_weight",
37+
field=models.PositiveSmallIntegerField(
38+
default=1,
39+
help_text="Weight for FARGATE_SPOT in capacity provider strategy. 0 excludes Spot.",
40+
verbose_name="Fargate Spot Weight",
41+
),
42+
),
43+
migrations.AddField(
44+
model_name="challenge",
45+
name="fargate_weight",
46+
field=models.PositiveSmallIntegerField(
47+
default=0,
48+
help_text="Weight for FARGATE in capacity provider strategy. 0 = Spot only.",
49+
verbose_name="Fargate Weight",
50+
),
51+
),
52+
migrations.AddField(
53+
model_name="challenge",
54+
name="use_fargate_spot",
55+
field=models.BooleanField(
56+
default=True,
57+
help_text="If True, use capacityProviderStrategy (Spot). If False, use launchType FARGATE.",
58+
verbose_name="Use Fargate Spot",
59+
),
60+
),
61+
]

apps/challenges/models.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,31 @@ def __init__(self, *args, **kwargs):
209209
# Memory size of a Fargate worker for the challenge. Default value is 0.5
210210
# GB memory.
211211
worker_memory = models.IntegerField(null=True, blank=True, default=1024)
212+
use_fargate_spot = models.BooleanField(
213+
default=True,
214+
verbose_name="Use Fargate Spot",
215+
help_text="If True, use capacityProviderStrategy (Spot). If False, use launchType FARGATE.",
216+
)
217+
fargate_spot_weight = models.PositiveSmallIntegerField(
218+
default=1,
219+
verbose_name="Fargate Spot Weight",
220+
help_text="Weight for FARGATE_SPOT in capacity provider strategy. 0 excludes Spot.",
221+
)
222+
fargate_spot_base = models.PositiveSmallIntegerField(
223+
default=0,
224+
verbose_name="Fargate Spot Base",
225+
help_text="Minimum number of tasks placed on FARGATE_SPOT before weights apply.",
226+
)
227+
fargate_weight = models.PositiveSmallIntegerField(
228+
default=0,
229+
verbose_name="Fargate Weight",
230+
help_text="Weight for FARGATE in capacity provider strategy. 0 = Spot only.",
231+
)
232+
fargate_base = models.PositiveSmallIntegerField(
233+
default=0,
234+
verbose_name="Fargate Base",
235+
help_text="Minimum number of tasks placed on FARGATE before weights apply.",
236+
)
212237
# Enable/Disable emails notifications for the challenge
213238
inform_hosts = models.BooleanField(default=True)
214239
# VPC and subnet CIDRs for code upload challenge

tests/unit/challenges/test_admin.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ def test_list_display_contains_expected_fields(self):
178178
"remote_evaluation",
179179
"created_at",
180180
"workers",
181+
"use_fargate_spot",
181182
"task_def_arn",
182183
"github_repository",
183184
)

0 commit comments

Comments
 (0)