Backend: Add Fargate Spot configuration to Challenge model and update service creation logic (#5056)

RishabhJain2018 · web-flow · commit 4ba45f9180eb · 2026-03-04T22:14:35.000-08:00
* Backend: Add Fargate Spot configuration to Challenge model and update service creation logic

* Backend: Update unit tests to include 'use_fargate_spot' attribute in Challenge model and service creation logic
diff --git a/apps/challenges/admin.py b/apps/challenges/admin.py
@@ -56,6 +56,7 @@ class ChallengeAdmin(ImportExportTimeStampedAdmin):
         "remote_evaluation",
         "created_at",
         "workers",
+        "use_fargate_spot",
         "task_def_arn",
         "github_repository",
     )
@@ -68,6 +69,7 @@ class ChallengeAdmin(ImportExportTimeStampedAdmin):
         "enable_forum",
         "anonymous_leaderboard",
         "remote_evaluation",
+        "use_fargate_spot",
         "start_date",
         "end_date",
     )
diff --git a/apps/challenges/aws_utils.py b/apps/challenges/aws_utils.py
@@ -100,6 +100,43 @@
 }
 
 
+def get_capacity_provider_strategy(challenge):
+    """
+    Build the ECS capacityProviderStrategy list from per-challenge fields.
+
+    Returns a list of dicts suitable for passing to client.create_service().
+    Only includes providers whose weight > 0.
+    Falls back to a single FARGATE_SPOT entry if both weights are 0.
+    """
+    strategy = []
+    spot_weight = getattr(challenge, "fargate_spot_weight", 0) or 0
+    spot_base = getattr(challenge, "fargate_spot_base", 0) or 0
+    fg_weight = getattr(challenge, "fargate_weight", 0) or 0
+    fg_base = getattr(challenge, "fargate_base", 0) or 0
+
+    if spot_weight > 0:
+        strategy.append(
+            {
+                "capacityProvider": "FARGATE_SPOT",
+                "weight": spot_weight,
+                "base": spot_base,
+            }
+        )
+    if fg_weight > 0:
+        strategy.append(
+            {
+                "capacityProvider": "FARGATE",
+                "weight": fg_weight,
+                "base": fg_base,
+            }
+        )
+    if not strategy:
+        strategy = [
+            {"capacityProvider": "FARGATE_SPOT", "weight": 1, "base": 0}
+        ]
+    return strategy
+
+
 def get_code_upload_setup_meta_for_challenge(challenge_pk):
     """
     Return the EKS cluster network and arn meta for a challenge
@@ -739,15 +776,51 @@ def create_service_by_challenge_pk(client, challenge, client_token):
             if response["ResponseMetadata"]["HTTPStatusCode"] != HTTPStatus.OK:
                 return response
         task_def_arn = challenge.task_def_arn
-        definition = service_definition.format(
-            CLUSTER=COMMON_SETTINGS_DICT["CLUSTER"],
-            service_name=service_name,
-            task_def_arn=task_def_arn,
-            client_token=client_token,
-            challenge_pk=str(challenge.pk),
-            **VPC_DICT,
-        )
-        definition = eval(definition)
+        if getattr(challenge, "use_fargate_spot", False):
+            definition = {
+                "cluster": COMMON_SETTINGS_DICT["CLUSTER"],
+                "serviceName": service_name,
+                "taskDefinition": task_def_arn,
+                "desiredCount": 1,
+                "clientToken": client_token,
+                "platformVersion": "LATEST",
+                "capacityProviderStrategy": get_capacity_provider_strategy(
+                    challenge
+                ),
+                "networkConfiguration": {
+                    "awsvpcConfiguration": {
+                        "subnets": [
+                            VPC_DICT["SUBNET_1"],
+                            VPC_DICT["SUBNET_2"],
+                        ],
+                        "securityGroups": [VPC_DICT["SUBNET_SECURITY_GROUP"]],
+                        "assignPublicIp": "ENABLED",
+                    }
+                },
+                "schedulingStrategy": "REPLICA",
+                "deploymentController": {"type": "ECS"},
+                "deploymentConfiguration": {
+                    "deploymentCircuitBreaker": {
+                        "enable": True,
+                        "rollback": False,
+                    }
+                },
+                "tags": [
+                    {"key": "challenge_pk", "value": str(challenge.pk)},
+                    {"key": "managed_by", "value": "evalai"},
+                ],
+                "propagateTags": "SERVICE",
+            }
+        else:
+            definition = service_definition.format(
+                CLUSTER=COMMON_SETTINGS_DICT["CLUSTER"],
+                service_name=service_name,
+                task_def_arn=task_def_arn,
+                client_token=client_token,
+                challenge_pk=str(challenge.pk),
+                **VPC_DICT,
+            )
+            definition = eval(definition)
         try:
             response = client.create_service(**definition)
             if response["ResponseMetadata"]["HTTPStatusCode"] == HTTPStatus.OK:
diff --git a/apps/challenges/migrations/0121_add_fargate_spot_fields.py b/apps/challenges/migrations/0121_add_fargate_spot_fields.py
@@ -0,0 +1,61 @@
+# Generated by Django 2.2.20 on 2026-03-05 02:19
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        (
+            "challenges",
+            "0120_remove_duplicate_star_challenges_and_add_unique_constraint",
+        ),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name="challenge",
+            name="fargate_base",
+            field=models.PositiveSmallIntegerField(
+                default=0,
+                help_text="Minimum number of tasks placed on FARGATE before weights apply.",
+                verbose_name="Fargate Base",
+            ),
+        ),
+        migrations.AddField(
+            model_name="challenge",
+            name="fargate_spot_base",
+            field=models.PositiveSmallIntegerField(
+                default=0,
+                help_text="Minimum number of tasks placed on FARGATE_SPOT before weights apply.",
+                verbose_name="Fargate Spot Base",
+            ),
+        ),
+        migrations.AddField(
+            model_name="challenge",
+            name="fargate_spot_weight",
+            field=models.PositiveSmallIntegerField(
+                default=1,
+                help_text="Weight for FARGATE_SPOT in capacity provider strategy. 0 excludes Spot.",
+                verbose_name="Fargate Spot Weight",
+            ),
+        ),
+        migrations.AddField(
+            model_name="challenge",
+            name="fargate_weight",
+            field=models.PositiveSmallIntegerField(
+                default=0,
+                help_text="Weight for FARGATE in capacity provider strategy. 0 = Spot only.",
+                verbose_name="Fargate Weight",
+            ),
+        ),
+        migrations.AddField(
+            model_name="challenge",
+            name="use_fargate_spot",
+            field=models.BooleanField(
+                default=True,
+                help_text="If True, use capacityProviderStrategy (Spot). If False, use launchType FARGATE.",
+                verbose_name="Use Fargate Spot",
+            ),
+        ),
+    ]
diff --git a/apps/challenges/models.py b/apps/challenges/models.py
@@ -209,6 +209,31 @@ def __init__(self, *args, **kwargs):
     # Memory size of a Fargate worker for the challenge. Default value is 0.5
     # GB memory.
     worker_memory = models.IntegerField(null=True, blank=True, default=1024)
+    use_fargate_spot = models.BooleanField(
+        default=True,
+        verbose_name="Use Fargate Spot",
+        help_text="If True, use capacityProviderStrategy (Spot). If False, use launchType FARGATE.",
+    )
+    fargate_spot_weight = models.PositiveSmallIntegerField(
+        default=1,
+        verbose_name="Fargate Spot Weight",
+        help_text="Weight for FARGATE_SPOT in capacity provider strategy. 0 excludes Spot.",
+    )
+    fargate_spot_base = models.PositiveSmallIntegerField(
+        default=0,
+        verbose_name="Fargate Spot Base",
+        help_text="Minimum number of tasks placed on FARGATE_SPOT before weights apply.",
+    )
+    fargate_weight = models.PositiveSmallIntegerField(
+        default=0,
+        verbose_name="Fargate Weight",
+        help_text="Weight for FARGATE in capacity provider strategy. 0 = Spot only.",
+    )
+    fargate_base = models.PositiveSmallIntegerField(
+        default=0,
+        verbose_name="Fargate Base",
+        help_text="Minimum number of tasks placed on FARGATE before weights apply.",
+    )
     # Enable/Disable emails notifications for the challenge
     inform_hosts = models.BooleanField(default=True)
     # VPC and subnet CIDRs for code upload challenge
diff --git a/tests/unit/challenges/test_admin.py b/tests/unit/challenges/test_admin.py
@@ -178,6 +178,7 @@ def test_list_display_contains_expected_fields(self):
             "remote_evaluation",
             "created_at",
             "workers",
+            "use_fargate_spot",
             "task_def_arn",
             "github_repository",
         )
diff --git a/tests/unit/challenges/test_aws_utils.py b/tests/unit/challenges/test_aws_utils.py

Original file line number	Diff line number	Diff line change
`@@ -178,6 +178,7 @@ def test_list_display_contains_expected_fields(self):`
`178`	`178`	`"remote_evaluation",`
`179`	`179`	`"created_at",`
`180`	`180`	`"workers",`
	`181`	`+ "use_fargate_spot",`
`181`	`182`	`"task_def_arn",`
`182`	`183`	`"github_repository",`
`183`	`184`	`)`