From 0fe4c859216e08c599b4dfec9b690457e7f41ca6 Mon Sep 17 00:00:00 2001 From: Alexander Zhipa Date: Fri, 13 Dec 2024 14:46:36 -0500 Subject: [PATCH] fix: adjust aws_c5.18xlarge memory size --- torchx/specs/named_resources_aws.py | 6 +++++- torchx/specs/test/named_resources_aws_test.py | 2 +- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/torchx/specs/named_resources_aws.py b/torchx/specs/named_resources_aws.py index c290c3c38..f2f9b85f4 100644 --- a/torchx/specs/named_resources_aws.py +++ b/torchx/specs/named_resources_aws.py @@ -142,7 +142,11 @@ def aws_m5_2xlarge() -> Resource: def aws_c5_18xlarge() -> Resource: return Resource( - cpu=72, gpu=0, memMB=144 * GiB, capabilities={K8S_ITYPE: "c5.18xlarge"} + # using lower memory size than the spec since MEM_TAX is not enough for adjustment + cpu=72, + gpu=0, + memMB=142 * GiB, + capabilities={K8S_ITYPE: "c5.18xlarge"}, ) diff --git a/torchx/specs/test/named_resources_aws_test.py b/torchx/specs/test/named_resources_aws_test.py index cdc99573a..9b589e188 100644 --- a/torchx/specs/test/named_resources_aws_test.py +++ b/torchx/specs/test/named_resources_aws_test.py @@ -249,7 +249,7 @@ def test_aws_c5_18xlarge(self) -> None: resource = aws_c5_18xlarge() self.assertEqual(72, resource.cpu) self.assertEqual(0, resource.gpu) - self.assertEqual(144 * GiB, resource.memMB) + self.assertEqual(142 * GiB, resource.memMB) def test_aws_t3_medium(self) -> None: resource = aws_t3_medium()