Skip to content

Commit a90627a

Browse files
authored
feat: add aws inf2 instance type as named resources (#1002)
1 parent 357b9d9 commit a90627a

File tree

2 files changed

+73
-0
lines changed

2 files changed

+73
-0
lines changed

torchx/specs/named_resources_aws.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,46 @@ def aws_trn1_32xlarge() -> Resource:
368368
)
369369

370370

371+
def aws_inf2_xlarge() -> Resource:
372+
return Resource(
373+
cpu=4,
374+
gpu=0,
375+
memMB=16 * GiB,
376+
capabilities={K8S_ITYPE: "inf2.xlarge"},
377+
devices={NEURON_DEVICE: 1},
378+
)
379+
380+
381+
def aws_inf2_8xlarge() -> Resource:
382+
return Resource(
383+
cpu=32,
384+
gpu=0,
385+
memMB=128 * GiB,
386+
capabilities={K8S_ITYPE: "inf2.8xlarge"},
387+
devices={NEURON_DEVICE: 1},
388+
)
389+
390+
391+
def aws_inf2_24xlarge() -> Resource:
392+
return Resource(
393+
cpu=96,
394+
gpu=0,
395+
memMB=384 * GiB,
396+
capabilities={K8S_ITYPE: "inf2.24xlarge"},
397+
devices={NEURON_DEVICE: 6},
398+
)
399+
400+
401+
def aws_inf2_48xlarge() -> Resource:
402+
return Resource(
403+
cpu=192,
404+
gpu=0,
405+
memMB=768 * GiB,
406+
capabilities={K8S_ITYPE: "inf2.48xlarge"},
407+
devices={NEURON_DEVICE: 12},
408+
)
409+
410+
371411
NAMED_RESOURCES: Mapping[str, Callable[[], Resource]] = {
372412
"aws_t3.medium": aws_t3_medium,
373413
"aws_m5.2xlarge": aws_m5_2xlarge,
@@ -405,4 +445,8 @@ def aws_trn1_32xlarge() -> Resource:
405445
"aws_g6e.48xlarge": aws_g6e_48xlarge,
406446
"aws_trn1.2xlarge": aws_trn1_2xlarge,
407447
"aws_trn1.32xlarge": aws_trn1_32xlarge,
448+
"aws_inf2.xlarge": aws_inf2_xlarge,
449+
"aws_inf2.8xlarge": aws_inf2_8xlarge,
450+
"aws_inf2.24xlarge": aws_inf2_24xlarge,
451+
"aws_inf2.48xlarge": aws_inf2_48xlarge,
408452
}

torchx/specs/test/named_resources_aws_test.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@
3232
aws_g6e_4xlarge,
3333
aws_g6e_8xlarge,
3434
aws_g6e_xlarge,
35+
aws_inf2_24xlarge,
36+
aws_inf2_48xlarge,
37+
aws_inf2_8xlarge,
38+
aws_inf2_xlarge,
3539
aws_m5_2xlarge,
3640
aws_p3_16xlarge,
3741
aws_p3_2xlarge,
@@ -239,6 +243,31 @@ def test_aws_trn1(self) -> None:
239243
self.assertEqual(trn1_32.memMB, trn1_2.memMB * 16)
240244
self.assertEqual({EFA_DEVICE: 8, NEURON_DEVICE: 16}, trn1_32.devices)
241245

246+
def test_aws_inf2(self) -> None:
247+
inf2_xlarge = aws_inf2_xlarge()
248+
self.assertEqual(4, inf2_xlarge.cpu)
249+
self.assertEqual(0, inf2_xlarge.gpu)
250+
self.assertEqual(16 * GiB, inf2_xlarge.memMB)
251+
self.assertEqual({NEURON_DEVICE: 1}, inf2_xlarge.devices)
252+
253+
inf2_8xlarge = aws_inf2_8xlarge()
254+
self.assertEqual(32, inf2_8xlarge.cpu)
255+
self.assertEqual(0, inf2_8xlarge.gpu)
256+
self.assertEqual(128 * GiB, inf2_8xlarge.memMB)
257+
self.assertEqual({NEURON_DEVICE: 1}, inf2_8xlarge.devices)
258+
259+
inf2_24xlarge = aws_inf2_24xlarge()
260+
self.assertEqual(96, inf2_24xlarge.cpu)
261+
self.assertEqual(0, inf2_24xlarge.gpu)
262+
self.assertEqual(384 * GiB, inf2_24xlarge.memMB)
263+
self.assertEqual({NEURON_DEVICE: 6}, inf2_24xlarge.devices)
264+
265+
inf2_48xlarge = aws_inf2_48xlarge()
266+
self.assertEqual(192, inf2_48xlarge.cpu)
267+
self.assertEqual(0, inf2_48xlarge.gpu)
268+
self.assertEqual(768 * GiB, inf2_48xlarge.memMB)
269+
self.assertEqual({NEURON_DEVICE: 12}, inf2_48xlarge.devices)
270+
242271
def test_aws_m5_2xlarge(self) -> None:
243272
resource = aws_m5_2xlarge()
244273
self.assertEqual(8, resource.cpu)

0 commit comments

Comments
 (0)