From 74e1e860026cbaf6c09642ba3dbac7438a3a6ecf Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Mon, 4 Aug 2025 14:13:26 -0400 Subject: [PATCH 01/13] chore: patch libsqlite3-0 huggingface-pytorch-inference-neuronx --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 1 + 1 file changed, 1 insertion(+) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index 55ce3ca3587d..cc2f8239e214 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -62,6 +62,7 @@ RUN apt-get update \ libxml2 \ libgstreamer1.0-0 \ libsoup2.4-1 \ + libsqlite3-0 \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/tmp* \ && apt-get clean From f015f201ce94f6655cf9ed4d3bddb17b0f8ef179 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Mon, 4 Aug 2025 14:13:32 -0400 Subject: [PATCH 02/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': True, 'ecs_tests': True, 'eks_tests': True, 'sagemaker_local_tests': True, 'sagemaker_remote_tests': True, 'sanity_tests': True, 'security_tests': True}} --- dlc_developer_config.toml | 122 ++++---------------------------------- 1 file changed, 10 insertions(+), 112 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 1962bfd69e21..e710e4cfd578 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -1,179 +1,77 @@ [dev] -# Set to "huggingface", for example, if you are a huggingface developer. Default is "" partner_developer = "" -# Please only set it to true if you are preparing an EI related PR -# Do remember to revert it back to false before merging any PR (including EI dedicated PR) ei_mode = false -# Please only set it to true if you are preparing a NEURON related PR -# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR) neuron_mode = false -# Please only set it to true if you are preparing a NEURONX related PR -# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR) -neuronx_mode = false -# Please only set it to true if you are preparing a GRAVITON related PR -# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR) +neuronx_mode = true graviton_mode = false -# Please only set it to true if you are preparing a ARM64 related PR -# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) arm64_mode = false -# Please only set it to True if you are preparing a HABANA related PR -# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false -# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR) -# This mode is used to build TF 2.6 and PT1.11 DLC huggingface_trcomp_mode = false -# Please only set it to True if you are preparing a TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR) -# This mode is used to build PT1.12 and above DLC trcomp_mode = false -# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the -# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped, -# regardless of whether they are enabled or disabled below. -# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images. -# Do remember to revert it back to false before merging any PR. deep_canary_mode = false [build] -# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] - - -# By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_frameworks = [ "huggingface_pytorch",] +build_training = false build_inference = true - -# Set do_build to "false" to skip builds and test the latest image built by this PR -# Note: at least one build is required to set do_build to "false" do_build = true [notify] -### Notify on test failures -### Off by default notify_test_failures = false - # Valid values: medium or high - notification_severity = "medium" +notification_severity = "medium" [test] -### On by default sanity_tests = true security_tests = true - safety_check_test = false - ecr_scan_allowlist_feature = false +safety_check_test = false +ecr_scan_allowlist_feature = false ecs_tests = true eks_tests = true ec2_tests = true -# Set it to true if you are preparing a Benchmark related PR ec2_benchmark_tests = false - -### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by -### default. If false, these types of tests will be skipped while other tests will run as usual. -### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true. -### Off by default (set to false) ec2_tests_on_heavy_instances = false -### SM specific tests -### On by default sagemaker_local_tests = true -### Set enable_ipv6 = true to run tests with IPv6-enabled resources -### Off by default (set to false) enable_ipv6 = false -### Set the VPC name to be used for IPv6 testing, this variable is empty by default -### To create an IPv6-enabled VPC and its related resources: -### 1. Follow this AWS doc: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html#create-vpc-and-other-resources -### 2. After creating the VPC and related resources: -### a. Set 'Auto-assign IPv6 address' option to 'No' in all public subnets within the VPC -### b. Configure the default security group to allow SSH traffic using IPv4 -### -### 3. Create an EFA-enabled security group: -### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: -### https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-security -### b. Configure this security group to also allow SSH traffic via IPv4 ipv6_vpc_name = "" - -# run standard sagemaker remote tests from test/sagemaker_tests sagemaker_remote_tests = true -# run efa sagemaker tests sagemaker_efa_tests = false -# run release_candidate_integration tests sagemaker_rc_tests = false -# run sagemaker benchmark tests sagemaker_benchmark_tests = false - -# SM remote EFA test instance type sagemaker_remote_efa_instance_type = "" - -# Run CI tests for nightly images -# false by default nightly_pr_test_mode = false - use_scheduler = false [buildspec_override] -# Assign the path to the required buildspec file from the deep-learning-containers folder -# For example: -# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml" -# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml" -# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file. - -### TRAINING PR JOBS ### - -# Standard Framework Training dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" - -# ARM64 Training dlc-pr-pytorch-arm64-training = "" - -# HuggingFace Training dlc-pr-huggingface-tensorflow-training = "" dlc-pr-huggingface-pytorch-training = "" - -# Training Compiler dlc-pr-huggingface-pytorch-trcomp-training = "" dlc-pr-huggingface-tensorflow-2-trcomp-training = "" dlc-pr-pytorch-trcomp-training = "" - -# Neuron Training dlc-pr-pytorch-neuron-training = "" dlc-pr-tensorflow-2-neuron-training = "" - -# Stability AI Training dlc-pr-stabilityai-pytorch-training = "" - -# Habana Training dlc-pr-pytorch-habana-training = "" dlc-pr-tensorflow-2-habana-training = "" - -### INFERENCE PR JOBS ### - -# Standard Framework Inference dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" - -# Graviton Inference dlc-pr-pytorch-graviton-inference = "" dlc-pr-tensorflow-2-graviton-inference = "" - -# ARM64 Inference dlc-pr-pytorch-arm64-inference = "" dlc-pr-tensorflow-2-arm64-inference = "" - -# Neuron Inference dlc-pr-pytorch-neuron-inference = "" dlc-pr-tensorflow-1-neuron-inference = "" dlc-pr-tensorflow-2-neuron-inference = "" - -# HuggingFace Inference dlc-pr-huggingface-tensorflow-inference = "" dlc-pr-huggingface-pytorch-inference = "" dlc-pr-huggingface-pytorch-neuron-inference = "" - -# Stability AI Inference dlc-pr-stabilityai-pytorch-inference = "" - -# EIA Inference dlc-pr-pytorch-eia-inference = "" -dlc-pr-tensorflow-2-eia-inference = "" \ No newline at end of file +dlc-pr-tensorflow-2-eia-inference = "" +# WARNING: Unrecognized key generated below +dlc-pr-huggingface-pytorch-neuronx-inference = "huggingface/pytorch/inference/buildspec-neuronx.yml" + From 11b8b79c5911031c54ac4d946ab5fb89c2e226bd Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Mon, 4 Aug 2025 14:42:53 -0400 Subject: [PATCH 03/13] Revert "Update ['dlc_developer_config.toml']" This reverts commit f015f201ce94f6655cf9ed4d3bddb17b0f8ef179. --- dlc_developer_config.toml | 122 ++++++++++++++++++++++++++++++++++---- 1 file changed, 112 insertions(+), 10 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index e710e4cfd578..1962bfd69e21 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -1,77 +1,179 @@ [dev] +# Set to "huggingface", for example, if you are a huggingface developer. Default is "" partner_developer = "" +# Please only set it to true if you are preparing an EI related PR +# Do remember to revert it back to false before merging any PR (including EI dedicated PR) ei_mode = false +# Please only set it to true if you are preparing a NEURON related PR +# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR) neuron_mode = false -neuronx_mode = true +# Please only set it to true if you are preparing a NEURONX related PR +# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR) +neuronx_mode = false +# Please only set it to true if you are preparing a GRAVITON related PR +# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR) graviton_mode = false +# Please only set it to true if you are preparing a ARM64 related PR +# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) arm64_mode = false +# Please only set it to True if you are preparing a HABANA related PR +# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false +# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR +# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR) +# This mode is used to build TF 2.6 and PT1.11 DLC huggingface_trcomp_mode = false +# Please only set it to True if you are preparing a TRCOMP related PR +# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR) +# This mode is used to build PT1.12 and above DLC trcomp_mode = false +# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the +# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped, +# regardless of whether they are enabled or disabled below. +# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images. +# Do remember to revert it back to false before merging any PR. deep_canary_mode = false [build] -build_frameworks = [ "huggingface_pytorch",] -build_training = false +# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. +# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] +build_frameworks = [] + + +# By default we build both training and inference containers. Set true/false values to determine which to build. +build_training = true build_inference = true + +# Set do_build to "false" to skip builds and test the latest image built by this PR +# Note: at least one build is required to set do_build to "false" do_build = true [notify] +### Notify on test failures +### Off by default notify_test_failures = false -notification_severity = "medium" + # Valid values: medium or high + notification_severity = "medium" [test] +### On by default sanity_tests = true security_tests = true -safety_check_test = false -ecr_scan_allowlist_feature = false + safety_check_test = false + ecr_scan_allowlist_feature = false ecs_tests = true eks_tests = true ec2_tests = true +# Set it to true if you are preparing a Benchmark related PR ec2_benchmark_tests = false + +### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by +### default. If false, these types of tests will be skipped while other tests will run as usual. +### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true. +### Off by default (set to false) ec2_tests_on_heavy_instances = false +### SM specific tests +### On by default sagemaker_local_tests = true +### Set enable_ipv6 = true to run tests with IPv6-enabled resources +### Off by default (set to false) enable_ipv6 = false +### Set the VPC name to be used for IPv6 testing, this variable is empty by default +### To create an IPv6-enabled VPC and its related resources: +### 1. Follow this AWS doc: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html#create-vpc-and-other-resources +### 2. After creating the VPC and related resources: +### a. Set 'Auto-assign IPv6 address' option to 'No' in all public subnets within the VPC +### b. Configure the default security group to allow SSH traffic using IPv4 +### +### 3. Create an EFA-enabled security group: +### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: +### https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-security +### b. Configure this security group to also allow SSH traffic via IPv4 ipv6_vpc_name = "" + +# run standard sagemaker remote tests from test/sagemaker_tests sagemaker_remote_tests = true +# run efa sagemaker tests sagemaker_efa_tests = false +# run release_candidate_integration tests sagemaker_rc_tests = false +# run sagemaker benchmark tests sagemaker_benchmark_tests = false + +# SM remote EFA test instance type sagemaker_remote_efa_instance_type = "" + +# Run CI tests for nightly images +# false by default nightly_pr_test_mode = false + use_scheduler = false [buildspec_override] +# Assign the path to the required buildspec file from the deep-learning-containers folder +# For example: +# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml" +# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml" +# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file. + +### TRAINING PR JOBS ### + +# Standard Framework Training dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" + +# ARM64 Training dlc-pr-pytorch-arm64-training = "" + +# HuggingFace Training dlc-pr-huggingface-tensorflow-training = "" dlc-pr-huggingface-pytorch-training = "" + +# Training Compiler dlc-pr-huggingface-pytorch-trcomp-training = "" dlc-pr-huggingface-tensorflow-2-trcomp-training = "" dlc-pr-pytorch-trcomp-training = "" + +# Neuron Training dlc-pr-pytorch-neuron-training = "" dlc-pr-tensorflow-2-neuron-training = "" + +# Stability AI Training dlc-pr-stabilityai-pytorch-training = "" + +# Habana Training dlc-pr-pytorch-habana-training = "" dlc-pr-tensorflow-2-habana-training = "" + +### INFERENCE PR JOBS ### + +# Standard Framework Inference dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" + +# Graviton Inference dlc-pr-pytorch-graviton-inference = "" dlc-pr-tensorflow-2-graviton-inference = "" + +# ARM64 Inference dlc-pr-pytorch-arm64-inference = "" dlc-pr-tensorflow-2-arm64-inference = "" + +# Neuron Inference dlc-pr-pytorch-neuron-inference = "" dlc-pr-tensorflow-1-neuron-inference = "" dlc-pr-tensorflow-2-neuron-inference = "" + +# HuggingFace Inference dlc-pr-huggingface-tensorflow-inference = "" dlc-pr-huggingface-pytorch-inference = "" dlc-pr-huggingface-pytorch-neuron-inference = "" + +# Stability AI Inference dlc-pr-stabilityai-pytorch-inference = "" -dlc-pr-pytorch-eia-inference = "" -dlc-pr-tensorflow-2-eia-inference = "" -# WARNING: Unrecognized key generated below -dlc-pr-huggingface-pytorch-neuronx-inference = "huggingface/pytorch/inference/buildspec-neuronx.yml" +# EIA Inference +dlc-pr-pytorch-eia-inference = "" +dlc-pr-tensorflow-2-eia-inference = "" \ No newline at end of file From ae973f6b508a23cd6e99411508c41c9bc62f21f3 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Mon, 4 Aug 2025 14:43:27 -0400 Subject: [PATCH 04/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} --- dlc_developer_config.toml | 134 +++++--------------------------------- 1 file changed, 16 insertions(+), 118 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 1962bfd69e21..9f2b27ad34b0 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -1,179 +1,77 @@ [dev] -# Set to "huggingface", for example, if you are a huggingface developer. Default is "" partner_developer = "" -# Please only set it to true if you are preparing an EI related PR -# Do remember to revert it back to false before merging any PR (including EI dedicated PR) ei_mode = false -# Please only set it to true if you are preparing a NEURON related PR -# Do remember to revert it back to false before merging any PR (including NEURON dedicated PR) neuron_mode = false -# Please only set it to true if you are preparing a NEURONX related PR -# Do remember to revert it back to false before merging any PR (including NEURONX dedicated PR) -neuronx_mode = false -# Please only set it to true if you are preparing a GRAVITON related PR -# Do remember to revert it back to false before merging any PR (including GRAVITON dedicated PR) +neuronx_mode = true graviton_mode = false -# Please only set it to true if you are preparing a ARM64 related PR -# Do remember to revert it back to false before merging any PR (including ARM64 dedicated PR) arm64_mode = false -# Please only set it to True if you are preparing a HABANA related PR -# Do remember to revert it back to False before merging any PR (including HABANA dedicated PR) habana_mode = false -# Please only set it to True if you are preparing a HUGGINGFACE TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including HUGGINGFACE TRCOMP dedicated PR) -# This mode is used to build TF 2.6 and PT1.11 DLC huggingface_trcomp_mode = false -# Please only set it to True if you are preparing a TRCOMP related PR -# Do remember to revert it back to False before merging any PR (including TRCOMP dedicated PR) -# This mode is used to build PT1.12 and above DLC trcomp_mode = false -# Set deep_canary_mode to true to simulate Deep Canary Test conditions on PR for all frameworks in the -# build_frameworks list below. This will cause all image builds and non-deep-canary tests on the PR to be skipped, -# regardless of whether they are enabled or disabled below. -# Set graviton_mode/arm64_mode to true to run Deep Canaries on Graviton/ARM64 images. -# Do remember to revert it back to false before merging any PR. deep_canary_mode = false [build] -# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image. -# available frameworks - ["base", "vllm", "autogluon", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"] -build_frameworks = [] - - -# By default we build both training and inference containers. Set true/false values to determine which to build. -build_training = true +build_frameworks = [ "huggingface_pytorch",] +build_training = false build_inference = true - -# Set do_build to "false" to skip builds and test the latest image built by this PR -# Note: at least one build is required to set do_build to "false" do_build = true [notify] -### Notify on test failures -### Off by default notify_test_failures = false - # Valid values: medium or high - notification_severity = "medium" +notification_severity = "medium" [test] -### On by default sanity_tests = true -security_tests = true - safety_check_test = false - ecr_scan_allowlist_feature = false -ecs_tests = true -eks_tests = true -ec2_tests = true -# Set it to true if you are preparing a Benchmark related PR +security_tests = false +safety_check_test = false +ecr_scan_allowlist_feature = false +ecs_tests = false +eks_tests = false +ec2_tests = false ec2_benchmark_tests = false - -### Set ec2_tests_on_heavy_instances = true to be able to run any EC2 tests that use large/expensive instance types by -### default. If false, these types of tests will be skipped while other tests will run as usual. -### These tests are run in EC2 test jobs, so ec2_tests must be true if ec2_tests_on_heavy_instances is true. -### Off by default (set to false) ec2_tests_on_heavy_instances = false -### SM specific tests -### On by default -sagemaker_local_tests = true -### Set enable_ipv6 = true to run tests with IPv6-enabled resources -### Off by default (set to false) +sagemaker_local_tests = false enable_ipv6 = false -### Set the VPC name to be used for IPv6 testing, this variable is empty by default -### To create an IPv6-enabled VPC and its related resources: -### 1. Follow this AWS doc: https://docs.aws.amazon.com/vpc/latest/userguide/create-vpc.html#create-vpc-and-other-resources -### 2. After creating the VPC and related resources: -### a. Set 'Auto-assign IPv6 address' option to 'No' in all public subnets within the VPC -### b. Configure the default security group to allow SSH traffic using IPv4 -### -### 3. Create an EFA-enabled security group: -### a. Follow 'Step 1: Prepare an EFA-enabled security group' in: -### https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/efa-start.html#efa-start-security -### b. Configure this security group to also allow SSH traffic via IPv4 ipv6_vpc_name = "" - -# run standard sagemaker remote tests from test/sagemaker_tests -sagemaker_remote_tests = true -# run efa sagemaker tests +sagemaker_remote_tests = false sagemaker_efa_tests = false -# run release_candidate_integration tests sagemaker_rc_tests = false -# run sagemaker benchmark tests sagemaker_benchmark_tests = false - -# SM remote EFA test instance type sagemaker_remote_efa_instance_type = "" - -# Run CI tests for nightly images -# false by default nightly_pr_test_mode = false - use_scheduler = false [buildspec_override] -# Assign the path to the required buildspec file from the deep-learning-containers folder -# For example: -# dlc-pr-tensorflow-2-habana-training = "habana/tensorflow/training/buildspec-2-10.yml" -# dlc-pr-pytorch-inference = "pytorch/inference/buildspec-1-12.yml" -# Setting the buildspec file path to "" allows the image builder to choose the default buildspec file. - -### TRAINING PR JOBS ### - -# Standard Framework Training dlc-pr-pytorch-training = "" dlc-pr-tensorflow-2-training = "" dlc-pr-autogluon-training = "" - -# ARM64 Training dlc-pr-pytorch-arm64-training = "" - -# HuggingFace Training dlc-pr-huggingface-tensorflow-training = "" dlc-pr-huggingface-pytorch-training = "" - -# Training Compiler dlc-pr-huggingface-pytorch-trcomp-training = "" dlc-pr-huggingface-tensorflow-2-trcomp-training = "" dlc-pr-pytorch-trcomp-training = "" - -# Neuron Training dlc-pr-pytorch-neuron-training = "" dlc-pr-tensorflow-2-neuron-training = "" - -# Stability AI Training dlc-pr-stabilityai-pytorch-training = "" - -# Habana Training dlc-pr-pytorch-habana-training = "" dlc-pr-tensorflow-2-habana-training = "" - -### INFERENCE PR JOBS ### - -# Standard Framework Inference dlc-pr-pytorch-inference = "" dlc-pr-tensorflow-2-inference = "" dlc-pr-autogluon-inference = "" - -# Graviton Inference dlc-pr-pytorch-graviton-inference = "" dlc-pr-tensorflow-2-graviton-inference = "" - -# ARM64 Inference dlc-pr-pytorch-arm64-inference = "" dlc-pr-tensorflow-2-arm64-inference = "" - -# Neuron Inference dlc-pr-pytorch-neuron-inference = "" dlc-pr-tensorflow-1-neuron-inference = "" dlc-pr-tensorflow-2-neuron-inference = "" - -# HuggingFace Inference dlc-pr-huggingface-tensorflow-inference = "" dlc-pr-huggingface-pytorch-inference = "" dlc-pr-huggingface-pytorch-neuron-inference = "" - -# Stability AI Inference dlc-pr-stabilityai-pytorch-inference = "" - -# EIA Inference dlc-pr-pytorch-eia-inference = "" -dlc-pr-tensorflow-2-eia-inference = "" \ No newline at end of file +dlc-pr-tensorflow-2-eia-inference = "" +# WARNING: Unrecognized key generated below +dlc-pr-huggingface-pytorch-neuronx-inference = "huggingface/pytorch/inference/buildspec-neuronx.yml" + From 766e942f2f7fdb340dabf13ac7200c1715faa817 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Mon, 4 Aug 2025 16:14:26 -0400 Subject: [PATCH 05/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': True, 'ecs_tests': True, 'eks_tests': True, 'sagemaker_local_tests': True, 'sagemaker_remote_tests': True, 'sanity_tests': True, 'security_tests': True}} --- dlc_developer_config.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index 9f2b27ad34b0..e710e4cfd578 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -22,18 +22,18 @@ notification_severity = "medium" [test] sanity_tests = true -security_tests = false +security_tests = true safety_check_test = false ecr_scan_allowlist_feature = false -ecs_tests = false -eks_tests = false -ec2_tests = false +ecs_tests = true +eks_tests = true +ec2_tests = true ec2_benchmark_tests = false ec2_tests_on_heavy_instances = false -sagemaker_local_tests = false +sagemaker_local_tests = true enable_ipv6 = false ipv6_vpc_name = "" -sagemaker_remote_tests = false +sagemaker_remote_tests = true sagemaker_efa_tests = false sagemaker_rc_tests = false sagemaker_benchmark_tests = false From 6a66584b3c988de5744d8c880ce77ec8616c1f6c Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 16:55:54 -0400 Subject: [PATCH 06/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} --- dlc_developer_config.toml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/dlc_developer_config.toml b/dlc_developer_config.toml index e710e4cfd578..9f2b27ad34b0 100644 --- a/dlc_developer_config.toml +++ b/dlc_developer_config.toml @@ -22,18 +22,18 @@ notification_severity = "medium" [test] sanity_tests = true -security_tests = true +security_tests = false safety_check_test = false ecr_scan_allowlist_feature = false -ecs_tests = true -eks_tests = true -ec2_tests = true +ecs_tests = false +eks_tests = false +ec2_tests = false ec2_benchmark_tests = false ec2_tests_on_heavy_instances = false -sagemaker_local_tests = true +sagemaker_local_tests = false enable_ipv6 = false ipv6_vpc_name = "" -sagemaker_remote_tests = true +sagemaker_remote_tests = false sagemaker_efa_tests = false sagemaker_rc_tests = false sagemaker_benchmark_tests = false From 43f88c6ae88e89627395261004220846eef65cc6 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 17:16:25 -0400 Subject: [PATCH 07/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} From 3117cf0a199e25acade710ecb1b73dbd03035e71 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 19:36:14 -0400 Subject: [PATCH 08/13] fixing glibc --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index cc2f8239e214..ee2bb4fdf078 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -67,6 +67,10 @@ RUN apt-get update \ && rm -rf /tmp/tmp* \ && apt-get clean +# Solve `GLIBCXX' not found +RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y \ + && apt-get upgrade -y libstdc++6 + RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - From 0cc93ec4b11d3f0273064abc88fe215e5cb1a42f Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 19:37:25 -0400 Subject: [PATCH 09/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} From da61844289d288f93612b698255029da8a369746 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 21:09:12 -0400 Subject: [PATCH 10/13] adding libc6-dev --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 1 + 1 file changed, 1 insertion(+) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index ee2bb4fdf078..efe4b877764d 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -63,6 +63,7 @@ RUN apt-get update \ libgstreamer1.0-0 \ libsoup2.4-1 \ libsqlite3-0 \ + libc6-dev \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/tmp* \ && apt-get clean From 2f2b43ff0d902a80f5cb8983542b25e75f986ba1 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 21:09:27 -0400 Subject: [PATCH 11/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}} From be4ef5411cd71068434adb77a76e86cc733d2c59 Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 21:42:36 -0400 Subject: [PATCH 12/13] putting original dockerfile --- .../inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx index efe4b877764d..a6c89dbbd624 100644 --- a/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx +++ b/huggingface/pytorch/inference/docker/2.1/py3/sdk2.20.0/Dockerfile.neuronx @@ -62,15 +62,15 @@ RUN apt-get update \ libxml2 \ libgstreamer1.0-0 \ libsoup2.4-1 \ - libsqlite3-0 \ - libc6-dev \ + # libsqlite3-0 \ + # libc6-dev \ && rm -rf /var/lib/apt/lists/* \ && rm -rf /tmp/tmp* \ && apt-get clean # Solve `GLIBCXX' not found -RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y \ - && apt-get upgrade -y libstdc++6 +# RUN add-apt-repository ppa:ubuntu-toolchain-r/test -y \ +# && apt-get upgrade -y libstdc++6 RUN echo "deb https://apt.repos.neuron.amazonaws.com focal main" > /etc/apt/sources.list.d/neuron.list RUN wget -qO - https://apt.repos.neuron.amazonaws.com/GPG-PUB-KEY-AMAZON-AWS-NEURON.PUB | apt-key add - From bc73fc07fa4b7ee4d7049c1a4a6ba37b3db2ad5c Mon Sep 17 00:00:00 2001 From: Bin Navin Patel Date: Tue, 5 Aug 2025 21:42:46 -0400 Subject: [PATCH 13/13] Update ['dlc_developer_config.toml'] dlc_developer_config.toml: { 'build': { 'build_frameworks': ['huggingface_pytorch'], 'build_inference': True, 'build_training': False}, 'buildspec_override': { 'dlc-pr-huggingface-pytorch-neuronx-inference': 'huggingface/pytorch/inference/buildspec-neuronx.yml'}, 'dev': { 'arm64_mode': False, 'deep_canary_mode': False, 'graviton_mode': False, 'neuronx_mode': True}, 'test': { 'ec2_tests': False, 'ecs_tests': False, 'eks_tests': False, 'sagemaker_local_tests': False, 'sagemaker_remote_tests': False, 'sanity_tests': True, 'security_tests': False}}