diff --git a/cookbooks/aws-parallelcluster-entrypoints/recipes/install.rb b/cookbooks/aws-parallelcluster-entrypoints/recipes/install.rb index 4555477c4e..308f0ec2aa 100644 --- a/cookbooks/aws-parallelcluster-entrypoints/recipes/install.rb +++ b/cookbooks/aws-parallelcluster-entrypoints/recipes/install.rb @@ -17,6 +17,7 @@ return if node['conditions']['ami_bootstrapped'] include_recipe "aws-parallelcluster-shared::setup_envars" +include_recipe "aws-parallelcluster-shared::setup_proxy" if node['cluster']['install_http_proxy_address'] include_recipe 'aws-parallelcluster-platform::install' include_recipe 'aws-parallelcluster-environment::install' diff --git a/cookbooks/aws-parallelcluster-entrypoints/spec/unit/recipes/install_spec.rb b/cookbooks/aws-parallelcluster-entrypoints/spec/unit/recipes/install_spec.rb index 684ac4ec53..12a4976a37 100644 --- a/cookbooks/aws-parallelcluster-entrypoints/spec/unit/recipes/install_spec.rb +++ b/cookbooks/aws-parallelcluster-entrypoints/spec/unit/recipes/install_spec.rb @@ -23,9 +23,11 @@ aws-parallelcluster-awsbatch::install ) + setup_proxy_recipe = 'aws-parallelcluster-shared::setup_proxy' + before do @included_recipes = [] - all_recipes.each do |recipe_name| + (all_recipes + [setup_proxy_recipe]).each do |recipe_name| allow_any_instance_of(Chef::Recipe).to receive(:include_recipe).with(recipe_name) do @included_recipes << recipe_name end @@ -61,6 +63,7 @@ it "includes all recipes in the right order" do chef_run expect(@included_recipes).to eq(all_recipes) + expect(@included_recipes).not_to include(setup_proxy_recipe) end end @@ -78,6 +81,22 @@ expect(@included_recipes).to eq(all_recipes - %w(aws-parallelcluster-awsbatch::install)) end end + + context "when install_http_proxy_address is set" do + cached(:chef_run) do + runner = runner(platform: platform, version: version) do |node| + node.override['conditions']['ami_bootstrapped'] = false + node.override['cluster']['skip_awsbatch_cli_install'] = false + node.override['cluster']['install_http_proxy_address'] = 'http://10.0.0.109:8888' + end + runner.converge(described_recipe) + end + + it "includes setup_proxy recipe" do + chef_run + expect(@included_recipes).to include(setup_proxy_recipe) + end + end end end end diff --git a/cookbooks/aws-parallelcluster-shared/recipes/setup_proxy.rb b/cookbooks/aws-parallelcluster-shared/recipes/setup_proxy.rb new file mode 100644 index 0000000000..dddcab55eb --- /dev/null +++ b/cookbooks/aws-parallelcluster-shared/recipes/setup_proxy.rb @@ -0,0 +1,95 @@ +# frozen_string_literal: true + +# +# Cookbook:: aws-parallelcluster +# Recipe:: setup_proxy +# +# Copyright:: 2026 Amazon.com, Inc. or its affiliates. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance with the +# License. A copy of the License is located at +# +# http://aws.amazon.com/apache2.0/ +# +# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and +# limitations under the License. + +# This recipe configures proxy environment variables for build-image in isolated networks. +# +# It reads the proxy URL from node['cluster']['install_http_proxy_address'] (set via ExtraChefAttributes) +# and configures http_proxy/https_proxy ENV vars for the Chef run. This makes all subsequent +# Chef resources (remote_file, bash, execute, etc.) use the explicit proxy for HTTPS traffic +# instead of trying direct connections that would fail in an isolated network. +# +# The no_proxy list excludes S3 endpoints so downloads from S3 go through the VPC Gateway +# Endpoint directly, not through the proxy. +# +# Both leading-dot and bare-host entries are needed for each S3 endpoint: +# ".s3.{region}.amazonaws.com" — matches subdomains (virtual-hosted bucket URLs) +# e.g., mybucket.s3.us-east-1.amazonaws.com used by remote_file downloads +# "s3.{region}.amazonaws.com" — matches the exact host (path-style URLs) +# e.g., s3.us-east-1.amazonaws.com/mybucket/key used by aws s3 presign URLs; cfn-bootstrap +# # bucket uses https://s3.amazonaws.com/cloudformation-examples/... +# +# The global .s3.amazonaws.com endpoint does NOT work through the VPC Gateway Endpoint +# (SSL errors with FSx repos), so it is intentionally left out of no_proxy without the +# bare-host form and instead goes through the proxy. The proxy allowlist must include +# s3.amazonaws.com for this to work. +# +# IMDS (169.254.169.254) is excluded so instance metadata queries bypass the proxy. +# +# This recipe only runs when install_http_proxy_address is set — normal builds are unaffected. + +ruby_block 'configure proxy from install_http_proxy_address' do + block do + proxy_url = node['cluster']['install_http_proxy_address'] + + if proxy_url && !proxy_url.empty? + # Validate proxy URL format: must be http://host:port + unless proxy_url.match?(%r{^https?://[^/:]+:\d+/?$}) + raise "Invalid install_http_proxy_address '#{proxy_url}'. Expected format: http://host:port" + end + + region = node['cluster']['region'] + + # S3 endpoints bypass the proxy and use the VPC Gateway Endpoint. + # Includes regional (s3.{region}), dash-style (s3-{region}), global (s3.amazonaws.com), + # and dualstack (s3.dualstack.{region}) variants used by different AWS services and repos. + no_proxy = [ + "localhost", + "127.0.0.1", + "169.254.169.254", + ".s3.#{region}.amazonaws.com", + "s3.#{region}.amazonaws.com", + ".s3-#{region}.amazonaws.com", + "s3-#{region}.amazonaws.com", + ".s3.amazonaws.com", + ".s3.dualstack.#{region}.amazonaws.com", + "s3.dualstack.#{region}.amazonaws.com", + ].join(",") + + Chef::Log.info("Configuring proxy: #{proxy_url}") + + ENV['http_proxy'] = proxy_url + ENV['https_proxy'] = proxy_url + ENV['HTTP_PROXY'] = proxy_url + ENV['HTTPS_PROXY'] = proxy_url + ENV['no_proxy'] = no_proxy + ENV['NO_PROXY'] = no_proxy + + # On Ubuntu, configure snapd to use the explicit proxy. snapd uses its own HTTP + # client and doesn't go through the transparent proxy (iptables REDIRECT). Without + # this, the Firefox transitional package's preinst runs `snap info firefox` via snapd, + # which times out, retries for 30 minutes holding the dpkg lock, and blocks all + # subsequent apt-get installs (e.g., DCV prerequisites). + if node['platform'] == 'ubuntu' && ::File.exist?('/run/snapd.socket') + Chef::Log.info("Configuring snapd proxy: #{proxy_url}") + shell_out!("snap", "set", "system", "proxy.http=#{proxy_url}") + shell_out!("snap", "set", "system", "proxy.https=#{proxy_url}") + end + else + Chef::Log.info("No install_http_proxy_address set, skipping proxy configuration") + end + end +end diff --git a/cookbooks/aws-parallelcluster-shared/spec/unit/recipes/setup_proxy_spec.rb b/cookbooks/aws-parallelcluster-shared/spec/unit/recipes/setup_proxy_spec.rb new file mode 100644 index 0000000000..6cce29d5b8 --- /dev/null +++ b/cookbooks/aws-parallelcluster-shared/spec/unit/recipes/setup_proxy_spec.rb @@ -0,0 +1,108 @@ +# frozen_string_literal: true + +require 'spec_helper' + +describe 'aws-parallelcluster-shared::setup_proxy' do + PROXY_URL = 'http://10.0.0.109:8888' + TEST_REGION = 'test-region-1' + RUBY_BLOCK_NAME = 'configure proxy from install_http_proxy_address' + + for_all_oses do |platform, version| + context "on #{platform}#{version}" do + before(:each) do + # Clean proxy ENV vars between tests to prevent leakage + %w(http_proxy https_proxy HTTP_PROXY HTTPS_PROXY no_proxy NO_PROXY).each { |var| ENV.delete(var) } + allow(::File).to receive(:exist?).and_call_original + end + + context 'when install_http_proxy_address is set with valid URL' do + cached(:chef_run) do + runner(platform: platform, version: version) do |node| + node.override['cluster'] = { 'install_http_proxy_address' => PROXY_URL, 'region' => TEST_REGION } + end.converge(described_recipe) + end + + before(:each) do + allow_any_instance_of(Chef::Resource::RubyBlock).to receive(:shell_out!).and_return(true) + end + + it 'configures proxy environment variables' do + expect(chef_run).to run_ruby_block(RUBY_BLOCK_NAME) + end + + it 'sets proxy env vars in the ruby block' do + chef_run + chef_run.ruby_block(RUBY_BLOCK_NAME).block.call + + %w(http_proxy https_proxy HTTP_PROXY HTTPS_PROXY).each do |var| + expect(ENV[var]).to eq(PROXY_URL) + end + expect(ENV['no_proxy']).to include(".s3.#{TEST_REGION}.amazonaws.com") + expect(ENV['no_proxy']).to include("s3.#{TEST_REGION}.amazonaws.com") + expect(ENV['no_proxy']).to include(".s3-#{TEST_REGION}.amazonaws.com") + expect(ENV['no_proxy']).to include('.s3.amazonaws.com') + expect(ENV['no_proxy']).to include(".s3.dualstack.#{TEST_REGION}.amazonaws.com") + expect(ENV['no_proxy']).to include('169.254.169.254') + expect(ENV['no_proxy']).to include('localhost') + end + + # snapd proxy configuration tests + { true => 'exists', false => 'does not exist' }.each do |socket_exists, description| + context "when snapd socket #{description}" do + before(:each) do + allow(::File).to receive(:exist?).with('/run/snapd.socket').and_return(socket_exists) + end + + if platform == 'ubuntu' && socket_exists + it 'configures snapd proxy' do + chef_run + expect_any_instance_of(Chef::Resource::RubyBlock).to receive(:shell_out!) + .with("snap", "set", "system", "proxy.http=#{PROXY_URL}") + expect_any_instance_of(Chef::Resource::RubyBlock).to receive(:shell_out!) + .with("snap", "set", "system", "proxy.https=#{PROXY_URL}") + chef_run.ruby_block(RUBY_BLOCK_NAME).block.call + end + else + it 'does not configure snapd proxy' do + chef_run + expect_any_instance_of(Chef::Resource::RubyBlock).not_to receive(:shell_out!) + chef_run.ruby_block(RUBY_BLOCK_NAME).block.call + end + end + end + end + end + + { + nil => { description: 'not set', should_skip: true }, + '' => { description: 'empty string', should_skip: true }, + 'not-a-valid-url' => { description: 'invalid format', should_skip: false }, + 'http://10.0.0.109' => { description: 'missing port', should_skip: false }, + }.each do |proxy_value, test_actions| + context "when install_http_proxy_address is #{test_actions[:description]}" do + cached(:chef_run) do + runner(platform: platform, version: version) do |node| + attrs = { 'region' => TEST_REGION } + attrs['install_http_proxy_address'] = proxy_value unless proxy_value.nil? + node.override['cluster'] = attrs + end.converge(described_recipe) + end + + if test_actions[:should_skip] + it 'does not configure proxy' do + chef_run + chef_run.ruby_block(RUBY_BLOCK_NAME).block.call + expect(ENV['http_proxy']).to be_nil + end + else + it 'raises an error' do + chef_run + expect { chef_run.ruby_block(RUBY_BLOCK_NAME).block.call } + .to raise_error(RuntimeError, /Invalid install_http_proxy_address/) + end + end + end + end + end + end +end