From 81be7c7a08d3331446ca145a1bbf800251c0cfb9 Mon Sep 17 00:00:00 2001 From: Greg Cobb Date: Mon, 18 Aug 2025 14:51:45 -0700 Subject: [PATCH 1/3] Revert "Catch error thrown if Diego Process Sync creates a desired lrp process" Revert "Remove unnecessary logger" Revert "Rubocop fixes" This reverts commits: e321fa8f014d57296abbec4572cb5525f4d96601. fc96bb6e245e2b0e1e7ec62ed23c139bf1697452. 133486e37c8a66883f8cad0da6fcaeefad0feda3. --- .../diego/desire_app_handler.rb | 11 +------- .../diego/desire_app_handler_spec.rb | 28 ------------------- 2 files changed, 1 insertion(+), 38 deletions(-) diff --git a/lib/cloud_controller/diego/desire_app_handler.rb b/lib/cloud_controller/diego/desire_app_handler.rb index 498d6eca648..a8cbd7e5c64 100644 --- a/lib/cloud_controller/diego/desire_app_handler.rb +++ b/lib/cloud_controller/diego/desire_app_handler.rb @@ -6,16 +6,7 @@ def create_or_update_app(process, client) if (existing_lrp = client.get_app(process)) client.update_app(process, existing_lrp) else - begin - client.desire_app(process) - rescue CloudController::Errors::ApiError => e # catch race condition if Diego Process Sync creates an LRP in the meantime - if e.name == 'RunnerError' && e.message['the requested resource already exists'] - existing_lrp = client.get_app(process) - client.update_app(process, existing_lrp) - elsif e.name == 'UnprocessableEntity' - raise - end - end + client.desire_app(process) end end end diff --git a/spec/unit/lib/cloud_controller/diego/desire_app_handler_spec.rb b/spec/unit/lib/cloud_controller/diego/desire_app_handler_spec.rb index 6db219ac2c5..23ca41a7330 100644 --- a/spec/unit/lib/cloud_controller/diego/desire_app_handler_spec.rb +++ b/spec/unit/lib/cloud_controller/diego/desire_app_handler_spec.rb @@ -35,34 +35,6 @@ module Diego expect(client).not_to have_received(:desire_app) end end - - context 'race condition when the Diego::ProcessesSync runs and creates a process ahead of the DesireAppHandler' do - let(:desired_lrp_update) { double(:desired_lrp_update) } - let(:get_app_response) { nil } - - before do - allow(client).to receive(:update_app) - allow(client).to receive(:desire_app).and_raise CloudController::Errors::ApiError.new_from_details('RunnerError', 'the requested resource already exists') - end - - it 'catches the error and updates the app' do - DesireAppHandler.create_or_update_app(process, client) - expect(client).to have_received(:update_app) - expect(client).to have_received(:get_app).exactly(2).times - end - end - - context 'when root user is not permitted' do - it 'raises a CloudController::Errors::ApiError' do - allow(client).to receive(:desire_app).and_raise(CloudController::Errors::ApiError.new_from_details('UnprocessableEntity', - 'Attempting to run process as root user, which is not permitted.')) - expect { DesireAppHandler.create_or_update_app(process, client) }.to(raise_error do |error| - expect(error).to be_a(CloudController::Errors::ApiError) - expect(error.name).to eq('UnprocessableEntity') - expect(error.message).to include('Attempting to run process as root user, which is not permitted') - end) - end - end end end end From c2d496ab6b38e30a1c332f9a954f44ecafcce968 Mon Sep 17 00:00:00 2001 From: Greg Cobb Date: Mon, 18 Aug 2025 15:43:47 -0700 Subject: [PATCH 2/3] Ignore ResourceExists errors from bbs_apps_client - This is a re-implementation of 133486e37c8a66883f8cad0da6fcaeefad0feda3 - This error can be raised by BBS when there is a race condition between the CC syncer and CC API when creating an LRP for a process - Since we want to desire a process anyway, it already existing (with the correct version) is likely not an error case. Thus, we can behave idempotently. - Improvements from previous implementation: - No longer ignores other types of APIErrors in desire_app_handler.rb - No longer coupled to the exact error message coming from BBS --- lib/cloud_controller/diego/bbs_apps_client.rb | 2 +- .../lib/cloud_controller/diego/bbs_apps_client_spec.rb | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/lib/cloud_controller/diego/bbs_apps_client.rb b/lib/cloud_controller/diego/bbs_apps_client.rb index 39990c59adc..2e74f1afced 100644 --- a/lib/cloud_controller/diego/bbs_apps_client.rb +++ b/lib/cloud_controller/diego/bbs_apps_client.rb @@ -16,7 +16,7 @@ def desire_app(process) logger.info('desire.app.response', process_guid: lrp.process_guid, error: response.error) runner_invalid_request!(response.error.message) if response.error&.type == ::Diego::Bbs::ErrorTypes::InvalidRequest - return response if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceConflict + return response if [::Diego::Bbs::ErrorTypes::ResourceConflict, ::Diego::Bbs::ErrorTypes::ResourceExists].include?(response.error&.type) response end diff --git a/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb b/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb index 5c05ad458f5..f2c6a54e89e 100644 --- a/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb +++ b/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb @@ -51,6 +51,14 @@ module VCAP::CloudController::Diego end end + context 'when the bbs response contains a resource exists error' do + let(:lifecycle_error) { ::Diego::Bbs::Models::Error.new(type: ::Diego::Bbs::Models::Error::Type::ResourceExists) } + + it 'does not raise error' do + expect { client.desire_app(process) }.not_to raise_error + end + end + context 'when the bbs response contains an invalid request error' do let(:lifecycle_error) { ::Diego::Bbs::Models::Error.new(type: ::Diego::Bbs::Models::Error::Type::InvalidRequest, message: 'bad request') } From ff23394e34955f07bc023a79e074e2c22603f583 Mon Sep 17 00:00:00 2001 From: Greg Cobb Date: Tue, 26 Aug 2025 00:46:12 -0700 Subject: [PATCH 3/3] Improve bbs_apps_client error logs Previous: ``` "error":{"descriptor":[{},{}]}} ``` Improved: ``` "error":"\u003cDiego::Bbs::Models::Error: type: :ResourceExists, message: \"the requested resource already exists\"\u003e" ``` --- lib/cloud_controller/diego/bbs_apps_client.rb | 14 +- .../diego/bbs_apps_client_spec.rb | 128 +++++++++++++++++- 2 files changed, 134 insertions(+), 8 deletions(-) diff --git a/lib/cloud_controller/diego/bbs_apps_client.rb b/lib/cloud_controller/diego/bbs_apps_client.rb index 2e74f1afced..72bc172e8d8 100644 --- a/lib/cloud_controller/diego/bbs_apps_client.rb +++ b/lib/cloud_controller/diego/bbs_apps_client.rb @@ -13,7 +13,7 @@ def desire_app(process) lrp = AppRecipeBuilder.new(config: @config, process: process).build_app_lrp response = @client.desire_lrp(lrp) - logger.info('desire.app.response', process_guid: lrp.process_guid, error: response.error) + logger.info('desire.app.response', process_guid: lrp.process_guid, error: response.error&.to_s) runner_invalid_request!(response.error.message) if response.error&.type == ::Diego::Bbs::ErrorTypes::InvalidRequest return response if [::Diego::Bbs::ErrorTypes::ResourceConflict, ::Diego::Bbs::ErrorTypes::ResourceExists].include?(response.error&.type) @@ -30,7 +30,7 @@ def update_app(process, existing_lrp) handle_diego_errors(process_guid) do response = @client.update_desired_lrp(process_guid, lrp_update) - logger.info('update.app.response', process_guid: process_guid, error: response.error) + logger.info('update.app.response', process_guid: process_guid, error: response.error&.to_s) runner_invalid_request!(response.error.message) if response.error&.type == ::Diego::Bbs::ErrorTypes::InvalidRequest return response if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceConflict @@ -45,7 +45,7 @@ def get_app(process) result = handle_diego_errors(process_guid) do response = @client.desired_lrp_by_process_guid(process_guid) - logger.info('get.app.response', process_guid: process_guid, error: response.error) + logger.info('get.app.response', process_guid: process_guid, error: response.error&.to_s) return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound @@ -59,7 +59,7 @@ def stop_app(process_guid) logger.info('stop.app.request', process_guid:) handle_diego_errors(process_guid) do response = @client.remove_desired_lrp(process_guid) - logger.info('stop.app.response', process_guid: process_guid, error: response.error) + logger.info('stop.app.response', process_guid: process_guid, error: response.error&.to_s) return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound @@ -72,7 +72,7 @@ def stop_index(process_guid, index) actual_lrp_key = ::Diego::Bbs::Models::ActualLRPKey.new(process_guid: process_guid, index: index, domain: APP_LRP_DOMAIN) handle_diego_errors(process_guid) do response = @client.retire_actual_lrp(actual_lrp_key) - logger.info('stop.index.response', process_guid: process_guid, index: index, error: response.error) + logger.info('stop.index.response', process_guid: process_guid, index: index, error: response.error&.to_s) return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound @@ -85,7 +85,7 @@ def fetch_scheduling_infos handle_diego_errors do response = @client.desired_lrp_scheduling_infos(APP_LRP_DOMAIN) - logger.info('fetch.scheduling.infos.response', error: response.error) + logger.info('fetch.scheduling.infos.response', error: response.error&.to_s) response end.desired_lrp_scheduling_infos end @@ -95,7 +95,7 @@ def bump_freshness handle_diego_errors do response = @client.upsert_domain(domain: APP_LRP_DOMAIN, ttl: APP_LRP_DOMAIN_TTL) - logger.info('bump.freshness.response', error: response.error) + logger.info('bump.freshness.response', error: response.error&.to_s) response end end diff --git a/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb b/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb index f2c6a54e89e..ee5f62f39ec 100644 --- a/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb +++ b/spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb @@ -7,8 +7,9 @@ module VCAP::CloudController::Diego describe '#desire_app' do let(:bbs_client) { instance_double(::Diego::Client, desire_lrp: lrp_response) } - let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new } + let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new process_guid: lrp_process_guid } let(:process) { VCAP::CloudController::ProcessModel.make } + let(:lrp_process_guid) { ProcessGuid.from_process process } let(:lrp_response) { ::Diego::Bbs::Models::DesiredLRPLifecycleResponse.new(error: lifecycle_error) } let(:lifecycle_error) { nil } @@ -81,6 +82,24 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.desire_app(process) }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'desire.app.response' } + ).to match( + hash_including( + 'message' => 'desire.app.response', + 'data' => { + 'error' => /error message/, + 'process_guid' => lrp_process_guid + } + ) + ) + end + end end end @@ -142,6 +161,24 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.stop_app(process_guid) }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'stop.app.response' } + ).to match( + hash_including( + 'message' => 'stop.app.response', + 'data' => { + 'error' => /error message/, + 'process_guid' => process_guid + } + ) + ) + end + end end context 'when bbs client errors' do @@ -194,6 +231,25 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.stop_index(process_guid, index) }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'stop.index.response' } + ).to match( + hash_including( + 'message' => 'stop.index.response', + 'data' => { + 'error' => /error message/, + 'process_guid' => process_guid, + 'index' => index + } + ) + ) + end + end end context 'when bbs client errors' do @@ -245,6 +301,24 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.get_app(process) }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'get.app.response' } + ).to match( + hash_including( + 'message' => 'get.app.response', + 'data' => { + 'error' => /error message/, + 'process_guid' => 'process-guid' + } + ) + ) + end + end end context 'when bbs client errors' do @@ -320,6 +394,24 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.update_app(process, existing_lrp) }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'update.app.response' } + ).to match( + hash_including( + 'message' => 'update.app.response', + 'data' => { + 'error' => /error message/, + 'process_guid' => process_guid + } + ) + ) + end + end end context 'when bbs client errors' do @@ -362,6 +454,23 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.fetch_scheduling_infos }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'fetch.scheduling.infos.response' } + ).to match( + hash_including( + 'message' => 'fetch.scheduling.infos.response', + 'data' => { + 'error' => /error message/ + } + ) + ) + end + end end context 'when bbs client errors' do @@ -403,6 +512,23 @@ module VCAP::CloudController::Diego expect(e.name).to eq('RunnerError') end end + + it 'logs the error' do + tail_logs do |logs| + expect { client.bump_freshness }.to raise_error(CloudController::Errors::ApiError) + + expect( + logs.read.find { |l| l['message'] == 'bump.freshness.response' } + ).to match( + hash_including( + 'message' => 'bump.freshness.response', + 'data' => { + 'error' => /error message/ + } + ) + ) + end + end end context 'when bbs client errors' do