Skip to content

Commit 178065c

Browse files
authored
Ignore ResourceExists errors from bbs_apps_client (#4522)
* Revert "Catch error thrown if Diego Process Sync creates a desired lrp process" Revert "Remove unnecessary logger" Revert "Rubocop fixes" This reverts commits: e321fa8. fc96bb6. 133486e. * Ignore ResourceExists errors from bbs_apps_client - This is a re-implementation of 133486e - This error can be raised by BBS when there is a race condition between the CC syncer and CC API when creating an LRP for a process - Since we want to desire a process anyway, it already existing (with the correct version) is likely not an error case. Thus, we can behave idempotently. - Improvements from previous implementation: - No longer ignores other types of APIErrors in desire_app_handler.rb - No longer coupled to the exact error message coming from BBS * Improve bbs_apps_client error logs Previous: ``` "error":{"descriptor":[{},{}]}} ``` Improved: ``` "error":"\u003cDiego::Bbs::Models::Error: type: :ResourceExists, message: \"the requested resource already exists\"\u003e" ```
1 parent 2f867f0 commit 178065c

File tree

4 files changed

+144
-47
lines changed

4 files changed

+144
-47
lines changed

lib/cloud_controller/diego/bbs_apps_client.rb

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ def desire_app(process)
1313
lrp = AppRecipeBuilder.new(config: @config, process: process).build_app_lrp
1414

1515
response = @client.desire_lrp(lrp)
16-
logger.info('desire.app.response', process_guid: lrp.process_guid, error: response.error)
16+
logger.info('desire.app.response', process_guid: lrp.process_guid, error: response.error&.to_s)
1717

1818
runner_invalid_request!(response.error.message) if response.error&.type == ::Diego::Bbs::ErrorTypes::InvalidRequest
19-
return response if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceConflict
19+
return response if [::Diego::Bbs::ErrorTypes::ResourceConflict, ::Diego::Bbs::ErrorTypes::ResourceExists].include?(response.error&.type)
2020

2121
response
2222
end
@@ -30,7 +30,7 @@ def update_app(process, existing_lrp)
3030

3131
handle_diego_errors(process_guid) do
3232
response = @client.update_desired_lrp(process_guid, lrp_update)
33-
logger.info('update.app.response', process_guid: process_guid, error: response.error)
33+
logger.info('update.app.response', process_guid: process_guid, error: response.error&.to_s)
3434

3535
runner_invalid_request!(response.error.message) if response.error&.type == ::Diego::Bbs::ErrorTypes::InvalidRequest
3636
return response if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceConflict
@@ -45,7 +45,7 @@ def get_app(process)
4545

4646
result = handle_diego_errors(process_guid) do
4747
response = @client.desired_lrp_by_process_guid(process_guid)
48-
logger.info('get.app.response', process_guid: process_guid, error: response.error)
48+
logger.info('get.app.response', process_guid: process_guid, error: response.error&.to_s)
4949

5050
return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound
5151

@@ -59,7 +59,7 @@ def stop_app(process_guid)
5959
logger.info('stop.app.request', process_guid:)
6060
handle_diego_errors(process_guid) do
6161
response = @client.remove_desired_lrp(process_guid)
62-
logger.info('stop.app.response', process_guid: process_guid, error: response.error)
62+
logger.info('stop.app.response', process_guid: process_guid, error: response.error&.to_s)
6363

6464
return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound
6565

@@ -72,7 +72,7 @@ def stop_index(process_guid, index)
7272
actual_lrp_key = ::Diego::Bbs::Models::ActualLRPKey.new(process_guid: process_guid, index: index, domain: APP_LRP_DOMAIN)
7373
handle_diego_errors(process_guid) do
7474
response = @client.retire_actual_lrp(actual_lrp_key)
75-
logger.info('stop.index.response', process_guid: process_guid, index: index, error: response.error)
75+
logger.info('stop.index.response', process_guid: process_guid, index: index, error: response.error&.to_s)
7676

7777
return nil if response.error&.type == ::Diego::Bbs::ErrorTypes::ResourceNotFound
7878

@@ -85,7 +85,7 @@ def fetch_scheduling_infos
8585

8686
handle_diego_errors do
8787
response = @client.desired_lrp_scheduling_infos(APP_LRP_DOMAIN)
88-
logger.info('fetch.scheduling.infos.response', error: response.error)
88+
logger.info('fetch.scheduling.infos.response', error: response.error&.to_s)
8989
response
9090
end.desired_lrp_scheduling_infos
9191
end
@@ -95,7 +95,7 @@ def bump_freshness
9595

9696
handle_diego_errors do
9797
response = @client.upsert_domain(domain: APP_LRP_DOMAIN, ttl: APP_LRP_DOMAIN_TTL)
98-
logger.info('bump.freshness.response', error: response.error)
98+
logger.info('bump.freshness.response', error: response.error&.to_s)
9999
response
100100
end
101101
end

lib/cloud_controller/diego/desire_app_handler.rb

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,7 @@ def create_or_update_app(process, client)
66
if (existing_lrp = client.get_app(process))
77
client.update_app(process, existing_lrp)
88
else
9-
begin
10-
client.desire_app(process)
11-
rescue CloudController::Errors::ApiError => e # catch race condition if Diego Process Sync creates an LRP in the meantime
12-
if e.name == 'RunnerError' && e.message['the requested resource already exists']
13-
existing_lrp = client.get_app(process)
14-
client.update_app(process, existing_lrp)
15-
elsif e.name == 'UnprocessableEntity'
16-
raise
17-
end
18-
end
9+
client.desire_app(process)
1910
end
2011
end
2112
end

spec/unit/lib/cloud_controller/diego/bbs_apps_client_spec.rb

Lines changed: 135 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,9 @@ module VCAP::CloudController::Diego
77

88
describe '#desire_app' do
99
let(:bbs_client) { instance_double(::Diego::Client, desire_lrp: lrp_response) }
10-
let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new }
10+
let(:lrp) { ::Diego::Bbs::Models::DesiredLRP.new process_guid: lrp_process_guid }
1111
let(:process) { VCAP::CloudController::ProcessModel.make }
12+
let(:lrp_process_guid) { ProcessGuid.from_process process }
1213
let(:lrp_response) { ::Diego::Bbs::Models::DesiredLRPLifecycleResponse.new(error: lifecycle_error) }
1314
let(:lifecycle_error) { nil }
1415

@@ -51,6 +52,14 @@ module VCAP::CloudController::Diego
5152
end
5253
end
5354

55+
context 'when the bbs response contains a resource exists error' do
56+
let(:lifecycle_error) { ::Diego::Bbs::Models::Error.new(type: ::Diego::Bbs::Models::Error::Type::ResourceExists) }
57+
58+
it 'does not raise error' do
59+
expect { client.desire_app(process) }.not_to raise_error
60+
end
61+
end
62+
5463
context 'when the bbs response contains an invalid request error' do
5564
let(:lifecycle_error) { ::Diego::Bbs::Models::Error.new(type: ::Diego::Bbs::Models::Error::Type::InvalidRequest, message: 'bad request') }
5665

@@ -73,6 +82,24 @@ module VCAP::CloudController::Diego
7382
expect(e.name).to eq('RunnerError')
7483
end
7584
end
85+
86+
it 'logs the error' do
87+
tail_logs do |logs|
88+
expect { client.desire_app(process) }.to raise_error(CloudController::Errors::ApiError)
89+
90+
expect(
91+
logs.read.find { |l| l['message'] == 'desire.app.response' }
92+
).to match(
93+
hash_including(
94+
'message' => 'desire.app.response',
95+
'data' => {
96+
'error' => /error message/,
97+
'process_guid' => lrp_process_guid
98+
}
99+
)
100+
)
101+
end
102+
end
76103
end
77104
end
78105

@@ -134,6 +161,24 @@ module VCAP::CloudController::Diego
134161
expect(e.name).to eq('RunnerError')
135162
end
136163
end
164+
165+
it 'logs the error' do
166+
tail_logs do |logs|
167+
expect { client.stop_app(process_guid) }.to raise_error(CloudController::Errors::ApiError)
168+
169+
expect(
170+
logs.read.find { |l| l['message'] == 'stop.app.response' }
171+
).to match(
172+
hash_including(
173+
'message' => 'stop.app.response',
174+
'data' => {
175+
'error' => /error message/,
176+
'process_guid' => process_guid
177+
}
178+
)
179+
)
180+
end
181+
end
137182
end
138183

139184
context 'when bbs client errors' do
@@ -186,6 +231,25 @@ module VCAP::CloudController::Diego
186231
expect(e.name).to eq('RunnerError')
187232
end
188233
end
234+
235+
it 'logs the error' do
236+
tail_logs do |logs|
237+
expect { client.stop_index(process_guid, index) }.to raise_error(CloudController::Errors::ApiError)
238+
239+
expect(
240+
logs.read.find { |l| l['message'] == 'stop.index.response' }
241+
).to match(
242+
hash_including(
243+
'message' => 'stop.index.response',
244+
'data' => {
245+
'error' => /error message/,
246+
'process_guid' => process_guid,
247+
'index' => index
248+
}
249+
)
250+
)
251+
end
252+
end
189253
end
190254

191255
context 'when bbs client errors' do
@@ -237,6 +301,24 @@ module VCAP::CloudController::Diego
237301
expect(e.name).to eq('RunnerError')
238302
end
239303
end
304+
305+
it 'logs the error' do
306+
tail_logs do |logs|
307+
expect { client.get_app(process) }.to raise_error(CloudController::Errors::ApiError)
308+
309+
expect(
310+
logs.read.find { |l| l['message'] == 'get.app.response' }
311+
).to match(
312+
hash_including(
313+
'message' => 'get.app.response',
314+
'data' => {
315+
'error' => /error message/,
316+
'process_guid' => 'process-guid'
317+
}
318+
)
319+
)
320+
end
321+
end
240322
end
241323

242324
context 'when bbs client errors' do
@@ -312,6 +394,24 @@ module VCAP::CloudController::Diego
312394
expect(e.name).to eq('RunnerError')
313395
end
314396
end
397+
398+
it 'logs the error' do
399+
tail_logs do |logs|
400+
expect { client.update_app(process, existing_lrp) }.to raise_error(CloudController::Errors::ApiError)
401+
402+
expect(
403+
logs.read.find { |l| l['message'] == 'update.app.response' }
404+
).to match(
405+
hash_including(
406+
'message' => 'update.app.response',
407+
'data' => {
408+
'error' => /error message/,
409+
'process_guid' => process_guid
410+
}
411+
)
412+
)
413+
end
414+
end
315415
end
316416

317417
context 'when bbs client errors' do
@@ -354,6 +454,23 @@ module VCAP::CloudController::Diego
354454
expect(e.name).to eq('RunnerError')
355455
end
356456
end
457+
458+
it 'logs the error' do
459+
tail_logs do |logs|
460+
expect { client.fetch_scheduling_infos }.to raise_error(CloudController::Errors::ApiError)
461+
462+
expect(
463+
logs.read.find { |l| l['message'] == 'fetch.scheduling.infos.response' }
464+
).to match(
465+
hash_including(
466+
'message' => 'fetch.scheduling.infos.response',
467+
'data' => {
468+
'error' => /error message/
469+
}
470+
)
471+
)
472+
end
473+
end
357474
end
358475

359476
context 'when bbs client errors' do
@@ -395,6 +512,23 @@ module VCAP::CloudController::Diego
395512
expect(e.name).to eq('RunnerError')
396513
end
397514
end
515+
516+
it 'logs the error' do
517+
tail_logs do |logs|
518+
expect { client.bump_freshness }.to raise_error(CloudController::Errors::ApiError)
519+
520+
expect(
521+
logs.read.find { |l| l['message'] == 'bump.freshness.response' }
522+
).to match(
523+
hash_including(
524+
'message' => 'bump.freshness.response',
525+
'data' => {
526+
'error' => /error message/
527+
}
528+
)
529+
)
530+
end
531+
end
398532
end
399533

400534
context 'when bbs client errors' do

spec/unit/lib/cloud_controller/diego/desire_app_handler_spec.rb

Lines changed: 0 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -35,34 +35,6 @@ module Diego
3535
expect(client).not_to have_received(:desire_app)
3636
end
3737
end
38-
39-
context 'race condition when the Diego::ProcessesSync runs and creates a process ahead of the DesireAppHandler' do
40-
let(:desired_lrp_update) { double(:desired_lrp_update) }
41-
let(:get_app_response) { nil }
42-
43-
before do
44-
allow(client).to receive(:update_app)
45-
allow(client).to receive(:desire_app).and_raise CloudController::Errors::ApiError.new_from_details('RunnerError', 'the requested resource already exists')
46-
end
47-
48-
it 'catches the error and updates the app' do
49-
DesireAppHandler.create_or_update_app(process, client)
50-
expect(client).to have_received(:update_app)
51-
expect(client).to have_received(:get_app).exactly(2).times
52-
end
53-
end
54-
55-
context 'when root user is not permitted' do
56-
it 'raises a CloudController::Errors::ApiError' do
57-
allow(client).to receive(:desire_app).and_raise(CloudController::Errors::ApiError.new_from_details('UnprocessableEntity',
58-
'Attempting to run process as root user, which is not permitted.'))
59-
expect { DesireAppHandler.create_or_update_app(process, client) }.to(raise_error do |error|
60-
expect(error).to be_a(CloudController::Errors::ApiError)
61-
expect(error.name).to eq('UnprocessableEntity')
62-
expect(error.message).to include('Attempting to run process as root user, which is not permitted')
63-
end)
64-
end
65-
end
6638
end
6739
end
6840
end

0 commit comments

Comments
 (0)