Skip to content

Commit 6ad5853

Browse files
authored
Surface deployment updater errors to the API (#4295)
1 parent 8fa6c9c commit 6ad5853

File tree

9 files changed

+195
-3
lines changed

9 files changed

+195
-3
lines changed

app/presenters/v3/deployment_presenter.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ def status(deployment)
8383
}
8484
}
8585

86+
status[:details][:error] = deployment.error if deployment.error
87+
8688
if deployment.strategy == VCAP::CloudController::DeploymentModel::CANARY_STRATEGY
8789
status[:canary] = {
8890
steps: {
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
Sequel.migration do
2+
change do
3+
alter_table(:deployments) do
4+
add_column :error, String, null: true, default: nil, size: 255
5+
end
6+
end
7+
end

docs/v3/source/includes/resources/deployments/_object.md.erb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ Name | Type | Description
1616
**status.reason** | _string_ | The reason for the status of the deployment;<br>following list represents valid values:<br>1. If **status.value** is `ACTIVE`<br>- `DEPLOYING`<br>- `PAUSED` (only valid for canary deployments) <br>- `CANCELING`<br>2. If **status.value** is `FINALIZED`<br>- `DEPLOYED`<br>- `CANCELED`<br>- `SUPERSEDED` (another deployment created for app before completion)<br>
1717
**status.details.last_successful_healthcheck** | _[timestamp](#timestamps)_ | Timestamp of the last successful healthcheck
1818
**status.details.last_status_change** | _[timestamp](#timestamps)_ | Timestamp of last change to status.value or status.reason**status.details.last_status_change** | _[timestamp](#timestamps)_ | Timestamp of last change to status.value or status.reason
19+
**status.details.error** | _string_ | | Brief description of error encountered while deploying, if any. This field is cleared once the deployment progresses successfully.
1920
**status.canary.steps.current** | _integer_ | The current canary step. Only available for deployments with strategy 'canary'. (experimental)
2021
**status.canary.steps.total** | _integer_ | The total number of canary steps. Only available for deployments with strategy 'canary'. (experimental)
2122
**strategy** | _string_ | Strategy used for the deployment; supported strategies are `rolling` and `canary` (experimental)

lib/cloud_controller/deployment_updater/actions/cancel.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ def call
2828
deployment.update(
2929
state: DeploymentModel::CANCELED_STATE,
3030
status_value: DeploymentModel::FINALIZED_STATUS_VALUE,
31-
status_reason: DeploymentModel::CANCELED_STATUS_REASON
31+
status_reason: DeploymentModel::CANCELED_STATUS_REASON,
32+
error: nil
3233
)
3334
end
3435
end

lib/cloud_controller/deployment_updater/actions/scale.rb

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ def call
3232

3333
deployment.update(
3434
status_value: DeploymentModel::ACTIVE_STATUS_VALUE,
35-
status_reason: DeploymentModel::DEPLOYING_STATUS_REASON
35+
status_reason: DeploymentModel::DEPLOYING_STATUS_REASON,
36+
error: nil
3637
)
3738

3839
ScaleDownCanceledProcesses.new(deployment).call

lib/cloud_controller/deployment_updater/updater.rb

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,10 +45,32 @@ def cancel
4545

4646
private
4747

48+
APPROVED_ERRORS = [
49+
Sequel::ValidationFailed,
50+
CloudController::Errors::ApiError
51+
].freeze
52+
4853
def with_error_logging(error_message)
4954
yield
5055
rescue StandardError => e
5156
error_name = e.is_a?(CloudController::Errors::ApiError) ? e.name : e.class.name
57+
58+
begin
59+
if APPROVED_ERRORS.include?(e.class)
60+
deployment.update(error: e.message)
61+
else
62+
deployment.update(error: 'An unexpected error has occurred.')
63+
end
64+
rescue StandardError => new_error
65+
logger.error(
66+
'error-saving-deployment-error',
67+
deployment_guid: deployment.guid,
68+
error: new_error.class.name,
69+
error_message: new_error.message,
70+
backtrace: new_error.backtrace.join("\n")
71+
)
72+
end
73+
5274
logger.error(
5375
error_message,
5476
deployment_guid: deployment.guid,

spec/request/deployments_spec.rb

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1665,6 +1665,24 @@
16651665
end
16661666
end
16671667

1668+
context 'when a deployment has had an error' do
1669+
let(:user) { make_developer_for_space(space) }
1670+
let(:deployment) do
1671+
VCAP::CloudController::DeploymentModelTestFactory.make(
1672+
app: app_model,
1673+
droplet: droplet,
1674+
previous_droplet: old_droplet,
1675+
error: 'Quota error'
1676+
)
1677+
end
1678+
1679+
it 'includes the error as part of the status details' do
1680+
get "/v3/deployments/#{deployment.guid}", nil, user_header
1681+
parsed_response = Oj.load(last_response.body)
1682+
expect(parsed_response['status']['details']['error']).to eq('Quota error')
1683+
end
1684+
end
1685+
16681686
it_behaves_like 'permissions for single object endpoint', ALL_PERMISSIONS
16691687
end
16701688

spec/unit/lib/cloud_controller/deployment_updater/updater_spec.rb

Lines changed: 117 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ module VCAP::CloudController
2424
instances: current_deploying_instances,
2525
guid: 'guid-final',
2626
revision: revision,
27-
state: ProcessModel::STOPPED
27+
state: ProcessModel::STARTED
2828
)
2929
end
3030
let(:revision) { RevisionModel.make(app: app, droplet: droplet, version: 300) }
@@ -183,6 +183,8 @@ module VCAP::CloudController
183183
expect do
184184
subject.scale
185185
end.not_to raise_error
186+
187+
expect(deployment.error).to eq 'An unexpected error has occurred.'
186188
end
187189
end
188190
end
@@ -393,6 +395,8 @@ module VCAP::CloudController
393395
expect do
394396
subject.canary
395397
end.not_to raise_error
398+
399+
expect(deployment.error).to eq 'An unexpected error has occurred.'
396400
end
397401
end
398402
end
@@ -424,6 +428,118 @@ module VCAP::CloudController
424428
expect do
425429
subject.cancel
426430
end.not_to raise_error
431+
432+
expect(deployment.error).to eq 'An unexpected error has occurred.'
433+
end
434+
end
435+
end
436+
437+
describe 'error handling' do
438+
context 'when saving the error to the deployment model' do
439+
context 'when the error is a quota error' do
440+
let(:web_instances) { nil }
441+
let(:max_in_flight) { 100 }
442+
let!(:quota_definition) { QuotaDefinition.make(memory_limit: 1) }
443+
444+
before do
445+
org = app.organization
446+
org.quota_definition = quota_definition
447+
org.save
448+
end
449+
450+
it 'saves the quota error to the model' do
451+
subject.scale
452+
expect(deployment.error).to eq 'memory quota_exceeded'
453+
end
454+
end
455+
456+
context 'when the error is not an approved error' do
457+
before do
458+
allow(deployment).to receive(:app).and_raise(StandardError.new('unrecognized error'))
459+
end
460+
461+
it 'provides a helpful error on the deployment model' do
462+
subject.scale
463+
expect(deployment.error).to eq 'An unexpected error has occurred.'
464+
end
465+
end
466+
467+
context 'when the error is a SequelValidation error' do
468+
before do
469+
allow(deployment).to receive(:app).and_raise(Sequel::ValidationFailed.new('Something bad happened with model validation'))
470+
end
471+
472+
it 'provides a helpful error on the deployment model' do
473+
subject.scale
474+
expect(deployment.error).to eq 'Something bad happened with model validation'
475+
end
476+
end
477+
478+
context 'when the error is a CloudController::Errors::ApiError error' do
479+
before do
480+
allow(deployment).to receive(:app).and_raise(CloudController::Errors::ApiError.new_from_details('RunnerError', 'some error'))
481+
end
482+
483+
it 'provides a helpful error on the deployment model' do
484+
subject.scale
485+
expect(deployment.error).to eq 'Runner error: some error'
486+
end
487+
end
488+
489+
context 'when there is a problem saving the error column' do
490+
before do
491+
# too long for db column
492+
allow(deployment).to receive(:app).and_raise(CloudController::Errors::ApiError.new_from_details('RunnerError', 'a' * 5000))
493+
end
494+
495+
it 'logs the issue and continues' do
496+
subject.scale
497+
498+
expect(logger).to have_received(:error).with(
499+
'error-saving-deployment-error',
500+
deployment_guid: deployment.guid,
501+
error: 'Sequel::DatabaseError',
502+
error_message: anything,
503+
backtrace: anything
504+
)
505+
end
506+
end
507+
508+
context 'when there was a prior error, but the error no longer occurs' do
509+
describe '#scale' do
510+
before do
511+
deployment.error = 'old error'
512+
deployment.save
513+
end
514+
515+
it 'clears the error' do
516+
subject.scale
517+
expect(deployment.error).to be_nil
518+
end
519+
end
520+
521+
describe '#cancel' do
522+
before do
523+
deployment.update(state: 'CANCELING', error: 'old error')
524+
allow_any_instance_of(VCAP::CloudController::Diego::Runner).to receive(:stop)
525+
end
526+
527+
it 'clears the error' do
528+
subject.cancel
529+
expect(deployment.error).to be_nil
530+
end
531+
end
532+
533+
describe '#canary' do
534+
before do
535+
deployment.update(strategy: 'canary', error: 'old error')
536+
end
537+
538+
it 'clears the error' do
539+
subject.canary
540+
expect(deployment.error).to be_nil
541+
end
542+
end
427543
end
428544
end
429545
end

spec/unit/presenters/v3/deployment_presenter_spec.rb

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,30 @@ module VCAP::CloudController::Presenters::V3
131131
end
132132
end
133133

134+
describe 'error' do
135+
context 'when the deployment has no error' do
136+
before do
137+
deployment.error = nil
138+
end
139+
140+
it 'does not have the error key' do
141+
result = DeploymentPresenter.new(deployment).to_hash
142+
expect(result[:status][:details].key?(:error)).to be false
143+
end
144+
end
145+
146+
context 'when the deployment has an error' do
147+
before do
148+
deployment.error = 'Quota error'
149+
end
150+
151+
it 'renders the error' do
152+
result = DeploymentPresenter.new(deployment).to_hash
153+
expect(result[:status][:details][:error]).to eq 'Quota error'
154+
end
155+
end
156+
end
157+
134158
describe 'status' do
135159
context 'when the strategy is rolling' do
136160
before do

0 commit comments

Comments
 (0)