Skip to content

Commit 41df631

Browse files
committed
Breakout Scale into UpScale/DownScale actions
* also move logic into instance_reporters
1 parent c778add commit 41df631

File tree

12 files changed

+537
-130
lines changed

12 files changed

+537
-130
lines changed

lib/cloud_controller/backends/instances_reporters.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ def stats_for_app(app)
2727
raise CloudController::Errors::ApiError.new_from_details('StatsUnavailable', 'Stats server temporarily unavailable.')
2828
end
2929

30-
delegate :number_of_starting_and_running_instances_for_processes, to: :diego_reporter
30+
delegate :number_of_starting_and_running_instances_for_processes, :instance_count_summary, to: :diego_reporter
3131

3232
private
3333

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
require 'cloud_controller/deployment_updater/actions/scale_down_old_process'
2+
module VCAP::CloudController
3+
module DeploymentUpdater
4+
module Actions
5+
class DownScaler
6+
attr_reader :deployment, :logger, :app, :target_total_instance_count
7+
8+
def initialize(deployment, logger, target_total_instance_count, routable_instance_count)
9+
@deployment = deployment
10+
@app = deployment.app
11+
@logger = logger
12+
@target_total_instance_count = target_total_instance_count
13+
@routable_instance_count = routable_instance_count
14+
end
15+
16+
def scale_down
17+
instances_to_reduce = non_deploying_web_processes.map(&:instances).sum - desired_non_deploying_instances
18+
19+
return if instances_to_reduce <= 0
20+
21+
non_deploying_web_processes.each do |process|
22+
if instances_to_reduce < process.instances
23+
ScaleDownOldProcess.new(deployment, process, process.instances - instances_to_reduce).call
24+
break
25+
end
26+
27+
instances_to_reduce -= process.instances
28+
ScaleDownOldProcess.new(deployment, process, 0).call
29+
end
30+
end
31+
32+
def can_downscale?
33+
non_deploying_web_processes.map(&:instances).sum > desired_non_deploying_instances
34+
end
35+
36+
def desired_non_deploying_instances
37+
[target_total_instance_count - @routable_instance_count, 0].max
38+
end
39+
40+
private
41+
42+
def non_deploying_web_processes
43+
app.web_processes.reject { |process| process.guid == deployment.deploying_web_process.guid }.sort_by { |p| [p.created_at, p.id] }
44+
end
45+
end
46+
end
47+
end
48+
end

lib/cloud_controller/deployment_updater/actions/finalize.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ def call
1515

1616
update_non_web_processes
1717
restart_non_web_processes
18-
18+
1919
deployment.update(
2020
state: DeploymentModel::DEPLOYED_STATE,
2121
status_value: DeploymentModel::FINALIZED_STATUS_VALUE,

lib/cloud_controller/deployment_updater/actions/scale.rb

Lines changed: 15 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,16 @@
11
require 'cloud_controller/deployment_updater/actions/scale_down_canceled_processes'
2-
require 'cloud_controller/deployment_updater/actions/scale_down_old_process'
32
require 'cloud_controller/deployment_updater/actions/finalize'
3+
require 'cloud_controller/deployment_updater/actions/down_scaler'
4+
require 'cloud_controller/deployment_updater/actions/up_scaler'
45
require 'cloud_controller/diego/constants'
56

67
module VCAP::CloudController
78
module DeploymentUpdater
89
module Actions
910
class Scale
10-
HEALTHY_STATES = [VCAP::CloudController::Diego::LRP_RUNNING, VCAP::CloudController::Diego::LRP_STARTING].freeze
1111
attr_reader :deployment, :logger, :app, :target_total_instance_count, :interim_desired_instance_count
1212

13-
def initialize(deployment, logger, target_total_instance_count, interim_desired_instance_count = nil)
13+
def initialize(deployment, logger, target_total_instance_count, interim_desired_instance_count=nil)
1414
@deployment = deployment
1515
@logger = logger
1616
@app = deployment.app
@@ -19,110 +19,52 @@ def initialize(deployment, logger, target_total_instance_count, interim_desired_
1919
end
2020

2121
def call
22+
down_scaler = DownScaler.new(deployment, logger, target_total_instance_count, instance_count_summary.routable_instances_count)
23+
up_scaler = UpScaler.new(deployment, logger, interim_desired_instance_count, instance_count_summary)
24+
2225
deployment.db.transaction do
2326
return unless deployment.lock!.state == DeploymentModel::DEPLOYING_STATE
24-
25-
return unless can_scale? || can_downscale?
27+
return unless up_scaler.can_scale? || down_scaler.can_downscale?
2628

2729
app.lock!
2830

2931
oldest_web_process_with_instances.lock!
3032
deploying_web_process.lock!
3133

3234
deployment.update(
33-
state: DeploymentModel::DEPLOYING_STATE,
3435
status_value: DeploymentModel::ACTIVE_STATUS_VALUE,
3536
status_reason: DeploymentModel::DEPLOYING_STATUS_REASON
3637
)
37-
38-
ScaleDownCanceledProcesses.new(deployment).call
39-
40-
scale_down_old_processes if can_downscale?
41-
42-
return true if deploying_web_process.instances >= interim_desired_instance_count
43-
44-
if can_scale?
45-
deploying_web_process.update(instances: desired_new_instances)
46-
deployment.update(last_healthy_at: Time.now)
47-
end
48-
end
49-
false
50-
end
51-
52-
private
5338

54-
def scale_down_old_processes
55-
instances_to_reduce = non_deploying_web_processes.map(&:instances).sum - desired_non_deploying_instances
39+
ScaleDownCanceledProcesses.new(deployment).call
5640

57-
return if instances_to_reduce <= 0
41+
down_scaler.scale_down if down_scaler.can_downscale?
5842

59-
non_deploying_web_processes.each do |process|
60-
if instances_to_reduce < process.instances
61-
ScaleDownOldProcess.new(deployment, process, process.instances - instances_to_reduce).call
62-
break
63-
end
43+
return true if up_scaler.finished_scaling?
6444

65-
instances_to_reduce -= process.instances
66-
ScaleDownOldProcess.new(deployment, process, 0).call
45+
up_scaler.scale_up if up_scaler.can_scale?
6746
end
68-
end
69-
70-
def can_scale?
71-
starting_instances.count < deployment.max_in_flight &&
72-
unhealthy_instances.count == 0 &&
73-
routable_instances.count >= deploying_web_process.instances - deployment.max_in_flight
74-
rescue CloudController::Errors::ApiError # the instances_reporter re-raises InstancesUnavailable as ApiError
75-
logger.info("skipping-deployment-update-for-#{deployment.guid}")
7647
false
77-
end
78-
79-
def can_downscale?
80-
non_deploying_web_processes.map(&:instances).sum > desired_non_deploying_instances
8148
rescue CloudController::Errors::ApiError # the instances_reporter re-raises InstancesUnavailable as ApiError
8249
logger.info("skipping-deployment-update-for-#{deployment.guid}")
8350
false
8451
end
8552

86-
def desired_non_deploying_instances
87-
[target_total_instance_count - routable_instances.count, 0].max
88-
end
89-
90-
def desired_new_instances
91-
[routable_instances.count + deployment.max_in_flight, interim_desired_instance_count].min
92-
end
53+
private
9354

9455
def oldest_web_process_with_instances
56+
# TODO: lock all web processes? We might alter all of them, depending on max-in-flight size
9557
@oldest_web_process_with_instances ||= app.web_processes.select { |process| process.instances > 0 }.min_by { |p| [p.created_at, p.id] }
9658
end
9759

98-
def non_deploying_web_processes
99-
app.web_processes.reject { |process| process.guid == deploying_web_process.guid }.sort_by { |p| [p.created_at, p.id] }
60+
def instance_count_summary
61+
@instance_count_summary ||= instance_reporters.instance_count_summary(deploying_web_process)
10062
end
10163

10264
def deploying_web_process
10365
@deploying_web_process ||= deployment.deploying_web_process
10466
end
10567

106-
def starting_instances
107-
healthy_instances.reject { |_, val| val[:state] == VCAP::CloudController::Diego::LRP_RUNNING && val[:routable] }
108-
end
109-
110-
def routable_instances
111-
reported_instances.select { |_, val| val[:state] == VCAP::CloudController::Diego::LRP_RUNNING && val[:routable] }
112-
end
113-
114-
def healthy_instances
115-
reported_instances.select { |_, val| HEALTHY_STATES.include?(val[:state]) }
116-
end
117-
118-
def unhealthy_instances
119-
reported_instances.reject { |_, val| HEALTHY_STATES.include?(val[:state]) }
120-
end
121-
122-
def reported_instances
123-
@reported_instances = instance_reporters.all_instances_for_app(deploying_web_process)
124-
end
125-
12668
def instance_reporters
12769
CloudController::DependencyLocator.instance.instances_reporters
12870
end
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
module VCAP::CloudController
2+
module DeploymentUpdater
3+
module Actions
4+
class UpScaler
5+
attr_reader :deployment, :logger, :app, :interim_desired_instance_count
6+
7+
def initialize(deployment, logger, interim_desired_instance_count, instance_count_summary)
8+
@deployment = deployment
9+
@app = deployment.app
10+
@logger = logger
11+
@interim_desired_instance_count = interim_desired_instance_count
12+
@starting_instances_count = instance_count_summary.starting_instances_count
13+
@unhealthy_instances_count = instance_count_summary.unhealthy_instances_count
14+
@routable_instances_count = instance_count_summary.routable_instances_count
15+
end
16+
17+
def scale_up
18+
return unless can_scale?
19+
20+
deploying_web_process.update(instances: desired_new_instances)
21+
deployment.update(last_healthy_at: Time.now)
22+
end
23+
24+
def can_scale?
25+
@starting_instances_count < deployment.max_in_flight &&
26+
@unhealthy_instances_count == 0 &&
27+
# if routable instances is < deploying_web_process.instances - deployment.max_in_flight
28+
# then that indicates that Diego isnt in sync with CAPI yet
29+
@routable_instances_count >= deploying_web_process.instances - deployment.max_in_flight
30+
end
31+
32+
def finished_scaling?
33+
deploying_web_process.instances >= interim_desired_instance_count
34+
end
35+
36+
private
37+
38+
def desired_new_instances
39+
[@routable_instances_count + deployment.max_in_flight, interim_desired_instance_count].min
40+
end
41+
42+
def deploying_web_process
43+
@deploying_web_process ||= deployment.deploying_web_process
44+
end
45+
end
46+
end
47+
end
48+
end

lib/cloud_controller/diego/reporters/instances_reporter.rb

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,8 @@ module VCAP::CloudController
66
module Diego
77
class InstancesReporter
88
include ReporterMixins
9-
9+
InstanceCountSummary = Struct.new(:starting_instances_count, :routable_instances_count, :healthy_instances_count, :unhealthy_instances_count)
10+
HEALTHY_STATES = [VCAP::CloudController::Diego::LRP_RUNNING, VCAP::CloudController::Diego::LRP_STARTING].freeze
1011
UNKNOWN_INSTANCE_COUNT = -1
1112

1213
def initialize(bbs_instances_client)
@@ -105,6 +106,17 @@ def crashed_instances_for_app(process)
105106
raise CloudController::Errors::InstancesUnavailable.new(e)
106107
end
107108

109+
def instance_count_summary(process)
110+
instances = all_instances_for_app(process)
111+
112+
healthy_instances = instances.select { |_, val| HEALTHY_STATES.include?(val[:state]) }
113+
unhealthy_instances = instances.reject { |_, val| HEALTHY_STATES.include?(val[:state]) }
114+
starting_instances = healthy_instances.reject { |_, val| val[:state] == VCAP::CloudController::Diego::LRP_RUNNING && val[:routable] }
115+
routable_instances = instances.select { |_, val| val[:state] == VCAP::CloudController::Diego::LRP_RUNNING && val[:routable] }
116+
117+
InstanceCountSummary.new(starting_instances.count, routable_instances.count, healthy_instances.count, unhealthy_instances.count)
118+
end
119+
108120
private
109121

110122
attr_reader :bbs_instances_client

lib/cloud_controller/metrics/prometheus_updater.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
require 'prometheus/client'
2+
require 'prometheus/client/data_stores/direct_file_store'
23

34
module VCAP::CloudController::Metrics
45
class PrometheusUpdater

0 commit comments

Comments
 (0)