|
1 | 1 | require 'prometheus/client' |
2 | 2 | require 'prometheus/client/data_stores/direct_file_store' |
| 3 | +require 'cloud_controller/execution_context' |
3 | 4 |
|
4 | 5 | module VCAP::CloudController::Metrics |
5 | 6 | class PrometheusUpdater |
@@ -29,12 +30,6 @@ def self.allow_pid_label |
29 | 30 | { type: :histogram, name: :cc_staging_failed_duration_seconds, docstring: 'Durations of failed staging events', buckets: DURATION_BUCKETS }, |
30 | 31 | { type: :gauge, name: :cc_requests_outstanding_total, docstring: 'Requests outstanding', aggregation: :sum }, |
31 | 32 | { type: :counter, name: :cc_requests_completed_total, docstring: 'Requests completed' }, |
32 | | - { type: :gauge, name: :cc_vitals_started_at, docstring: 'CloudController Vitals: started_at', aggregation: :most_recent }, |
33 | | - { type: :gauge, name: :cc_vitals_mem_bytes, docstring: 'CloudController Vitals: mem_bytes', aggregation: :most_recent }, |
34 | | - { type: :gauge, name: :cc_vitals_cpu_load_avg, docstring: 'CloudController Vitals: cpu_load_avg', aggregation: :most_recent }, |
35 | | - { type: :gauge, name: :cc_vitals_mem_used_bytes, docstring: 'CloudController Vitals: mem_used_bytes', aggregation: :most_recent }, |
36 | | - { type: :gauge, name: :cc_vitals_mem_free_bytes, docstring: 'CloudController Vitals: mem_free_bytes', aggregation: :most_recent }, |
37 | | - { type: :gauge, name: :cc_vitals_num_cores, docstring: 'CloudController Vitals: num_cores', aggregation: :most_recent }, |
38 | 33 | { type: :gauge, name: :cc_running_tasks_total, docstring: 'Total running tasks', aggregation: :most_recent }, |
39 | 34 | { type: :gauge, name: :cc_running_tasks_memory_bytes, docstring: 'Total memory consumed by running tasks', aggregation: :most_recent }, |
40 | 35 | { type: :gauge, name: :cc_users_total, docstring: 'Number of users', aggregation: :most_recent }, |
@@ -67,19 +62,68 @@ def self.allow_pid_label |
67 | 62 | { type: :histogram, name: :cc_job_duration_seconds, docstring: 'Job processing time (start to finish)', labels: %i[queue worker], buckets: DELAYED_JOB_METRIC_BUCKETS } |
68 | 63 | ].freeze |
69 | 64 |
|
70 | | - def initialize(registry: Prometheus::Client.registry, cc_worker: false) |
| 65 | + VITAL_METRICS = [ |
| 66 | + { type: :gauge, name: :cc_vitals_started_at, docstring: 'CloudController Vitals: started_at', aggregation: :most_recent }, |
| 67 | + { type: :gauge, name: :cc_vitals_mem_bytes, docstring: 'CloudController Vitals: mem_bytes', aggregation: :most_recent }, |
| 68 | + { type: :gauge, name: :cc_vitals_cpu_load_avg, docstring: 'CloudController Vitals: cpu_load_avg', aggregation: :most_recent }, |
| 69 | + { type: :gauge, name: :cc_vitals_mem_used_bytes, docstring: 'CloudController Vitals: mem_used_bytes', aggregation: :most_recent }, |
| 70 | + { type: :gauge, name: :cc_vitals_mem_free_bytes, docstring: 'CloudController Vitals: mem_free_bytes', aggregation: :most_recent }, |
| 71 | + { type: :gauge, name: :cc_vitals_num_cores, docstring: 'CloudController Vitals: num_cores', aggregation: :most_recent } |
| 72 | + ].freeze |
| 73 | + |
| 74 | + def initialize(registry: Prometheus::Client.registry) |
71 | 75 | self.class.allow_pid_label |
72 | 76 |
|
73 | 77 | @registry = registry |
| 78 | + execution_context = VCAP::CloudController::ExecutionContext.from_process_type_env |
74 | 79 |
|
75 | | - # Register all metrics, to initialize them for discoverability |
76 | | - DB_CONNECTION_POOL_METRICS.each { |metric| register(metric) } |
77 | | - DELAYED_JOB_METRICS.each { |metric| register(metric) } |
| 80 | + register_metrics_for_process(execution_context) |
| 81 | + initialize_cc_db_connection_pool_timeouts_total(execution_context) |
| 82 | + end |
| 83 | + |
| 84 | + private |
| 85 | + |
| 86 | + # rubocop:disable Metrics/CyclomaticComplexity |
| 87 | + def register_metrics_for_process(execution_context) |
| 88 | + case execution_context |
| 89 | + when VCAP::CloudController::ExecutionContext::CC_WORKER |
| 90 | + DB_CONNECTION_POOL_METRICS.each { |metric| register(metric) } |
| 91 | + DELAYED_JOB_METRICS.each { |metric| register(metric) } |
| 92 | + VITAL_METRICS.each { |metric| register(metric) } |
| 93 | + when VCAP::CloudController::ExecutionContext::CLOCK, VCAP::CloudController::ExecutionContext::DEPLOYMENT_UPDATER |
| 94 | + DB_CONNECTION_POOL_METRICS.each { |metric| register(metric) } |
| 95 | + VITAL_METRICS.each { |metric| register(metric) } |
| 96 | + when VCAP::CloudController::ExecutionContext::API_PUMA_MAIN, VCAP::CloudController::ExecutionContext::API_PUMA_WORKER |
| 97 | + DB_CONNECTION_POOL_METRICS.each { |metric| register(metric) } |
| 98 | + DELAYED_JOB_METRICS.each { |metric| register(metric) } |
| 99 | + VITAL_METRICS.each { |metric| register(metric) } |
| 100 | + METRICS.each { |metric| register(metric) } |
| 101 | + PUMA_METRICS.each { |metric| register(metric) } if is_puma_webserver? |
| 102 | + else |
| 103 | + raise 'Could not register Prometheus metrics: Unknown execution context' |
| 104 | + end |
| 105 | + end |
| 106 | + # rubocop:enable Metrics/CyclomaticComplexity |
| 107 | + |
| 108 | + def initialize_cc_db_connection_pool_timeouts_total(execution_context) |
| 109 | + return if execution_context.nil? # In unit tests, the execution context might not be set - thus skip initialization |
| 110 | + return unless @registry.exist?(:cc_db_connection_pool_timeouts_total) # If the metric is not registered, we don't need to initialize it |
| 111 | + |
| 112 | + # initialize metric with 0 for discoverability, because it likely won't get updated on healthy systems |
| 113 | + update_gauge_metric(:cc_db_connection_pool_timeouts_total, 0, labels: { process_type: execution_context.process_type }) |
| 114 | + |
| 115 | + return unless execution_context == VCAP::CloudController::ExecutionContext::API_PUMA_MAIN |
| 116 | + |
| 117 | + # also initialize for puma_worker |
| 118 | + update_gauge_metric(:cc_db_connection_pool_timeouts_total, 0, labels: { process_type: VCAP::CloudController::ExecutionContext::API_PUMA_WORKER.process_type }) |
| 119 | + end |
78 | 120 |
|
79 | | - return if cc_worker |
| 121 | + public |
80 | 122 |
|
81 | | - METRICS.each { |metric| register(metric) } |
82 | | - PUMA_METRICS.each { |metric| register(metric) } if VCAP::CloudController::Config.config&.get(:webserver) == 'puma' |
| 123 | + def is_puma_webserver? |
| 124 | + VCAP::CloudController::Config.config&.get(:webserver) == 'puma' |
| 125 | + rescue VCAP::CloudController::Config::InvalidConfigPath |
| 126 | + false |
83 | 127 | end |
84 | 128 |
|
85 | 129 | def update_gauge_metric(metric, value, labels: {}) |
|
0 commit comments