|
13 | 13 | require 'cloud_controller/secrets_fetcher' |
14 | 14 | require 'cloud_controller/runners/thin_runner' |
15 | 15 | require 'cloud_controller/runners/puma_runner' |
| 16 | +require 'cloud_controller/metrics_webserver' |
16 | 17 | require 'prometheus/client/data_stores/direct_file_store' |
17 | 18 | require 'prometheus/middleware/exporter' |
18 | 19 |
|
@@ -133,84 +134,8 @@ def setup_metrics |
133 | 134 |
|
134 | 135 | Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: prometheus_dir) |
135 | 136 |
|
136 | | - setup_metrics_webserver |
137 | | - end |
138 | | - |
139 | | - # The webserver runs in the main process and serves only the metrics and status endpoint. |
140 | | - # This makes it possible to retrieve both even if all Puma workers of the main app are busy. |
141 | | - def setup_metrics_webserver |
142 | | - readiness_status_proc = method(:status) |
143 | | - metrics_app = Rack::Builder.new do |
144 | | - use Prometheus::Middleware::Exporter, path: '/internal/v4/metrics' |
145 | | - |
146 | | - map '/internal/v4/status' do |
147 | | - run ->(_env) { readiness_status_proc.call } |
148 | | - end |
149 | | - |
150 | | - map '/' do |
151 | | - run lambda { |_env| |
152 | | - # Return 404 for any other request |
153 | | - ['404', { 'Content-Type' => 'text/plain' }, ['Not Found']] |
154 | | - } |
155 | | - end |
156 | | - end |
157 | | - |
158 | 137 | Thread.new do |
159 | | - server = Puma::Server.new(metrics_app) |
160 | | - |
161 | | - if config.get(:nginx, :metrics_socket).nil? || config.get(:nginx, :metrics_socket).empty? |
162 | | - server.add_tcp_listener('127.0.0.1', 9395) |
163 | | - else |
164 | | - server.add_unix_listener(@config.get(:nginx, :metrics_socket)) |
165 | | - end |
166 | | - |
167 | | - server.run |
168 | | - end |
169 | | - end |
170 | | - |
171 | | - # Persist state for status endpoint |
172 | | - @previous_requests_count_sum = nil |
173 | | - @last_requests_count_increase_time = nil |
174 | | - |
175 | | - def status |
176 | | - stats = Puma.stats_hash |
177 | | - worker_statuses = stats[:worker_status] |
178 | | - all_busy = all_workers_busy?(worker_statuses) |
179 | | - current_requests_count_sum = worker_requests_count_sum(worker_statuses) |
180 | | - |
181 | | - now = Time.now |
182 | | - prev = @previous_requests_count_sum |
183 | | - |
184 | | - # Track when requests_count_sum increases |
185 | | - @last_requests_count_increase_time = now if prev.nil? || current_requests_count_sum > prev |
186 | | - @previous_requests_count_sum = current_requests_count_sum |
187 | | - |
188 | | - unhealthy = false |
189 | | - if all_busy && @last_requests_count_increase_time && (now - @last_requests_count_increase_time) > 60 |
190 | | - # If requests_count_sum hasn't increased in 60 seconds, unhealthy |
191 | | - unhealthy = true |
192 | | - end |
193 | | - |
194 | | - if all_busy && unhealthy |
195 | | - [503, { 'Content-Type' => 'text/plain' }, ['UNHEALTHY']] |
196 | | - elsif all_busy |
197 | | - [429, { 'Content-Type' => 'text/plain' }, ['BUSY']] |
198 | | - else |
199 | | - [200, { 'Content-Type' => 'text/plain' }, ['OK']] |
200 | | - end |
201 | | - rescue StandardError => e |
202 | | - [500, { 'Content-Type' => 'text/plain' }, ["Readiness check error: #{e}"]] |
203 | | - end |
204 | | - |
205 | | - def all_workers_busy?(worker_statuses) |
206 | | - worker_statuses.all? do |worker| |
207 | | - worker[:last_status][:busy_threads] == worker[:last_status][:running] |
208 | | - end |
209 | | - end |
210 | | - |
211 | | - def worker_requests_count_sum(worker_statuses) |
212 | | - worker_statuses.sum do |worker| |
213 | | - worker[:last_status][:requests_count] || 0 |
| 138 | + VCAP::CloudController::MetricsWebserver.new.start(@config) |
214 | 139 | end |
215 | 140 | end |
216 | 141 |
|
|
0 commit comments