Skip to content

Commit 2bc79b6

Browse files
committed
Refactor runner and move seperate webserver into own class
1 parent d441af7 commit 2bc79b6

File tree

5 files changed

+216
-98
lines changed

5 files changed

+216
-98
lines changed
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
require 'rack'
2+
require 'prometheus/middleware/exporter'
3+
4+
module VCAP
5+
module CloudController
6+
class MetricsWebserver
7+
attr_reader :app
8+
9+
def initialize
10+
@app = build_app
11+
end
12+
13+
def start(config)
14+
server = Puma::Server.new(@app)
15+
16+
if config.get(:nginx, :metrics_socket).nil? || config.get(:nginx, :metrics_socket).empty?
17+
server.add_tcp_listener('127.0.0.1', 9395)
18+
else
19+
server.add_unix_listener(config.get(:nginx, :metrics_socket))
20+
end
21+
22+
server.run
23+
end
24+
25+
private
26+
27+
def build_app
28+
status_proc = method(:status)
29+
Rack::Builder.new do
30+
use Prometheus::Middleware::Exporter, path: '/internal/v4/metrics'
31+
32+
map '/internal/v4/status' do
33+
run ->(_env) { status_proc.call }
34+
end
35+
36+
map '/' do
37+
run lambda { |_env|
38+
# Return 404 for any other request
39+
['404', { 'Content-Type' => 'text/plain' }, ['Not Found']]
40+
}
41+
end
42+
end
43+
end
44+
45+
def status
46+
stats = Puma.stats_hash
47+
worker_statuses = stats[:worker_status]
48+
49+
all_busy = all_workers_busy?(worker_statuses)
50+
current_requests_count_sum = worker_requests_count_sum(worker_statuses)
51+
52+
track_request_count_increase(current_requests_count_sum)
53+
54+
unhealthy = determine_unhealthy_state(all_busy)
55+
56+
build_status_response(all_busy, unhealthy)
57+
rescue StandardError => e
58+
[500, { 'Content-Type' => 'text/plain' }, ["Readiness check error: #{e}"]]
59+
end
60+
61+
def track_request_count_increase(current_requests_count_sum)
62+
now = Time.now
63+
prev = @previous_requests_count_sum
64+
65+
@last_requests_count_increase_time = now if prev.nil? || current_requests_count_sum > prev
66+
@previous_requests_count_sum = current_requests_count_sum
67+
end
68+
69+
def determine_unhealthy_state(all_busy)
70+
return false unless all_busy && @last_requests_count_increase_time
71+
72+
(Time.now - @last_requests_count_increase_time) > 60
73+
end
74+
75+
def build_status_response(all_busy, unhealthy)
76+
if all_busy && unhealthy
77+
[503, { 'Content-Type' => 'text/plain' }, ['UNHEALTHY']]
78+
elsif all_busy
79+
[429, { 'Content-Type' => 'text/plain' }, ['BUSY']]
80+
else
81+
[200, { 'Content-Type' => 'text/plain' }, ['OK']]
82+
end
83+
end
84+
85+
def all_workers_busy?(worker_statuses)
86+
worker_statuses.all? do |worker|
87+
worker[:last_status][:busy_threads] == worker[:last_status][:running]
88+
end
89+
end
90+
91+
def worker_requests_count_sum(worker_statuses)
92+
worker_statuses.sum do |worker|
93+
worker[:last_status][:requests_count] || 0
94+
end
95+
end
96+
end
97+
end
98+
end

lib/cloud_controller/runner.rb

Lines changed: 2 additions & 77 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
require 'cloud_controller/secrets_fetcher'
1414
require 'cloud_controller/runners/thin_runner'
1515
require 'cloud_controller/runners/puma_runner'
16+
require 'cloud_controller/metrics_webserver'
1617
require 'prometheus/client/data_stores/direct_file_store'
1718
require 'prometheus/middleware/exporter'
1819

@@ -133,84 +134,8 @@ def setup_metrics
133134

134135
Prometheus::Client.config.data_store = Prometheus::Client::DataStores::DirectFileStore.new(dir: prometheus_dir)
135136

136-
setup_metrics_webserver
137-
end
138-
139-
# The webserver runs in the main process and serves only the metrics and status endpoint.
140-
# This makes it possible to retrieve both even if all Puma workers of the main app are busy.
141-
def setup_metrics_webserver
142-
readiness_status_proc = method(:status)
143-
metrics_app = Rack::Builder.new do
144-
use Prometheus::Middleware::Exporter, path: '/internal/v4/metrics'
145-
146-
map '/internal/v4/status' do
147-
run ->(_env) { readiness_status_proc.call }
148-
end
149-
150-
map '/' do
151-
run lambda { |_env|
152-
# Return 404 for any other request
153-
['404', { 'Content-Type' => 'text/plain' }, ['Not Found']]
154-
}
155-
end
156-
end
157-
158137
Thread.new do
159-
server = Puma::Server.new(metrics_app)
160-
161-
if config.get(:nginx, :metrics_socket).nil? || config.get(:nginx, :metrics_socket).empty?
162-
server.add_tcp_listener('127.0.0.1', 9395)
163-
else
164-
server.add_unix_listener(@config.get(:nginx, :metrics_socket))
165-
end
166-
167-
server.run
168-
end
169-
end
170-
171-
# Persist state for status endpoint
172-
@previous_requests_count_sum = nil
173-
@last_requests_count_increase_time = nil
174-
175-
def status
176-
stats = Puma.stats_hash
177-
worker_statuses = stats[:worker_status]
178-
all_busy = all_workers_busy?(worker_statuses)
179-
current_requests_count_sum = worker_requests_count_sum(worker_statuses)
180-
181-
now = Time.now
182-
prev = @previous_requests_count_sum
183-
184-
# Track when requests_count_sum increases
185-
@last_requests_count_increase_time = now if prev.nil? || current_requests_count_sum > prev
186-
@previous_requests_count_sum = current_requests_count_sum
187-
188-
unhealthy = false
189-
if all_busy && @last_requests_count_increase_time && (now - @last_requests_count_increase_time) > 60
190-
# If requests_count_sum hasn't increased in 60 seconds, unhealthy
191-
unhealthy = true
192-
end
193-
194-
if all_busy && unhealthy
195-
[503, { 'Content-Type' => 'text/plain' }, ['UNHEALTHY']]
196-
elsif all_busy
197-
[429, { 'Content-Type' => 'text/plain' }, ['BUSY']]
198-
else
199-
[200, { 'Content-Type' => 'text/plain' }, ['OK']]
200-
end
201-
rescue StandardError => e
202-
[500, { 'Content-Type' => 'text/plain' }, ["Readiness check error: #{e}"]]
203-
end
204-
205-
def all_workers_busy?(worker_statuses)
206-
worker_statuses.all? do |worker|
207-
worker[:last_status][:busy_threads] == worker[:last_status][:running]
208-
end
209-
end
210-
211-
def worker_requests_count_sum(worker_statuses)
212-
worker_statuses.sum do |worker|
213-
worker[:last_status][:requests_count] || 0
138+
VCAP::CloudController::MetricsWebserver.new.start(@config)
214139
end
215140
end
216141

spec/request/status_spec.rb

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
require 'spec_helper'
2+
3+
module VCAP::CloudController
4+
RSpec.describe 'Status Endpoint' do
5+
include Rack::Test::Methods
6+
7+
let(:metrics_webserver) { MetricsWebserver.new }
8+
9+
delegate :app, to: :metrics_webserver
10+
11+
describe 'GET /internal/v4/status' do
12+
context 'when all workers are busy and unhealthy' do
13+
before do
14+
allow(Puma).to receive(:stats_hash).and_return(
15+
worker_status: [
16+
{ last_status: { busy_threads: 2, running: 2, requests_count: 5 } },
17+
{ last_status: { busy_threads: 1, running: 1, requests_count: 3 } }
18+
]
19+
)
20+
allow(metrics_webserver).to receive(:determine_unhealthy_state).and_return(true)
21+
end
22+
23+
it 'returns 503 UNHEALTHY' do
24+
get '/internal/v4/status'
25+
26+
expect(last_response.status).to eq(503)
27+
expect(last_response.body).to eq('UNHEALTHY')
28+
end
29+
end
30+
31+
context 'when all workers are busy but not unhealthy' do
32+
before do
33+
allow(Puma).to receive(:stats_hash).and_return(
34+
worker_status: [
35+
{ last_status: { busy_threads: 2, running: 2, requests_count: 5 } },
36+
{ last_status: { busy_threads: 1, running: 1, requests_count: 3 } }
37+
]
38+
)
39+
allow(metrics_webserver).to receive(:determine_unhealthy_state).and_return(false)
40+
end
41+
42+
it 'returns 429 BUSY' do
43+
get '/internal/v4/status'
44+
45+
expect(last_response.status).to eq(429)
46+
expect(last_response.body).to eq('BUSY')
47+
end
48+
end
49+
50+
context 'when not all workers are busy' do
51+
before do
52+
allow(Puma).to receive(:stats_hash).and_return(
53+
worker_status: [
54+
{ last_status: { busy_threads: 1, running: 2, requests_count: 5 } },
55+
{ last_status: { busy_threads: 0, running: 1, requests_count: 3 } }
56+
]
57+
)
58+
end
59+
60+
it 'returns 200 OK' do
61+
get '/internal/v4/status'
62+
63+
expect(last_response.status).to eq(200)
64+
expect(last_response.body).to eq('OK')
65+
end
66+
end
67+
end
68+
end
69+
end
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
require 'spec_helper'
2+
3+
module VCAP::CloudController
4+
RSpec.describe MetricsWebserver do
5+
let(:metrics_webserver) { described_class.new }
6+
let(:config) { double('config', get: nil) }
7+
8+
describe '#start' do
9+
it 'configures and starts a Puma server' do
10+
allow(Puma::Server).to receive(:new).and_call_original
11+
expect_any_instance_of(Puma::Server).to receive(:run)
12+
13+
metrics_webserver.start(config)
14+
end
15+
16+
context 'when no socket is specified' do
17+
before do
18+
allow(config).to receive(:get).with(:nginx, :metrics_socket).and_return(nil)
19+
end
20+
21+
it 'uses a TCP listener' do
22+
expect_any_instance_of(Puma::Server).to receive(:add_tcp_listener).with('127.0.0.1', 9395)
23+
24+
metrics_webserver.start(config)
25+
end
26+
end
27+
28+
context 'when a socket is specified' do
29+
before do
30+
allow(config).to receive(:get).with(:nginx, :metrics_socket).and_return('/tmp/metrics.sock')
31+
end
32+
33+
it 'uses a Unix socket listener' do
34+
expect_any_instance_of(Puma::Server).to receive(:add_unix_listener).with('/tmp/metrics.sock')
35+
36+
metrics_webserver.start(config)
37+
end
38+
end
39+
end
40+
end
41+
end

spec/unit/lib/cloud_controller/runner_spec.rb

Lines changed: 6 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -290,36 +290,21 @@ module VCAP::CloudController
290290
context 'when the webserver is puma' do
291291
let(:test_config_overrides) { super().merge(webserver: 'puma') }
292292

293-
it 'sets up a separate webserver for metrics' do
294-
subject
295-
296-
expect(Puma::Server).to have_received(:new).once
297-
end
293+
it 'starts the MetricsWebserver' do
294+
expect(MetricsWebserver).to receive(:new).and_call_original
295+
expect_any_instance_of(MetricsWebserver).to receive(:start)
298296

299-
it 'listens on the specified metrics port' do
300297
subject
301-
302-
expect(puma_server_double).to have_received(:add_tcp_listener).with('127.0.0.1', 9395)
303-
end
304-
305-
context 'metrics_socket is configured' do
306-
let(:test_config_overrides) { super().merge(nginx: { metrics_socket: '/tmp/metrics_socket.sock' }) }
307-
308-
it 'listens on the specified metrics socket' do
309-
subject
310-
311-
expect(puma_server_double).to have_received(:add_unix_listener).with('/tmp/metrics_socket.sock')
312-
end
313298
end
314299
end
315300

316301
context 'when the webserver is not puma' do
317302
let(:test_config_overrides) { super().merge(webserver: 'thin') }
318303

319-
it 'does not set up the webserver' do
320-
subject
304+
it 'does not start the MetricsWebserver' do
305+
expect(MetricsWebserver).not_to receive(:new)
321306

322-
expect(Puma::Server).not_to have_received(:new)
307+
subject
323308
end
324309
end
325310
end

0 commit comments

Comments
 (0)