Skip to content

Commit 7fd67a1

Browse files
authored
Add pickup and duration time metrics for processed jobs (#4312)
1 parent 071f998 commit 7fd67a1

File tree

5 files changed

+81
-2
lines changed

5 files changed

+81
-2
lines changed

lib/cloud_controller.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ module VCAP::CloudController; end
3434
require 'delayed_job_plugins/deserialization_retry'
3535
require 'delayed_job_plugins/before_enqueue_hook'
3636
require 'delayed_job_plugins/after_enqueue_hook'
37+
require 'delayed_job_plugins/delayed_jobs_metrics'
3738
require 'sequel_plugins/sequel_plugins'
3839
require 'vcap/sequel_add_association_dependencies_monkeypatch'
3940
require 'access/access'

lib/cloud_controller/metrics/prometheus_updater.rb

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ def self.allow_pid_label
1818

1919
DURATION_BUCKETS = [5, 10, 30, 60, 300, 600, 890].freeze
2020
CONNECTION_DURATION_BUCKETS = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1, 5, 10].freeze
21+
DELAYED_JOB_METRIC_BUCKETS = [0.01, 0.05, 0.1, 0.5, 1, 2, 5, 10, 30, 60, 120, 300, 600].freeze
2122

2223
METRICS = [
2324
{ type: :gauge, name: :cc_job_queues_length_total, docstring: 'Job queues length of worker processes', labels: [:queue], aggregation: :most_recent },
@@ -67,13 +68,19 @@ def self.allow_pid_label
6768
buckets: CONNECTION_DURATION_BUCKETS }
6869
].freeze
6970

71+
DELAYED_JOB_METRICS = [
72+
{ type: :histogram, name: :cc_job_pickup_delay_seconds, docstring: 'Job pickup time (from enqueue to start)', labels: %i[queue worker], buckets: DELAYED_JOB_METRIC_BUCKETS },
73+
{ type: :histogram, name: :cc_job_duration_seconds, docstring: 'Job processing time (start to finish)', labels: %i[queue worker], buckets: DELAYED_JOB_METRIC_BUCKETS }
74+
].freeze
75+
7076
def initialize(registry: Prometheus::Client.registry, cc_worker: false)
7177
self.class.allow_pid_label
7278

7379
@registry = registry
7480

7581
# Register all metrics, to initialize them for discoverability
7682
DB_CONNECTION_POOL_METRICS.each { |metric| register(metric) }
83+
DELAYED_JOB_METRICS.each { |metric| register(metric) }
7784

7885
return if cc_worker
7986

@@ -98,8 +105,8 @@ def increment_counter_metric(metric)
98105
@registry.get(metric).increment
99106
end
100107

101-
def update_histogram_metric(metric, value)
102-
@registry.get(metric).observe(value)
108+
def update_histogram_metric(metric, value, labels: {})
109+
@registry.get(metric).observe(value, labels:)
103110
end
104111

105112
def update_summary_metric(metric, value)
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
module DelayedJobMetrics
2+
class Plugin < Delayed::Plugin
3+
class << self
4+
attr_writer :prometheus
5+
6+
def prometheus
7+
@prometheus ||= CloudController::DependencyLocator.instance.cc_worker_prometheus_updater
8+
end
9+
end
10+
11+
callbacks do |lifecycle|
12+
lifecycle.after(:perform) do |worker, job|
13+
labels = { queue: job.queue, worker: worker.name }
14+
15+
job_pickup_delay = job.locked_at && job.run_at ? job.locked_at - job.run_at : nil
16+
prometheus.update_histogram_metric(:cc_job_pickup_delay_seconds, job_pickup_delay, labels:) if job_pickup_delay
17+
18+
job_duration = job.locked_at ? Time.now.utc - job.locked_at : nil
19+
prometheus.update_histogram_metric(:cc_job_duration_seconds, job_duration, labels:) if job_duration
20+
end
21+
end
22+
end
23+
end
24+
25+
Delayed::Worker.plugins << DelayedJobMetrics::Plugin

spec/db_spec_helper.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
require 'delayed_job_plugins/deserialization_retry'
1717
require 'delayed_job_plugins/after_enqueue_hook'
1818
require 'delayed_job_plugins/before_enqueue_hook'
19+
require 'delayed_job_plugins/delayed_jobs_metrics'
1920

2021
require 'support/fakes/blueprints'
2122

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
require 'spec_helper'
2+
3+
RSpec.describe DelayedJobMetrics::Plugin do
4+
let(:prometheus) { instance_double(VCAP::CloudController::Metrics::PrometheusUpdater) }
5+
6+
before do
7+
DelayedJobMetrics::Plugin.prometheus = prometheus
8+
allow(prometheus).to receive(:update_histogram_metric)
9+
end
10+
11+
after do
12+
DelayedJobMetrics::Plugin.prometheus = nil # Reset after each test
13+
end
14+
15+
it 'loads the plugin' do
16+
expect(Delayed::Worker.plugins).to include(DelayedJobMetrics::Plugin)
17+
end
18+
19+
it 'processes a job and updates Prometheus metrics with simulated time delay' do
20+
Timecop.freeze(Time.now) do
21+
events_cleanup_job = VCAP::CloudController::Jobs::Runtime::EventsCleanup.new(10_000)
22+
VCAP::CloudController::Jobs::Enqueuer.new({ queue: VCAP::CloudController::Jobs::Queues.generic }).enqueue(events_cleanup_job)
23+
24+
events_cleanup_job = Delayed::Job.last
25+
expect(events_cleanup_job).not_to be_nil
26+
27+
allow(Time).to receive(:now).and_return(Time.now + 10.seconds)
28+
worker = Delayed::Worker.new
29+
worker.name = 'test_worker'
30+
worker.work_off(1)
31+
32+
expect(prometheus).to have_received(:update_histogram_metric).with(
33+
:cc_job_pickup_delay_seconds,
34+
be_within(0.5).of(10.0),
35+
labels: { queue: VCAP::CloudController::Jobs::Queues.generic, worker: 'test_worker' }
36+
).once
37+
38+
expect(prometheus).to have_received(:update_histogram_metric).with(
39+
:cc_job_duration_seconds,
40+
kind_of(Numeric),
41+
labels: { queue: VCAP::CloudController::Jobs::Queues.generic, worker: 'test_worker' }
42+
).once
43+
end
44+
end
45+
end

0 commit comments

Comments
 (0)