1
+ """
2
+ Prometheus metrics server for Jupyter Server
3
+
4
+ This module provides functionality to start a separate Prometheus metrics server
5
+ that exposes Jupyter-specific metrics on a dedicated port.
6
+
7
+ Note on HTTP Request Metrics:
8
+ The separate metrics server uses the same prometheus registry as the main server.
9
+ HTTP request duration metrics (http_request_duration_seconds) are recorded by the
10
+ main server's logging system when record_http_request_metrics=True. Since both
11
+ servers share the same registry, these metrics will be available in the separate
12
+ metrics server as well.
13
+
14
+ The record_http_request_metrics parameter controls whether the main server records
15
+ these metrics, and the separate metrics server will automatically reflect this
16
+ setting since it uses the same underlying metrics collection.
17
+
18
+ Authentication:
19
+ The separate metrics server reuses the main server's authentication settings and
20
+ handler infrastructure, ensuring consistent behavior.
21
+ """
22
+
23
+ import threading
24
+ import tornado .web
25
+ import tornado .httpserver
26
+ import tornado .ioloop
27
+ import prometheus_client
28
+ from typing import Optional
29
+
30
+ from jupyter_server ._version import __version__
31
+ from jupyter_server .base .handlers import PrometheusMetricsHandler
32
+ from jupyter_server .prometheus .metrics import (
33
+ SERVER_INFO ,
34
+ SERVER_EXTENSION_INFO ,
35
+ LAST_ACTIVITY ,
36
+ SERVER_STARTED ,
37
+ ACTIVE_DURATION ,
38
+ HTTP_REQUEST_DURATION_SECONDS ,
39
+ KERNEL_CURRENTLY_RUNNING_TOTAL ,
40
+ TERMINAL_CURRENTLY_RUNNING_TOTAL ,
41
+ )
42
+
43
+
44
+ class PrometheusMetricsServer :
45
+ """A separate server for exposing Prometheus metrics."""
46
+
47
+ def __init__ (self , server_app ):
48
+ """Initialize the metrics server.
49
+
50
+ Parameters
51
+ ----------
52
+ server_app : ServerApp
53
+ The main Jupyter server application instance
54
+ """
55
+ self .server_app = server_app
56
+ self .port = None
57
+ self .http_server = None
58
+ self .thread = None
59
+
60
+ def initialize_metrics (self ):
61
+ """Initialize Jupyter-specific metrics for this server instance."""
62
+ # Set server version info
63
+ SERVER_INFO .info ({"version" : __version__ })
64
+
65
+ # Set up extension info
66
+ for ext in self .server_app .extension_manager .extensions .values ():
67
+ SERVER_EXTENSION_INFO .labels (
68
+ name = ext .name , version = ext .version , enabled = str (ext .enabled ).lower ()
69
+ ).info ({})
70
+
71
+ # Set server start time
72
+ started = self .server_app .web_app .settings ["started" ]
73
+ SERVER_STARTED .set (started .timestamp ())
74
+
75
+ # Set up activity tracking
76
+ LAST_ACTIVITY .set_function (lambda : self .server_app .web_app .last_activity ().timestamp ())
77
+ ACTIVE_DURATION .set_function (
78
+ lambda : (
79
+ self .server_app .web_app .last_activity () - self .server_app .web_app .settings ["started" ]
80
+ ).total_seconds ()
81
+ )
82
+
83
+ # Set up kernel and terminal metrics
84
+ self ._setup_runtime_metrics ()
85
+
86
+ # Note: HTTP request metrics are recorded by the main server's logging system
87
+ # via the log_request function when record_http_request_metrics=True.
88
+ # The separate metrics server uses the same prometheus registry, so those
89
+ # metrics will be available here as well.
90
+
91
+ def _setup_runtime_metrics (self ):
92
+ """Set up metrics that track runtime state."""
93
+ # Set up kernel count tracking
94
+ def update_kernel_metrics ():
95
+ try :
96
+ kernel_manager = self .server_app .kernel_manager
97
+ if hasattr (kernel_manager , 'list_kernel_ids' ):
98
+ kernel_ids = kernel_manager .list_kernel_ids ()
99
+ # Reset all kernel type metrics to 0
100
+ for kernel_type in set (KERNEL_CURRENTLY_RUNNING_TOTAL ._metrics .keys ()):
101
+ KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (0 )
102
+
103
+ # Count kernels by type
104
+ kernel_types = {}
105
+ for kid in kernel_ids :
106
+ try :
107
+ kernel = kernel_manager .get_kernel (kid )
108
+ if hasattr (kernel , 'kernel_name' ):
109
+ kernel_type = kernel .kernel_name
110
+ else :
111
+ kernel_type = 'unknown'
112
+ kernel_types [kernel_type ] = kernel_types .get (kernel_type , 0 ) + 1
113
+ except Exception :
114
+ kernel_types ['unknown' ] = kernel_types .get ('unknown' , 0 ) + 1
115
+
116
+ # Update metrics
117
+ for kernel_type , count in kernel_types .items ():
118
+ KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (count )
119
+ except Exception as e :
120
+ self .server_app .log .debug (f"Error updating kernel metrics: { e } " )
121
+
122
+ # Set up terminal count tracking
123
+ def update_terminal_metrics ():
124
+ try :
125
+ terminal_manager = getattr (self .server_app , 'terminal_manager' , None )
126
+ if terminal_manager and hasattr (terminal_manager , 'list' ):
127
+ terminal_count = len (terminal_manager .list ())
128
+ TERMINAL_CURRENTLY_RUNNING_TOTAL .set (terminal_count )
129
+ else :
130
+ TERMINAL_CURRENTLY_RUNNING_TOTAL .set (0 )
131
+ except Exception as e :
132
+ self .server_app .log .debug (f"Error updating terminal metrics: { e } " )
133
+
134
+ # Set up periodic updates
135
+ def periodic_update ():
136
+ update_kernel_metrics ()
137
+ update_terminal_metrics ()
138
+
139
+ # Run initial update
140
+ periodic_update ()
141
+
142
+ # Set up periodic updates every 30 seconds
143
+ def start_periodic_updates ():
144
+ loop = tornado .ioloop .IOLoop .current ()
145
+ def update ():
146
+ periodic_update ()
147
+ loop .call_later (30 , update )
148
+ loop .call_later (30 , update )
149
+
150
+ # Start periodic updates in the main server's IOLoop
151
+ if hasattr (self .server_app , 'io_loop' ) and self .server_app .io_loop :
152
+ self .server_app .io_loop .add_callback (start_periodic_updates )
153
+
154
+ def start (self , port : int ) -> None :
155
+ """Start the metrics server on the specified port.
156
+
157
+ Parameters
158
+ ----------
159
+ port : int
160
+ The port to listen on for metrics requests
161
+ """
162
+ self .port = port
163
+
164
+ # Initialize Jupyter metrics
165
+ self .initialize_metrics ()
166
+
167
+ # Reuse the main server's web application and settings
168
+ # This ensures identical behavior and eliminates duplication
169
+ main_app = self .server_app .web_app
170
+
171
+ # Create a new application that shares the same settings and handlers
172
+ # but only serves the metrics endpoint
173
+ metrics_app = tornado .web .Application ([
174
+ (r"/metrics" , PrometheusMetricsHandler ),
175
+ ], ** main_app .settings )
176
+
177
+ # Determine authentication status for logging
178
+ authenticate_metrics = main_app .settings .get ("authenticate_prometheus" , True )
179
+ auth_info = "with authentication" if authenticate_metrics else "without authentication"
180
+
181
+ # Create and start the HTTP server
182
+ self .http_server = tornado .httpserver .HTTPServer (metrics_app )
183
+ self .http_server .listen (port )
184
+
185
+ # Start the IOLoop in a separate thread
186
+ def start_metrics_loop ():
187
+ loop = tornado .ioloop .IOLoop ()
188
+ loop .make_current ()
189
+ loop .start ()
190
+
191
+ self .thread = threading .Thread (target = start_metrics_loop , daemon = True )
192
+ self .thread .start ()
193
+
194
+ self .server_app .log .info (f"Metrics server started on port { port } { auth_info } (using Jupyter Prometheus integration)" )
195
+
196
+ def stop (self ) -> None :
197
+ """Stop the metrics server."""
198
+ if self .http_server :
199
+ self .http_server .stop ()
200
+ self .http_server = None
201
+
202
+ if self .thread and self .thread .is_alive ():
203
+ # Note: Tornado IOLoop doesn't have a clean stop method
204
+ # The thread will exit when the process ends
205
+ pass
206
+
207
+ self .server_app .log .info (f"Metrics server stopped on port { self .port } " )
208
+
209
+
210
+ def start_metrics_server (server_app , port : int ) -> PrometheusMetricsServer :
211
+ """Start a Prometheus metrics server for the given Jupyter server.
212
+
213
+ Parameters
214
+ ----------
215
+ server_app : ServerApp
216
+ The main Jupyter server application instance
217
+ port : int
218
+ The port to listen on for metrics requests
219
+
220
+ Returns
221
+ -------
222
+ PrometheusMetricsServer
223
+ The metrics server instance
224
+ """
225
+ metrics_server = PrometheusMetricsServer (server_app )
226
+ metrics_server .start (port )
227
+ return metrics_server
0 commit comments