1+ """
2+ Prometheus metrics server for Jupyter Server
3+
4+ This module provides functionality to start a separate Prometheus metrics server
5+ that exposes Jupyter-specific metrics on a dedicated port.
6+
7+ Note on HTTP Request Metrics:
8+ The separate metrics server uses the same prometheus registry as the main server.
9+ HTTP request duration metrics (http_request_duration_seconds) are recorded by the
10+ main server's logging system when record_http_request_metrics=True. Since both
11+ servers share the same registry, these metrics will be available in the separate
12+ metrics server as well.
13+
14+ The record_http_request_metrics parameter controls whether the main server records
15+ these metrics, and the separate metrics server will automatically reflect this
16+ setting since it uses the same underlying metrics collection.
17+
18+ Authentication:
19+ The separate metrics server reuses the main server's authentication settings and
20+ handler infrastructure, ensuring consistent behavior.
21+ """
22+
23+ import threading
24+ import tornado .web
25+ import tornado .httpserver
26+ import tornado .ioloop
27+ import prometheus_client
28+ from typing import Optional
29+
30+ from jupyter_server ._version import __version__
31+ from jupyter_server .base .handlers import PrometheusMetricsHandler
32+ from jupyter_server .prometheus .metrics import (
33+ SERVER_INFO ,
34+ SERVER_EXTENSION_INFO ,
35+ LAST_ACTIVITY ,
36+ SERVER_STARTED ,
37+ ACTIVE_DURATION ,
38+ HTTP_REQUEST_DURATION_SECONDS ,
39+ KERNEL_CURRENTLY_RUNNING_TOTAL ,
40+ TERMINAL_CURRENTLY_RUNNING_TOTAL ,
41+ )
42+
43+
44+ class PrometheusMetricsServer :
45+ """A separate server for exposing Prometheus metrics."""
46+
47+ def __init__ (self , server_app ):
48+ """Initialize the metrics server.
49+
50+ Parameters
51+ ----------
52+ server_app : ServerApp
53+ The main Jupyter server application instance
54+ """
55+ self .server_app = server_app
56+ self .port = None
57+ self .http_server = None
58+ self .thread = None
59+
60+ def initialize_metrics (self ):
61+ """Initialize Jupyter-specific metrics for this server instance."""
62+ # Set server version info
63+ SERVER_INFO .info ({"version" : __version__ })
64+
65+ # Set up extension info
66+ for ext in self .server_app .extension_manager .extensions .values ():
67+ SERVER_EXTENSION_INFO .labels (
68+ name = ext .name , version = ext .version , enabled = str (ext .enabled ).lower ()
69+ ).info ({})
70+
71+ # Set server start time
72+ started = self .server_app .web_app .settings ["started" ]
73+ SERVER_STARTED .set (started .timestamp ())
74+
75+ # Set up activity tracking
76+ LAST_ACTIVITY .set_function (lambda : self .server_app .web_app .last_activity ().timestamp ())
77+ ACTIVE_DURATION .set_function (
78+ lambda : (
79+ self .server_app .web_app .last_activity () - self .server_app .web_app .settings ["started" ]
80+ ).total_seconds ()
81+ )
82+
83+ # Set up kernel and terminal metrics
84+ self ._setup_runtime_metrics ()
85+
86+ # Note: HTTP request metrics are recorded by the main server's logging system
87+ # via the log_request function when record_http_request_metrics=True.
88+ # The separate metrics server uses the same prometheus registry, so those
89+ # metrics will be available here as well.
90+
91+ def _setup_runtime_metrics (self ):
92+ """Set up metrics that track runtime state."""
93+ # Set up kernel count tracking
94+ def update_kernel_metrics ():
95+ try :
96+ kernel_manager = self .server_app .kernel_manager
97+ if hasattr (kernel_manager , 'list_kernel_ids' ):
98+ kernel_ids = kernel_manager .list_kernel_ids ()
99+ # Reset all kernel type metrics to 0
100+ for kernel_type in set (KERNEL_CURRENTLY_RUNNING_TOTAL ._metrics .keys ()):
101+ KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (0 )
102+
103+ # Count kernels by type
104+ kernel_types = {}
105+ for kid in kernel_ids :
106+ try :
107+ kernel = kernel_manager .get_kernel (kid )
108+ if hasattr (kernel , 'kernel_name' ):
109+ kernel_type = kernel .kernel_name
110+ else :
111+ kernel_type = 'unknown'
112+ kernel_types [kernel_type ] = kernel_types .get (kernel_type , 0 ) + 1
113+ except Exception :
114+ kernel_types ['unknown' ] = kernel_types .get ('unknown' , 0 ) + 1
115+
116+ # Update metrics
117+ for kernel_type , count in kernel_types .items ():
118+ KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (count )
119+ except Exception as e :
120+ self .server_app .log .debug (f"Error updating kernel metrics: { e } " )
121+
122+ # Set up terminal count tracking
123+ def update_terminal_metrics ():
124+ try :
125+ terminal_manager = getattr (self .server_app , 'terminal_manager' , None )
126+ if terminal_manager and hasattr (terminal_manager , 'list' ):
127+ terminal_count = len (terminal_manager .list ())
128+ TERMINAL_CURRENTLY_RUNNING_TOTAL .set (terminal_count )
129+ else :
130+ TERMINAL_CURRENTLY_RUNNING_TOTAL .set (0 )
131+ except Exception as e :
132+ self .server_app .log .debug (f"Error updating terminal metrics: { e } " )
133+
134+ # Set up periodic updates
135+ def periodic_update ():
136+ update_kernel_metrics ()
137+ update_terminal_metrics ()
138+
139+ # Run initial update
140+ periodic_update ()
141+
142+ # Set up periodic updates every 30 seconds
143+ def start_periodic_updates ():
144+ loop = tornado .ioloop .IOLoop .current ()
145+ def update ():
146+ periodic_update ()
147+ loop .call_later (30 , update )
148+ loop .call_later (30 , update )
149+
150+ # Start periodic updates in the main server's IOLoop
151+ if hasattr (self .server_app , 'io_loop' ) and self .server_app .io_loop :
152+ self .server_app .io_loop .add_callback (start_periodic_updates )
153+
154+ def start (self , port : int ) -> None :
155+ """Start the metrics server on the specified port.
156+
157+ Parameters
158+ ----------
159+ port : int
160+ The port to listen on for metrics requests
161+ """
162+ self .port = port
163+
164+ # Initialize Jupyter metrics
165+ self .initialize_metrics ()
166+
167+ # Reuse the main server's web application and settings
168+ # This ensures identical behavior and eliminates duplication
169+ main_app = self .server_app .web_app
170+
171+ # Create a new application that shares the same settings and handlers
172+ # but only serves the metrics endpoint
173+ metrics_app = tornado .web .Application ([
174+ (r"/metrics" , PrometheusMetricsHandler ),
175+ ], ** main_app .settings )
176+
177+ # Determine authentication status for logging
178+ authenticate_metrics = main_app .settings .get ("authenticate_prometheus" , True )
179+ auth_info = "with authentication" if authenticate_metrics else "without authentication"
180+
181+ # Create and start the HTTP server
182+ self .http_server = tornado .httpserver .HTTPServer (metrics_app )
183+ self .http_server .listen (port )
184+
185+ # Start the IOLoop in a separate thread
186+ def start_metrics_loop ():
187+ loop = tornado .ioloop .IOLoop ()
188+ loop .make_current ()
189+ loop .start ()
190+
191+ self .thread = threading .Thread (target = start_metrics_loop , daemon = True )
192+ self .thread .start ()
193+
194+ self .server_app .log .info (f"Metrics server started on port { port } { auth_info } (using Jupyter Prometheus integration)" )
195+
196+ def stop (self ) -> None :
197+ """Stop the metrics server."""
198+ if self .http_server :
199+ self .http_server .stop ()
200+ self .http_server = None
201+
202+ if self .thread and self .thread .is_alive ():
203+ # Note: Tornado IOLoop doesn't have a clean stop method
204+ # The thread will exit when the process ends
205+ pass
206+
207+ self .server_app .log .info (f"Metrics server stopped on port { self .port } " )
208+
209+
210+ def start_metrics_server (server_app , port : int ) -> PrometheusMetricsServer :
211+ """Start a Prometheus metrics server for the given Jupyter server.
212+
213+ Parameters
214+ ----------
215+ server_app : ServerApp
216+ The main Jupyter server application instance
217+ port : int
218+ The port to listen on for metrics requests
219+
220+ Returns
221+ -------
222+ PrometheusMetricsServer
223+ The metrics server instance
224+ """
225+ metrics_server = PrometheusMetricsServer (server_app )
226+ metrics_server .start (port )
227+ return metrics_server
0 commit comments