2121"""
2222
2323import threading
24- import tornado .web
24+ from typing import Optional
25+
26+ import prometheus_client
2527import tornado .httpserver
2628import tornado .ioloop
27- import prometheus_client
28- from typing import Optional
29+ import tornado .web
2930
3031from jupyter_server ._version import __version__
3132from jupyter_server .base .handlers import PrometheusMetricsHandler
3233from jupyter_server .prometheus .metrics import (
33- SERVER_INFO ,
34- SERVER_EXTENSION_INFO ,
35- LAST_ACTIVITY ,
36- SERVER_STARTED ,
3734 ACTIVE_DURATION ,
3835 HTTP_REQUEST_DURATION_SECONDS ,
3936 KERNEL_CURRENTLY_RUNNING_TOTAL ,
37+ LAST_ACTIVITY ,
38+ SERVER_EXTENSION_INFO ,
39+ SERVER_INFO ,
40+ SERVER_STARTED ,
4041 TERMINAL_CURRENTLY_RUNNING_TOTAL ,
4142)
4243
4344
4445class PrometheusMetricsServer :
4546 """A separate server for exposing Prometheus metrics."""
46-
47+
4748 def __init__ (self , server_app ):
4849 """Initialize the metrics server.
49-
50+
5051 Parameters
5152 ----------
5253 server_app : ServerApp
@@ -56,172 +57,181 @@ def __init__(self, server_app):
5657 self .port = None
5758 self .http_server = None
5859 self .thread = None
59-
60+
6061 def initialize_metrics (self ):
6162 """Initialize Jupyter-specific metrics for this server instance."""
6263 # Set server version info
6364 SERVER_INFO .info ({"version" : __version__ })
64-
65+
6566 # Set up extension info
6667 for ext in self .server_app .extension_manager .extensions .values ():
6768 SERVER_EXTENSION_INFO .labels (
6869 name = ext .name , version = ext .version , enabled = str (ext .enabled ).lower ()
6970 ).info ({})
70-
71+
7172 # Set server start time
7273 started = self .server_app .web_app .settings ["started" ]
7374 SERVER_STARTED .set (started .timestamp ())
74-
75+
7576 # Set up activity tracking
7677 LAST_ACTIVITY .set_function (lambda : self .server_app .web_app .last_activity ().timestamp ())
7778 ACTIVE_DURATION .set_function (
7879 lambda : (
79- self .server_app .web_app .last_activity () - self .server_app .web_app .settings ["started" ]
80+ self .server_app .web_app .last_activity ()
81+ - self .server_app .web_app .settings ["started" ]
8082 ).total_seconds ()
8183 )
82-
84+
8385 # Set up kernel and terminal metrics
8486 self ._setup_runtime_metrics ()
85-
87+
8688 # Note: HTTP request metrics are recorded by the main server's logging system
8789 # via the log_request function when record_http_request_metrics=True.
8890 # The separate metrics server uses the same prometheus registry, so those
8991 # metrics will be available here as well.
90-
92+
9193 def _setup_runtime_metrics (self ):
9294 """Set up metrics that track runtime state."""
95+
9396 # Set up kernel count tracking
9497 def update_kernel_metrics ():
9598 try :
9699 kernel_manager = self .server_app .kernel_manager
97- if hasattr (kernel_manager , ' list_kernel_ids' ):
100+ if hasattr (kernel_manager , " list_kernel_ids" ):
98101 kernel_ids = kernel_manager .list_kernel_ids ()
99102 # Reset all kernel type metrics to 0
100103 for kernel_type in set (KERNEL_CURRENTLY_RUNNING_TOTAL ._metrics .keys ()):
101104 KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (0 )
102-
105+
103106 # Count kernels by type
104107 kernel_types = {}
105108 for kid in kernel_ids :
106109 try :
107110 kernel = kernel_manager .get_kernel (kid )
108- if hasattr (kernel , ' kernel_name' ):
111+ if hasattr (kernel , " kernel_name" ):
109112 kernel_type = kernel .kernel_name
110113 else :
111- kernel_type = ' unknown'
114+ kernel_type = " unknown"
112115 kernel_types [kernel_type ] = kernel_types .get (kernel_type , 0 ) + 1
113116 except Exception :
114- kernel_types [' unknown' ] = kernel_types .get (' unknown' , 0 ) + 1
115-
117+ kernel_types [" unknown" ] = kernel_types .get (" unknown" , 0 ) + 1
118+
116119 # Update metrics
117120 for kernel_type , count in kernel_types .items ():
118121 KERNEL_CURRENTLY_RUNNING_TOTAL .labels (type = kernel_type ).set (count )
119122 except Exception as e :
120123 self .server_app .log .debug (f"Error updating kernel metrics: { e } " )
121-
124+
122125 # Set up terminal count tracking
123126 def update_terminal_metrics ():
124127 try :
125- terminal_manager = getattr (self .server_app , ' terminal_manager' , None )
126- if terminal_manager and hasattr (terminal_manager , ' list' ):
128+ terminal_manager = getattr (self .server_app , " terminal_manager" , None )
129+ if terminal_manager and hasattr (terminal_manager , " list" ):
127130 terminal_count = len (terminal_manager .list ())
128131 TERMINAL_CURRENTLY_RUNNING_TOTAL .set (terminal_count )
129132 else :
130133 TERMINAL_CURRENTLY_RUNNING_TOTAL .set (0 )
131134 except Exception as e :
132135 self .server_app .log .debug (f"Error updating terminal metrics: { e } " )
133-
136+
134137 # Set up periodic updates
135138 def periodic_update ():
136139 update_kernel_metrics ()
137140 update_terminal_metrics ()
138-
141+
139142 # Run initial update
140143 periodic_update ()
141-
144+
142145 # Set up periodic updates every 30 seconds
143146 def start_periodic_updates ():
144147 loop = tornado .ioloop .IOLoop .current ()
148+
145149 def update ():
146150 periodic_update ()
147151 loop .call_later (30 , update )
152+
148153 loop .call_later (30 , update )
149-
154+
150155 # Start periodic updates in the main server's IOLoop
151- if hasattr (self .server_app , ' io_loop' ) and self .server_app .io_loop :
156+ if hasattr (self .server_app , " io_loop" ) and self .server_app .io_loop :
152157 self .server_app .io_loop .add_callback (start_periodic_updates )
153-
158+
154159 def start (self , port : int ) -> None :
155160 """Start the metrics server on the specified port.
156-
161+
157162 Parameters
158163 ----------
159164 port : int
160165 The port to listen on for metrics requests
161166 """
162167 self .port = port
163-
168+
164169 # Initialize Jupyter metrics
165170 self .initialize_metrics ()
166-
171+
167172 # Reuse the main server's web application and settings
168173 # This ensures identical behavior and eliminates duplication
169174 main_app = self .server_app .web_app
170-
175+
171176 # Create a new application that shares the same settings and handlers
172177 # but only serves the metrics endpoint
173- metrics_app = tornado .web .Application ([
174- (r"/metrics" , PrometheusMetricsHandler ),
175- ], ** main_app .settings )
176-
178+ metrics_app = tornado .web .Application (
179+ [
180+ (r"/metrics" , PrometheusMetricsHandler ),
181+ ],
182+ ** main_app .settings ,
183+ )
184+
177185 # Determine authentication status for logging
178186 authenticate_metrics = main_app .settings .get ("authenticate_prometheus" , True )
179187 auth_info = "with authentication" if authenticate_metrics else "without authentication"
180-
188+
181189 # Create and start the HTTP server
182190 self .http_server = tornado .httpserver .HTTPServer (metrics_app )
183191 self .http_server .listen (port )
184-
192+
185193 # Start the IOLoop in a separate thread
186194 def start_metrics_loop ():
187195 loop = tornado .ioloop .IOLoop ()
188196 loop .make_current ()
189197 loop .start ()
190-
198+
191199 self .thread = threading .Thread (target = start_metrics_loop , daemon = True )
192200 self .thread .start ()
193-
194- self .server_app .log .info (f"Metrics server started on port { port } { auth_info } (using Jupyter Prometheus integration)" )
195-
201+
202+ self .server_app .log .info (
203+ f"Metrics server started on port { port } { auth_info } (using Jupyter Prometheus integration)"
204+ )
205+
196206 def stop (self ) -> None :
197207 """Stop the metrics server."""
198208 if self .http_server :
199209 self .http_server .stop ()
200210 self .http_server = None
201-
211+
202212 if self .thread and self .thread .is_alive ():
203213 # Note: Tornado IOLoop doesn't have a clean stop method
204214 # The thread will exit when the process ends
205215 pass
206-
216+
207217 self .server_app .log .info (f"Metrics server stopped on port { self .port } " )
208218
209219
210220def start_metrics_server (server_app , port : int ) -> PrometheusMetricsServer :
211221 """Start a Prometheus metrics server for the given Jupyter server.
212-
222+
213223 Parameters
214224 ----------
215225 server_app : ServerApp
216226 The main Jupyter server application instance
217227 port : int
218228 The port to listen on for metrics requests
219-
229+
220230 Returns
221231 -------
222232 PrometheusMetricsServer
223233 The metrics server instance
224234 """
225235 metrics_server = PrometheusMetricsServer (server_app )
226236 metrics_server .start (port )
227- return metrics_server
237+ return metrics_server
0 commit comments