From 8ddfcff7419243afe93aacc92c71a2815116cf9a Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Sun, 17 Nov 2024 17:16:30 -0500 Subject: [PATCH 1/9] Replace tornado options with traitlets --- jupyterhub_idle_culler/__init__.py | 294 ++++++++++++++++++----------- 1 file changed, 184 insertions(+), 110 deletions(-) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index cddbde3..79d390f 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -7,6 +7,7 @@ import json import os import ssl +import sys from datetime import datetime, timezone from functools import partial from textwrap import dedent @@ -18,7 +19,8 @@ from tornado.httputil import url_concat from tornado.ioloop import IOLoop, PeriodicCallback from tornado.log import app_log -from tornado.options import define, options, parse_command_line +from traitlets import Bool, Int, Unicode, default +from traitlets.config import Application __version__ = "1.4.1.dev" @@ -452,186 +454,258 @@ async def handle_user(user): app_log.debug("Finished culling %s", name) -def main(): - define( - "url", - default=os.environ.get("JUPYTERHUB_API_URL"), +class IdleCuller(Application): + + api_page_size = Int( + 0, help=dedent( """ - The JupyterHub API URL. + Number of users to request per page, + when using JupyterHub 2.0's paginated user list API. + Default: user the server-side default configured page size. """ ).strip(), + ).tag( + config=True, ) - define( - "timeout", - type=int, - default=600, + + concurrency = Int( + 10, help=dedent( """ - The idle timeout (in seconds). + Limit the number of concurrent requests made to the Hub. + + Deleting a lot of users at the same time can slow down the Hub, + so limit the number of API requests we have outstanding at any given time. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_every", - type=int, - default=0, + + config_file = Unicode( + "idle_culler_config.py", help=dedent( """ - The interval (in seconds) for checking for idle servers to cull. + Config file to load. """ ).strip(), + ).tag( + config=True, ) - define( - "max_age", - type=int, - default=0, + + cull_admin_users = Bool( + True, help=dedent( """ - The maximum age (in seconds) of servers that should be culled even if they are active. + Whether admin users should be culled (only if --cull-users=true). """ ).strip(), + ).tag( + config=True, ) - define( - "cull_users", - type=bool, - default=False, + + cull_default_servers = Bool( + True, help=dedent( """ - Cull users in addition to servers. - - This is for use in temporary-user cases such as tmpnb. + Whether default servers should be culled (only if --cull-default-servers=true). """ ).strip(), + ).tag( + config=True, ) - define( - "remove_named_servers", - default=False, - type=bool, + + cull_every = Int( + 0, help=dedent( """ - Remove named servers in addition to stopping them. + The interval (in seconds) for checking for idle servers to cull. + """ + ).strip(), + ).tag( + config=True, + ) - This is useful for a BinderHub that uses authentication and named servers. + @default("cull_every") + def _default_cull_every(self): + return self.timeout // 2 + + cull_named_servers = Bool( + True, + help=dedent( + """ + Whether named servers should be culled (only if --cull-named-servers=true). """ ).strip(), + ).tag( + config=True, ) - define( - "concurrency", - type=int, - default=10, + + cull_users = Bool( + False, help=dedent( """ - Limit the number of concurrent requests made to the Hub. + Cull users in addition to servers. - Deleting a lot of users at the same time can slow down the Hub, - so limit the number of API requests we have outstanding at any given time. + This is for use in temporary-user cases such as tmpnb. """ ).strip(), + ).tag( + config=True, ) - define( - "ssl_enabled", - type=bool, - default=False, + + generate_config = Bool( + False, help=dedent( """ - Whether the Jupyter API endpoint has TLS enabled. + Generate default config file. """ ).strip(), + ).tag( + config=True, ) - define( - "internal_certs_location", - type=str, - default="internal-ssl", + + internal_certs_location = Unicode( + "internal-ssl", help=dedent( """ The location of generated internal-ssl certificates (only needed with --ssl-enabled=true). """ ).strip(), + ).tag( + config=True, ) - define( - "cull_admin_users", - type=bool, - default=True, + + max_age = Int( + 0, help=dedent( """ - Whether admin users should be culled (only if --cull-users=true). + The maximum age (in seconds) of servers that should be culled even if they are active.", """ ).strip(), + ).tag( + config=True, ) - define( - "api_page_size", - type=int, - default=0, + + remove_named_servers = Bool( + False, help=dedent( """ - Number of users to request per page, - when using JupyterHub 2.0's paginated user list API. - Default: user the server-side default configured page size. + Remove named servers in addition to stopping them. + + This is useful for a BinderHub that uses authentication and named servers. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_default_servers", - type=bool, - default=True, + + ssl_enabled = Bool( + False, help=dedent( """ - Whether default servers should be culled (only if --cull-default-servers=true). + Whether the Jupyter API endpoint has TLS enabled. """ ).strip(), + ).tag( + config=True, ) - define( - "cull_named_servers", - type=bool, - default=True, + + timeout = Int( + 600, help=dedent( """ - Whether named servers should be culled (only if --cull-named-servers=true). + The idle timeout (in seconds). """ ).strip(), + ).tag( + config=True, ) - parse_command_line() - if not options.cull_every: - options.cull_every = options.timeout // 2 - api_token = os.environ["JUPYTERHUB_API_TOKEN"] - - try: - AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") - except ImportError as e: - app_log.warning( - f"Could not load pycurl: {e}\n" - "pycurl is recommended if you have a large number of users." + url = Unicode( + os.environ.get("JUPYTERHUB_API_URL"), + help=dedent( + """ + The JupyterHub API URL. + """ + ).strip(), + ).tag( + config=True, + ) + + aliases = { + "api-page-size": "IdleCuller.api_page_size", + "concurrency": "IdleCuller.concurrency", + "cull-admin-users": "IdleCuller.cull_admin_users", + "cull-default-servers": "IdleCuller.cull_default_servers", + "cull-every": "IdleCuller.cull_every", + "cull-named-servers": "IdleCuller.cull_named_servers", + "cull-users": "IdleCuller.cull_users", + "internal-certs-location": "IdleCuller.internal_certs_location", + "max-age": "IdleCuller.max_age", + "remove-named-servers": "IdleCuller.remove_named_servers", + "ssl-enabled": "IdleCuller.ssl_enabled", + "timeout": "IdleCuller.timeout", + "url": "IdleCuller.url", + } + + flags = { + "generate-config": ( + {"IdleCuller": {"generate_config": True}}, + generate_config.help, ) + } - loop = IOLoop.current() - cull = partial( - cull_idle, - url=options.url, - api_token=api_token, - inactive_limit=options.timeout, - cull_users=options.cull_users, - remove_named_servers=options.remove_named_servers, - max_age=options.max_age, - concurrency=options.concurrency, - ssl_enabled=options.ssl_enabled, - internal_certs_location=options.internal_certs_location, - cull_admin_users=options.cull_admin_users, - api_page_size=options.api_page_size, - cull_default_servers=options.cull_default_servers, - cull_named_servers=options.cull_named_servers, - ) - # schedule first cull immediately - # because PeriodicCallback doesn't start until the end of the first interval - loop.add_callback(cull) - # schedule periodic cull - pc = PeriodicCallback(cull, 1e3 * options.cull_every) - pc.start() - try: - loop.start() - except KeyboardInterrupt: - pass + def start(self): + + if self.generate_config: + print(self.generate_config_file()) + sys.exit(0) + + if self.config_file: + self.load_config_file(self.config_file) + + api_token = os.environ["JUPYTERHUB_API_TOKEN"] + + try: + AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + except ImportError as e: + app_log.warning( + f"Could not load pycurl: {e}\n" + "pycurl is recommended if you have a large number of users." + ) + + loop = IOLoop.current() + cull = partial( + cull_idle, + url=self.url, + api_token=api_token, + inactive_limit=self.timeout, + cull_users=self.cull_users, + remove_named_servers=self.remove_named_servers, + max_age=self.max_age, + concurrency=self.concurrency, + ssl_enabled=self.ssl_enabled, + internal_certs_location=self.internal_certs_location, + cull_admin_users=self.cull_admin_users, + api_page_size=self.api_page_size, + cull_default_servers=self.cull_default_servers, + cull_named_servers=self.cull_named_servers, + ) + # schedule first cull immediately + # because PeriodicCallback doesn't start until the end of the first interval + loop.add_callback(cull) + # schedule periodic cull + pc = PeriodicCallback(cull, 1e3 * self.cull_every) + pc.start() + try: + loop.start() + except KeyboardInterrupt: + pass + +def main(): + IdleCuller.launch_instance() if __name__ == "__main__": From 3e51020ac9bf9d22170d862ef0c25f41d9702de1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 17 Nov 2024 22:41:00 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyterhub_idle_culler/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index 79d390f..8314079 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -704,6 +704,7 @@ def start(self): except KeyboardInterrupt: pass + def main(): IdleCuller.launch_instance() From e375eecf3da7c249ddbd279dcf426847181144af Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Thu, 5 Dec 2024 20:23:23 -0800 Subject: [PATCH 3/9] Use traitlets logger with JupyterHub format --- jupyterhub_idle_culler/__init__.py | 61 ++++++++++++++++++++---------- tests/test_idle_culler.py | 5 ++- 2 files changed, 43 insertions(+), 23 deletions(-) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index 79d390f..a8c61e1 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -5,6 +5,7 @@ import asyncio import json +import logging import os import ssl import sys @@ -18,7 +19,7 @@ from tornado.httpclient import AsyncHTTPClient, HTTPRequest from tornado.httputil import url_concat from tornado.ioloop import IOLoop, PeriodicCallback -from tornado.log import app_log +from tornado.log import LogFormatter from traitlets import Bool, Int, Unicode, default from traitlets.config import Application @@ -81,6 +82,7 @@ async def cull_idle( url, api_token, inactive_limit, + logger, cull_users=False, remove_named_servers=False, max_age=0, @@ -109,8 +111,8 @@ async def cull_idle( f"{internal_certs_location}/hub-ca/hub-ca.crt", ) - app_log.debug("ssl_enabled is Enabled: %s", ssl_enabled) - app_log.debug("internal_certs_location is %s", internal_certs_location) + logger.debug("ssl_enabled is Enabled: %s", ssl_enabled) + logger.debug("internal_certs_location is %s", internal_certs_location) defaults["ssl_options"] = ssl_context AsyncHTTPClient.configure(None, defaults=defaults) @@ -155,7 +157,7 @@ async def fetch_paginated(req): next_info = resp_model["_pagination"]["next"] if next_info: page_no += 1 - app_log.info(f"Fetching page {page_no} {next_info['url']}") + logger.info(f"Fetching page {page_no} {next_info['url']}") # submit next request req.url = next_info["url"] resp_future = asyncio.ensure_future(fetch(req)) @@ -164,7 +166,7 @@ async def fetch_paginated(req): item_count += 1 yield item - app_log.debug(f"Fetched {item_count} items from {url} in {page_no} pages") + logger.debug(f"Fetched {item_count} items from {url} in {page_no} pages") # Starting with jupyterhub 1.3.0 the users can be filtered in the server # using the `state` filter parameter. "ready" means all users who have any @@ -189,7 +191,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): if server_name: log_name = f"{user['name']}/{server_name}" if server.get("pending"): - app_log.warning( + logger.warning( f"Not culling server {log_name} with pending {server['pending']}" ) return False @@ -202,7 +204,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): # but let's check just to be safe. if not server.get("ready", bool(server["url"])): - app_log.warning( + logger.warning( f"Not culling not-ready not-pending server {log_name}: {server}" ) return False @@ -250,7 +252,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): ) ) if should_cull: - app_log.info( + logger.info( f"Culling server {log_name} (inactive for {format_td(inactive)})" ) @@ -259,7 +261,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): # so that we can still be compatible with jupyterhub 0.8 # which doesn't define the 'started' field if age is not None and age.total_seconds() >= max_age: - app_log.info( + logger.info( "Culling server %s (age: %s, inactive for %s)", log_name, format_td(age), @@ -268,7 +270,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): should_cull = True if not should_cull: - app_log.debug( + logger.debug( "Not culling server %s (age: %s, inactive for %s)", log_name, format_td(age), @@ -305,7 +307,7 @@ async def handle_server(user, server_name, server, max_age, inactive_limit): ) resp = await fetch(req) if resp.code == 202: - app_log.warning(f"Server {log_name} is slow to stop") + logger.warning(f"Server {log_name} is slow to stop") # return False to prevent culling user with pending shutdowns return False return True @@ -349,7 +351,7 @@ async def handle_user(user): # some servers are still running, cannot cull users still_alive = len(results) - sum(results) if still_alive: - app_log.debug( + logger.debug( "Not culling user %s with %i servers still alive", user["name"], still_alive, @@ -380,21 +382,21 @@ async def handle_user(user): ) and (cull_admin_users or not user_is_admin) if should_cull: - app_log.info(f"Culling user {user['name']} " f"(inactive for {inactive})") + logger.info(f"Culling user {user['name']} " f"(inactive for {inactive})") if max_age and not should_cull: # only check created if max_age is specified # so that we can still be compatible with jupyterhub 0.8 # which doesn't define the 'started' field if age is not None and age.total_seconds() >= max_age: - app_log.info( + logger.info( f"Culling user {user['name']} " f"(age: {format_td(age)}, inactive for {format_td(inactive)})" ) should_cull = True if not should_cull: - app_log.debug( + logger.debug( f"Not culling user {user['name']} " f"(created: {format_td(age)}, last active: {format_td(inactive)})" ) @@ -424,7 +426,7 @@ async def handle_user(user): async for user in fetch_paginated(req): n_idle += 1 futures.append((user["name"], handle_user(user))) - app_log.debug(f"Got {n_idle} users with inactive servers") + logger.debug(f"Got {n_idle} users with inactive servers") if state_filter: params["state"] = "ready" @@ -440,18 +442,18 @@ async def handle_user(user): futures.append((user["name"], handle_user(user))) if state_filter: - app_log.debug(f"Got {n_users} users with ready servers") + logger.debug(f"Got {n_users} users with ready servers") else: - app_log.debug(f"Got {n_users} users") + logger.debug(f"Got {n_users} users") for name, f in futures: try: result = await f except Exception: - app_log.exception(f"Error processing {name}") + logger.exception(f"Error processing {name}") else: if result: - app_log.debug("Finished culling %s", name) + logger.debug("Finished culling %s", name) class IdleCuller(Application): @@ -577,6 +579,22 @@ def _default_cull_every(self): config=True, ) + _log_formatter_cls = LogFormatter + + @default('log_level') + def _log_level_default(self): + return logging.INFO + + @default('log_datefmt') + def _log_datefmt_default(self): + """Exclude date from default date format""" + return "%Y-%m-%d %H:%M:%S" + + @default('log_format') + def _log_format_default(self): + """override default log format to include time""" + return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" + max_age = Int( 0, help=dedent( @@ -671,7 +689,7 @@ def start(self): try: AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") except ImportError as e: - app_log.warning( + self.log.warning( f"Could not load pycurl: {e}\n" "pycurl is recommended if you have a large number of users." ) @@ -682,6 +700,7 @@ def start(self): url=self.url, api_token=api_token, inactive_limit=self.timeout, + logger=self.log, cull_users=self.cull_users, remove_named_servers=self.remove_named_servers, max_age=self.max_age, diff --git a/tests/test_idle_culler.py b/tests/test_idle_culler.py index 88e9901..f894124 100644 --- a/tests/test_idle_culler.py +++ b/tests/test_idle_culler.py @@ -3,6 +3,7 @@ from subprocess import check_output from unittest import mock +from tornado.log import app_log from jupyterhub_idle_culler import utcnow @@ -32,7 +33,7 @@ async def test_cull_idle(cull_idle, start_users, admin_request): assert await count_active_users(admin_request) == 0 await start_users(3) assert await count_active_users(admin_request) == 3 - await cull_idle(inactive_limit=300) + await cull_idle(inactive_limit=300, logger=app_log) # no change assert await count_active_users(admin_request) == 3 @@ -40,7 +41,7 @@ async def test_cull_idle(cull_idle, start_users, admin_request): with mock.patch( "jupyterhub_idle_culler.utcnow", lambda: utcnow() + timedelta(seconds=600) ): - await cull_idle(inactive_limit=300) + await cull_idle(inactive_limit=300, logger=app_log) assert await count_active_users(admin_request) == 0 From aeefd9deafd1395aa371c4da1d7915de8f56e082 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 6 Dec 2024 04:41:22 +0000 Subject: [PATCH 4/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- jupyterhub_idle_culler/__init__.py | 6 +++--- tests/test_idle_culler.py | 1 + 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index 70242d2..a3f59f2 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -581,16 +581,16 @@ def _default_cull_every(self): _log_formatter_cls = LogFormatter - @default('log_level') + @default("log_level") def _log_level_default(self): return logging.INFO - @default('log_datefmt') + @default("log_datefmt") def _log_datefmt_default(self): """Exclude date from default date format""" return "%Y-%m-%d %H:%M:%S" - @default('log_format') + @default("log_format") def _log_format_default(self): """override default log format to include time""" return "%(color)s[%(levelname)1.1s %(asctime)s.%(msecs).03d %(name)s %(module)s:%(lineno)d]%(end_color)s %(message)s" diff --git a/tests/test_idle_culler.py b/tests/test_idle_culler.py index f894124..e6986df 100644 --- a/tests/test_idle_culler.py +++ b/tests/test_idle_culler.py @@ -4,6 +4,7 @@ from unittest import mock from tornado.log import app_log + from jupyterhub_idle_culler import utcnow From 44d556762d2cea59f6b298b46e7c6a17e297e923 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Wed, 22 Jan 2025 12:36:53 -0800 Subject: [PATCH 5/9] Add config-file alias --- jupyterhub_idle_culler/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index a8c61e1..b9f9f48 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -655,6 +655,7 @@ def _log_format_default(self): aliases = { "api-page-size": "IdleCuller.api_page_size", "concurrency": "IdleCuller.concurrency", + "config-file": "IdleCuller.config_file", "cull-admin-users": "IdleCuller.cull_admin_users", "cull-default-servers": "IdleCuller.cull_default_servers", "cull-every": "IdleCuller.cull_every", From 2c8906b321e0b2235b3379a1e64fc0fe6873dac9 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 14 Feb 2025 16:29:25 -0800 Subject: [PATCH 6/9] Add traitlets as a dependency --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 6bc9d33..4cb78f4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,6 +28,7 @@ dependencies = [ "tornado", "packaging", "python-dateutil", + "traitlets", ] dynamic = ["version"] From a3dc47f1b455ade7774f471717afd855ac600524 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 14 Feb 2025 16:30:39 -0800 Subject: [PATCH 7/9] Allow url to be None aligned with tornado.options --- jupyterhub_idle_culler/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index 062e67c..631d237 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -643,6 +643,7 @@ def _log_format_default(self): url = Unicode( os.environ.get("JUPYTERHUB_API_URL"), + allow_none=True, help=dedent( """ The JupyterHub API URL. From 73852f764726e2f95f8ad295ab38a7291c6c9b3c Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 14 Feb 2025 16:33:29 -0800 Subject: [PATCH 8/9] Use `--config` for alias aligned with JupyterHub --- jupyterhub_idle_culler/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jupyterhub_idle_culler/__init__.py b/jupyterhub_idle_culler/__init__.py index 631d237..9cd729e 100755 --- a/jupyterhub_idle_culler/__init__.py +++ b/jupyterhub_idle_culler/__init__.py @@ -656,7 +656,7 @@ def _log_format_default(self): aliases = { "api-page-size": "IdleCuller.api_page_size", "concurrency": "IdleCuller.concurrency", - "config-file": "IdleCuller.config_file", + "config": "IdleCuller.config_file", "cull-admin-users": "IdleCuller.cull_admin_users", "cull-default-servers": "IdleCuller.cull_default_servers", "cull-every": "IdleCuller.cull_every", From 12907f9502132fe28bac55fc6b68a630d028cb19 Mon Sep 17 00:00:00 2001 From: Rollin Thomas Date: Fri, 14 Feb 2025 16:36:36 -0800 Subject: [PATCH 9/9] Document the `--config` option --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 843a620..8ea405a 100644 --- a/README.md +++ b/README.md @@ -131,6 +131,8 @@ python3 -m jupyterhub_idle_culler [--timeout=900] [--url=http://localhost:8081/h same time can slow down the Hub, so limit the number of API requests we have outstanding at any given time. (default 10) + --config Service configuration file to load. + (default idle_culler_config.py) --cull-admin-users Whether admin users should be culled (only if --cull-users=true). (default True) --cull-every The interval (in seconds) for checking for