diff --git a/binderhub/app.py b/binderhub/app.py
index ae57543a2..c20c7f5af 100644
--- a/binderhub/app.py
+++ b/binderhub/app.py
@@ -50,6 +50,7 @@
 from .log import log_request
 from .main import LegacyRedirectHandler, MainHandler, ParameterizedMainHandler
 from .metrics import MetricsHandler
+from .quota import KubernetesLaunchQuota, LaunchQuota
 from .ratelimit import RateLimiter
 from .registry import DockerRegistry
 from .repoproviders import (
@@ -304,6 +305,8 @@ def _valid_badge_base_url(self, proposal):
     pod_quota = Integer(
         None,
         help="""
+        DEPRECATED: Use c.LaunchQuota.total_quota
+
         The number of concurrent pods this hub has been designed to support.
 
         This quota is used as an indication for how much above or below the
@@ -319,6 +322,13 @@ def _valid_badge_base_url(self, proposal):
         config=True,
     )
 
+    @observe("pod_quota")
+    def _pod_quota_deprecated(self, change):
+        self.log.warning(
+            "BinderHub.pod_quota is deprecated, use LaunchQuota.total_quota"
+        )
+        self.config.LaunchQuota.total_quota = change.new
+
     per_repo_quota_higher = Integer(
         0,
         help="""
@@ -333,6 +343,17 @@ def _valid_badge_base_url(self, proposal):
         config=True,
     )
 
+    launch_quota_class = Type(
+        LaunchQuota,
+        default=KubernetesLaunchQuota,
+        help="""
+        The class used to check quotas for launched servers.
+
+        Must inherit from binderhub.quota.LaunchQuota
+        """,
+        config=True,
+    )
+
     log_tail_lines = Integer(
         100,
         help="""
@@ -791,6 +812,8 @@ def initialize(self, *args, **kwargs):
             with open(schema_file) as f:
                 self.event_log.register_schema(json.load(f))
 
+        launch_quota = self.launch_quota_class(parent=self, executor=self.executor)
+
         self.tornado_settings.update(
             {
                 "log_function": log_request,
@@ -814,6 +837,7 @@ def initialize(self, *args, **kwargs):
                 "per_repo_quota": self.per_repo_quota,
                 "per_repo_quota_higher": self.per_repo_quota_higher,
                 "repo_providers": self.repo_providers,
+                "launch_quota": launch_quota,
                 "rate_limiter": RateLimiter(parent=self),
                 "use_registry": self.use_registry,
                 "build_class": self.build_class,
diff --git a/binderhub/builder.py b/binderhub/builder.py
index 6b49bd593..89c32ba27 100644
--- a/binderhub/builder.py
+++ b/binderhub/builder.py
@@ -2,7 +2,6 @@
 Handlers for working with version control services (i.e. GitHub) for builds.
 """
 
-import asyncio
 import hashlib
 import json
 import re
@@ -23,7 +22,7 @@
 
 from .base import BaseHandler
 from .build import Build, ProgressEvent
-from .utils import KUBE_REQUEST_TIMEOUT
+from .quota import LaunchQuotaExceeded
 
 # Separate buckets for builds and launches.
 # Builds and launches have very different characteristic times,
@@ -586,73 +585,29 @@ async def launch(self, provider):
         # Load the spec-specific configuration if it has been overridden
         repo_config = provider.repo_config(self.settings)
 
-        # the image name (without tag) is unique per repo
-        # use this to count the number of pods running with a given repo
-        # if we added annotations/labels with the repo name via KubeSpawner
-        # we could do this better
-        image_no_tag = self.image_name.rsplit(":", 1)[0]
-
-        # TODO: put busy users in a queue rather than fail?
-        # That would be hard to do without in-memory state.
-        repo_quota = repo_config.get("quota")
-        pod_quota = self.settings["pod_quota"]
-        if pod_quota is not None or repo_quota:
-            # Fetch info on currently running users *only* if quotas are set
-            matching_pods = 0
-
-            # TODO: run a watch to keep this up to date in the background
-            f = self.settings["executor"].submit(
-                self.settings["kubernetes_client"].list_namespaced_pod,
-                self.settings["build_namespace"],
-                label_selector="app=jupyterhub,component=singleuser-server",
-                _request_timeout=KUBE_REQUEST_TIMEOUT,
-                _preload_content=False,
+        launch_quota = self.settings["launch_quota"]
+        try:
+            quota_check = await launch_quota.check_repo_quota(
+                self.image_name, repo_config, self.repo_url
             )
-            resp = await asyncio.wrap_future(f)
-            pods = json.loads(resp.read())["items"]
-            total_pods = len(pods)
-
-            if pod_quota is not None and total_pods >= pod_quota:
-                # check overall quota first
-                LAUNCH_COUNT.labels(
-                    status="pod_quota",
-                    **self.repo_metric_labels,
-                ).inc()
-                app_log.error(f"BinderHub is full: {total_pods}/{pod_quota}")
-                await self.fail("Too many users on this BinderHub! Try again soon.")
-                return
-
-            for pod in pods:
-                for container in pod["spec"]["containers"]:
-                    # is the container running the same image as us?
-                    # if so, count one for the current repo.
-                    image = container["image"].rsplit(":", 1)[0]
-                    if image == image_no_tag:
-                        matching_pods += 1
-                        break
-
-            if repo_quota and matching_pods >= repo_quota:
-                LAUNCH_COUNT.labels(
-                    status="repo_quota",
-                    **self.repo_metric_labels,
-                ).inc()
-                app_log.error(
-                    f"{self.repo_url} has exceeded quota: {matching_pods}/{repo_quota} ({total_pods} total)"
-                )
-                await self.fail(
-                    f"Too many users running {self.repo_url}! Try again soon."
-                )
-                return
+        except LaunchQuotaExceeded as e:
+            LAUNCH_COUNT.labels(
+                status=e.status,
+                **self.repo_metric_labels,
+            ).inc()
+            await self.fail(e.message)
+            return
 
-            if matching_pods >= 0.5 * repo_quota:
+        if quota_check:
+            if quota_check.matching >= 0.5 * quota_check.quota:
                 log = app_log.warning
             else:
                 log = app_log.info
             log(
-                "Launching pod for %s: %s other pods running this repo (%s total)",
+                "Launching server for %s: %s other servers running this repo (%s total)",
                 self.repo_url,
-                matching_pods,
-                total_pods,
+                quota_check.matching,
+                quota_check.total,
             )
 
         await self.emit(
diff --git a/binderhub/quota.py b/binderhub/quota.py
new file mode 100644
index 000000000..451086631
--- /dev/null
+++ b/binderhub/quota.py
@@ -0,0 +1,161 @@
+"""
+Singleuser server quotas
+"""
+
+import asyncio
+import json
+import os
+from collections import namedtuple
+
+import kubernetes.config
+from kubernetes import client
+from traitlets import Any, Integer, Unicode, default
+from traitlets.config import LoggingConfigurable
+
+from .utils import KUBE_REQUEST_TIMEOUT
+
+
+class LaunchQuotaExceeded(Exception):
+    """Raised when a quota will be exceeded by a launch"""
+
+    def __init__(self, message, *, quota, used, status):
+        """
+        message: User-facing message
+        quota: Quota limit
+        used: Quota used
+        status: String indicating the type of quota
+        """
+        super().__init__()
+        self.message = message
+        self.quota = quota
+        self.used = used
+        self.status = status
+
+
+ServerQuotaCheck = namedtuple("ServerQuotaCheck", ["total", "matching", "quota"])
+
+
+class LaunchQuota(LoggingConfigurable):
+
+    executor = Any(
+        allow_none=True, help="Optional Executor to use for blocking operations"
+    )
+
+    total_quota = Integer(
+        None,
+        help="""
+        The number of concurrent singleuser servers that can be run.
+
+        None: no quota
+        0: the hub can't run any singleuser servers (e.g. in maintenance mode)
+        Positive integer: sets the quota
+        """,
+        allow_none=True,
+        config=True,
+    )
+
+    async def check_repo_quota(self, image_name, repo_config, repo_url):
+        """
+        Check whether launching a repository would exceed a quota.
+
+        Parameters
+        ----------
+        image_name: str
+        repo_config: dict
+        repo_url: str
+
+        Returns
+        -------
+        If quotas are disabled returns None
+        If quotas are exceeded raises LaunchQuotaExceeded
+        Otherwise returns:
+          - total servers
+          - matching servers running image_name
+          - quota
+        """
+        return None
+
+
+class KubernetesLaunchQuota(LaunchQuota):
+
+    api = Any(
+        help="Kubernetes API object to make requests (kubernetes.client.CoreV1Api())",
+    )
+
+    @default("api")
+    def _default_api(self):
+        try:
+            kubernetes.config.load_incluster_config()
+        except kubernetes.config.ConfigException:
+            kubernetes.config.load_kube_config()
+        return client.CoreV1Api()
+
+    namespace = Unicode(help="Kubernetes namespace to check", config=True)
+
+    @default("namespace")
+    def _default_namespace(self):
+        return os.getenv("BUILD_NAMESPACE", "default")
+
+    async def check_repo_quota(self, image_name, repo_config, repo_url):
+        # the image name (without tag) is unique per repo
+        # use this to count the number of pods running with a given repo
+        # if we added annotations/labels with the repo name via KubeSpawner
+        # we could do this better
+        image_no_tag = image_name.rsplit(":", 1)[0]
+
+        # TODO: put busy users in a queue rather than fail?
+        # That would be hard to do without in-memory state.
+        repo_quota = repo_config.get("quota")
+        pod_quota = self.total_quota
+
+        # Fetch info on currently running users *only* if quotas are set
+        if pod_quota is not None or repo_quota:
+            matching_pods = 0
+
+            # TODO: run a watch to keep this up to date in the background
+            f = self.executor.submit(
+                self.api.list_namespaced_pod,
+                self.namespace,
+                label_selector="app=jupyterhub,component=singleuser-server",
+                _request_timeout=KUBE_REQUEST_TIMEOUT,
+                _preload_content=False,
+            )
+            resp = await asyncio.wrap_future(f)
+            pods = json.loads(resp.read())["items"]
+            total_pods = len(pods)
+
+            if pod_quota is not None and total_pods >= pod_quota:
+                # check overall quota first
+                self.log.error(f"BinderHub is full: {total_pods}/{pod_quota}")
+                raise LaunchQuotaExceeded(
+                    "Too many users on this BinderHub! Try again soon.",
+                    quota=pod_quota,
+                    used=total_pods,
+                    status="pod_quota",
+                )
+
+            for pod in pods:
+                for container in pod["spec"]["containers"]:
+                    # is the container running the same image as us?
+                    # if so, count one for the current repo.
+                    image = container["image"].rsplit(":", 1)[0]
+                    if image == image_no_tag:
+                        matching_pods += 1
+                        break
+
+            if repo_quota and matching_pods >= repo_quota:
+                self.log.error(
+                    f"{repo_url} has exceeded quota: {matching_pods}/{repo_quota} ({total_pods} total)"
+                )
+                raise LaunchQuotaExceeded(
+                    f"Too many users running {repo_url}! Try again soon.",
+                    quota=repo_quota,
+                    used=matching_pods,
+                    status="repo_quota",
+                )
+
+            return ServerQuotaCheck(
+                total=total_pods, matching=matching_pods, quota=repo_quota
+            )
+
+        return None
diff --git a/binderhub/tests/test_quota.py b/binderhub/tests/test_quota.py
new file mode 100644
index 000000000..58e5ae7c5
--- /dev/null
+++ b/binderhub/tests/test_quota.py
@@ -0,0 +1,93 @@
+"""Test launch quotas"""
+import concurrent.futures
+import json
+from unittest import mock
+
+import pytest
+
+from binderhub.quota import KubernetesLaunchQuota, LaunchQuotaExceeded
+
+
+@pytest.fixture
+def mock_pod_list_resp():
+    r = mock.MagicMock()
+    r.read.return_value = json.dumps(
+        {
+            "items": [
+                {
+                    "spec": {
+                        "containers": [
+                            {"image": "example.org/test/kubernetes_quota:1.2.3"}
+                        ],
+                    },
+                },
+                {
+                    "spec": {
+                        "containers": [
+                            {"image": "example.org/test/kubernetes_quota:latest"}
+                        ],
+                    },
+                },
+                {
+                    "spec": {
+                        "containers": [{"image": "example.org/test/other:abc"}],
+                    },
+                },
+            ]
+        }
+    )
+    f = concurrent.futures.Future()
+    f.set_result(r)
+    return f
+
+
+async def test_kubernetes_quota_none(mock_pod_list_resp):
+    quota = KubernetesLaunchQuota(api=mock.MagicMock(), executor=mock.MagicMock())
+    quota.executor.submit.return_value = mock_pod_list_resp
+
+    r = await quota.check_repo_quota(
+        "example.org/test/kubernetes_quota", {}, "repo.url"
+    )
+    assert r is None
+
+
+async def test_kubernetes_quota_allowed(mock_pod_list_resp):
+    quota = KubernetesLaunchQuota(api=mock.MagicMock(), executor=mock.MagicMock())
+    quota.executor.submit.return_value = mock_pod_list_resp
+
+    r = await quota.check_repo_quota(
+        "example.org/test/kubernetes_quota", {"quota": 3}, "repo.url"
+    )
+    assert r.total == 3
+    assert r.matching == 2
+    assert r.quota == 3
+
+
+async def test_kubernetes_quota_total_exceeded(mock_pod_list_resp):
+    quota = KubernetesLaunchQuota(
+        api=mock.MagicMock(), executor=mock.MagicMock(), total_quota=3
+    )
+    quota.executor.submit.return_value = mock_pod_list_resp
+
+    with pytest.raises(LaunchQuotaExceeded) as excinfo:
+        await quota.check_repo_quota(
+            "example.org/test/kubernetes_quota", {}, "repo.url"
+        )
+    assert excinfo.value.message == "Too many users on this BinderHub! Try again soon."
+    assert excinfo.value.quota == 3
+    assert excinfo.value.used == 3
+    assert excinfo.value.status == "pod_quota"
+
+
+async def test_kubernetes_quota_repo_exceeded(mock_pod_list_resp):
+    quota = KubernetesLaunchQuota(api=mock.MagicMock(), executor=mock.MagicMock())
+    quota.executor.submit.return_value = mock_pod_list_resp
+
+    with pytest.raises(LaunchQuotaExceeded) as excinfo:
+        await quota.check_repo_quota(
+            "example.org/test/kubernetes_quota", {"quota": 2}, "repo.url"
+        )
+    assert excinfo.value.message == "Too many users running repo.url! Try again soon."
+    assert excinfo.value.quota == 2
+    assert excinfo.value.used == 2
+    assert excinfo.value.status == "repo_quota"
diff --git a/testing/local-binder-local-hub/binderhub_config.py b/testing/local-binder-local-hub/binderhub_config.py
index 54d7395b7..da6bd2770 100644
--- a/testing/local-binder-local-hub/binderhub_config.py
+++ b/testing/local-binder-local-hub/binderhub_config.py
@@ -12,6 +12,7 @@
 import socket
 
 from binderhub.build_local import LocalRepo2dockerBuild
+from binderhub.quota import LaunchQuota
 
 s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
 s.connect(("8.8.8.8", 80))
@@ -24,6 +25,7 @@
 
 c.BinderHub.build_class = LocalRepo2dockerBuild
 c.BinderHub.push_secret = None
+c.BinderHub.launch_quota_class = LaunchQuota
 
 c.BinderHub.about_message = "This is a local dev deployment without Kubernetes"
 c.BinderHub.banner_message = (