Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions binderhub/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
from .log import log_request
from .main import LegacyRedirectHandler, MainHandler, ParameterizedMainHandler
from .metrics import MetricsHandler
from .quota import KubernetesLaunchQuota, LaunchQuota
from .ratelimit import RateLimiter
from .registry import DockerRegistry
from .repoproviders import (
Expand Down Expand Up @@ -304,6 +305,8 @@ def _valid_badge_base_url(self, proposal):
pod_quota = Integer(
None,
help="""
DEPRECATED: Use c.LaunchQuota.total_quota

The number of concurrent pods this hub has been designed to support.

This quota is used as an indication for how much above or below the
Expand All @@ -319,6 +322,13 @@ def _valid_badge_base_url(self, proposal):
config=True,
)

@observe("pod_quota")
def _pod_quota_deprecated(self, change):
self.log.warning(
"BinderHub.pod_quota is deprecated, use LaunchQuota.total_quota"
)
self.config.LaunchQuota.total_quota = change.new

per_repo_quota_higher = Integer(
0,
help="""
Expand All @@ -333,6 +343,17 @@ def _valid_badge_base_url(self, proposal):
config=True,
)

launch_quota_class = Type(
LaunchQuota,
default=KubernetesLaunchQuota,
help="""
The class used to check quotas for launched servers.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the context of binderhub, it would be great to be explicit about what this means. I now understand it as "the amount of user servers scheduled-to-run-after-build/starting/running by binderhub in the jupyterhub that binderhub manages", but I'm open to it meaning something related to build pods as well, or that it could be coupled to possible jupyterhub user servers unmanaged by binderhub.

Is "launch quota" a name introduced in this PR?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Practically, does having a launch quota of 100 imply:

  • you can at most have 100 build workloads, if you do, you must have 0 user servers started at this point in time
  • you can at most have 100 user servers, if you do, you must have 0 build workloads running at this point in time

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Previously the quota was handled inside the builder.BuildHandler.launch method. By moving it into a separate class the launch() method becomes independent of the platform (k8s, docker, HPC, etc).

The quota check determines whether BinderHub should launch a repository which is why I've called the base class LaunchQuota. The decision on whether to allow a launch or not is an implementation detail. The current default (K8s only) method is to look at the total number of label_selector="app=jupyterhub,component=singleuser-server" pods, and the total number of those pods that are running an image from the repo being launched. In future there's scope for taking other factors into account such as build pods, total CPU/memory usage across all pods running a particular repo, etc.

It's not a perfect abstraction since LaunchQuota.check_repo_quota(self, image_name, repo_config, repo_url) takes a repo_config parameter, which is created by BinderHub using some parameters:

binderhub/binderhub/app.py

Lines 292 to 334 in e80f841

per_repo_quota = Integer(
0,
help="""
Maximum number of concurrent users running from a given repo.
Limits the amount of Binder that can be consumed by a single repo.
0 (default) means no quotas.
""",
config=True,
)
pod_quota = Integer(
None,
help="""
The number of concurrent pods this hub has been designed to support.
This quota is used as an indication for how much above or below the
design capacity a hub is running.
Attempts to launch new pods once the quota has been reached will fail.
The default corresponds to no quota, 0 means the hub can't accept pods
(maybe because it is in maintenance mode), and any positive integer
sets the quota.
""",
allow_none=True,
config=True,
)
per_repo_quota_higher = Integer(
0,
help="""
Maximum number of concurrent users running from a higher-quota repo.
Limits the amount of Binder that can be consumed by a single repo. This
quota is a second limit for repos with special status. See the
`high_quota_specs` parameter of RepoProvider classes for usage.
0 (default) means no quotas.
""",
config=True,
)

and a RepoProvider based class:
def repo_config(self, settings):
"""
Return configuration for this repository.
"""
repo_config = {}
# Defaults and simple overrides
if self.has_higher_quota():
repo_config["quota"] = settings.get("per_repo_quota_higher")
else:
repo_config["quota"] = settings.get("per_repo_quota")
# Spec regex-based configuration
for item in self.spec_config:
pattern = item.get("pattern", None)
config = item.get("config", None)
if not isinstance(pattern, str):
raise ValueError(
"Spec-pattern configuration expected "
"a regex pattern string, not "
f"type {type(pattern)}"
)
if not isinstance(config, dict):
raise ValueError(
"Spec-pattern configuration expected "
"a specification configuration dict, not "
f"type {type(config)}"
)
# Ignore case, because most git providers do not
# count DS-100/textbook as different from ds-100/textbook
if re.match(pattern, self.spec, re.IGNORECASE):
repo_config.update(config)
return repo_config

but I'm trying to minimise the changes to the codebase, assuming I've coded this correctly there should be zero change in the existing behaviour.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the thorough clarfication @manics!!


Must inherit from binderhub.quota.LaunchQuota
""",
config=True,
)

log_tail_lines = Integer(
100,
help="""
Expand Down Expand Up @@ -791,6 +812,8 @@ def initialize(self, *args, **kwargs):
with open(schema_file) as f:
self.event_log.register_schema(json.load(f))

launch_quota = self.launch_quota_class(parent=self, executor=self.executor)

self.tornado_settings.update(
{
"log_function": log_request,
Expand All @@ -814,6 +837,7 @@ def initialize(self, *args, **kwargs):
"per_repo_quota": self.per_repo_quota,
"per_repo_quota_higher": self.per_repo_quota_higher,
"repo_providers": self.repo_providers,
"launch_quota": launch_quota,
"rate_limiter": RateLimiter(parent=self),
"use_registry": self.use_registry,
"build_class": self.build_class,
Expand Down
79 changes: 17 additions & 62 deletions binderhub/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
Handlers for working with version control services (i.e. GitHub) for builds.
"""

import asyncio
import hashlib
import json
import re
Expand All @@ -23,7 +22,7 @@

from .base import BaseHandler
from .build import Build, ProgressEvent
from .utils import KUBE_REQUEST_TIMEOUT
from .quota import LaunchQuotaExceeded

# Separate buckets for builds and launches.
# Builds and launches have very different characteristic times,
Expand Down Expand Up @@ -586,73 +585,29 @@ async def launch(self, provider):
# Load the spec-specific configuration if it has been overridden
repo_config = provider.repo_config(self.settings)

# the image name (without tag) is unique per repo
# use this to count the number of pods running with a given repo
# if we added annotations/labels with the repo name via KubeSpawner
# we could do this better
image_no_tag = self.image_name.rsplit(":", 1)[0]

# TODO: put busy users in a queue rather than fail?
# That would be hard to do without in-memory state.
repo_quota = repo_config.get("quota")
pod_quota = self.settings["pod_quota"]
if pod_quota is not None or repo_quota:
# Fetch info on currently running users *only* if quotas are set
matching_pods = 0

# TODO: run a watch to keep this up to date in the background
f = self.settings["executor"].submit(
self.settings["kubernetes_client"].list_namespaced_pod,
self.settings["build_namespace"],
label_selector="app=jupyterhub,component=singleuser-server",
_request_timeout=KUBE_REQUEST_TIMEOUT,
_preload_content=False,
launch_quota = self.settings["launch_quota"]
try:
quota_check = await launch_quota.check_repo_quota(
self.image_name, repo_config, self.repo_url
)
resp = await asyncio.wrap_future(f)
pods = json.loads(resp.read())["items"]
total_pods = len(pods)

if pod_quota is not None and total_pods >= pod_quota:
# check overall quota first
LAUNCH_COUNT.labels(
status="pod_quota",
**self.repo_metric_labels,
).inc()
app_log.error(f"BinderHub is full: {total_pods}/{pod_quota}")
await self.fail("Too many users on this BinderHub! Try again soon.")
return

for pod in pods:
for container in pod["spec"]["containers"]:
# is the container running the same image as us?
# if so, count one for the current repo.
image = container["image"].rsplit(":", 1)[0]
if image == image_no_tag:
matching_pods += 1
break

if repo_quota and matching_pods >= repo_quota:
LAUNCH_COUNT.labels(
status="repo_quota",
**self.repo_metric_labels,
).inc()
app_log.error(
f"{self.repo_url} has exceeded quota: {matching_pods}/{repo_quota} ({total_pods} total)"
)
await self.fail(
f"Too many users running {self.repo_url}! Try again soon."
)
return
except LaunchQuotaExceeded as e:
LAUNCH_COUNT.labels(
status=e.status,
**self.repo_metric_labels,
).inc()
await self.fail(e.message)
return

if matching_pods >= 0.5 * repo_quota:
if quota_check:
if quota_check.matching >= 0.5 * quota_check.quota:
log = app_log.warning
else:
log = app_log.info
log(
"Launching pod for %s: %s other pods running this repo (%s total)",
"Launching server for %s: %s other servers running this repo (%s total)",
self.repo_url,
matching_pods,
total_pods,
quota_check.matching,
quota_check.total,
)

await self.emit(
Expand Down
161 changes: 161 additions & 0 deletions binderhub/quota.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
"""
Singleuser server quotas
"""

import asyncio
import json
import os
from collections import namedtuple

import kubernetes.config
from kubernetes import client
from traitlets import Any, Integer, Unicode, default
from traitlets.config import LoggingConfigurable

from .utils import KUBE_REQUEST_TIMEOUT


class LaunchQuotaExceeded(Exception):
"""Raised when a quota will be exceeded by a launch"""

def __init__(self, message, *, quota, used, status):
"""
message: User-facing message
quota: Quota limit
used: Quota used
status: String indicating the type of quota
"""
super().__init__()
self.message = message
self.quota = quota
self.used = used
self.status = status


ServerQuotaCheck = namedtuple("ServerQuotaCheck", ["total", "matching", "quota"])


class LaunchQuota(LoggingConfigurable):

executor = Any(
allow_none=True, help="Optional Executor to use for blocking operations"
)

total_quota = Integer(
None,
help="""
The number of concurrent singleuser servers that can be run.

None: no quota
0: the hub can't run any singleuser servers (e.g. in maintenance mode)
Positive integer: sets the quota
""",
allow_none=True,
config=True,
)

async def check_repo_quota(self, image_name, repo_config, repo_url):
"""
Check whether launching a repository would exceed a quota.

Parameters
----------
image_name: str
repo_config: dict
repo_url: str

Returns
-------
If quotas are disabled returns None
If quotas are exceeded raises LaunchQuotaExceeded
Otherwise returns:
- total servers
- matching servers running image_name
- quota
"""
return None


class KubernetesLaunchQuota(LaunchQuota):

api = Any(
help="Kubernetes API object to make requests (kubernetes.client.CoreV1Api())",
)

@default("api")
def _default_api(self):
try:
kubernetes.config.load_incluster_config()
except kubernetes.config.ConfigException:
kubernetes.config.load_kube_config()
return client.CoreV1Api()

namespace = Unicode(help="Kubernetes namespace to check", config=True)

@default("namespace")
def _default_namespace(self):
return os.getenv("BUILD_NAMESPACE", "default")

async def check_repo_quota(self, image_name, repo_config, repo_url):
# the image name (without tag) is unique per repo
# use this to count the number of pods running with a given repo
# if we added annotations/labels with the repo name via KubeSpawner
# we could do this better
image_no_tag = image_name.rsplit(":", 1)[0]

# TODO: put busy users in a queue rather than fail?
# That would be hard to do without in-memory state.
repo_quota = repo_config.get("quota")
pod_quota = self.total_quota

# Fetch info on currently running users *only* if quotas are set
if pod_quota is not None or repo_quota:
matching_pods = 0

# TODO: run a watch to keep this up to date in the background
f = self.executor.submit(
self.api.list_namespaced_pod,
self.namespace,
label_selector="app=jupyterhub,component=singleuser-server",
_request_timeout=KUBE_REQUEST_TIMEOUT,
_preload_content=False,
)
resp = await asyncio.wrap_future(f)
pods = json.loads(resp.read())["items"]
total_pods = len(pods)

if pod_quota is not None and total_pods >= pod_quota:
# check overall quota first
self.log.error(f"BinderHub is full: {total_pods}/{pod_quota}")
raise LaunchQuotaExceeded(
"Too many users on this BinderHub! Try again soon.",
quota=pod_quota,
used=total_pods,
status="pod_quota",
)

for pod in pods:
for container in pod["spec"]["containers"]:
# is the container running the same image as us?
# if so, count one for the current repo.
image = container["image"].rsplit(":", 1)[0]
if image == image_no_tag:
matching_pods += 1
break

if repo_quota and matching_pods >= repo_quota:
self.log.error(
f"{repo_url} has exceeded quota: {matching_pods}/{repo_quota} ({total_pods} total)"
)
raise LaunchQuotaExceeded(
f"Too many users running {repo_url}! Try again soon.",
quota=repo_quota,
used=matching_pods,
status="repo_quota",
)

return ServerQuotaCheck(
total=total_pods, matching=matching_pods, quota=repo_quota
)

return None
Loading