From 50fa118f4811f01fe8ac6cd044d144b8e469a963 Mon Sep 17 00:00:00 2001 From: Chris Ostrouchov Date: Wed, 3 Mar 2021 07:47:55 -0500 Subject: [PATCH 1/2] Initial work to remove jupyterhub helm chart --- modules/kubernetes/services/jupyterhub/hub.tf | 247 ++++++++++++++++++ .../kubernetes/services/jupyterhub/outputs.tf | 11 + .../kubernetes/services/jupyterhub/proxy.tf | 132 ++++++++++ .../kubernetes/services/jupyterhub/rbac.tf | 41 +++ .../jupyterhub/templates/jupyterhub_config.py | 54 ++++ .../services/jupyterhub/values.yaml | 55 ---- .../services/jupyterhub/variables.tf | 53 +++- 7 files changed, 534 insertions(+), 59 deletions(-) create mode 100644 modules/kubernetes/services/jupyterhub/hub.tf create mode 100644 modules/kubernetes/services/jupyterhub/outputs.tf create mode 100644 modules/kubernetes/services/jupyterhub/proxy.tf create mode 100644 modules/kubernetes/services/jupyterhub/rbac.tf create mode 100644 modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py delete mode 100644 modules/kubernetes/services/jupyterhub/values.yaml diff --git a/modules/kubernetes/services/jupyterhub/hub.tf b/modules/kubernetes/services/jupyterhub/hub.tf new file mode 100644 index 00000000..e0418c8b --- /dev/null +++ b/modules/kubernetes/services/jupyterhub/hub.tf @@ -0,0 +1,247 @@ +resource "random_password" "proxy_secret_token" { + length = 32 + special = false +} + + +# requires hex password +resource "random_password" "hub_secret_cookie" { + length = 32 +} + + +resource "random_password" "api_token" { + count = length(var.services) + length = 32 +} + + +resource "kubernetes_config_map" "hub" { + metadata { + name = "${var.name}-jupyterhub-hub" + namespace = var.namespace + } + + data = { + "jupyterhub_config.py" = templatefile("${path.module}/templates/jupyterhub_config.py", { + proxy_public = { + host = kubernetes_service.proxy_public.metadata.0.name + port = 80 + } + proxy_api = { + host = kubernetes_service.proxy_api.metadata.0.name + port = 8001 + } + singleuser = var.singleuser + hub = { + host = "${kubernetes_service.hub.metadata.0.name}.${kubernetes_service.hub.metadata.0.namespace}" + port = 8081 + } + extraConfig = var.extraConfig + }) + } +} + + +resource "kubernetes_secret" "hub" { + metadata { + name = "${var.name}-jupyterhub-hub" + namespace = var.namespace + } + + data = { + "proxy.token" = random_password.proxy_secret_token.result + # must be hex value + "hub.cookie-secret" = sha256(random_password.hub_secret_cookie.result) + "api-tokens" = jsonencode(zipmap( + var.services, + [for instance in random_password.api_token : instance.result] + )) + } +} + + +resource "kubernetes_persistent_volume_claim" "hub" { + metadata { + name = "${var.name}-jupyterhub-hub" + namespace = var.namespace + } + + spec { + access_modes = ["ReadWriteOnce"] + resources { + requests = { + storage = "1Gi" + } + } + } +} + + +resource "kubernetes_service" "hub" { + metadata { + name = "${var.name}-jupyterhub-hub" + namespace = var.namespace + } + + spec { + selector = { + "app.kubernetes.io/component" = "jupyterhub-hub" + } + + port { + target_port = "http" + port = 8081 + } + } +} + +resource "kubernetes_deployment" "hub" { + metadata { + name = "${var.name}-jupyterhub-hub" + namespace = var.namespace + } + + spec { + replicas = 1 + + selector { + match_labels = { + "app.kubernetes.io/component" = "jupyterhub-hub" + } + } + + template { + metadata { + labels = { + "app.kubernetes.io/component" = "jupyterhub-hub" + "hub.jupyter.org/network-access-proxy-api" = "true" + "hub.jupyter.org/network-access-proxy-http" = "true" + "hub.jupyter.org/network-access-singleuser" = "true" + } + + annotations = { + # This lets us autorestart when the secret changes! + "checksum/config-map" = sha256(jsonencode(kubernetes_config_map.hub.data)) + "checksum/secret" = sha256(jsonencode(kubernetes_secret.hub.data)) + } + } + + spec { + volume { + name = "config" + config_map { + name = kubernetes_config_map.hub.metadata.0.name + } + } + + volume { + name = "secret" + config_map { + name = kubernetes_secret.hub.metadata.0.name + } + } + + volume { + name = "hub-db-dir" + persistent_volume_claim { + claim_name = kubernetes_persistent_volume_claim.hub.metadata.0.name + } + } + + service_account_name = kubernetes_service_account.hub.metadata.0.name + automount_service_account_token = true + + container { + name = "hub" + image = "${var.hub-image.image}:${var.hub-image.tag}" + + command = [ + "jupyterhub", + "--config", + "/etc/jupyterhub/jupyterhub_config.py", + "--upgrade-db", # auto upgrade db + ] + + volume_mount { + name = "config" + mount_path = "/etc/jupyterhub/jupyterhub_config.py" + sub_path = "jupyterhub_config.py" + } + + volume_mount { + mount_path = "/etc/jupyterhub/secret/" + name = "secret" + } + + volume_mount { + mount_path = "/srv/jupyterhub" + name = "hub-db-dir" + } + + env { + name = "PYTHONUNBUFFERED" + value = "1" + } + + env { + name = "JPY_COOKIE_SECRET" + value_from { + secret_key_ref { + name = kubernetes_secret.hub.metadata.0.name + key = "hub.cookie-secret" + } + } + } + + env { + name = "CONFIGPROXY_AUTH_TOKEN" + value_from { + secret_key_ref { + name = kubernetes_secret.hub.metadata.0.name + key = "proxy.token" + } + } + } + + env { + name = "JUPYTERHUB_API_SERVICE_TOKENS" + value_from { + secret_key_ref { + name = kubernetes_secret.hub.metadata.0.name + key = "api-tokens" + } + } + } + + port { + name = "http" + container_port = 8081 + } + + # TODO: consider baseUrl + liveness_probe { + http_get { + path = "/hub/health" + port = "http" + } + + initial_delay_seconds = 60 + period_seconds = 10 + } + + # TODO: consider baseUrl + readiness_probe { + http_get { + path = "/hub/health" + port = "http" + } + + initial_delay_seconds = 0 + period_seconds = 2 + } + } + } + } + } +} diff --git a/modules/kubernetes/services/jupyterhub/outputs.tf b/modules/kubernetes/services/jupyterhub/outputs.tf new file mode 100644 index 00000000..971b570d --- /dev/null +++ b/modules/kubernetes/services/jupyterhub/outputs.tf @@ -0,0 +1,11 @@ +output "api_tokens" { + description = "Jupyterhub API Tokens for services" + value = zipmap( + var.services, + [for instance in random_password.api_token : instance.result]) +} + +output "internal_proxy_url" { + description = "Jupyterhub API URL" + value = "http://${kubernetes_service.proxy_public.metadata.0.name}:80" +} diff --git a/modules/kubernetes/services/jupyterhub/proxy.tf b/modules/kubernetes/services/jupyterhub/proxy.tf new file mode 100644 index 00000000..501b0fa5 --- /dev/null +++ b/modules/kubernetes/services/jupyterhub/proxy.tf @@ -0,0 +1,132 @@ +resource "kubernetes_service" "proxy_api" { + metadata { + name = "${var.name}-jupyterhub-proxy-api" + namespace = var.namespace + } + + spec { + selector = { + "app.kubernetes.io/component" = "jupyterhub-proxy" + } + + port { + port = 8001 + target_port = "api" + } + } +} + + +resource "kubernetes_service" "proxy_public" { + metadata { + name = "${var.name}-jupyterhub-proxy-public" + namespace = var.namespace + } + + spec { + selector = { + "app.kubernetes.io/component" = "jupyterhub-proxy" + } + + port { + name = "http" + port = 80 + target_port = "http" + } + + type = "NodePort" + } +} + +resource "kubernetes_deployment" "proxy" { + metadata { + name = "${var.name}-jupyterhub-proxy" + namespace = var.namespace + } + + spec { + replicas = 1 + + selector { + match_labels = { + "app.kubernetes.io/component" = "jupyterhub-proxy" + } + } + + template { + metadata { + labels = { + "app.kubernetes.io/component" = "jupyterhub-proxy" + "hub.jupyter.org/network-access-hub" = "true" + "hub.jupyter.org/network-access-singleuser" = "true" + } + + annotations = { + # This lets us autorestart when the secret changes! + "checksum/config-map" = sha256(jsonencode(kubernetes_config_map.hub.data)) + "checksum/secret" = sha256(jsonencode(kubernetes_secret.hub.data)) + } + } + + spec { + termination_grace_period_seconds = 60 + + container { + image = "${var.proxy-image.image}:${var.proxy-image.tag}" + name = "${var.name}-jupyterhub-chp" + + command = [ + "configurable-http-proxy", + "--ip=::", + "--api-ip=::", + "--api-port=8001", + "--default-target=http://${kubernetes_service.hub.metadata.0.name}:8081", + "--error-target=http://${kubernetes_service.hub.metadata.0.name}:8081/hub/error", + "--port=8000", + # "--log-level=debug" + ] + + env { + name = "CONFIGPROXY_AUTH_TOKEN" + value_from { + secret_key_ref { + name = kubernetes_secret.hub.metadata.0.name + key = "proxy.token" + } + } + } + + port { + name = "http" + container_port = 8000 + } + + port { + name = "api" + container_port = 8001 + } + + liveness_probe { + http_get { + path = "/_chp_healthz" + port = "http" + } + + initial_delay_seconds = 60 + period_seconds = 10 + } + + readiness_probe { + http_get { + path = "/_chp_healthz" + port = "http" + } + + initial_delay_seconds = 0 + period_seconds = 2 + } + } + } + } + } +} diff --git a/modules/kubernetes/services/jupyterhub/rbac.tf b/modules/kubernetes/services/jupyterhub/rbac.tf new file mode 100644 index 00000000..43fd2fb0 --- /dev/null +++ b/modules/kubernetes/services/jupyterhub/rbac.tf @@ -0,0 +1,41 @@ +resource "kubernetes_service_account" "hub" { + metadata { + name = "${var.name}-jupyterhub" + namespace = var.namespace + } +} + +resource "kubernetes_role" "hub" { + metadata { + name = "${var.name}-jupyterhub" + } + + rule { + api_groups = [""] + resources = ["pods", "persistentvolumeclaims"] + verbs = ["get", "list", "watch", "create", "delete"] + } + + rule { + api_groups = [""] + resources = ["events"] + verbs = ["get", "list", "watch"] + } +} + +resource "kubernetes_role_binding" "hub" { + metadata { + name = "${var.name}-jupyterhub" + } + + role_ref { + api_group = "rbac.authorization.k8s.io" + kind = "Role" + name = kubernetes_role.hub.metadata.0.name + } + subject { + kind = "ServiceAccount" + name = kubernetes_service_account.hub.metadata.0.name + namespace = var.namespace + } +} diff --git a/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py b/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py new file mode 100644 index 00000000..6a39d9aa --- /dev/null +++ b/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py @@ -0,0 +1,54 @@ +# based on zero to jupyterhub +# https://github.com/jupyterhub/zero-to-jupyterhub-k8s/blob/master/jupyterhub/files/hub/jupyterhub_config.py +import json +import os + +# Configure JupyterHub to use the curl backend for making HTTP requests, +# rather than the pure-python implementations. The default one starts +# being too slow to make a large number of requests to the proxy API +# at the rate required. +from tornado.httpclient import AsyncHTTPClient +AsyncHTTPClient.configure("tornado.curl_httpclient.CurlAsyncHTTPClient") + +c.JupyterHub.spawner_class = 'kubespawner.KubeSpawner' + +# Connect to a proxy running in a different pod +c.ConfigurableHTTPProxy.api_url = 'http://${proxy_api.host}:${proxy_api.port}' +c.ConfigurableHTTPProxy.should_start = False + +# Do not shut down user pods when hub is restarted +c.JupyterHub.cleanup_servers = False + +# Check that the proxy has routes appropriately setup +c.JupyterHub.last_activity_interval = 60 + +# Don't wait at all before redirecting a spawning user to the progress page +c.JupyterHub.tornado_settings = { + 'slow_spawn_timeout': 0, +} + +# Configure persistent sqlite jupyterhub database +c.JupyterHub.db_url = "sqlite:///jupyterhub.sqlite" + +# Set jupyterhub proxy ip/hostname +c.JupyterHub.ip = "${proxy_public.host}" +c.JupyterHub.port = ${proxy_public.port} + +# the hub should listen on all interfaces, so the proxy can access it +c.JupyterHub.hub_ip = '0.0.0.0' + +# Set default namespace for pods to be launched as +c.KubeSpawner.namespace = "${singleuser.namespace}" + +# Gives spawned containers access to the API of the hub +# c.JupyterHub.hub_connect_url = "http://${proxy_public.host}:${proxy_public.port}/hub/api" +c.JupyterHub.hub_connect_ip = "${hub.host}" +c.JupyterHub.hub_connect_port = ${hub.port} + +# convert {"service1": "token1", "service2": "token2"} into +# [{"name": "service1", "api_token": "token1"}, ...] +# due to inflexibility of terraform language +# TODO come up with more elegant way to add services +c.JupyterHub.services = [{"name": k, "api_token": v} for k, v in json.loads(os.environ['JUPYTERHUB_API_SERVICE_TOKENS']).items()] + +${extraConfig} diff --git a/modules/kubernetes/services/jupyterhub/values.yaml b/modules/kubernetes/services/jupyterhub/values.yaml deleted file mode 100644 index cf1caba9..00000000 --- a/modules/kubernetes/services/jupyterhub/values.yaml +++ /dev/null @@ -1,55 +0,0 @@ -hub: - db: - type: sqlite-pvc - pvc: - storage: 1Gi - baseUrl: "/" - -proxy: - secretToken: "" - service: - type: NodePort - -scheduling: - userScheduler: - enabled: true - podPriority: - enabled: true - userPlaceholder: - enabled: false - replicas: 1 - - -singleuser: - defaultUrl: "/lab" - image: - name: jupyter/datascience-notebook - tag: 73a577b006b4 - profileList: - - display_name: "Small Instance" - description: "Stable environment with 1 cpu / 1GB ram" - default: true - storage: - type: static - static: - pvcName: "" - subPath: 'home/{username}' - extraVolumeMounts: - - mountPath: "/home/shared" - name: home - subPath: "home/shared" - initContainers: - - name: init-nfs - image: busybox:1.31 - command: ['sh', '-c', 'mkdir -p /mnt/home/{username} && chmod 777 /mnt/home/{username} && mkdir -p /mnt/home/shared && chmod 777 /mnt/home/shared'] - securityContext: - runAsUser: 0 - volumeMounts: - - mountPath: "/mnt" - name: home - cpu: - limit: 1 - guarantee: 1 - memory: - limit: "1G" - guarantee: "1G" diff --git a/modules/kubernetes/services/jupyterhub/variables.tf b/modules/kubernetes/services/jupyterhub/variables.tf index 4efb9ffa..ff3b8e20 100644 --- a/modules/kubernetes/services/jupyterhub/variables.tf +++ b/modules/kubernetes/services/jupyterhub/variables.tf @@ -1,10 +1,55 @@ +variable "name" { + description = "name prefix to assign to jupyterhub" + default = "terraform-jupyterhub" +} + variable "namespace" { - description = "Namespace for jupyterhub deployment" - type = string + description = "namespace to deploy jupyterhub" + default = "default" } -variable "overrides" { - description = "Jupyterhub helm chart list of overrides" +variable "hub-image" { + description = "hub image" + type = object({ + image = string + tag = string + }) + default = { + image = "jupyterhub/k8s-hub" + tag = "0.9.1" + } +} + +variable "services" { + description = "services to create api tokens" type = list(string) default = [] } + +variable "proxy-image" { + description = "proxy image" + type = object({ + image = string + tag = string + }) + default = { + image = "jupyterhub/configurable-http-proxy" + tag = "4.2.1" + } +} + +variable "singleuser" { + description = "jupyterhub singleuser defaults" + type = object({ + namespace = string + }) + default = { + namespace = "default" + } +} + +variable "extraConfig" { + description = "Additional jupyterhub configuration" + type = string + default = "" +} From eaf2a0b5a19435481b2f388ffee3a8a1d69e1fa2 Mon Sep 17 00:00:00 2001 From: Chris Ostrouchov Date: Mon, 15 Mar 2021 10:09:46 -0400 Subject: [PATCH 2/2] Adding modifications to configuration --- modules/kubernetes/services/jupyterhub/hub.tf | 18 +++++++ .../kubernetes/services/jupyterhub/proxy.tf | 14 +++++ .../jupyterhub/templates/jupyterhub_config.py | 23 ++++++--- .../services/jupyterhub/variables.tf | 51 ++++++++++++++++--- 4 files changed, 91 insertions(+), 15 deletions(-) diff --git a/modules/kubernetes/services/jupyterhub/hub.tf b/modules/kubernetes/services/jupyterhub/hub.tf index e0418c8b..1ad6bc91 100644 --- a/modules/kubernetes/services/jupyterhub/hub.tf +++ b/modules/kubernetes/services/jupyterhub/hub.tf @@ -37,6 +37,10 @@ resource "kubernetes_config_map" "hub" { host = "${kubernetes_service.hub.metadata.0.name}.${kubernetes_service.hub.metadata.0.namespace}" port = 8081 } + service = { + names = var.services + api_tokens = random_password.api_token + } extraConfig = var.extraConfig }) } @@ -128,6 +132,20 @@ resource "kubernetes_deployment" "hub" { } spec { + affinity { + node_affinity { + required_during_scheduling_ignored_during_execution { + node_selector_term { + match_expressions { + key = var.hub-node-group.key + operator = "In" + values = [var.hub-node-group.value] + } + } + } + } + } + volume { name = "config" config_map { diff --git a/modules/kubernetes/services/jupyterhub/proxy.tf b/modules/kubernetes/services/jupyterhub/proxy.tf index 501b0fa5..e11b120a 100644 --- a/modules/kubernetes/services/jupyterhub/proxy.tf +++ b/modules/kubernetes/services/jupyterhub/proxy.tf @@ -69,6 +69,20 @@ resource "kubernetes_deployment" "proxy" { } spec { + affinity { + node_affinity { + required_during_scheduling_ignored_during_execution { + node_selector_term { + match_expressions { + key = var.proxy-node-group.key + operator = "In" + values = [var.proxy-node-group.value] + } + } + } + } + } + termination_grace_period_seconds = 60 container { diff --git a/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py b/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py index 6a39d9aa..810711e5 100644 --- a/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py +++ b/modules/kubernetes/services/jupyterhub/templates/jupyterhub_config.py @@ -37,18 +37,25 @@ # the hub should listen on all interfaces, so the proxy can access it c.JupyterHub.hub_ip = '0.0.0.0' -# Set default namespace for pods to be launched as -c.KubeSpawner.namespace = "${singleuser.namespace}" - # Gives spawned containers access to the API of the hub # c.JupyterHub.hub_connect_url = "http://${proxy_public.host}:${proxy_public.port}/hub/api" c.JupyterHub.hub_connect_ip = "${hub.host}" c.JupyterHub.hub_connect_port = ${hub.port} -# convert {"service1": "token1", "service2": "token2"} into -# [{"name": "service1", "api_token": "token1"}, ...] -# due to inflexibility of terraform language -# TODO come up with more elegant way to add services -c.JupyterHub.services = [{"name": k, "api_token": v} for k, v in json.loads(os.environ['JUPYTERHUB_API_SERVICE_TOKENS']).items()] +# singleuser notebook defaults +c.KubeSpawner.namespace = "${singleuser.namespace}" +c.KubeSpawner.image = "${singleuser.image}" +c.KubeSpawner.cpu_guarantee = ${singleuser.cpu_guarantee} +c.KubeSpawner.cpu_limit = ${singleuser.cpu_limit} +c.KubeSpawner.mem_guarantee = "${singleuser.mem_guarantee}" +c.KubeSpawner.mem_limit = "${singleuser.mem_limit}" +c.KubeSpawner.default_url = "${singleuser.default_url}" +c.KubeSpawner.pod_name_template = "${singleuser.pod_name_template}" +c.KubeSpawner.node_selector = {"${jupyterlab-node-group.key}": "${jupyterlab-node-group.value}"} + +# jupyterhub services +c.JupyterHub.services = [ +${join("\n", formatlist(" {'name': '%s', 'api_token': '%s'},", service.names, service.api_tokens))} +] ${extraConfig} diff --git a/modules/kubernetes/services/jupyterhub/variables.tf b/modules/kubernetes/services/jupyterhub/variables.tf index ff3b8e20..d3005580 100644 --- a/modules/kubernetes/services/jupyterhub/variables.tf +++ b/modules/kubernetes/services/jupyterhub/variables.tf @@ -20,12 +20,6 @@ variable "hub-image" { } } -variable "services" { - description = "services to create api tokens" - type = list(string) - default = [] -} - variable "proxy-image" { description = "proxy image" type = object({ @@ -41,13 +35,56 @@ variable "proxy-image" { variable "singleuser" { description = "jupyterhub singleuser defaults" type = object({ - namespace = string + namespace = string # default spawner namespace + image = string # default spawner jupyterlab image + cpu_guarantee = number # default spawner jupyterlab guaranteed cpu + cpu_limit = number # default spawner jupyterlab max cpus + mem_guarantee = number # default spawner jupyterlab guaranteed memory + mem_limit = number # default spawner jupyterlab max memory + default_url = string # default spawner jupyterlab url }) default = { namespace = "default" + image = "quansight/qhub-jupyterlab:e26a2766a0a66ce6d4c538f9f550b1f267f3d240" + cpu_guarantee = "1.0" + cpu_limit = "1.0" + memory_guarantee = "1G" + mem_limit = "1G" + default_url = "/lab" + pod_name_template = "jupyter-{username}--{servername}" } } +variable "hub-node-group" { + description = "Node key value pair for bound jupyterhub deployment" + type = object({ + key = string + value = string + }) +} + +variable "proxy-node-group" { + description = "Node group key value pair for bound user resources" + type = object({ + key = string + value = string + }) +} + +variable "jupyterlab-node-group" { + description = "Node group key value pair for bound worker resources" + type = object({ + key = string + value = string + }) +} + +variable "services" { + description = "services to create api tokens" + type = list(string) + default = [] +} + variable "extraConfig" { description = "Additional jupyterhub configuration" type = string