From 06fa76dcc4c6dfabe4660daa59a1b71a71884e6b Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 28 Aug 2025 12:08:14 +0000 Subject: [PATCH] multicluster based on a flat model and namespace seamless SIG Multicluster concept. Network Policies are cluster local. Selectors are cluster set global. Administrators MUST own the label schema to apply multi cluster policies. avoid conflict on artifacts --- .github/workflows/bats.yml | 25 +- Makefile | 24 +- cmd/kube-ip-tracker/multicluster/main.go | 301 ++++++++++++++++ docs/multicluster.md | 169 +++++++++ go.mod | 1 + go.sum | 2 + install-iptracker.yaml | 4 + install-multicluster.yaml | 229 ++++++++++++ tests/multicluster/e2e_multicluster.bats | 428 +++++++++++++++++++++++ 9 files changed, 1178 insertions(+), 5 deletions(-) create mode 100644 cmd/kube-ip-tracker/multicluster/main.go create mode 100644 docs/multicluster.md create mode 100644 install-multicluster.yaml create mode 100644 tests/multicluster/e2e_multicluster.bats diff --git a/.github/workflows/bats.yml b/.github/workflows/bats.yml index 0c9c61ef..3a9be369 100644 --- a/.github/workflows/bats.yml +++ b/.github/workflows/bats.yml @@ -38,6 +38,29 @@ jobs: if: always() uses: actions/upload-artifact@v4 with: - name: kind-logs-${{ env.JOB_NAME }}-${{ github.run_id }} + name: kind-logs-bats-${{ github.run_id }} path: ./_artifacts + bats_multicluster_tests: + runs-on: ubuntu-22.04 + name: Bats e2e multicluster tests + steps: + - name: Checkout + uses: actions/checkout@v2 + - name: Setup Bats and bats libs + id: setup-bats + uses: bats-core/bats-action@3.0.0 + - name: Bats tests + shell: bash + env: + BATS_LIB_PATH: ${{ steps.setup-bats.outputs.lib-path }} + TERM: xterm + run: bats -o _artifacts --print-output-on-failure tests/multicluster/ + + - name: Upload logs + if: always() + uses: actions/upload-artifact@v4 + with: + name: kind-logs-bats-multicluster-${{ github.run_id }} + path: ./_artifacts + \ No newline at end of file diff --git a/Makefile b/Makefile index 68fda398..f35f3803 100644 --- a/Makefile +++ b/Makefile @@ -12,9 +12,9 @@ REGISTRY?=gcr.io/k8s-staging-networking TAG?=$(shell echo "$$(date +v%Y%m%d)-$$(git describe --always --dirty)") PLATFORMS?=linux/amd64,linux/arm64 -.PHONY: all build build-standard build-npa-v1alpha1 build-npa-v1alpha2 build-iptracker build-kube-ip-tracker-standard +.PHONY: all build build-standard build-npa-v1alpha1 build-npa-v1alpha2 build-iptracker build-kube-ip-tracker-standard build-kube-ip-tracker-multicluster -build: build-standard build-npa-v1alpha1 build-npa-v1alpha2 build-iptracker build-kube-ip-tracker-standard +build: build-standard build-npa-v1alpha1 build-npa-v1alpha2 build-iptracker build-kube-ip-tracker-standard build-kube-ip-tracker-multicluster build-standard: @echo "Building standard binary..." @@ -36,6 +36,10 @@ build-kube-ip-tracker-standard: @echo "Building kube-ip-tracker binary..." go build -o ./bin/kube-ip-tracker-standard ./cmd/kube-ip-tracker/standard +build-kube-ip-tracker-multicluster: + @echo "Building multicluster binary..." + go build -o ./bin/kube-ip-tracker-multicluster ./cmd/kube-ip-tracker/multicluster + clean: rm -rf "$(OUT_DIR)/" @@ -86,6 +90,12 @@ image-build-kube-ip-tracker-standard: build-kube-ip-tracker-standard --tag="${REGISTRY}/kube-ip-tracker:$(TAG)" \ --load +image-build-kube-ip-tracker-multicluster: build-kube-ip-tracker-multicluster + docker buildx build . -f Dockerfile.iptracker \ + --build-arg TARGET_BUILD=multicluster \ + --tag="${REGISTRY}/kube-ip-tracker:$(TAG)-multicluster" \ + --load + # Individual image push targets (multi-platform) image-push-standard: build-standard docker buildx build . \ @@ -121,14 +131,20 @@ image-push-kube-ip-tracker-standard: build-kube-ip-tracker-standard --tag="${REGISTRY}/kube-ip-tracker:$(TAG)" \ --push +image-push-kube-ip-tracker-multicluster: build-kube-ip-tracker-multicluster + docker buildx build . -f Dockerfile.iptracker \ + --build-arg TARGET_BUILD=multicluster \ + --tag="${REGISTRY}/kube-ip-tracker:$(TAG)-multicluster" \ + --push + # --- Aggregate Targets --- .PHONY: images-build images-push release # Build all image variants and load them into the local Docker daemon -images-build: ensure-buildx image-build-standard image-build-npa-v1alpha1 image-build-npa-v1alpha2 image-build-iptracker image-build-kube-ip-tracker-standard +images-build: ensure-buildx image-build-standard image-build-npa-v1alpha1 image-build-npa-v1alpha2 image-build-iptracker image-build-kube-ip-tracker-standard image-build-kube-ip-tracker-multicluster # Build and push all multi-platform image variants to the registry -images-push: ensure-buildx image-push-standard image-push-npa-v1alpha1 image-push-npa-v1alpha2 image-push-iptracker image-push-kube-ip-tracker-standard +images-push: ensure-buildx image-push-standard image-push-npa-v1alpha1 image-push-npa-v1alpha2 image-push-iptracker image-push-kube-ip-tracker-standard image-push-kube-ip-tracker-multicluster # The main release target, which pushes all images release: images-push \ No newline at end of file diff --git a/cmd/kube-ip-tracker/multicluster/main.go b/cmd/kube-ip-tracker/multicluster/main.go new file mode 100644 index 00000000..82da6f95 --- /dev/null +++ b/cmd/kube-ip-tracker/multicluster/main.go @@ -0,0 +1,301 @@ +package main + +import ( + "context" + "flag" + "os" + "os/signal" + "path/filepath" + "reflect" + "sync" + "syscall" + + "github.com/fsnotify/fsnotify" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/client-go/informers" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/clientcmd" + "k8s.io/klog/v2" + + "sigs.k8s.io/kube-network-policies/pkg/api" + "sigs.k8s.io/kube-network-policies/pkg/ipcache" +) + +var ( + listenAddr = flag.String("listen-address", "http://0.0.0.0:19090", "The address for the cache server to listen on.") + kubeconfigDir = flag.String("kubeconfig-dir", "", "Directory containing kubeconfig files for clusters to watch in multi-cluster mode.") + etcdDir = flag.String("etcd-dir", "./ipcache.etcd", "The directory for the embedded etcd server.") + caFile = flag.String("tls-ca-file", "", "The CA file for the server.") + certFile = flag.String("tls-cert-file", "", "The certificate file for the server.") + keyFile = flag.String("tls-key-file", "", "The key file for the server.") + + // Global map to keep track of running cluster watchers + clusterContexts sync.Map // map[clusterName]context.CancelFunc +) + +func main() { + klog.InitFlags(nil) + flag.Parse() + + if *kubeconfigDir == "" { + klog.Fatal("required --kubeconfig-dir flag") + } + + ctx, cancel := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer cancel() + + var opts []ipcache.EtcdOption + if *caFile != "" && *certFile != "" && *keyFile != "" { + opts = append(opts, ipcache.WithTLS(*certFile, *keyFile, *caFile)) + } + + // Create the etcd directory if it doesn't exist. + if err := os.MkdirAll(*etcdDir, 0750); err != nil { + klog.Fatalf("Failed to create etcd directory: %v", err) + } + + cacheServer, err := ipcache.NewEtcdStore(*listenAddr, *etcdDir, opts...) + if err != nil { + klog.Fatalf("Failed to create ipcache server: %v", err) + } + + go watchKubeconfigDir(ctx, *kubeconfigDir, cacheServer) + + klog.Infoln("kube-ip-tracker is running...") + <-ctx.Done() + klog.Infoln("Shutting down.") +} + +func watchKubeconfigDir(ctx context.Context, dir string, server *ipcache.EtcdStore) { + watcher, err := fsnotify.NewWatcher() + if err != nil { + klog.Fatalf("Failed to create file watcher: %v", err) + } + defer watcher.Close() + + if err := watcher.Add(dir); err != nil { + klog.Fatalf("Failed to add directory to watcher: %v", err) + } + + reconcile := func() { + klog.Info("Reconciling kubeconfigs...") + activeClusters := make(map[string]bool) + + files, _ := os.ReadDir(dir) + for _, f := range files { + if !f.IsDir() { + path := filepath.Join(dir, f.Name()) + clusters, err := getClustersFromKubeconfig(path) + if err != nil { + klog.Errorf("Error processing kubeconfig %s: %v", path, err) + continue + } + for clusterName := range clusters { + activeClusters[clusterName] = true + if _, loaded := clusterContexts.Load(clusterName); !loaded { + startForKubeconfig(ctx, path, clusterName, server) + } + } + } + } + + // Stop watchers for clusters that are no longer defined + clusterContexts.Range(func(key, value interface{}) bool { + clusterName := key.(string) + if !activeClusters[clusterName] { + stopWatching(clusterName) + } + return true + }) + } + + reconcile() // Initial run + + for { + select { + case event, ok := <-watcher.Events: + if !ok { + return + } + if event.Op&(fsnotify.Create|fsnotify.Write|fsnotify.Remove) != 0 { + reconcile() + } + case err, ok := <-watcher.Errors: + if !ok { + return + } + klog.Errorf("Watcher error: %v", err) + case <-ctx.Done(): + return + } + } +} + +func startForKubeconfig(ctx context.Context, kubeconfigPath, clusterAlias string, server *ipcache.EtcdStore) { + var config *rest.Config + var err error + + if kubeconfigPath == "" { + klog.Errorf("missing kubeconfig path") + return + } + loadingRules := &clientcmd.ClientConfigLoadingRules{ExplicitPath: kubeconfigPath} + configOverrides := &clientcmd.ConfigOverrides{} + if clusterAlias != "" { + configOverrides.CurrentContext = clusterAlias + } + config, err = clientcmd.NewNonInteractiveDeferredLoadingClientConfig(loadingRules, configOverrides).ClientConfig() + if err != nil { + klog.Errorf("Failed to create client-go configuration for %s: %v", kubeconfigPath, err) + return + } + + clientset, err := kubernetes.NewForConfig(config) + if err != nil { + klog.Errorf("Failed to create clientset for %s: %v", kubeconfigPath, err) + return + } + + // We use the kube-system UID as the unique identifier for the cluster. + ns, err := clientset.CoreV1().Namespaces().Get(ctx, metav1.NamespaceSystem, metav1.GetOptions{}) + if err != nil { + klog.Errorf("Failed to get kube-system namespace to determine cluster ID for cluster %s: %v", clusterAlias, err) + return + } + clusterID := string(ns.UID) + + if _, loaded := clusterContexts.Load(clusterAlias); loaded { + klog.Infof("Already watching cluster with alias: %s", clusterAlias) + return + } + + clusterCtx, cancel := context.WithCancel(ctx) + clusterContexts.Store(clusterAlias, cancel) + + factory := informers.NewSharedInformerFactory(clientset, 0) + podInformer := factory.Core().V1().Pods().Informer() + nsInformer := factory.Core().V1().Namespaces().Informer() + nodeInformer := factory.Core().V1().Nodes().Informer() + + _, _ = podInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + AddFunc: func(obj interface{}) { + updatePodInCache(server, nsInformer.GetStore(), nodeInformer.GetStore(), obj.(*v1.Pod), clusterID) + }, + UpdateFunc: func(oldObj, newObj interface{}) { + oldPod := oldObj.(*v1.Pod) + newPod := newObj.(*v1.Pod) + // check if pod IPs or labels changed + if !reflect.DeepEqual(oldPod.Status.PodIPs, newPod.Status.PodIPs) || !reflect.DeepEqual(oldPod.Labels, newPod.Labels) { + updatePodInCache(server, nsInformer.GetStore(), nodeInformer.GetStore(), newPod, clusterID) + } + }, + DeleteFunc: func(obj interface{}) { deletePodFromCache(server, obj) }, + }) + + _, _ = nsInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + UpdateFunc: func(oldObj, newObj interface{}) { + oldNs, newNs := oldObj.(*v1.Namespace), newObj.(*v1.Namespace) + if reflect.DeepEqual(oldNs.Labels, newNs.Labels) { + return + } + pods, err := factory.Core().V1().Pods().Lister().Pods(newNs.Name).List(labels.Everything()) + if err != nil { + klog.Errorf("Error listing pods in namespace %s for cluster %s: %v", newNs.Name, clusterAlias, err) + return + } + for _, pod := range pods { + updatePodInCache(server, nsInformer.GetStore(), nodeInformer.GetStore(), pod, clusterID) + } + }, + }) + + _, _ = nodeInformer.AddEventHandler(cache.ResourceEventHandlerFuncs{ + UpdateFunc: func(oldObj, newObj interface{}) { + oldNode, newNode := oldObj.(*v1.Node), newObj.(*v1.Node) + if reflect.DeepEqual(oldNode.Labels, newNode.Labels) { + return + } + // This is less efficient, but necessary without a nodeName index on the pod informer. + pods, err := factory.Core().V1().Pods().Lister().List(labels.Everything()) + if err != nil { + klog.Errorf("Error listing all pods for cluster %s: %v", clusterAlias, err) + return + } + for _, pod := range pods { + if pod.Spec.NodeName == newNode.Name { + updatePodInCache(server, nsInformer.GetStore(), nodeInformer.GetStore(), pod, clusterID) + } + } + }, + }) + + factory.Start(clusterCtx.Done()) + cache.WaitForCacheSync(clusterCtx.Done(), podInformer.HasSynced, nsInformer.HasSynced, nodeInformer.HasSynced) + klog.Infof("Started watching cluster with alias: %s", clusterAlias) +} + +func stopWatching(clusterName string) { + if cancelFunc, loaded := clusterContexts.Load(clusterName); loaded { + cancelFunc.(context.CancelFunc)() + clusterContexts.Delete(clusterName) + klog.Infof("Stopped watching cluster '%s'", clusterName) + } +} + +func getClustersFromKubeconfig(path string) (map[string]bool, error) { + config, err := clientcmd.LoadFromFile(path) + if err != nil { + return nil, err + } + clusters := make(map[string]bool) + for name := range config.Clusters { + clusters[name] = true + } + return clusters, nil +} + +// updatePodInCache now accepts the cluster UID and stores it in the PodInfo. +func updatePodInCache(server *ipcache.EtcdStore, nsStore cache.Store, nodeStore cache.Store, pod *v1.Pod, clusterID string) { + if pod.Spec.HostNetwork || len(pod.Status.PodIPs) == 0 { + return + } + var nodeLabels, nsLabels map[string]string + nsObj, exists, err := nsStore.GetByKey(pod.Namespace) + if err == nil && exists { + nsLabels = nsObj.(*v1.Namespace).Labels + } + nodeObj, exists, err := nodeStore.GetByKey(pod.Spec.NodeName) + if err == nil && exists { + nodeLabels = nodeObj.(*v1.Node).Labels + } + // The cluster UID is now stored in the ClusterId field. + podInfo := api.NewPodInfo(pod, nsLabels, nodeLabels, clusterID) + for _, podIP := range pod.Status.PodIPs { + err := server.Upsert(podIP.IP, podInfo) + if err != nil { + klog.Errorf("fail to update IP address %s: %v", podIP.IP, err) + } + } +} + +func deletePodFromCache(server *ipcache.EtcdStore, obj interface{}) { + pod, ok := obj.(*v1.Pod) + if !ok { + if tombstone, ok := obj.(cache.DeletedFinalStateUnknown); ok { + pod, _ = tombstone.Obj.(*v1.Pod) + } + } + if pod == nil { + return + } + for _, podIP := range pod.Status.PodIPs { + err := server.Delete(podIP.IP) + if err != nil { + klog.Errorf("fail to delete IP address %s: %v", podIP.IP, err) + } + } +} diff --git a/docs/multicluster.md b/docs/multicluster.md new file mode 100644 index 00000000..67ddc3b7 --- /dev/null +++ b/docs/multicluster.md @@ -0,0 +1,169 @@ +# Multi-Cluster Network Policies User Guide + +This guide provides instructions on how to install and use the multi-cluster +feature of `kube-network-policies`. This feature allows you to enforce network +policies across multiple Kubernetes clusters. + +### Overview + +The multi-cluster feature is composed of two main components: + +* **`kube-ip-tracker`**: A central component that runs as a **Deployment** in + each cluster. It watches for `Pods`, `Namespaces`, and `Nodes` in all + clusters and stores the necessary information for applying + `NetworkPolicies`: IPs and Labels. This component requires a `kubeconfig` + file with access to all clusters in the mesh. + +* **`kube-network-policies` agent**: A **DaemonSet** that runs on each node of + each cluster. This agent enforces the network policies and connects to the + `kube-ip-tracker` service to get the necessary information to apply the + `NetworkPolicies`. + +### Prerequisites + +* Two or more Kubernetes clusters. +* `kubectl` installed and configured with contexts for all your clusters. +* Full network connectivity between the pods of all clusters. This means that + a pod in one cluster must be able to reach the IP of a pod in any of the + other clusters, and vice-versa. This guide does not cover how to set up this + inter-cluster networking. + +### 1. RBAC Requirements + +Proper functioning of the multi-cluster feature requires specific RBAC +permissions for each component. + +The `install-multicluster.yaml` manifest provided in this repository includes +the necessary `ServiceAccounts`, `ClusterRoles`, and `ClusterRoleBindings`. + +### 2. Prepare the kubeconfig secret + +The `kube-ip-tracker` needs access to all clusters in the mesh. We will provide +this access via a secret containing the kubeconfig files for each cluster. + +For each cluster, you need to have a kubeconfig file. You will then create a +secret named `remote-kubeconfigs` in the `kube-system` namespace of **each** +cluster. This secret will contain the kubeconfig files for all clusters in your +mesh. + +For example, if you have two clusters, `cluster-a` and `cluster-b`, and you have +their kubeconfig files at `/path/to/cluster-a.conf` and +`/path/to/cluster-b.conf`, you would run the following commands for **each** of +your clusters: + +```bash +# Replace with the kubectl context for your first cluster +kubectl --context -n kube-system create secret generic remote-kubeconfigs \ + --from-file=cluster-a.yaml=/path/to/cluster-a.conf \ + --from-file=cluster-b.yaml=/path/to/cluster-b.conf + +# Replace with the kubectl context for your second cluster +kubectl --context -n kube-system create secret generic remote-kubeconfigs \ + --from-file=cluster-a.yaml=/path/to/cluster-a.conf \ + --from-file=cluster-b.yaml=/path/to/cluster-b.conf +``` + +### 3. Deploy the multi-cluster components + +Now we will deploy the `kube-ip-tracker` and the `kube-network-policies` agent +to both clusters using the `install-multicluster.yaml` manifest. + +Make sure the container images specified in the manifest are available in your +clusters. You may need to push them to a registry that your clusters can access +and update the image paths in the `install-multicluster.yaml` file. + +Apply the manifest to each cluster: + +```bash +# Replace with the kubectl context for your first cluster +kubectl --context apply -f install-multicluster.yaml + +# Replace with the kubectl context for your second cluster +kubectl --context apply -f install-multicluster.yaml +``` + + +### 4. Example: Enforcing Cross-Cluster Security Boundaries + +This example demonstrates how to enforce a network policy that allows traffic from a specific application in one cluster to a database in another cluster, while denying traffic from other applications. + +First, let's label the `default` namespace in each cluster with the cluster's name. This will allow us to create policies that are scoped to a specific cluster. + +```bash +# Replace and with your contexts +kubectl --context label namespace default cluster.clusterset.k8s.io=clustera +kubectl --context label namespace default cluster.clusterset.k8s.io=clusterb +``` + +Now, let's deploy a `database` service in `cluster-a`: + +```bash +# Replace with your context +kubectl --context run database --image=httpd:2 --labels="app=database" --expose --port=80 +kubectl --context wait --for=condition=ready pod -l app=database --timeout=2m +``` + +Next, apply a network policy to the `database` pod that allows ingress traffic only from pods with the label `app=billing` in `cluster-b`. + +```bash +# Replace with your context +DB_POD_IP=$(kubectl --context get pod -l app=database -o jsonpath='{.items[0].status.podIP}') + +kubectl --context apply -f - < with your context +kubectl --context run billing-client --image=busybox --labels="app=billing" --rm -it --restart=Never --command -- wget -O- --timeout=2 http://${DB_POD_IP} +``` + +This connection from the `analytics` app should **FAIL**: + +```bash +# Replace with your context +kubectl --context run analytics-client --image=busybox --labels="app=analytics" --rm -it --restart=Never --command -- wget -O- --timeout=2 http://${DB_POD_IP} +``` + +This demonstrates that the policy correctly allows traffic from the intended application in the remote cluster while blocking traffic from others. + +### 7. Cleanup + +To remove the resources created in this guide, you can run: + +```bash +# Replace with your context +kubectl --context delete -f install-multicluster.yaml +kubectl --context delete secret -n kube-system remote-kubeconfigs +kubectl --context delete networkpolicy allow-from-cluster-b +kubectl --context delete service web +kubectl --context delete pod web + +# Replace with your context +kubectl --context delete -f install-multicluster.yaml +kubectl --context delete secret -n kube-system remote-kubeconfigs +``` diff --git a/go.mod b/go.mod index ef49cf58..ae8924b3 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/armon/go-radix v1.0.0 github.com/containerd/nri v0.10.0 github.com/florianl/go-nfqueue v1.3.2 + github.com/fsnotify/fsnotify v1.9.0 github.com/google/go-cmp v0.7.0 github.com/google/nftables v0.3.0 github.com/mdlayher/netlink v1.8.0 diff --git a/go.sum b/go.sum index bc55a8aa..6474651c 100644 --- a/go.sum +++ b/go.sum @@ -31,6 +31,8 @@ github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/florianl/go-nfqueue v1.3.2 h1:8DPzhKJHywpHJAE/4ktgcqveCL7qmMLsEsVD68C4x4I= github.com/florianl/go-nfqueue v1.3.2/go.mod h1:eSnAor2YCfMCVYrVNEhkLGN/r1L+J4uDjc0EUy0tfq4= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= github.com/fxamacker/cbor/v2 v2.8.0 h1:fFtUGXUzXPHTIUdne5+zzMPTfffl3RD5qYnkY40vtxU= github.com/fxamacker/cbor/v2 v2.8.0/go.mod h1:vM4b+DJCtHn+zz7h3FFp/hDAI9WNWCsZj23V5ytsSxQ= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= diff --git a/install-iptracker.yaml b/install-iptracker.yaml index 70322196..abf372de 100644 --- a/install-iptracker.yaml +++ b/install-iptracker.yaml @@ -69,6 +69,10 @@ spec: spec: hostNetwork: true # do not depend on the network of the cluster serviceAccountName: kube-ip-tracker + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" containers: - name: kube-ip-tracker image: registry.k8s.io/networking/kube-ip-tracker:v0.1.0 diff --git a/install-multicluster.yaml b/install-multicluster.yaml new file mode 100644 index 00000000..6f945322 --- /dev/null +++ b/install-multicluster.yaml @@ -0,0 +1,229 @@ +# This manifest provides an example of how to deploy the kube-ip-tracker +# in a multi-cluster configuration. It uses a Secret to store the kubeconfig +# files for the remote clusters and mounts them into a directory that the +# tracker monitors. +# It also deploys the network policy agent. +--- +# 1. Secret containing the kubeconfig files for all clusters in the mesh. +# Each key in the `data` field should be a filename (e.g., cluster-one.yaml). +# The value is the base64 encoded content of the corresponding kubeconfig file. +# +# You can create this secret from existing files using a command like: +# kubectl create secret generic remote-kubeconfigs \ +# --from-file=cluster-one.yaml=/path/to/your/cluster-one.yaml \ +# --from-file=cluster-two.yaml=/path/to/your/cluster-two.yaml \ +# -n kube-system +# +# apiVersion: v1 +# kind: Secret +# metadata: +# name: remote-kubeconfigs +# namespace: kube-system +# type: Opaque +# data: +# cluster-one.yaml: base64-encoded-kubeconfig +# cluster-two.yaml: base64-encoded-kubeconfig + +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kube-ip-tracker +rules: + - apiGroups: + - "" + resources: + - nodes + - pods + - namespaces + verbs: + - get + - list + - watch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kube-ip-tracker +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-ip-tracker +subjects: +- kind: ServiceAccount + name: kube-ip-tracker + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-ip-tracker + namespace: kube-system +--- +apiVersion: v1 +kind: Service +metadata: + name: kube-ip-tracker + namespace: kube-system +spec: + clusterIP: None + selector: + app: kube-ip-tracker + ports: + - name: grpc + port: 10999 + targetPort: 10999 +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: kube-ip-tracker + namespace: kube-system + labels: + app: kube-ip-tracker + k8s-app: kube-ip-tracker +spec: + replicas: 2 + selector: + matchLabels: + app: kube-ip-tracker + template: + metadata: + labels: + app: kube-ip-tracker + k8s-app: kube-ip-tracker + spec: + hostNetwork: true # do not depend on the network of the cluster + serviceAccountName: kube-ip-tracker + tolerations: + - key: "node-role.kubernetes.io/control-plane" + operator: "Exists" + effect: "NoSchedule" + volumes: + - name: remote-kubeconfigs + secret: + secretName: remote-kubeconfigs + containers: + - name: kube-ip-tracker + image: registry.k8s.io/networking/kube-ip-tracker:v0.1.0-multicluster + args: + - /bin/kube-ip-tracker + - --listen-address=http://0.0.0.0:10999 + - --v=2 + - --kubeconfig-dir=/etc/kubernetes/kubeconfig + ports: + - containerPort: 10999 + name: grpc + volumeMounts: + - name: remote-kubeconfigs + mountPath: /etc/kubernetes/kubeconfig + readOnly: true + resources: + requests: + cpu: "100m" + memory: "50Mi" +--- +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kube-network-policies +rules: + - apiGroups: + - "" + resources: + - namespaces + verbs: + - get + - list + - watch + - apiGroups: + - "networking.k8s.io" + resources: + - networkpolicies + verbs: + - list + - watch +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: kube-network-policies +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: kube-network-policies +subjects: +- kind: ServiceAccount + name: kube-network-policies + namespace: kube-system +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: kube-network-policies + namespace: kube-system +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: kube-network-policies + namespace: kube-system + labels: + tier: node + app: kube-network-policies + k8s-app: kube-network-policies +spec: + selector: + matchLabels: + app: kube-network-policies + template: + metadata: + labels: + tier: node + app: kube-network-policies + k8s-app: kube-network-policies + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + nodeSelector: + kubernetes.io/os: linux + tolerations: + - operator: Exists + effect: NoSchedule + serviceAccountName: kube-network-policies + containers: + - name: kube-network-policies + image: registry.k8s.io/networking/kube-network-policies:v0.8.0-iptracker + args: + - /bin/netpol + - --hostname-override=$(MY_NODE_NAME) + - --v=2 + - --nfqueue-id=198 + - --ip-tracker-address=kube-ip-tracker.kube-system.svc.cluster.local:10999 + volumeMounts: + - name: nri-plugin + mountPath: /var/run/nri + - name: netns + mountPath: /var/run/netns + mountPropagation: HostToContainer + resources: + requests: + cpu: "100m" + memory: "50Mi" + securityContext: + privileged: true + capabilities: + add: ["NET_ADMIN"] + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumes: + - name: nri-plugin + hostPath: + path: /var/run/nri + - name: netns + hostPath: + path: /var/run/netns +--- diff --git a/tests/multicluster/e2e_multicluster.bats b/tests/multicluster/e2e_multicluster.bats new file mode 100644 index 00000000..50e269fe --- /dev/null +++ b/tests/multicluster/e2e_multicluster.bats @@ -0,0 +1,428 @@ +#!/usr/bin/env bats + +# +# setup_file: This function is executed once before all tests. +# It sets up a two-cluster kind environment, deploys the necessary controllers, +# and establishes network connectivity between the clusters. +# +function setup_file { + export BATS_TEST_TIMEOUT=300 # Increased timeout for multi-cluster setup + + # Define image variables + export REGISTRY=${REGISTRY:-"registry.k8s.io/networking"} + export IMAGE_NAME=${IMAGE_NAME:-"kube-network-policies"} + export TAG=${TAG:-"test"} + + # Build the images for the specific binaries and architecture + ( + cd "$BATS_TEST_DIRNAME"/../.. + TAG="$TAG" make image-build-iptracker + TAG="$TAG" IMAGE_NAME="kube-ip-tracker" make image-build-kube-ip-tracker-multicluster + ) + + # Define cluster names and network subnets + export CLUSTER_NAME_A="clustera" + export CLUSTER_NAME_B="clusterb" + export POD_SUBNET_A="10.110.0.0/16" + export SERVICE_SUBNET_A="10.115.0.0/16" + export POD_SUBNET_B="10.220.0.0/16" + export SERVICE_SUBNET_B="10.225.0.0/16" + + # --- Create Cluster A --- + kind delete cluster --name $CLUSTER_NAME_A || true + + cat < /tmp/kubeconfig-a + kind get kubeconfig --name $CLUSTER_NAME_B > /tmp/kubeconfig-b + + # Merge kubeconfigs to be used by the tests + KUBECONFIG=/tmp/kubeconfig-a:/tmp/kubeconfig-b kubectl config view --flatten > /tmp/kubeconfig + export KUBECONFIG=/tmp/kubeconfig + + # --- Establish Cross-Cluster Routes --- + ROUTES_B=$(kubectl --context kind-clusterb get nodes -o=jsonpath='{range .items[*]}{"ip route add "}{.spec.podCIDR}{" via "}{.status.addresses[?(@.type=="InternalIP")].address}{"\n"}{end}') + for n in $(kind get nodes --name ${CLUSTER_NAME_A}); do + echo "$ROUTES_B" | while read -r route; do docker exec ${n} $route; done + docker exec ${n} ip route add $SERVICE_SUBNET_B via $(kubectl --context kind-clusterb get nodes -o=jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') + # avoid cross cluster traffic to be masqueraded + docker exec ${n} iptables -t nat -I KIND-MASQ-AGENT 1 -d $POD_SUBNET_B -j ACCEPT + docker exec ${n} iptables -t nat -I KIND-MASQ-AGENT 1 -d $SERVICE_SUBNET_B -j ACCEPT + done + + ROUTES_A=$(kubectl --context kind-clustera get nodes -o=jsonpath='{range .items[*]}{"ip route add "}{.spec.podCIDR}{" via "}{.status.addresses[?(@.type=="InternalIP")].address}{"\n"}{end}') + for n in $(kind get nodes --name ${CLUSTER_NAME_B}); do + echo "$ROUTES_A" | while read -r route; do docker exec ${n} $route; done + docker exec ${n} ip route add $SERVICE_SUBNET_A via $(kubectl --context kind-clustera get nodes -o=jsonpath='{.items[0].status.addresses[?(@.type=="InternalIP")].address}') + # avoid cross cluster traffic to be masqueraded + docker exec ${n} iptables -t nat -I KIND-MASQ-AGENT 1 -d $POD_SUBNET_A -j ACCEPT + docker exec ${n} iptables -t nat -I KIND-MASQ-AGENT 1 -d $SERVICE_SUBNET_A -j ACCEPT + done + + # --- Deploy Multi-Cluster Controllers --- + # Prepare the installation manifest by replacing image placeholders + _install_manifest=$(cat "$BATS_TEST_DIRNAME"/../../install-multicluster.yaml | \ + sed "s#registry.k8s.io/networking/kube-ip-tracker:.*#$REGISTRY/kube-ip-tracker:$TAG-multicluster#" | \ + sed "s#registry.k8s.io/networking/kube-network-policies:.*#$REGISTRY/$IMAGE_NAME:$TAG-iptracker#") + + # Deploy controllers to both clusters + kind get kubeconfig --name $CLUSTER_NAME_A --internal > /tmp/kubeconfig-internal-a + kind get kubeconfig --name $CLUSTER_NAME_B --internal > /tmp/kubeconfig-internal-b + for cluster_context in "kind-clustera" "kind-clusterb"; do + # Create the shared kubeconfig secret with separate files + kubectl --context $cluster_context -n kube-system create secret generic remote-kubeconfigs \ + --from-file=clustera.yaml=/tmp/kubeconfig-internal-a \ + --from-file=clusterb.yaml=/tmp/kubeconfig-internal-b + + # Apply the installation manifest + printf '%s' "${_install_manifest}" | kubectl --context $cluster_context apply -f - + done + + # Wait for all controllers to be ready + kubectl --context kind-clustera -n kube-system wait --for=condition=ready pod -l app=kube-ip-tracker --timeout=2m + kubectl --context kind-clusterb -n kube-system wait --for=condition=ready pod -l app=kube-ip-tracker --timeout=2m + kubectl --context kind-clustera -n kube-system wait --for=condition=ready pod -l k8s-app=kube-network-policies + kubectl --context kind-clusterb -n kube-system wait --for=condition=ready pod -l k8s-app=kube-network-policies + +} + +# +# teardown: This function is executed after each test. +# It cleans up network policies to ensure tests are isolated. +# +function teardown { + # Clean up network policies in both clusters + kubectl --context kind-clustera delete networkpolicy --all --namespace default + kubectl --context kind-clusterb delete networkpolicy --all --namespace default +} + +# +# teardown_file: This function is executed once after all tests. +# It exports logs and deletes the kind clusters. +# +function teardown_file { + kind export logs "$BATS_TEST_DIRNAME"/../../_artifacts --name "$CLUSTER_NAME_A" + kind delete cluster --name "$CLUSTER_NAME_A" + kind export logs "$BATS_TEST_DIRNAME"/../../_artifacts --name "$CLUSTER_NAME_B" + kind delete cluster --name "$CLUSTER_NAME_B" +} + +@test "multicluster: deny all ingress traffic" { + # Get the pod IP of the web service in cluster A + POD_IP_A=$(kubectl --context kind-clustera get pod -l app=web -o jsonpath='{.items[0].status.podIP}') + + # Apply a default deny policy to the web server in cluster A + kubectl --context kind-clustera apply -f - <