Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 21 additions & 9 deletions .github/workflows/operator-ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,22 @@ env:
PLATFORMS: linux/amd64,linux/arm64

jobs:
# Test operator across supported Kubernetes versions
# Test operator across supported Kubernetes versions and test suites
tests:
runs-on: ubuntu-latest
strategy:
matrix:
# Test on all supported K8s versions (matches docs/kubernetes-support.md)
# Standard E2E tests on all supported K8s versions
k8s-version: ["1.31.12", "1.32.8", "1.33.4", "1.34.0"]
test-suite: ["e2e"]
include:
# Deployment policy tests on 15-node cluster (K8s 1.34 only)
- k8s-version: "1.34.0"
test-suite: deployment-policy
kind-config: k8s-tests/chainsaw/deployment-policy/kind-config.yaml
make-target: deployment-policy-tests
fail-fast: false # Continue testing other versions if one fails
name: ${{ matrix.test-suite }}-tests (k8s-${{ matrix.k8s-version }})
steps:
- uses: actions/checkout@v4
with:
Expand All @@ -71,13 +79,13 @@ jobs:
registry: ${{ env.REGISTRY }}
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Kubernetes KinD Cluster v${{ matrix.k8s-version }}
- name: Create Kubernetes KinD Cluster v${{ matrix.k8s-version }}
id: kind
uses: helm/kind-action@v1
with:
version: v0.30.0
node_image: kindest/node:v${{ matrix.k8s-version }}
config: operator/config/local-dev/kind-config.yaml
config: ${{ matrix.kind-config || 'operator/config/local-dev/kind-config.yaml' }}
cluster_name: kind
# Cache build tools and dependencies for faster builds
- name: Restore cached Binaries
Expand All @@ -103,17 +111,21 @@ jobs:
path: |
${{ github.workspace }}/operator/bin
~/.cache/go-build
# Run full test suite including e2e tests
- name: end-to-end-tests
# Run test suite
- name: Run ${{ matrix.test-suite }} tests
run: |
cd operator
make setup-kind-cluster
make test
if [ "${{ matrix.test-suite }}" = "e2e" ]; then
make setup-kind-cluster
make test
else
make ${{ matrix.make-target }}
fi

# Build multi-platform container image and push to registry
build-and-push-operator:
runs-on: ubuntu-latest
needs: [tests] # Don't run the build and push if the k8s tests fail
needs: [tests] # Don't run the build and push if tests fail
# Sets the permissions granted to the `GITHUB_TOKEN` for the actions in this job.
permissions:
contents: read
Expand Down
44 changes: 44 additions & 0 deletions k8s-tests/chainsaw/deployment-policy/kind-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Shared Kind cluster configuration for all deployment policy tests
# 15 worker nodes + 1 control-plane
# - Multi-compartment test uses all 15 nodes
# - Linear strategy test uses first 8 nodes
# - Overlapping selectors test uses first 6 nodes

kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
- role: worker
90 changes: 90 additions & 0 deletions k8s-tests/chainsaw/deployment-policy/label-nodes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
#!/bin/bash

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -e

# Usage: label-nodes.sh <operation> <node_range> <label1=value1> [label2=value2] ...
# Examples:
# label-nodes.sh add 0-4 priority=critical skyhook.nvidia.com/test-node=skyhooke2e
# label-nodes.sh remove 0-14 priority env region
# label-nodes.sh clean-all skyhook.nvidia.com/test-node

OPERATION=$1
shift

if [ "$OPERATION" = "clean-all" ]; then
LABEL_PREFIX=$1
echo "Cleaning all labels matching: $LABEL_PREFIX"
kubectl label nodes --all "${LABEL_PREFIX}-" --overwrite 2>/dev/null || true
echo "✓ Cleanup complete"
exit 0
fi

NODE_RANGE=$1
shift

# Get all worker nodes (excluding control-plane)
WORKERS=($(kubectl get nodes --no-headers -o custom-columns=NAME:.metadata.name | grep -v control-plane | sort))

# Parse node range
if [[ $NODE_RANGE == *-* ]]; then
START=$(echo $NODE_RANGE | cut -d'-' -f1)
END=$(echo $NODE_RANGE | cut -d'-' -f2)
else
START=$NODE_RANGE
END=$NODE_RANGE
fi

# Validate we have enough nodes
if [ ${#WORKERS[@]} -lt $((END + 1)) ]; then
echo "ERROR: Need at least $((END + 1)) worker nodes for this operation"
echo "Found: ${#WORKERS[@]} workers"
exit 1
fi

case "$OPERATION" in
add)
LABELS="$@"
echo "Adding labels to nodes [$START-$END]: $LABELS"
for i in $(seq $START $END); do
if [ -n "${WORKERS[$i]}" ]; then
kubectl label node ${WORKERS[$i]} $LABELS --overwrite
fi
done
;;
remove)
LABELS_TO_REMOVE=""
for label in "$@"; do
LABELS_TO_REMOVE="$LABELS_TO_REMOVE ${label}-"
done
echo "Removing labels from nodes [$START-$END]: $@"
for i in $(seq $START $END); do
if [ -n "${WORKERS[$i]}" ]; then
kubectl label node ${WORKERS[$i]} $LABELS_TO_REMOVE --overwrite 2>/dev/null || true
fi
done
;;
*)
echo "ERROR: Unknown operation: $OPERATION"
echo "Usage: $0 {add|remove|clean-all} ..."
exit 1
;;
esac

echo "✓ Operation complete"

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: legacy-interruption-budget-test
status:
compartmentStatuses:
__default__:
matched: 6
ceiling: 3
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# yaml-language-server: $schema=https://raw.githubusercontent.com/kyverno/chainsaw/main/.schemas/json/test-chainsaw-v1alpha1.json
apiVersion: chainsaw.kyverno.io/v1alpha1
kind: Test
metadata:
name: legacy-interruption-budget-compatibility
spec:
description: |
Tests backwards compatibility with legacy InterruptionBudget.
- Creates Skyhook with interruptionBudget (count: 3) instead of deploymentPolicy
- Verifies that a synthetic __default__ compartment is created
- Verifies that the budget ceiling is respected (max 3 nodes in progress)
- Ensures existing customers' configurations continue to work
timeouts:
exec: 180s
assert: 120s
steps:
- name: setup-nodes
try:
- script:
content: |
chmod +x ../label-nodes.sh
# Clean up any existing labels from previous tests
../label-nodes.sh clean-all skyhook.nvidia.com/test-node
# Label first 6 worker nodes
../label-nodes.sh add 0-5 skyhook.nvidia.com/test-node=skyhooke2e
echo "✓ Node labeling complete"
kubectl get nodes -L skyhook.nvidia.com/test-node --sort-by=.metadata.name | head -8

- name: apply-skyhook
try:
- apply:
file: skyhook.yaml
- sleep:
duration: 10s

- name: verify-default-compartment
try:
- assert:
file: assert-default-compartment.yaml

- name: verify-metrics
try:
- script:
content: |
echo "=== Verifying legacy compatibility metrics ==="

# Verify metrics for synthetic __default__ compartment
../../metrics_test.py skyhook_rollout_matched_nodes 6 -t skyhook_name=legacy-interruption-budget-test -t policy_name=legacy -t compartment_name=__default__ -t strategy=fixed
../../metrics_test.py skyhook_rollout_ceiling 3 -t skyhook_name=legacy-interruption-budget-test -t policy_name=legacy -t compartment_name=__default__ -t strategy=fixed

# Verify that the legacy policy name is "legacy" and not a real policy
echo "✓ Legacy compatibility metrics verified!"
echo "✓ Metrics use policy_name=legacy for backwards compatibility"

- name: cleanup
try:
- script:
content: |
# Clean up node annotations
../../skyhook/rest_test.sh legacy-interruption-budget-test
# Clean up labels
../label-nodes.sh clean-all skyhook.nvidia.com/test-node
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: legacy-interruption-budget-test
spec:
priority: 100
# Use legacy InterruptionBudget instead of DeploymentPolicy
interruptionBudget:
count: 3 # Max 3 nodes at once
nodeSelectors:
matchLabels:
skyhook.nvidia.com/test-node: skyhooke2e
packages:
test-pkg:
version: "6.2.0"
image: "ghcr.io/nvidia/skyhook/agentless"

Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# Assert: Batch 1 - At least 1 node completed (initial batch)
apiVersion: skyhook.nvidia.com/v1alpha1
kind: Skyhook
metadata:
name: linear-strategy-test
status:
compartmentStatuses:
production:
(completed >= `1`): true
Loading