diff --git a/Makefile b/Makefile index 4204cf932bd8..b5cf84d4304d 100644 --- a/Makefile +++ b/Makefile @@ -938,30 +938,31 @@ test-cover: ## Run unit and integration tests and generate a coverage report go tool cover -func=out/coverage.out -o out/coverage.txt go tool cover -html=out/coverage.out -o out/coverage.html -.PHONY: test-docker-infrastructure -test-docker-infrastructure: $(SETUP_ENVTEST) ## Run unit and integration tests with race detector for docker infrastructure provider +.PHONY: test-infrastructure +test-infrastructure: $(SETUP_ENVTEST) ## Run unit and integration tests with race detector for docker infrastructure provider # Note: Fuzz tests are not executed with race detector because they would just time out. # To achieve that, all files with fuzz tests have the "!race" build tag, to still run fuzz tests # we have an additional `go test` run that focuses on "TestFuzzyConversion". - cd $(CAPD_DIR); KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -race ./... $(TEST_ARGS) - $(MAKE) test-docker-infrastructure-conversions TEST_ARGS="$(TEST_ARGS)" + cd test/infrastructure; KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test ./... $(TEST_ARGS) + $(MAKE) test-infrastructure-conversions TEST_ARGS="$(TEST_ARGS)" -.PHONY: test-docker-infrastructure-conversions -test-docker-infrastructure-conversions: $(SETUP_ENVTEST) ## Run conversions test for docker infrastructure provider - cd $(CAPD_DIR); KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -run "^TestFuzzyConversion$$" ./... $(TEST_ARGS) +.PHONY: test-infrastructure-conversions +test-infrastructure-conversions: $(SETUP_ENVTEST) ## Run conversions test for docker infrastructure provider + cd test/infrastructure; KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -run "^TestFuzzyConversion$$" ./... $(TEST_ARGS) -.PHONY: test-docker-infrastructure-verbose -test-docker-infrastructure-verbose: ## Run unit and integration tests with race detector and with verbose flag for docker infrastructure provider - $(MAKE) test-docker-infrastructure TEST_ARGS="$(TEST_ARGS) -v" +.PHONY: test-infrastructure-verbose +test-infrastructure-verbose: ## Run unit and integration tests with race detector and with verbose flag for docker infrastructure provider + $(MAKE) test-infrastructure TEST_ARGS="$(TEST_ARGS) -v" -.PHONY: test-docker-infrastructure-junit -test-docker-infrastructure-junit: $(SETUP_ENVTEST) $(GOTESTSUM) ## Run unit and integration tests with race detector and generate a junit report for docker infrastructure provider - cd $(CAPD_DIR); set +o errexit; (KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -race -json ./... $(TEST_ARGS); echo $$? > $(ARTIFACTS)/junit.infra_docker.exitcode) | tee $(ARTIFACTS)/junit.infra_docker.stdout +.PHONY: test-infrastructure-junit +test-infrastructure-junit: $(SETUP_ENVTEST) $(GOTESTSUM) ## Run unit and integration tests with race detector and generate a junit report for docker infrastructure provider + cd test/infrastructure; set +o errexit; (KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -json ./... $(TEST_ARGS); echo $$? > $(ARTIFACTS)/junit.infra_docker.exitcode) | tee $(ARTIFACTS)/junit.infra_docker.stdout $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit.infra_docker.xml --raw-command cat $(ARTIFACTS)/junit.infra_docker.stdout exit $$(cat $(ARTIFACTS)/junit.infra_docker.exitcode) - cd $(CAPD_DIR); set +o errexit; (KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -run "^TestFuzzyConversion$$" -json ./... $(TEST_ARGS); echo $$? > $(ARTIFACTS)/junit-fuzz.infra_docker.exitcode) | tee $(ARTIFACTS)/junit-fuzz.infra_docker.stdout + cd test/infrastructure; set +o errexit; (KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -run "^TestFuzzyConversion$$" -json ./... $(TEST_ARGS); echo $$? > $(ARTIFACTS)/junit-fuzz.infra_docker.exitcode) | tee $(ARTIFACTS)/junit-fuzz.infra_docker.stdout $(GOTESTSUM) --junitfile $(ARTIFACTS)/junit-fuzz.infra_docker.xml --raw-command cat $(ARTIFACTS)/junit-fuzz.infra_docker.stdout exit $$(cat $(ARTIFACTS)/junit-fuzz.infra_docker.exitcode) + .PHONY: test-test-extension test-test-extension: $(SETUP_ENVTEST) ## Run unit and integration tests for the test extension cd $(TEST_EXTENSION_DIR); KUBEBUILDER_ASSETS="$(KUBEBUILDER_ASSETS)" go test -race ./... $(TEST_ARGS) diff --git a/api/core/v1beta1/conversion.go b/api/core/v1beta1/conversion.go index 1b15ed66341c..43ff16560c13 100644 --- a/api/core/v1beta1/conversion.go +++ b/api/core/v1beta1/conversion.go @@ -248,6 +248,8 @@ func (src *ClusterClass) ConvertTo(dstRaw conversion.Hub) error { dst.Status.Variables[i] = variable } + dst.Spec.KubernetesVersions = restored.Spec.KubernetesVersions + return nil } diff --git a/api/core/v1beta1/zz_generated.conversion.go b/api/core/v1beta1/zz_generated.conversion.go index eb54f254dd5a..7a54a418e8e4 100644 --- a/api/core/v1beta1/zz_generated.conversion.go +++ b/api/core/v1beta1/zz_generated.conversion.go @@ -1207,6 +1207,7 @@ func autoConvert_v1beta2_ClusterClassSpec_To_v1beta1_ClusterClassSpec(in *v1beta } else { out.Patches = nil } + // WARNING: in.KubernetesVersions requires manual conversion: does not exist in peer-type return nil } diff --git a/api/core/v1beta2/clusterclass_types.go b/api/core/v1beta2/clusterclass_types.go index 80d78f358553..189995e33e24 100644 --- a/api/core/v1beta2/clusterclass_types.go +++ b/api/core/v1beta2/clusterclass_types.go @@ -135,6 +135,18 @@ type ClusterClassSpec struct { // +kubebuilder:validation:MinItems=1 // +kubebuilder:validation:MaxItems=1000 Patches []ClusterClassPatch `json:"patches,omitempty"` + + // kubernetesVersions is the list of Kubernetes versions that can be + // used for clusters using this ClusterClass. + // The list of version must be ordered from the older to the newer version, and there should be + // at least one version for every minor in between the first and the last version. + // +optional + // +listType=atomic + // +kubebuilder:validation:MinItems=1 + // +kubebuilder:validation:MaxItems=100 + // +kubebuilder:validation:items:MinLength=1 + // +kubebuilder:validation:items:MaxLength=256 + KubernetesVersions []string `json:"kubernetesVersions,omitempty"` } // InfrastructureClass defines the class for the infrastructure cluster. diff --git a/api/core/v1beta2/common_types.go b/api/core/v1beta2/common_types.go index 55c37a288bde..26ff7d15084a 100644 --- a/api/core/v1beta2/common_types.go +++ b/api/core/v1beta2/common_types.go @@ -36,6 +36,10 @@ const ( // to track the name of the MachineDeployment topology it represents. ClusterTopologyMachineDeploymentNameLabel = "topology.cluster.x-k8s.io/deployment-name" + // ClusterTopologyUpgradeStepAnnotation tracks the version of the current upgrade step. + // It is only set when an upgrade is in progress, and it contains the control plane version computed by topology controller. + ClusterTopologyUpgradeStepAnnotation = "topology.internal.cluster.x-k8s.io/upgrade-step" + // ClusterTopologyHoldUpgradeSequenceAnnotation can be used to hold the entire MachineDeployment upgrade sequence. // If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade // for this MachineDeployment topology and all subsequent ones is deferred. diff --git a/api/core/v1beta2/zz_generated.deepcopy.go b/api/core/v1beta2/zz_generated.deepcopy.go index 49d1f6655253..f53aa29c59c7 100644 --- a/api/core/v1beta2/zz_generated.deepcopy.go +++ b/api/core/v1beta2/zz_generated.deepcopy.go @@ -253,6 +253,11 @@ func (in *ClusterClassSpec) DeepCopyInto(out *ClusterClassSpec) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.KubernetesVersions != nil { + in, out := &in.KubernetesVersions, &out.KubernetesVersions + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterClassSpec. diff --git a/api/core/v1beta2/zz_generated.openapi.go b/api/core/v1beta2/zz_generated.openapi.go index 6ada26c78cd5..45b8f5b952e7 100644 --- a/api/core/v1beta2/zz_generated.openapi.go +++ b/api/core/v1beta2/zz_generated.openapi.go @@ -617,6 +617,26 @@ func schema_cluster_api_api_core_v1beta2_ClusterClassSpec(ref common.ReferenceCa }, }, }, + "kubernetesVersions": { + VendorExtensible: spec.VendorExtensible{ + Extensions: spec.Extensions{ + "x-kubernetes-list-type": "atomic", + }, + }, + SchemaProps: spec.SchemaProps{ + Description: "kubernetesVersions is the list of Kubernetes versions that can be used for clusters using this ClusterClass. The list of version must be ordered from the older to the newer version, and there should be at least one version for every minor in between the first and the last version.", + Type: []string{"array"}, + Items: &spec.SchemaOrArray{ + Schema: &spec.Schema{ + SchemaProps: spec.SchemaProps{ + Default: "", + Type: []string{"string"}, + Format: "", + }, + }, + }, + }, + }, }, Required: []string{"infrastructure", "controlPlane"}, }, diff --git a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml index 60ca830f5af1..69d6725d76b0 100644 --- a/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml +++ b/config/crd/bases/cluster.x-k8s.io_clusterclasses.yaml @@ -3320,6 +3320,20 @@ spec: required: - templateRef type: object + kubernetesVersions: + description: |- + kubernetesVersions is the list of Kubernetes versions that can be + used for clusters using this ClusterClass. + The list of version must be ordered from the older to the newer version, and there should be + at least one version for every minor in between the first and the last version. + items: + maxLength: 256 + minLength: 1 + type: string + maxItems: 100 + minItems: 1 + type: array + x-kubernetes-list-type: atomic patches: description: |- patches defines the patches which are applied to customize diff --git a/controlplane/kubeadm/internal/controllers/scale.go b/controlplane/kubeadm/internal/controllers/scale.go index b58b1f1a5ffb..a16ad847bef1 100644 --- a/controlplane/kubeadm/internal/controllers/scale.go +++ b/controlplane/kubeadm/internal/controllers/scale.go @@ -185,8 +185,18 @@ func (r *KubeadmControlPlaneReconciler) preflightChecks(ctx context.Context, con if feature.Gates.Enabled(feature.ClusterTopology) { // Block when we expect an upgrade to be propagated for topology clusters. - if controlPlane.Cluster.Spec.Topology.IsDefined() && controlPlane.Cluster.Spec.Topology.Version != controlPlane.KCP.Spec.Version { - logger.Info(fmt.Sprintf("Waiting for a version upgrade to %s to be propagated from Cluster.spec.topology", controlPlane.Cluster.Spec.Topology.Version)) + // NOTE: in case the cluster is performing an upgrade, allow creation of machines for the intermediate step. + hasSameVersionOfCurrentUpgradeStep := false + if version, ok := controlPlane.Cluster.GetAnnotations()[clusterv1.ClusterTopologyUpgradeStepAnnotation]; ok { + hasSameVersionOfCurrentUpgradeStep = version == controlPlane.KCP.Spec.Version + } + + if controlPlane.Cluster.Spec.Topology.IsDefined() && controlPlane.Cluster.Spec.Topology.Version != controlPlane.KCP.Spec.Version && !hasSameVersionOfCurrentUpgradeStep { + v := controlPlane.Cluster.Spec.Topology.Version + if version, ok := controlPlane.Cluster.GetAnnotations()[clusterv1.ClusterTopologyUpgradeStepAnnotation]; ok { + v = version + } + logger.Info(fmt.Sprintf("Waiting for a version upgrade to %s to be propagated", v)) controlPlane.PreflightCheckResults.TopologyVersionMismatch = true return ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, nil } diff --git a/controlplane/kubeadm/internal/controllers/scale_test.go b/controlplane/kubeadm/internal/controllers/scale_test.go index ace9e7681391..f8606fc71968 100644 --- a/controlplane/kubeadm/internal/controllers/scale_test.go +++ b/controlplane/kubeadm/internal/controllers/scale_test.go @@ -562,6 +562,37 @@ func TestPreflightChecks(t *testing.T) { TopologyVersionMismatch: true, }, }, + { + name: "control plane with a pending upgrade, but not yet at the current step of the upgrade plan, should requeue", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + clusterv1.ClusterTopologyUpgradeStepAnnotation: "v1.32.0", + }, + }, + Spec: clusterv1.ClusterSpec{ + Topology: clusterv1.Topology{ + Version: "v1.33.0", + }, + }, + }, + kcp: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.31.0", + }, + }, + machines: []*clusterv1.Machine{ + {}, + }, + + expectResult: ctrl.Result{RequeueAfter: preflightFailedRequeueAfter}, + expectPreflight: internal.PreflightCheckResults{ + HasDeletingMachine: false, + ControlPlaneComponentsNotHealthy: false, + EtcdClusterNotHealthy: false, + TopologyVersionMismatch: true, + }, + }, { name: "control plane with a deleting machine should requeue", kcp: &controlplanev1.KubeadmControlPlane{}, @@ -687,6 +718,55 @@ func TestPreflightChecks(t *testing.T) { TopologyVersionMismatch: false, }, }, + { + name: "control plane with a pending upgrade, but already at the current step of the upgrade plan, should pass", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + clusterv1.ClusterTopologyUpgradeStepAnnotation: "v1.32.0", + }, + }, + Spec: clusterv1.ClusterSpec{ + Topology: clusterv1.Topology{ + Version: "v1.33.0", + }, + }, + }, + kcp: &controlplanev1.KubeadmControlPlane{ + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.32.0", + }, Status: controlplanev1.KubeadmControlPlaneStatus{ + Conditions: []metav1.Condition{ + {Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyCondition, Status: metav1.ConditionTrue}, + {Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyCondition, Status: metav1.ConditionTrue}, + }, + }, + }, + machines: []*clusterv1.Machine{ + { + Status: clusterv1.MachineStatus{ + NodeRef: clusterv1.MachineNodeReference{ + Name: "node-1", + }, + Conditions: []metav1.Condition{ + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyCondition, Status: metav1.ConditionTrue}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyCondition, Status: metav1.ConditionTrue}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyCondition, Status: metav1.ConditionTrue}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyCondition, Status: metav1.ConditionTrue}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyCondition, Status: metav1.ConditionTrue}, + }, + }, + }, + }, + + expectResult: ctrl.Result{}, + expectPreflight: internal.PreflightCheckResults{ + HasDeletingMachine: false, + ControlPlaneComponentsNotHealthy: false, + EtcdClusterNotHealthy: false, + TopologyVersionMismatch: false, + }, + }, } for _, tt := range testCases { diff --git a/controlplane/kubeadm/internal/controllers/status.go b/controlplane/kubeadm/internal/controllers/status.go index d056e2d8a567..afa4429eae38 100644 --- a/controlplane/kubeadm/internal/controllers/status.go +++ b/controlplane/kubeadm/internal/controllers/status.go @@ -836,7 +836,11 @@ func minTime(t1, t2 time.Time) time.Time { func getPreflightMessages(cluster *clusterv1.Cluster, preflightChecks internal.PreflightCheckResults) []string { additionalMessages := []string{} if preflightChecks.TopologyVersionMismatch { - additionalMessages = append(additionalMessages, fmt.Sprintf("* waiting for a version upgrade to %s to be propagated from Cluster.spec.topology", cluster.Spec.Topology.Version)) + v := cluster.Spec.Topology.Version + if version, ok := cluster.GetAnnotations()[clusterv1.ClusterTopologyUpgradeStepAnnotation]; ok { + v = version + } + additionalMessages = append(additionalMessages, fmt.Sprintf("* waiting for a version upgrade to %s to be propagated", v)) } if preflightChecks.HasDeletingMachine { diff --git a/controlplane/kubeadm/internal/controllers/status_test.go b/controlplane/kubeadm/internal/controllers/status_test.go index 66f983b8270e..427dca5f325e 100644 --- a/controlplane/kubeadm/internal/controllers/status_test.go +++ b/controlplane/kubeadm/internal/controllers/status_test.go @@ -451,7 +451,7 @@ func Test_setScalingUpCondition(t *testing.T) { Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneScalingUpReason, Message: "Scaling up from 3 to 5 replicas is blocked because:\n" + - "* waiting for a version upgrade to v1.32.0 to be propagated from Cluster.spec.topology\n" + + "* waiting for a version upgrade to v1.32.0 to be propagated\n" + "* waiting for a control plane Machine to complete deletion\n" + "* waiting for control plane components to become healthy\n" + "* waiting for etcd cluster to become healthy", @@ -645,7 +645,7 @@ After above Pods have been removed from the Node, the following Pods will be evi Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneScalingDownReason, Message: "Scaling down from 3 to 1 replicas is blocked because:\n" + - "* waiting for a version upgrade to v1.32.0 to be propagated from Cluster.spec.topology\n" + + "* waiting for a version upgrade to v1.32.0 to be propagated\n" + "* waiting for a control plane Machine to complete deletion\n" + "* waiting for control plane components to become healthy\n" + "* waiting for etcd cluster to become healthy", diff --git a/docs/book/src/reference/api/labels-and-annotations.md b/docs/book/src/reference/api/labels-and-annotations.md index fc428bd5acd6..66ab543f46df 100644 --- a/docs/book/src/reference/api/labels-and-annotations.md +++ b/docs/book/src/reference/api/labels-and-annotations.md @@ -21,14 +21,14 @@ | Annotation | Note | Managed By | Applies to | |:-----------------------------------------------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------|:-----------------------------------------------| | before-upgrade.hook.cluster.cluster.x-k8s.io | It specifies the prefix we search each annotation for during the before-upgrade lifecycle hook to block propagating the new version to the control plane. These hooks will prevent propagation of changes made to the Cluster Topology to the underlying objects. | User | Clusters | -| cluster.x-k8s.io/annotations-from-machine| It is set on nodes to track the annotations that originated from machines.| Cluster API | Nodes (workload cluster)| +| cluster.x-k8s.io/annotations-from-machine | It is set on nodes to track the annotations that originated from machines. | Cluster API | Nodes (workload cluster) | | cluster.x-k8s.io/cloned-from-groupkind | It is the annotation that stores the group-kind of the template from which the current resource has been cloned from. | Cluster API | All Cluster API objects cloned from a template | | cluster.x-k8s.io/cloned-from-name | It is the annotation that stores the name of the template from which the current resource has been cloned from. | Cluster API | All Cluster API objects cloned from a template | | cluster.x-k8s.io/cluster-name | It is set on nodes identifying the name of the cluster the node belongs to. | Cluster API | Nodes (workload cluster) | | cluster.x-k8s.io/cluster-namespace | It is set on nodes identifying the namespace of the cluster the node belongs to. | Cluster API | Nodes (workload cluster) | | cluster.x-k8s.io/delete-machine | It marks control plane and worker nodes that will be given priority for deletion when KCP or a MachineSet scales down. It is given top priority on all delete policies. | User | Machines | | cluster.x-k8s.io/disable-machine-create | It can be used to signal a MachineSet to stop creating new machines. It is utilized in the OnDelete MachineDeploymentStrategy to allow the MachineDeployment controller to scale down older MachineSets when Machines are deleted and add the new replicas to the latest MachineSet. | Cluster API | MachineSets | -| cluster.x-k8s.io/labels-from-machine| It is set on nodes to track the labels that originated from machines.| Cluster API | Nodes (workload cluster)| +| cluster.x-k8s.io/labels-from-machine | It is set on nodes to track the labels that originated from machines. | Cluster API | Nodes (workload cluster) | | cluster.x-k8s.io/managed-by | It can be applied to InfraCluster resources to signify that some external system is managing the cluster infrastructure. Provider InfraCluster controllers will ignore resources with this annotation. An external controller must fulfill the contract of the InfraCluster resource. External infrastructure providers should ensure that the annotation, once set, cannot be removed. | User | InfraClusters | | cluster.x-k8s.io/machine | It is set on nodes identifying the machine the node belongs to. | Cluster API | Nodes (workload cluster) | | cluster.x-k8s.io/owner-kind | It is set on nodes identifying the machine's owner kind the node belongs to. | Cluster API | Nodes (workload cluster) | @@ -59,5 +59,6 @@ | topology.cluster.x-k8s.io/dry-run | It is an annotation that gets set on objects by the topology controller only during a server side dry run apply operation. It is used for validating update webhooks for objects which get updated by template rotation (e.g. InfrastructureMachineTemplate). When the annotation is set and the admission request is a dry run, the webhook should deny validation due to immutability. By that the request will succeed (without any changes to the actual object because it is a dry run) and the topology controller will receive the resulting object. | Cluster API | Template rotation objects | | topology.cluster.x-k8s.io/hold-upgrade-sequence | It can be used to hold the entire MachineDeployment upgrade sequence. If the annotation is set on a MachineDeployment topology in Cluster.spec.topology.workers, the Kubernetes upgrade for this MachineDeployment topology and all subsequent ones is deferred. | Cluster API | MachineDeployments in Cluster.topology | | topology.cluster.x-k8s.io/upgrade-concurrency | It can be used to configure the maximum concurrency while upgrading MachineDeployments of a classy Cluster. It is set as a top level annotation on the Cluster object. The value should be >= 1. If unspecified the upgrade concurrency will default to 1. | Cluster API | Clusters | +| topology.internal.cluster.x-k8s.io/upgrade-step | This is an annotation used by CAPI internally to track upgrade steps. Name, meaning and semantic of the annotation can change anytime and it should not be used outside of CAPI controllers. | Cluster API | Clusters | | unsafe.topology.cluster.x-k8s.io/disable-update-class-name-check | It can be used to disable the webhook check on update that disallows a pre-existing Cluster to be populated with Topology information and Class. | User | Clusters | | unsafe.topology.cluster.x-k8s.io/disable-update-version-check | It can be used to disable the webhook checks on update that disallows updating the .topology.spec.version on certain conditions. | User | Clusters | diff --git a/exp/topology/desiredstate/desired_state.go b/exp/topology/desiredstate/desired_state.go index cc868dfe7677..afc5dcc34ebd 100644 --- a/exp/topology/desiredstate/desired_state.go +++ b/exp/topology/desiredstate/desired_state.go @@ -54,6 +54,7 @@ import ( "sigs.k8s.io/cluster-api/internal/topology/selectors" "sigs.k8s.io/cluster-api/internal/webhooks" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/annotations" "sigs.k8s.io/cluster-api/util/conversion" ) @@ -115,6 +116,17 @@ func (g *generator) Generate(ctx context.Context, s *scope.Scope) (*scope.Cluste } } + // Compute the upgradePlan. + // By default CAPI allows to upgrade only by one minor, but if the cluster class defines a list of Kubernetes versions, + // the upgrade plan will be inferred from those versions. + getUpgradePlan := GetUpgradePlanOneMinor + if len(s.Blueprint.ClusterClass.Spec.KubernetesVersions) > 0 { + getUpgradePlan = GetUpgradePlanFromClusterClassVersions(s.Blueprint.ClusterClass.Spec.KubernetesVersions) + } + if err := ComputeUpgradePlan(ctx, s, getUpgradePlan); err != nil { + return nil, err + } + // Mark all the MachineDeployments that are currently upgrading. // This captured information is used for: // - Building the TopologyReconciled condition. @@ -178,7 +190,10 @@ func (g *generator) Generate(ctx context.Context, s *scope.Scope) (*scope.Cluste // Compute the desired state for the Cluster object adding a reference to the // InfrastructureCluster and the ControlPlane objects generated by the previous step. - desiredState.Cluster = computeCluster(ctx, s, desiredState.InfrastructureCluster, desiredState.ControlPlane.Object) + desiredState.Cluster, err = computeCluster(ctx, s, desiredState.InfrastructureCluster, desiredState.ControlPlane.Object) + if err != nil { + return nil, errors.Wrapf(err, "failed to compute Cluster") + } // If required, compute the desired state of the MachineDeployments from the list of MachineDeploymentTopologies // defined in the cluster. @@ -479,7 +494,7 @@ func (g *generator) computeControlPlane(ctx context.Context, s *scope.Scope, inf // Sets the desired Kubernetes version for the control plane. version, err := g.computeControlPlaneVersion(ctx, s) if err != nil { - return nil, errors.Wrap(err, "failed to compute version of control plane") + return nil, errors.Wrap(err, "failed to compute version of ControlPlane") } if err := contract.ControlPlane().Version().Set(controlPlane, version); err != nil { return nil, errors.Wrapf(err, "failed to set %s in the ControlPlane object", contract.ControlPlane().Version().Path()) @@ -493,10 +508,10 @@ func (g *generator) computeControlPlane(ctx context.Context, s *scope.Scope, inf // and the version defined in the topology. func (g *generator) computeControlPlaneVersion(ctx context.Context, s *scope.Scope) (string, error) { log := ctrl.LoggerFrom(ctx) - desiredVersion := s.Blueprint.Topology.Version + topologyVersion := s.Blueprint.Topology.Version // If we are creating the control plane object (current control plane is nil), use version from topology. if s.Current.ControlPlane == nil || s.Current.ControlPlane.Object == nil { - return desiredVersion, nil + return topologyVersion, nil } // Get the current currentVersion of the control plane. @@ -505,11 +520,12 @@ func (g *generator) computeControlPlaneVersion(ctx context.Context, s *scope.Sco return "", errors.Wrap(err, "failed to get the version from control plane spec") } + // Track if the control plane needs an update. + // NOTE: in case the control plane don't need an update do not return immediately for: + // - computing a few more info for the update tracker, used to show the appropriate message for the TopologyReconciled condition. + // - call the AfterControlPlaneUpgrade hook (if not already called). s.UpgradeTracker.ControlPlane.IsPendingUpgrade = true - if *currentVersion == desiredVersion { - // Mark that the control plane spec is already at the desired version. - // This information is used to show the appropriate message for the TopologyReconciled - // condition. + if *currentVersion == topologyVersion { s.UpgradeTracker.ControlPlane.IsPendingUpgrade = false } @@ -518,8 +534,8 @@ func (g *generator) computeControlPlaneVersion(ctx context.Context, s *scope.Sco if err != nil { return "", errors.Wrap(err, "failed to check if the control plane is being provisioned") } - // If the control plane is being provisioned (being craeted for the first time), then do not - // pick up the desiredVersion yet. + // If the control plane is being provisioned (being created for the first time), then do not + // pick up the topologyVersion yet. // Return the current version of the control plane. We will pick up the new version after the // control plane is provisioned. if cpProvisioning { @@ -533,7 +549,7 @@ func (g *generator) computeControlPlaneVersion(ctx context.Context, s *scope.Sco return "", errors.Wrap(err, "failed to check if control plane is upgrading") } // If the current control plane is upgrading (still completing a previous upgrade), - // then do not pick up the desiredVersion yet. + // then do not pick up the topologyVersion yet. // Return the current version of the control plane. We will pick up the new version // after the control plane is stable. if cpUpgrading { @@ -541,136 +557,167 @@ func (g *generator) computeControlPlaneVersion(ctx context.Context, s *scope.Sco return *currentVersion, nil } - // Return here if the control plane is already at the desired version - if !s.UpgradeTracker.ControlPlane.IsPendingUpgrade { - // At this stage the control plane is not upgrading and is already at the desired version. - // We can return. - // Nb. We do not return early in the function if the control plane is already at the desired version so as - // to know if the control plane is being upgraded. This information - // is required when updating the TopologyReconciled condition on the cluster. - - // Call the AfterControlPlaneUpgrade now that the control plane is upgraded. - if feature.Gates.Enabled(feature.RuntimeSDK) { - // Call the hook only if we are tracking the intent to do so. If it is not tracked it means we don't need to call the - // hook because we didn't go through an upgrade or we already called the hook after the upgrade. - if hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster) { - v1beta1Cluster := &clusterv1beta1.Cluster{} - // DeepCopy cluster because ConvertFrom has side effects like adding the conversion annotation. - if err := v1beta1Cluster.ConvertFrom(s.Current.Cluster.DeepCopy()); err != nil { - return "", errors.Wrap(err, "error converting Cluster to v1beta1 Cluster") - } + // if the control plane is not upgrading, before making further considerations about if to pick up another version, + // we should call the AfterControlPlaneUpgrade hook if not already done. + if feature.Gates.Enabled(feature.RuntimeSDK) { + // Call the hook only if we are tracking the intent to do so. If it is not tracked it means we don't need to call the + // hook because we didn't go through an upgrade or we already called the hook after the upgrade. + if hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster) { + v1beta1Cluster := &clusterv1beta1.Cluster{} + // DeepCopy cluster because ConvertFrom has side effects like adding the conversion annotation. + if err := v1beta1Cluster.ConvertFrom(s.Current.Cluster.DeepCopy()); err != nil { + return "", errors.Wrap(err, "error converting Cluster to v1beta1 Cluster") + } - // Call all the registered extension for the hook. - hookRequest := &runtimehooksv1.AfterControlPlaneUpgradeRequest{ - Cluster: *cleanupCluster(v1beta1Cluster), - KubernetesVersion: desiredVersion, - } - hookResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{} - if err := g.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster, hookRequest, hookResponse); err != nil { - return "", err - } - // Add the response to the tracker so we can later update condition or requeue when required. - s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneUpgrade, hookResponse) - - // If the extension responds to hold off on starting Machine deployments upgrades, - // change the UpgradeTracker accordingly, otherwise the hook call is completed and we - // can remove this hook from the list of pending-hooks. - if hookResponse.RetryAfterSeconds != 0 { - log.Info(fmt.Sprintf("MachineDeployments/MachinePools upgrade to version %q are blocked by %q hook", desiredVersion, runtimecatalog.HookName(runtimehooksv1.AfterControlPlaneUpgrade))) - } else { - if err := hooks.MarkAsDone(ctx, g.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneUpgrade); err != nil { - return "", err - } + // Call all the registered extension for the hook. + hookRequest := &runtimehooksv1.AfterControlPlaneUpgradeRequest{ + Cluster: *cleanupCluster(v1beta1Cluster), + KubernetesVersion: *currentVersion, + } + hookResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{} + if err := g.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster, hookRequest, hookResponse); err != nil { + return "", err + } + // Add the response to the tracker so we can later update condition or requeue when required. + s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneUpgrade, hookResponse) + + // If the extension responds to hold off on starting Machine deployments upgrades, + // change the UpgradeTracker accordingly, otherwise the hook call is completed and we + // can remove this hook from the list of pending-hooks. + if hookResponse.RetryAfterSeconds != 0 { + v := topologyVersion + if len(s.UpgradeTracker.ControlPlane.UpgradePlan) > 0 { + v = s.UpgradeTracker.ControlPlane.UpgradePlan[0] } + log.Info(fmt.Sprintf("Upgrade to version %q is blocked by %q hook", v, runtimecatalog.HookName(runtimehooksv1.AfterControlPlaneUpgrade))) + return *currentVersion, nil + } + if err := hooks.MarkAsDone(ctx, g.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneUpgrade); err != nil { + return "", err } } + } + + // At this stage, we can assume the previous control plane upgrade is fully complete (including calling the AfterControlPlaneUpgrade). + // It is now possible to start making considerations if to pick up another version. + + // If the control plane is not pending upgrade, then it is already at the desired version and there is no other version to pick up. + if !s.UpgradeTracker.ControlPlane.IsPendingUpgrade { + return *currentVersion, nil + } + // Before considering picking up the next control plane version, check if workers are required + // to upgrade first, e.g. to avoid breaking rules defining the max version skew between control plane + // and workers. + // If the MachineDeployments/MachinePools are required to upgrade, then do not pick up the next control plane version yet. + // We will pick up the new version after the MachineDeployments/MachinePools finish upgrading. + if len(s.UpgradeTracker.MachineDeployments.UpgradePlan) > 0 && s.UpgradeTracker.MachineDeployments.UpgradePlan[0] == *currentVersion { + s.UpgradeTracker.ControlPlane.IsWaitingForWorkersUpgrade = true + return *currentVersion, nil + } + if len(s.UpgradeTracker.MachinePools.UpgradePlan) > 0 && s.UpgradeTracker.MachinePools.UpgradePlan[0] == *currentVersion { + s.UpgradeTracker.ControlPlane.IsWaitingForWorkersUpgrade = true return *currentVersion, nil } - // If the control plane is not upgrading or scaling, we can assume the control plane is stable. - // However, we should also check for the MachineDeployments/MachinePools upgrading. - // If the MachineDeployments/MachinePools are upgrading, then do not pick up the desiredVersion yet. + // Also check if MachineDeployments/MachinePools are already upgrading. + // If the MachineDeployments/MachinePools are upgrading, then do not pick up the next control plane version yet. // We will pick up the new version after the MachineDeployments/MachinePools finish upgrading. if len(s.UpgradeTracker.MachineDeployments.UpgradingNames()) > 0 || len(s.UpgradeTracker.MachinePools.UpgradingNames()) > 0 { return *currentVersion, nil } + // At this point we can assume the control plane is stable and also MachineDeployments/MachinePools + // are not upgrading/are not required to upgrade. + // If not already done, call the BeforeClusterUpgrade hook before picking up the desired version. if feature.Gates.Enabled(feature.RuntimeSDK) { - var hookAnnotations []string - for key := range s.Current.Cluster.Annotations { - if strings.HasPrefix(key, clusterv1.BeforeClusterUpgradeHookAnnotationPrefix) { - hookAnnotations = append(hookAnnotations, key) - } - } - if len(hookAnnotations) > 0 { - slices.Sort(hookAnnotations) - message := fmt.Sprintf("annotations [%s] are set", strings.Join(hookAnnotations, ", ")) - if len(hookAnnotations) == 1 { - message = fmt.Sprintf("annotation [%s] is set", strings.Join(hookAnnotations, ", ")) + // NOTE: the hook should be called only at the beginning of either a regular upgrade or a multistep upgrade sequence (it should not be called when in the middle of a multistep upgrade sequence); + // to detect if we are at the beginning of an upgrade, we check if the intent to call the AfterClusterUpgrade is not yet tracked. + if !hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster) { + var hookAnnotations []string + for key := range s.Current.Cluster.Annotations { + if strings.HasPrefix(key, clusterv1.BeforeClusterUpgradeHookAnnotationPrefix) { + hookAnnotations = append(hookAnnotations, key) + } } - // Add the hook with a response to the tracker so we can later update the condition. - s.HookResponseTracker.Add(runtimehooksv1.BeforeClusterUpgrade, &runtimehooksv1.BeforeClusterUpgradeResponse{ - CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ - // RetryAfterSeconds needs to be set because having only hooks without RetryAfterSeconds - // would lead to not updating the condition. We can rely on getting an event when the - // annotation gets removed so we set twice of the default sync-period to not cause additional reconciles. - RetryAfterSeconds: 20 * 60, - CommonResponse: runtimehooksv1.CommonResponse{ - Message: message, + if len(hookAnnotations) > 0 { + slices.Sort(hookAnnotations) + message := fmt.Sprintf("annotations [%s] are set", strings.Join(hookAnnotations, ", ")) + if len(hookAnnotations) == 1 { + message = fmt.Sprintf("annotation [%s] is set", strings.Join(hookAnnotations, ", ")) + } + // Add the hook with a response to the tracker so we can later update the condition. + s.HookResponseTracker.Add(runtimehooksv1.BeforeClusterUpgrade, &runtimehooksv1.BeforeClusterUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + // RetryAfterSeconds needs to be set because having only hooks without RetryAfterSeconds + // would lead to not updating the condition. We can rely on getting an event when the + // annotation gets removed so we set twice of the default sync-period to not cause additional reconciles. + RetryAfterSeconds: 20 * 60, + CommonResponse: runtimehooksv1.CommonResponse{ + Message: message, + }, }, - }, - }) + }) - log.Info(fmt.Sprintf("Cluster upgrade to version %q is blocked by %q hook (via annotations)", desiredVersion, runtimecatalog.HookName(runtimehooksv1.BeforeClusterUpgrade)), "hooks", strings.Join(hookAnnotations, ",")) - return *currentVersion, nil - } + log.Info(fmt.Sprintf("Cluster upgrade to version %q is blocked by %q hook (via annotations)", topologyVersion, runtimecatalog.HookName(runtimehooksv1.BeforeClusterUpgrade)), "hooks", strings.Join(hookAnnotations, ",")) + return *currentVersion, nil + } - // At this point the control plane and the machine deployments are stable and we are almost ready to pick - // up the desiredVersion. Call the BeforeClusterUpgrade hook before picking up the desired version. - v1beta1Cluster := &clusterv1beta1.Cluster{} - // DeepCopy cluster because ConvertFrom has side effects like adding the conversion annotation. - if err := v1beta1Cluster.ConvertFrom(s.Current.Cluster.DeepCopy()); err != nil { - return "", errors.Wrap(err, "error converting Cluster to v1beta1 Cluster") - } + // At this point the control plane and the machine deployments are stable and we are almost ready to pick + // up the topologyVersion. Call the BeforeClusterUpgrade hook before picking up the desired version. + v1beta1Cluster := &clusterv1beta1.Cluster{} + // DeepCopy cluster because ConvertFrom has side effects like adding the conversion annotation. + if err := v1beta1Cluster.ConvertFrom(s.Current.Cluster.DeepCopy()); err != nil { + return "", errors.Wrap(err, "error converting Cluster to v1beta1 Cluster") + } - hookRequest := &runtimehooksv1.BeforeClusterUpgradeRequest{ - Cluster: *cleanupCluster(v1beta1Cluster), - FromKubernetesVersion: *currentVersion, - ToKubernetesVersion: desiredVersion, - } - hookResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{} - if err := g.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.BeforeClusterUpgrade, s.Current.Cluster, hookRequest, hookResponse); err != nil { - return "", err - } - // Add the response to the tracker so we can later update condition or requeue when required. - s.HookResponseTracker.Add(runtimehooksv1.BeforeClusterUpgrade, hookResponse) - if hookResponse.RetryAfterSeconds != 0 { - // Cannot pickup the new version right now. Need to try again later. - log.Info(fmt.Sprintf("Cluster upgrade to version %q is blocked by %q hook", desiredVersion, runtimecatalog.HookName(runtimehooksv1.BeforeClusterUpgrade))) - return *currentVersion, nil + hookRequest := &runtimehooksv1.BeforeClusterUpgradeRequest{ + Cluster: *cleanupCluster(v1beta1Cluster), + FromKubernetesVersion: *currentVersion, + ToKubernetesVersion: topologyVersion, + } + hookResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{} + if err := g.RuntimeClient.CallAllExtensions(ctx, runtimehooksv1.BeforeClusterUpgrade, s.Current.Cluster, hookRequest, hookResponse); err != nil { + return "", err + } + // Add the response to the tracker so we can later update condition or requeue when required. + s.HookResponseTracker.Add(runtimehooksv1.BeforeClusterUpgrade, hookResponse) + if hookResponse.RetryAfterSeconds != 0 { + // Cannot pickup the new version right now. Need to try again later. + log.Info(fmt.Sprintf("Cluster upgrade to version %q is blocked by %q hook", topologyVersion, runtimecatalog.HookName(runtimehooksv1.BeforeClusterUpgrade))) + return *currentVersion, nil + } } + } - // We are picking up the new version here. - // Track the intent of calling the AfterControlPlaneUpgrade and the AfterClusterUpgrade hooks once we are done with the upgrade. - if err := hooks.MarkAsPending(ctx, g.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneUpgrade, runtimehooksv1.AfterClusterUpgrade); err != nil { - return "", err - } + // Control plane and machine deployments are stable. All the required hooks are called. + // Ready to pick up the next version in the upgrade plan. + + // Track the intent of calling the AfterControlPlaneUpgrade and the AfterClusterUpgrade hooks once we are done with the upgrade. + if err := hooks.MarkAsPending(ctx, g.Client, s.Current.Cluster, runtimehooksv1.AfterControlPlaneUpgrade, runtimehooksv1.AfterClusterUpgrade); err != nil { + return "", err } - // Control plane and machine deployments are stable. All the required hook are called. - // Ready to pick up the topology version. - s.UpgradeTracker.ControlPlane.IsPendingUpgrade = false + // Pick up the new version + if len(s.UpgradeTracker.ControlPlane.UpgradePlan) == 0 { + return "", errors.New("cannot compute the control plane version if the control plane is pending upgrade and the upgrade plan is not set") + } + nextVersion := s.UpgradeTracker.ControlPlane.UpgradePlan[0] + + // The upgrade is now starting in this reconcile and not pending anymore. + // Note: it is important to unset IsPendingUpgrade, otherwise reconcileState will assume that we are still waiting for another upgrade (and thus defer the one we are starting). s.UpgradeTracker.ControlPlane.IsStartingUpgrade = true - return desiredVersion, nil + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = false + + return nextVersion, nil } // computeCluster computes the desired state for the Cluster object. // NOTE: Some fields of the Cluster’s fields contribute to defining the Cluster blueprint (e.g. Cluster.Spec.Topology), // while some other fields should be managed as part of the actual Cluster (e.g. Cluster.Spec.ControlPlaneRef); in this func // we are concerned only about the latest group of fields. -func computeCluster(_ context.Context, s *scope.Scope, infrastructureCluster, controlPlane *unstructured.Unstructured) *clusterv1.Cluster { +func computeCluster(_ context.Context, s *scope.Scope, infrastructureCluster, controlPlane *unstructured.Unstructured) (*clusterv1.Cluster, error) { cluster := s.Current.Cluster.DeepCopy() // Enforce the topology labels. @@ -687,7 +734,23 @@ func computeCluster(_ context.Context, s *scope.Scope, infrastructureCluster, co cluster.Spec.InfrastructureRef = contract.ObjToContractVersionedObjectReference(infrastructureCluster) cluster.Spec.ControlPlaneRef = contract.ObjToContractVersionedObjectReference(controlPlane) - return cluster + // Track the current upgrade step in the cluster object (otherwise make sure we cleanup tracking of previous upgrades). + // NOTE: to detect if we are upgrading, we check if the intent to call the AfterClusterUpgrade is already tracked. + // NOTE, it is required to surface intermediate steps of the upgrade plan to allow creation of machines in KCP/MS. + // TODO: consider if we want to surface the upgrade plan (or the list of desired versions) in cluster status; + // TBD if the semantic of the new field can replace this annotation. + if hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster) { + // NOTE: to detect if we are at the beginning of an upgrade, we check if the intent to call the AfterClusterUpgrade is already tracked. + controlPlaneVersion, err := contract.ControlPlane().Version().Get(controlPlane) + if err != nil { + return nil, errors.Wrap(err, "error getting control plane version") + } + annotations.AddAnnotations(cluster, map[string]string{clusterv1.ClusterTopologyUpgradeStepAnnotation: *controlPlaneVersion}) + } else { + delete(cluster.Annotations, clusterv1.ClusterTopologyUpgradeStepAnnotation) + } + + return cluster, nil } // calculateRefDesiredAPIVersion returns the desired ref calculated from desiredReferencedObject @@ -812,7 +875,10 @@ func (g *generator) computeMachineDeployment(ctx context.Context, s *scope.Scope // Add ClusterTopologyMachineDeploymentLabel to the generated InfrastructureMachine template infraMachineTemplateLabels[clusterv1.ClusterTopologyMachineDeploymentNameLabel] = machineDeploymentTopology.Name desiredMachineDeployment.InfrastructureMachineTemplate.SetLabels(infraMachineTemplateLabels) - version := g.computeMachineDeploymentVersion(s, machineDeploymentTopology, currentMachineDeployment) + version, err := g.computeMachineDeploymentVersion(s, machineDeploymentTopology, currentMachineDeployment) + if err != nil { + return nil, err + } // Compute values that can be set both in the MachineDeploymentClass and in the MachineDeploymentTopology minReadySeconds := machineDeploymentClass.MinReadySeconds @@ -991,8 +1057,8 @@ func (g *generator) computeMachineDeployment(ctx context.Context, s *scope.Scope // computeMachineDeploymentVersion calculates the version of the desired machine deployment. // The version is calculated using the state of the current machine deployments, // the current control plane and the version defined in the topology. -func (g *generator) computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, currentMDState *scope.MachineDeploymentState) string { - desiredVersion := s.Blueprint.Topology.Version +func (g *generator) computeMachineDeploymentVersion(s *scope.Scope, machineDeploymentTopology clusterv1.MachineDeploymentTopology, currentMDState *scope.MachineDeploymentState) (string, error) { + topologyVersion := s.Blueprint.Topology.Version // If creating a new machine deployment, mark it as pending if the control plane is not // yet stable. Creating a new MD while the control plane is upgrading can lead to unexpected race conditions. // Example: join could fail if the load balancers are slow in detecting when CP machines are @@ -1001,49 +1067,63 @@ func (g *generator) computeMachineDeploymentVersion(s *scope.Scope, machineDeplo if !s.UpgradeTracker.ControlPlane.IsControlPlaneStable() || s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { s.UpgradeTracker.MachineDeployments.MarkPendingCreate(machineDeploymentTopology.Name) } - return desiredVersion + return topologyVersion, nil } // Get the current version of the machine deployment. currentVersion := currentMDState.Object.Spec.Template.Spec.Version - // Return early if the currentVersion is already equal to the desiredVersion + // Return early if the currentVersion is already equal to the topologyVersion // no further checks required. - if currentVersion == desiredVersion { - return currentVersion + if currentVersion == topologyVersion { + return currentVersion, nil } // Return early if the upgrade for the MachineDeployment is deferred. if isMachineDeploymentDeferred(s.Blueprint.Topology, machineDeploymentTopology) { s.UpgradeTracker.MachineDeployments.MarkDeferredUpgrade(currentMDState.Object.Name) s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion + return currentVersion, nil } // Return early if the AfterControlPlaneUpgrade hook returns a blocking response. if s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion + return currentVersion, nil } // Return early if the upgrade concurrency is reached. if s.UpgradeTracker.MachineDeployments.UpgradeConcurrencyReached() { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion + return currentVersion, nil } - // Return early if the Control Plane is not stable. Do not pick up the desiredVersion yet. + // Return early if the Control Plane is not stable. Do not pick up the topologyVersion yet. // Return the current version of the machine deployment. We will pick up the new version after the control // plane is stable. - if !s.UpgradeTracker.ControlPlane.IsControlPlaneStable() { + if !s.UpgradeTracker.ControlPlane.IsControlPlaneStableOrWaitingForWorkersUpgrade() { s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) - return currentVersion + return currentVersion, nil } // Control plane and machine deployments are stable. - // Ready to pick up the topology version. + // Ready to pick up the next version in the upgrade plan. + if len(s.UpgradeTracker.MachineDeployments.UpgradePlan) == 0 { + return "", errors.New("cannot compute the machine deployment version if the machine deployment is pending upgrade and the upgrade plan is not set") + } + + // The upgrade plan for workers has all versions from minWorkersVersion version to topologyVersion. + // If this MachineDeployment is already at minWorkersVersion, it should wait for the control plane to pick up next version before upgrading. + // Note: at this point we know that MachineDeployment is not yet at topologyVersion, so also set that MachineDeployment as PendingUpgrade. + if s.UpgradeTracker.MachineDeployments.UpgradePlan[0] == currentVersion { + s.UpgradeTracker.MachineDeployments.MarkPendingUpgrade(currentMDState.Object.Name) + return currentVersion, nil + } + s.UpgradeTracker.MachineDeployments.MarkUpgrading(currentMDState.Object.Name) - return desiredVersion + + nextVersion := s.UpgradeTracker.MachineDeployments.UpgradePlan[0] + return nextVersion, nil } // isMachineDeploymentDeferred returns true if the upgrade for the mdTopology is deferred. @@ -1175,7 +1255,10 @@ func (g *generator) computeMachinePool(_ context.Context, s *scope.Scope, machin // Add ClusterTopologyMachinePoolLabel to the generated InfrastructureMachinePool object infraMachinePoolObjectLabels[clusterv1.ClusterTopologyMachinePoolNameLabel] = machinePoolTopology.Name desiredMachinePool.InfrastructureMachinePoolObject.SetLabels(infraMachinePoolObjectLabels) - version := g.computeMachinePoolVersion(s, machinePoolTopology, currentMachinePool) + version, err := g.computeMachinePoolVersion(s, machinePoolTopology, currentMachinePool) + if err != nil { + return nil, err + } // Compute values that can be set both in the MachinePoolClass and in the MachinePoolTopology minReadySeconds := machinePoolClass.MinReadySeconds @@ -1288,8 +1371,8 @@ func (g *generator) computeMachinePool(_ context.Context, s *scope.Scope, machin // computeMachinePoolVersion calculates the version of the desired machine pool. // The version is calculated using the state of the current machine pools, // the current control plane and the version defined in the topology. -func (g *generator) computeMachinePoolVersion(s *scope.Scope, machinePoolTopology clusterv1.MachinePoolTopology, currentMPState *scope.MachinePoolState) string { - desiredVersion := s.Blueprint.Topology.Version +func (g *generator) computeMachinePoolVersion(s *scope.Scope, machinePoolTopology clusterv1.MachinePoolTopology, currentMPState *scope.MachinePoolState) (string, error) { + topologyVersion := s.Blueprint.Topology.Version // If creating a new machine pool, mark it as pending if the control plane is not // yet stable. Creating a new MP while the control plane is upgrading can lead to unexpected race conditions. // Example: join could fail if the load balancers are slow in detecting when CP machines are @@ -1298,49 +1381,63 @@ func (g *generator) computeMachinePoolVersion(s *scope.Scope, machinePoolTopolog if !s.UpgradeTracker.ControlPlane.IsControlPlaneStable() || s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { s.UpgradeTracker.MachinePools.MarkPendingCreate(machinePoolTopology.Name) } - return desiredVersion + return topologyVersion, nil } // Get the current version of the machine pool. currentVersion := currentMPState.Object.Spec.Template.Spec.Version - // Return early if the currentVersion is already equal to the desiredVersion + // Return early if the currentVersion is already equal to the topologyVersion // no further checks required. - if currentVersion == desiredVersion { - return currentVersion + if currentVersion == topologyVersion { + return currentVersion, nil } // Return early if the upgrade for the MachinePool is deferred. if isMachinePoolDeferred(s.Blueprint.Topology, machinePoolTopology) { s.UpgradeTracker.MachinePools.MarkDeferredUpgrade(currentMPState.Object.Name) s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) - return currentVersion + return currentVersion, nil } // Return early if the AfterControlPlaneUpgrade hook returns a blocking response. if s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade) { s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) - return currentVersion + return currentVersion, nil } // Return early if the upgrade concurrency is reached. if s.UpgradeTracker.MachinePools.UpgradeConcurrencyReached() { s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) - return currentVersion + return currentVersion, nil } - // Return early if the Control Plane is not stable. Do not pick up the desiredVersion yet. + // Return early if the Control Plane is not stable. Do not pick up the topologyVersion yet. // Return the current version of the machine pool. We will pick up the new version after the control // plane is stable. - if !s.UpgradeTracker.ControlPlane.IsControlPlaneStable() { + if !s.UpgradeTracker.ControlPlane.IsControlPlaneStableOrWaitingForWorkersUpgrade() { s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) - return currentVersion + return currentVersion, nil } // Control plane and machine pools are stable. // Ready to pick up the topology version. + if len(s.UpgradeTracker.MachinePools.UpgradePlan) == 0 { + return "", errors.New("cannot compute the machine pool version if the machine pool is pending upgrade and the upgrade plan is not set") + } + + // The upgrade plan for workers has all versions from minWorkersVersion version to topologyVersion. + // If this MachinePool is already at minWorkersVersion, it should wait for the control plane to pick up next version before upgrading. + // Note: at this point we know that MachinePool is not yet at topologyVersion, so also set that MachinePool is PendingUpgrade. + if s.UpgradeTracker.MachinePools.UpgradePlan[0] == currentVersion { + s.UpgradeTracker.MachinePools.MarkPendingUpgrade(currentMPState.Object.Name) + return currentVersion, nil + } + s.UpgradeTracker.MachinePools.MarkUpgrading(currentMPState.Object.Name) - return desiredVersion + + nextVersion := s.UpgradeTracker.MachinePools.UpgradePlan[0] + return nextVersion, nil } // isMachinePoolDeferred returns true if the upgrade for the mpTopology is deferred. diff --git a/exp/topology/desiredstate/desired_state_test.go b/exp/topology/desiredstate/desired_state_test.go index a2e41371ffe5..846de7747718 100644 --- a/exp/topology/desiredstate/desired_state_test.go +++ b/exp/topology/desiredstate/desired_state_test.go @@ -435,19 +435,20 @@ func TestComputeControlPlane(t *testing.T) { g.Expect(json.Unmarshal(jsonValue, &expectedReadinessGates)).ToNot(HaveOccurred()) scheme := runtime.NewScheme() + _ = clusterv1.AddToScheme(scheme) _ = apiextensionsv1.AddToScheme(scheme) - crd := builder.GenericControlPlaneCRD.DeepCopy() - crd.Labels = map[string]string{ + crdV1Beta1Contract := builder.GenericControlPlaneCRD.DeepCopy() + crdV1Beta1Contract.Labels = map[string]string{ // Set contract label for tt.contract. fmt.Sprintf("%s/%s", clusterv1.GroupVersion.Group, "v1beta1"): clusterv1.GroupVersionControlPlane.Version, } - clientWithV1Beta1ContractCRD := fake.NewClientBuilder().WithScheme(scheme).WithObjects(crd).Build() - crd = builder.GenericControlPlaneCRD.DeepCopy() - crd.Labels = map[string]string{ + clientWithV1Beta1ContractCRD := fake.NewClientBuilder().WithScheme(scheme).WithObjects(crdV1Beta1Contract).Build() + crdV1Beta2Contract := builder.GenericControlPlaneCRD.DeepCopy() + crdV1Beta2Contract.Labels = map[string]string{ // Set contract label for tt.contract. fmt.Sprintf("%s/%s", clusterv1.GroupVersion.Group, "v1beta2"): clusterv1.GroupVersionControlPlane.Version, } - clientWithV1Beta2ContractCRD := fake.NewClientBuilder().WithScheme(scheme).WithObjects(crd).Build() + clientWithV1Beta2ContractCRD := fake.NewClientBuilder().WithScheme(scheme).WithObjects(crdV1Beta2Contract).Build() t.Run("Generates the ControlPlane from the template (v1beta1 contract)", func(t *testing.T) { g := NewWithT(t) @@ -866,12 +867,13 @@ func TestComputeControlPlane(t *testing.T) { }) }) t.Run("Should choose the correct version for control plane", func(t *testing.T) { - // Note: in all of the following tests we are setting it up so that there are not machine deployments. + // Note: in all the following tests we are setting it up so that there are not machine deployments. // A more extensive list of scenarios is tested in TestComputeControlPlaneVersion. tests := []struct { name string currentControlPlane *unstructured.Unstructured topologyVersion string + upgradePlan []string expectedVersion string }{ { @@ -881,7 +883,21 @@ func TestComputeControlPlane(t *testing.T) { expectedVersion: "v1.2.3", }, { - name: "use controlplane.spec.version if the control plane's spec.version is not equal to status.version", + name: "use cluster.spec.topology.version if the control plane is already up to date", + currentControlPlane: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.3", + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.3", + }). + Build(), + topologyVersion: "v1.2.3", + upgradePlan: nil, + expectedVersion: "v1.2.3", + }, + { + name: "use controlplane.spec.version if the control plane's spec.version is not equal to status.version", // NOTE: there are a few other conditions preventing to pick up latest cluster.spec.topology.version (other than is upgrading which is test here); all those conditions are validated in TestComputeControlPlaneVersion currentControlPlane: builder.ControlPlane("test1", "cp1"). WithSpecFields(map[string]interface{}{ "spec.version": "v1.2.2", @@ -891,8 +907,63 @@ func TestComputeControlPlane(t *testing.T) { }). Build(), topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", }, + { + name: "use cluster.spec.topology.version if the control plane can upgrade and it is a simple upgrade", + currentControlPlane: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build(), + topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, // Simple upgrade + expectedVersion: "v1.2.3", + }, + { + name: "use intermediate version if the control plane can upgrade and it is a multistep upgrade", + currentControlPlane: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build(), + topologyVersion: "v1.5.3", + upgradePlan: []string{"v1.3.2", "v1.4.2", "v1.5.3"}, // Multistep upgrade + expectedVersion: "v1.3.2", + }, + { + name: "use cluster.spec.topology.version if the control plane can upgrade and we are at the last step of a multistep upgrade", + currentControlPlane: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.4.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.4.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build(), + topologyVersion: "v1.5.3", + upgradePlan: []string{"v1.5.3"}, + expectedVersion: "v1.5.3", + }, } for _, tt := range tests { @@ -918,8 +989,10 @@ func TestComputeControlPlane(t *testing.T) { s.Current.ControlPlane = &scope.ControlPlaneState{ Object: tt.currentControlPlane, } + s.UpgradeTracker = scope.NewUpgradeTracker() + s.UpgradeTracker.ControlPlane.UpgradePlan = tt.upgradePlan - obj, err := (&generator{Client: clientWithV1Beta2ContractCRD}).computeControlPlane(ctx, s, nil) + obj, err := (&generator{Client: fake.NewClientBuilder().WithScheme(scheme).WithObjects(crdV1Beta2Contract, clusterWithControlPlaneRef).Build()}).computeControlPlane(ctx, s, nil) g.Expect(err).ToNot(HaveOccurred()) g.Expect(obj).NotTo(BeNil()) assertNestedField(g, obj, tt.expectedVersion, contract.ControlPlane().Version().Path()...) @@ -1023,21 +1096,29 @@ func TestComputeControlPlaneVersion(t *testing.T) { } tests := []struct { - name string - hookResponse *runtimehooksv1.BeforeClusterUpgradeResponse - topologyVersion string - clusterModifier func(c *clusterv1.Cluster) - controlPlaneObj *unstructured.Unstructured - upgradingMachineDeployments []string - upgradingMachinePools []string - expectedVersion string - wantErr bool + name string + hookResponse *runtimehooksv1.BeforeClusterUpgradeResponse + topologyVersion string + clusterModifier func(c *clusterv1.Cluster) + controlPlaneObj *unstructured.Unstructured + controlPlaneUpgradePlan []string + machineDeploymentsUpgradePlan []string + machinePoolsUpgradePlan []string + upgradingMachineDeployments []string + upgradingMachinePools []string + expectedVersion string + expectedIsPendingUpgrade bool + expectedIsStartingUpgrade bool + expectedIsWaitingForWorkersUpgrade bool + wantErr bool }{ { - name: "should return cluster.spec.topology.version if creating a new control plane", - topologyVersion: "v1.2.3", - controlPlaneObj: nil, - expectedVersion: "v1.2.3", + name: "should return cluster.spec.topology.version if creating a new control plane", + topologyVersion: "v1.2.3", + controlPlaneObj: nil, + expectedVersion: "v1.2.3", + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: false, }, { // Control plane is not upgrading implies that controlplane.spec.version is equal to controlplane.status.version. @@ -1059,7 +1140,26 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), - expectedVersion: "v1.2.3", + controlPlaneUpgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.3", + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: true, + }, + { + name: "should return cluster.spec.topology.version if the control plane is already at the target version", + topologyVersion: "v1.2.3", + controlPlaneObj: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.3", + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.3", + }). + Build(), + controlPlaneUpgradePlan: nil, + expectedVersion: "v1.2.3", + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: false, }, { // Control plane is considered upgrading if controlplane.spec.version is not equal to controlplane.status.version. @@ -1073,7 +1173,10 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.version": "v1.2.1", }). Build(), - expectedVersion: "v1.2.2", + controlPlaneUpgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsStartingUpgrade: false, }, { name: "should return cluster.spec.topology.version if the control plane is scaling", @@ -1092,7 +1195,10 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), - expectedVersion: "v1.2.3", + controlPlaneUpgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.3", + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: true, }, { name: "should return controlplane.spec.version if control plane is not upgrading and not scaling and one of the MachineDeployments and one of the MachinePools is upgrading", @@ -1110,9 +1216,12 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), + controlPlaneUpgradePlan: []string{"v1.2.3"}, upgradingMachineDeployments: []string{"md1"}, upgradingMachinePools: []string{"mp1"}, expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsStartingUpgrade: false, }, { name: "should return cluster.spec.topology.version if control plane is not upgrading and not scaling and none of the MachineDeployments and MachinePools are upgrading - hook returns non blocking response", @@ -1131,9 +1240,112 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), + controlPlaneUpgradePlan: []string{"v1.2.3"}, upgradingMachineDeployments: []string{}, upgradingMachinePools: []string{}, expectedVersion: "v1.2.3", + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: true, + }, + { + name: "should return an intermediate version when upgrading by more than 1 minor and control plane should perform the first step of the upgrade sequence", + hookResponse: nonBlockingBeforeClusterUpgradeResponse, + topologyVersion: "v1.5.3", + controlPlaneObj: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build(), + controlPlaneUpgradePlan: []string{"v1.3.2", "v1.4.2", "v1.5.3"}, + upgradingMachineDeployments: []string{}, + upgradingMachinePools: []string{}, + expectedVersion: "v1.3.2", // first step of the upgrade plan + expectedIsPendingUpgrade: false, // there are still upgrade in the queue, but we are starting one (so not pending) + expectedIsStartingUpgrade: true, + }, + { + name: "should return cluster.spec.topology.version when performing a multi step upgrade and control plane is at the second last minor in the upgrade sequence", + hookResponse: nonBlockingBeforeClusterUpgradeResponse, + topologyVersion: "v1.5.3", + controlPlaneObj: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.4.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.4.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build(), + controlPlaneUpgradePlan: []string{"v1.5.3"}, + upgradingMachineDeployments: []string{}, + upgradingMachinePools: []string{}, + expectedVersion: "v1.5.3", // last step of the upgrade plan + expectedIsPendingUpgrade: false, + expectedIsStartingUpgrade: true, + }, + { + name: "should remain on the current version when upgrading by more than 1 minor and MachineDeployments have to upgrade", + hookResponse: nonBlockingBeforeClusterUpgradeResponse, + topologyVersion: "v1.5.3", + controlPlaneObj: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build(), + controlPlaneUpgradePlan: []string{"v1.3.2", "v1.4.2", "v1.5.3"}, + machineDeploymentsUpgradePlan: []string{"v1.2.2"}, + upgradingMachineDeployments: []string{}, + upgradingMachinePools: []string{}, + expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsWaitingForWorkersUpgrade: true, + expectedIsStartingUpgrade: false, + }, + { + name: "should remain on the current version when upgrading by more than 1 minor and MachinePools have to upgrade", + hookResponse: nonBlockingBeforeClusterUpgradeResponse, + topologyVersion: "v1.5.3", + controlPlaneObj: builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build(), + controlPlaneUpgradePlan: []string{"v1.3.2", "v1.4.2", "v1.5.3"}, + machinePoolsUpgradePlan: []string{"v1.2.2"}, + upgradingMachineDeployments: []string{}, + upgradingMachinePools: []string{}, + expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsWaitingForWorkersUpgrade: true, + expectedIsStartingUpgrade: false, }, { name: "should return the controlplane.spec.version if a BeforeClusterUpgradeHook returns a blocking response", @@ -1152,7 +1364,10 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), - expectedVersion: "v1.2.2", + controlPlaneUpgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsStartingUpgrade: false, }, { name: "should fail if the BeforeClusterUpgrade hooks returns a failure response", @@ -1171,8 +1386,8 @@ func TestComputeControlPlaneVersion(t *testing.T) { "status.unavailableReplicas": int64(0), }). Build(), - expectedVersion: "v1.2.2", - wantErr: true, + controlPlaneUpgradePlan: []string{"v1.2.3"}, + wantErr: true, }, { name: "should return the controlplane.spec.version if a BeforeClusterUpgradeHook annotation is set", @@ -1196,7 +1411,11 @@ func TestComputeControlPlaneVersion(t *testing.T) { clusterv1.BeforeClusterUpgradeHookAnnotationPrefix + "/test": "true", } }, - expectedVersion: "v1.2.2", + controlPlaneUpgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectedIsPendingUpgrade: true, + expectedIsStartingUpgrade: false, + wantErr: false, }, } for _, tt := range tests { @@ -1237,8 +1456,7 @@ func TestComputeControlPlaneVersion(t *testing.T) { APIGroup: "refAPIGroup1", Kind: "refKind1", Name: "refName1", - }, - }, + }}, }, ControlPlane: &scope.ControlPlaneState{Object: tt.controlPlaneObj}, }, @@ -1248,6 +1466,15 @@ func TestComputeControlPlaneVersion(t *testing.T) { if tt.clusterModifier != nil { tt.clusterModifier(s.Current.Cluster) } + if len(tt.controlPlaneUpgradePlan) > 0 { + s.UpgradeTracker.ControlPlane.UpgradePlan = tt.controlPlaneUpgradePlan + } + if len(tt.machineDeploymentsUpgradePlan) > 0 { + s.UpgradeTracker.MachineDeployments.UpgradePlan = tt.machineDeploymentsUpgradePlan + } + if len(tt.machinePoolsUpgradePlan) > 0 { + s.UpgradeTracker.MachinePools.UpgradePlan = tt.machinePoolsUpgradePlan + } if len(tt.upgradingMachineDeployments) > 0 { s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...) } @@ -1272,369 +1499,815 @@ func TestComputeControlPlaneVersion(t *testing.T) { version, err := r.computeControlPlaneVersion(ctx, s) if tt.wantErr { g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(version).To(Equal(tt.expectedVersion)) - // Verify that if the upgrade is pending it is captured in the upgrade tracker. - upgradePending := tt.expectedVersion != tt.topologyVersion - g.Expect(s.UpgradeTracker.ControlPlane.IsPendingUpgrade).To(Equal(upgradePending)) + return } + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(version).To(Equal(tt.expectedVersion)) + g.Expect(s.UpgradeTracker.ControlPlane.IsPendingUpgrade).To(Equal(tt.expectedIsPendingUpgrade)) + g.Expect(s.UpgradeTracker.ControlPlane.IsStartingUpgrade).To(Equal(tt.expectedIsStartingUpgrade)) + g.Expect(s.UpgradeTracker.ControlPlane.IsWaitingForWorkersUpgrade).To(Equal(tt.expectedIsWaitingForWorkersUpgrade)) }) } }) +} - t.Run("Calling AfterControlPlaneUpgrade hook", func(t *testing.T) { - utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, true) +func TestComputeControlPlaneVersion_callAfterControlPlaneUpgrade(t *testing.T) { + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, true) - catalog := runtimecatalog.New() - _ = runtimehooksv1.AddToCatalog(catalog) + catalog := runtimecatalog.New() + _ = runtimehooksv1.AddToCatalog(catalog) - afterControlPlaneUpgradeGVH, err := catalog.GroupVersionHook(runtimehooksv1.AfterControlPlaneUpgrade) - if err != nil { - panic(err) - } + afterControlPlaneUpgradeGVH, err := catalog.GroupVersionHook(runtimehooksv1.AfterControlPlaneUpgrade) + if err != nil { + panic(err) + } + beforeClusterUpgradeGVH, err := catalog.GroupVersionHook(runtimehooksv1.BeforeClusterUpgrade) + if err != nil { + panic("unable to compute GVH") + } + beforeClusterUpgradeNonBlockingResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } - blockingResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ - CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ - RetryAfterSeconds: int32(10), - CommonResponse: runtimehooksv1.CommonResponse{ - Status: runtimehooksv1.ResponseStatusSuccess, + blockingResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(10), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + nonBlockingResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(0), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + failureResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusFailure, + }, + }, + } + + topologyVersion := "v1.2.3" + lowerVersion := "v1.2.2" + + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": topologyVersion, + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": topologyVersion, + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build() + + controlPlaneUpgrading := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": topologyVersion, + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": lowerVersion, + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build() + + controlPlaneProvisioning := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": lowerVersion, + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "", + }). + Build() + + newUpgradeTrackerWithUpgradePlan := func(upgradePlan []string) *scope.UpgradeTracker { + ut := scope.NewUpgradeTracker() + ut.ControlPlane.UpgradePlan = upgradePlan + return ut + } + + tests := []struct { + name string + s *scope.Scope + hookResponse *runtimehooksv1.AfterControlPlaneUpgradeResponse + wantIntentToCall bool + wantHookToBeCalled bool + wantHookToBlock bool + wantErr bool + }{ + { + name: "should not call hook if it is not marked", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneStable, + }, }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), }, - } - nonBlockingResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ - CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ - RetryAfterSeconds: int32(0), - CommonResponse: runtimehooksv1.CommonResponse{ - Status: runtimehooksv1.ResponseStatusSuccess, + wantIntentToCall: false, // preserve existing value (not set) + wantHookToBeCalled: false, + wantErr: false, + }, + { + name: "should not call hook if the control plane is provisioning - there is intent to call hook", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneProvisioning, + }, }, + UpgradeTracker: newUpgradeTrackerWithUpgradePlan([]string{topologyVersion}), + HookResponseTracker: scope.NewHookResponseTracker(), }, - } - failureResponse := &runtimehooksv1.AfterControlPlaneUpgradeResponse{ - CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ - CommonResponse: runtimehooksv1.CommonResponse{ - Status: runtimehooksv1.ResponseStatusFailure, + wantIntentToCall: true, // preserve existing value (set) + wantHookToBeCalled: false, + wantErr: false, + }, + { + name: "should not call hook if the control plane is upgrading - there is intent to call hook", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneUpgrading, + }, }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), }, - } - - topologyVersion := "v1.2.3" - lowerVersion := "v1.2.2" - controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": topologyVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": topologyVersion, - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - }). - Build() - - controlPlaneUpgrading := builder.ControlPlane("test-ns", "cp1"). - WithSpecFields(map[string]interface{}{ - "spec.version": topologyVersion, - "spec.replicas": int64(2), - }). - WithStatusFields(map[string]interface{}{ - "status.version": lowerVersion, - "status.replicas": int64(2), - "status.updatedReplicas": int64(2), - "status.readyReplicas": int64(2), - }). - Build() + wantIntentToCall: true, // preserve existing value (set) + wantHookToBeCalled: false, + wantErr: false, + }, + { + name: "should call hook if the control plane is at desired version - non blocking response should remove hook from pending hooks list and allow MD upgrades", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneStable, + }, + }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), + }, + hookResponse: nonBlockingResponse, + wantIntentToCall: false, // remove the intent to call the hook (hook called, we are at target state) + wantHookToBeCalled: true, + wantHookToBlock: false, + wantErr: false, + }, + { + name: "should call hook if the control plane is at desired version - blocking response should leave the hook in pending hooks list and block MD upgrades", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneStable, + }, + }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), + }, + hookResponse: blockingResponse, + wantIntentToCall: true, // preserve existing value (set) + wantHookToBeCalled: true, + wantHookToBlock: true, + wantErr: false, + }, + { + name: "should call hook if the control plane is at desired version - failure response should leave the hook in pending hooks list", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: topologyVersion, + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: controlPlaneStable, + }, + }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), + }, + hookResponse: failureResponse, + wantIntentToCall: true, // preserve existing value (set) + wantHookToBeCalled: true, + wantErr: true, + }, + { + name: "should call hook if the control plane is at the first step of a multistep upgrade - intent to call for next minor should be tracked", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: "v1.5.3", + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.3.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.3.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build(), + }, + }, + UpgradeTracker: newUpgradeTrackerWithUpgradePlan([]string{"v1.4.2", "v1.5.3"}), + HookResponseTracker: scope.NewHookResponseTracker(), + }, + hookResponse: nonBlockingResponse, + wantIntentToCall: true, // new intent to call the hook for the next minor + wantHookToBeCalled: true, // the hook has been called for the current minor + wantHookToBlock: false, + + wantErr: false, + }, + { + name: "should call hook if the control plane is at the last step of a multistep upgrade", + s: &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{ + Topology: clusterv1.Topology{ + Version: "v1.5.3", + ControlPlane: clusterv1.ControlPlaneTopology{}, + }, + }, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", + }, + }, + Spec: clusterv1.ClusterSpec{}, + }, + ControlPlane: &scope.ControlPlaneState{ + Object: builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.5.3", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.5.3", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + }). + Build(), + }, + }, + UpgradeTracker: scope.NewUpgradeTracker(), // already at topology version, upgrade plan is empty. + HookResponseTracker: scope.NewHookResponseTracker(), + }, + hookResponse: nonBlockingResponse, + wantIntentToCall: false, // remove the intent to call the hook (hook called, we are at target state) + wantHookToBeCalled: true, // the hook has been called for the current minor + wantHookToBlock: false, + + wantErr: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + // Add managedFields and annotations that should be cleaned up before the Cluster is sent to the RuntimeExtension. + tt.s.Current.Cluster.SetManagedFields([]metav1.ManagedFieldsEntry{ + { + APIVersion: builder.InfrastructureGroupVersion.String(), + Manager: "manager", + Operation: "Apply", + Time: ptr.To(metav1.Now()), + FieldsType: "FieldsV1", + }, + }) + if tt.s.Current.Cluster.Annotations == nil { + tt.s.Current.Cluster.Annotations = map[string]string{} + } + tt.s.Current.Cluster.Annotations[corev1.LastAppliedConfigAnnotation] = "should be cleaned up" + tt.s.Current.Cluster.Annotations[conversion.DataAnnotation] = "should be cleaned up" + + fakeRuntimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + afterControlPlaneUpgradeGVH: tt.hookResponse, + beforeClusterUpgradeGVH: beforeClusterUpgradeNonBlockingResponse, + }).WithCallAllExtensionValidations(validateClusterParameter(tt.s.Current.Cluster)). + WithCatalog(catalog). + Build() + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(tt.s.Current.Cluster).Build() + + r := &generator{ + Client: fakeClient, + RuntimeClient: fakeRuntimeClient, + } + + _, err := r.computeControlPlaneVersion(ctx, tt.s) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + + if tt.wantHookToBeCalled { + g.Expect(fakeRuntimeClient.CallAllCount(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(1), "Expected hook to be called once") + } else { + g.Expect(fakeRuntimeClient.CallAllCount(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(0), "Did not expect hook to be called") + } + + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, tt.s.Current.Cluster)).To(Equal(tt.wantIntentToCall)) + + if tt.wantHookToBeCalled && !tt.wantErr { + g.Expect(tt.s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(tt.wantHookToBlock)) + } + }) + } +} + +func TestComputeControlPlaneVersion_callBeforeClusterUpgrade_trackIntentOfCallingAfterClusterUpgrade(t *testing.T) { + utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, true) + + catalog := runtimecatalog.New() + _ = runtimehooksv1.AddToCatalog(catalog) + beforeClusterUpgradeGVH, err := catalog.GroupVersionHook(runtimehooksv1.BeforeClusterUpgrade) + if err != nil { + panic("unable to compute GVH") + } + blockingResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(10), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + nonBlockingResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + RetryAfterSeconds: int32(0), + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusSuccess, + }, + }, + } + failureResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{ + CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ + CommonResponse: runtimehooksv1.CommonResponse{ + Status: runtimehooksv1.ResponseStatusFailure, + }, + }, + } + + t.Run("Call BeforeClusterUpgrade hook when doing simple upgrades", func(t *testing.T) { + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build() + + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: "v1.2.3", + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: ptr.To[int32](2), + }, + }}, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + }, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, + }, + UpgradeTracker: scope.NewUpgradeTracker(), + HookResponseTracker: scope.NewHookResponseTracker(), + } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.2.3"} + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + beforeClusterUpgradeGVH: nonBlockingResponse, + }). + Build() + + r := &generator{ + Client: fakeClient, + RuntimeClient: runtimeClient, + } + + desiredVersion, err := r.computeControlPlaneVersion(ctx, s) + g := NewWithT(t) + g.Expect(err).ToNot(HaveOccurred()) + + // Before Cluster upgrade hook must have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(1)) + + // When successfully picking up the new version the intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should be registered. + g.Expect(desiredVersion).To(Equal("v1.2.3")) + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeTrue()) + g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeTrue()) + }) + + t.Run("Call BeforeClusterUpgrade hook when doing simple upgrades - failure response should block picking up a new version", func(t *testing.T) { + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build() + + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: "v1.2.3", + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: ptr.To[int32](2), + }, + }}, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + }, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, + }, + UpgradeTracker: scope.NewUpgradeTracker(), + HookResponseTracker: scope.NewHookResponseTracker(), + } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.2.3"} + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + beforeClusterUpgradeGVH: failureResponse, + }). + Build() + + r := &generator{ + Client: fakeClient, + RuntimeClient: runtimeClient, + } + + desiredVersion, err := r.computeControlPlaneVersion(ctx, s) + g := NewWithT(t) + g.Expect(desiredVersion).To(Equal("")) + g.Expect(err).To(HaveOccurred()) + + // Before Cluster upgrade hook must have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(1)) + + // After a failure, intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should not be registered. + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeFalse()) + g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeFalse()) + }) + + t.Run("Call BeforeClusterUpgrade hook when doing simple upgrades - blocking response should block picking up a new version", func(t *testing.T) { + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build() + + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: "v1.2.3", + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: ptr.To[int32](2), + }, + }}, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + }, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, + }, + UpgradeTracker: scope.NewUpgradeTracker(), + HookResponseTracker: scope.NewHookResponseTracker(), + } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.2.3"} + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + beforeClusterUpgradeGVH: blockingResponse, + }). + Build() + + r := &generator{ + Client: fakeClient, + RuntimeClient: runtimeClient, + } + + desiredVersion, err := r.computeControlPlaneVersion(ctx, s) + g := NewWithT(t) + g.Expect(err).ToNot(HaveOccurred()) + + // Before Cluster upgrade hook must have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(1)) + + // After a blocking response, current version should not be picked up, intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should not be registered. + g.Expect(desiredVersion).To(Equal("v1.2.2")) + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeFalse()) + g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeFalse()) + }) + + t.Run("Call BeforeClusterUpgrade hook when doing the first step of a multistep cluster upgrade", func(t *testing.T) { + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": "v1.2.2", + "spec.replicas": int64(2), + }). + WithStatusFields(map[string]interface{}{ + "status.version": "v1.2.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), + }). + Build() + + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: "v1.5.3", // more than one minor after current + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: ptr.To[int32](2), + }, + }}, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + }, + }, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, + }, + UpgradeTracker: scope.NewUpgradeTracker(), + HookResponseTracker: scope.NewHookResponseTracker(), + } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.3.2", "v1.4.2", "v1.5.3"} + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() + + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + beforeClusterUpgradeGVH: nonBlockingResponse, + }). + Build() + + r := &generator{ + Client: fakeClient, + RuntimeClient: runtimeClient, + } + + desiredVersion, err := r.computeControlPlaneVersion(ctx, s) + g := NewWithT(t) + g.Expect(err).ToNot(HaveOccurred()) + + // Before Cluster upgrade hook must have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(1)) + + // When successfully picking up the new version the intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should be registered. + g.Expect(desiredVersion).To(Equal("v1.3.2")) + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeTrue()) + g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeTrue()) + }) - controlPlaneProvisioning := builder.ControlPlane("test-ns", "cp1"). + t.Run("Don't call BeforeClusterUpgrade hook after the first step of a multistep upgrade", func(t *testing.T) { + controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.2", + "spec.version": "v1.3.2", "spec.replicas": int64(2), }). WithStatusFields(map[string]interface{}{ - "status.version": "", + "status.version": "v1.3.2", + "status.replicas": int64(2), + "status.updatedReplicas": int64(2), + "status.readyReplicas": int64(2), + "status.unavailableReplicas": int64(0), }). Build() - tests := []struct { - name string - s *scope.Scope - hookResponse *runtimehooksv1.AfterControlPlaneUpgradeResponse - wantIntentToCall bool - wantHookToBeCalled bool - wantHookToBlock bool - wantErr bool - }{ - { - name: "should not call hook if it is not marked", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStable, - }, - }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), - }, - wantIntentToCall: false, - wantHookToBeCalled: false, - wantErr: false, - }, - { - name: "should not call hook if the control plane is provisioning - there is intent to call hook", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - Annotations: map[string]string{ - runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", - }, - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneProvisioning, - }, - }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), - }, - wantIntentToCall: true, - wantHookToBeCalled: false, - wantErr: false, - }, - { - name: "should not call hook if the control plane is upgrading - there is intent to call hook", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - Annotations: map[string]string{ - runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", - }, - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneUpgrading, - }, - }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), - }, - wantIntentToCall: true, - wantHookToBeCalled: false, - wantErr: false, - }, - { - name: "should call hook if the control plane is at desired version - non blocking response should remove hook from pending hooks list and allow MD upgrades", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - Annotations: map[string]string{ - runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", - }, - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStable, - }, - }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), - }, - hookResponse: nonBlockingResponse, - wantIntentToCall: false, - wantHookToBeCalled: true, - wantHookToBlock: false, - wantErr: false, - }, - { - name: "should call hook if the control plane is at desired version - blocking response should leave the hook in pending hooks list and block MD upgrades", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - Annotations: map[string]string{ - runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", - }, - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStable, - }, - }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: "v1.5.3", // more than one minor after current + ControlPlane: clusterv1.ControlPlaneTopology{ + Replicas: ptr.To[int32](2), }, - hookResponse: blockingResponse, - wantIntentToCall: true, - wantHookToBeCalled: true, - wantHookToBlock: true, - wantErr: false, - }, - { - name: "should call hook if the control plane is at desired version - failure response should leave the hook in pending hooks list", - s: &scope.Scope{ - Blueprint: &scope.ClusterBlueprint{ - Topology: clusterv1.Topology{ - Version: topologyVersion, - ControlPlane: clusterv1.ControlPlaneTopology{}, - }, - }, - Current: &scope.ClusterState{ - Cluster: &clusterv1.Cluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-ns", - Annotations: map[string]string{ - runtimev1.PendingHooksAnnotation: "AfterControlPlaneUpgrade", - }, - }, - Spec: clusterv1.ClusterSpec{}, - }, - ControlPlane: &scope.ControlPlaneState{ - Object: controlPlaneStable, + }}, + Current: &scope.ClusterState{ + Cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: runtimecatalog.HookName(runtimehooksv1.AfterClusterUpgrade), // This signal that the upgrade is already in progress. }, }, - UpgradeTracker: scope.NewUpgradeTracker(), - HookResponseTracker: scope.NewHookResponseTracker(), }, - hookResponse: failureResponse, - wantIntentToCall: true, - wantHookToBeCalled: true, - wantErr: true, + ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, }, + UpgradeTracker: scope.NewUpgradeTracker(), + HookResponseTracker: scope.NewHookResponseTracker(), } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.4.2", "v1.5.3"} - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - g := NewWithT(t) - - // Add managedFields and annotations that should be cleaned up before the Cluster is sent to the RuntimeExtension. - tt.s.Current.Cluster.SetManagedFields([]metav1.ManagedFieldsEntry{ - { - APIVersion: builder.InfrastructureGroupVersion.String(), - Manager: "manager", - Operation: "Apply", - Time: ptr.To(metav1.Now()), - FieldsType: "FieldsV1", - }, - }) - if tt.s.Current.Cluster.Annotations == nil { - tt.s.Current.Cluster.Annotations = map[string]string{} - } - tt.s.Current.Cluster.Annotations[corev1.LastAppliedConfigAnnotation] = "should be cleaned up" - tt.s.Current.Cluster.Annotations[conversion.DataAnnotation] = "should be cleaned up" - - fakeRuntimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). - WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ - afterControlPlaneUpgradeGVH: tt.hookResponse, - }). - WithCallAllExtensionValidations(validateClusterParameter(tt.s.Current.Cluster)). - WithCatalog(catalog). - Build() + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() - fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(tt.s.Current.Cluster).Build() + runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). + WithCatalog(catalog). + WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ + beforeClusterUpgradeGVH: nonBlockingResponse, + }). + Build() - r := &generator{ - Client: fakeClient, - RuntimeClient: fakeRuntimeClient, - } + r := &generator{ + Client: fakeClient, + RuntimeClient: runtimeClient, + } - _, err := r.computeControlPlaneVersion(ctx, tt.s) - if tt.wantErr { - g.Expect(err).To(HaveOccurred()) - } else { - g.Expect(err).ToNot(HaveOccurred()) - } + desiredVersion, err := r.computeControlPlaneVersion(ctx, s) + g := NewWithT(t) + g.Expect(err).ToNot(HaveOccurred()) - if tt.wantHookToBeCalled { - g.Expect(fakeRuntimeClient.CallAllCount(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(1), "Expected hook to be called once") - } else { - g.Expect(fakeRuntimeClient.CallAllCount(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(0), "Did not expect hook to be called") - } + // Before Cluster upgrade hook must not have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(0)) - g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, tt.s.Current.Cluster)).To(Equal(tt.wantIntentToCall)) - if tt.wantHookToBeCalled && !tt.wantErr { - g.Expect(tt.s.HookResponseTracker.IsBlocking(runtimehooksv1.AfterControlPlaneUpgrade)).To(Equal(tt.wantHookToBlock)) - } - }) - } + // When successfully picking up the new version the intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should be registered. + g.Expect(desiredVersion).To(Equal("v1.4.2")) + g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeTrue()) + g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeTrue()) }) - t.Run("register intent to call AfterClusterUpgrade and AfterControlPlaneUpgrade hooks", func(t *testing.T) { - utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.RuntimeSDK, true) - - catalog := runtimecatalog.New() - _ = runtimehooksv1.AddToCatalog(catalog) - beforeClusterUpgradeGVH, err := catalog.GroupVersionHook(runtimehooksv1.BeforeClusterUpgrade) - if err != nil { - panic("unable to compute GVH") - } - beforeClusterUpgradeNonBlockingResponse := &runtimehooksv1.BeforeClusterUpgradeResponse{ - CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ - CommonResponse: runtimehooksv1.CommonResponse{ - Status: runtimehooksv1.ResponseStatusSuccess, - }, - }, - } - + t.Run("Don't call BeforeClusterUpgrade hook when at the last step of a multi cluster upgrade", func(t *testing.T) { controlPlaneStable := builder.ControlPlane("test-ns", "cp1"). WithSpecFields(map[string]interface{}{ - "spec.version": "v1.2.2", + "spec.version": "v1.4.2", "spec.replicas": int64(2), }). WithStatusFields(map[string]interface{}{ - "status.version": "v1.2.2", + "status.version": "v1.4.2", "status.replicas": int64(2), "status.updatedReplicas": int64(2), "status.readyReplicas": int64(2), @@ -1644,7 +2317,7 @@ func TestComputeControlPlaneVersion(t *testing.T) { s := &scope.Scope{ Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ - Version: "v1.2.3", + Version: "v1.5.3", // one minor after current ControlPlane: clusterv1.ControlPlaneTopology{ Replicas: ptr.To[int32](2), }, @@ -1654,6 +2327,9 @@ func TestComputeControlPlaneVersion(t *testing.T) { ObjectMeta: metav1.ObjectMeta{ Name: "test-cluster", Namespace: "test-ns", + Annotations: map[string]string{ + runtimev1.PendingHooksAnnotation: runtimecatalog.HookName(runtimehooksv1.AfterClusterUpgrade), // This signal that the upgrade is already in progress. + }, }, }, ControlPlane: &scope.ControlPlaneState{Object: controlPlaneStable}, @@ -1661,16 +2337,17 @@ func TestComputeControlPlaneVersion(t *testing.T) { UpgradeTracker: scope.NewUpgradeTracker(), HookResponseTracker: scope.NewHookResponseTracker(), } + s.UpgradeTracker.ControlPlane.UpgradePlan = []string{"v1.5.3"} + + fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() runtimeClient := fakeruntimeclient.NewRuntimeClientBuilder(). WithCatalog(catalog). WithCallAllExtensionResponses(map[runtimecatalog.GroupVersionHook]runtimehooksv1.ResponseObject{ - beforeClusterUpgradeGVH: beforeClusterUpgradeNonBlockingResponse, + beforeClusterUpgradeGVH: nonBlockingResponse, }). Build() - fakeClient := fake.NewClientBuilder().WithScheme(fakeScheme).WithObjects(s.Current.Cluster).Build() - r := &generator{ Client: fakeClient, RuntimeClient: runtimeClient, @@ -1679,8 +2356,12 @@ func TestComputeControlPlaneVersion(t *testing.T) { desiredVersion, err := r.computeControlPlaneVersion(ctx, s) g := NewWithT(t) g.Expect(err).ToNot(HaveOccurred()) + + // Before Cluster upgrade hook must not have been be called. + g.Expect(runtimeClient.CallAllCount(runtimehooksv1.BeforeClusterUpgrade)).To(Equal(0)) + // When successfully picking up the new version the intent to call AfterControlPlaneUpgrade and AfterClusterUpgrade hooks should be registered. - g.Expect(desiredVersion).To(Equal("v1.2.3")) + g.Expect(desiredVersion).To(Equal("v1.5.3")) g.Expect(hooks.IsPending(runtimehooksv1.AfterControlPlaneUpgrade, s.Current.Cluster)).To(BeTrue()) g.Expect(hooks.IsPending(runtimehooksv1.AfterClusterUpgrade, s.Current.Cluster)).To(BeTrue()) }) @@ -1693,6 +2374,7 @@ func TestComputeCluster(t *testing.T) { infrastructureCluster := builder.InfrastructureCluster(metav1.NamespaceDefault, "infrastructureCluster1"). Build() controlPlane := builder.ControlPlane(metav1.NamespaceDefault, "controlplane1"). + WithVersion("v1.30.3"). Build() // current cluster objects @@ -1704,10 +2386,14 @@ func TestComputeCluster(t *testing.T) { } // aggregating current cluster objects into ClusterState (simulating getCurrentState) - scope := scope.New(cluster) + s := scope.New(cluster) + s.Current.ControlPlane = &scope.ControlPlaneState{ + Object: controlPlane, + } - obj := computeCluster(ctx, scope, infrastructureCluster, controlPlane) + obj, err := computeCluster(ctx, s, infrastructureCluster, controlPlane) g.Expect(obj).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) // TypeMeta g.Expect(obj.APIVersion).To(Equal(cluster.APIVersion)) @@ -1718,10 +2404,35 @@ func TestComputeCluster(t *testing.T) { g.Expect(obj.Namespace).To(Equal(cluster.Namespace)) g.Expect(obj.GetLabels()).To(HaveKeyWithValue(clusterv1.ClusterNameLabel, cluster.Name)) g.Expect(obj.GetLabels()).To(HaveKeyWithValue(clusterv1.ClusterTopologyOwnedLabel, "")) + g.Expect(obj.GetAnnotations()).ToNot(HaveKey(clusterv1.ClusterTopologyUpgradeStepAnnotation)) // Spec g.Expect(obj.Spec.InfrastructureRef).To(BeComparableTo(contract.ObjToContractVersionedObjectReference(infrastructureCluster))) g.Expect(obj.Spec.ControlPlaneRef).To(BeComparableTo(contract.ObjToContractVersionedObjectReference(controlPlane))) + + // Surfaces the ClusterTopologyUpgradeStepAnnotation annotation during upgrades. + annotations := s.Current.Cluster.GetAnnotations() + if annotations == nil { + annotations = map[string]string{} + } + annotations[runtimev1.PendingHooksAnnotation] = "AfterClusterUpgrade" + s.Current.Cluster.SetAnnotations(annotations) + + obj, err = computeCluster(ctx, s, infrastructureCluster, controlPlane) + g.Expect(obj).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(obj.GetAnnotations()).To(HaveKeyWithValue(clusterv1.ClusterTopologyUpgradeStepAnnotation, "v1.30.3")) + + // Delete ClusterTopologyUpgradeStepAnnotation annotation after upgrade is completed. + delete(annotations, runtimev1.PendingHooksAnnotation) + s.Current.Cluster.SetAnnotations(annotations) + + obj, err = computeCluster(ctx, s, infrastructureCluster, controlPlane) + g.Expect(obj).ToNot(BeNil()) + g.Expect(err).ToNot(HaveOccurred()) + + g.Expect(obj.GetAnnotations()).ToNot(HaveKey(clusterv1.ClusterTopologyUpgradeStepAnnotation)) } func TestComputeMachineDeployment(t *testing.T) { @@ -2133,6 +2844,7 @@ func TestComputeMachineDeployment(t *testing.T) { currentMDVersion *string upgradeConcurrency string topologyVersion string + upgradePlan []string expectedVersion string }{ { @@ -2141,6 +2853,7 @@ func TestComputeMachineDeployment(t *testing.T) { upgradeConcurrency: "1", currentMDVersion: nil, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, { @@ -2149,6 +2862,7 @@ func TestComputeMachineDeployment(t *testing.T) { upgradeConcurrency: "1", currentMDVersion: nil, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, { @@ -2157,6 +2871,7 @@ func TestComputeMachineDeployment(t *testing.T) { upgradeConcurrency: "1", currentMDVersion: ptr.To("v1.2.2"), topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", }, { @@ -2165,6 +2880,7 @@ func TestComputeMachineDeployment(t *testing.T) { upgradeConcurrency: "2", currentMDVersion: ptr.To("v1.2.2"), topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, } @@ -2218,6 +2934,9 @@ func TestComputeMachineDeployment(t *testing.T) { Replicas: ptr.To[int32](2), } s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...) + if tt.upgradePlan != nil { + s.UpgradeTracker.MachineDeployments.UpgradePlan = tt.upgradePlan + } e := generator{} @@ -2534,6 +3253,7 @@ func TestComputeMachinePool(t *testing.T) { currentMPVersion *string upgradeConcurrency string topologyVersion string + upgradePlan []string expectedVersion string }{ { @@ -2542,6 +3262,7 @@ func TestComputeMachinePool(t *testing.T) { upgradeConcurrency: "1", currentMPVersion: nil, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, { @@ -2550,6 +3271,7 @@ func TestComputeMachinePool(t *testing.T) { upgradeConcurrency: "1", currentMPVersion: nil, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, { @@ -2558,6 +3280,7 @@ func TestComputeMachinePool(t *testing.T) { upgradeConcurrency: "1", currentMPVersion: ptr.To("v1.2.2"), topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", }, { @@ -2566,6 +3289,7 @@ func TestComputeMachinePool(t *testing.T) { upgradeConcurrency: "2", currentMPVersion: ptr.To("v1.2.2"), topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", }, } @@ -2621,6 +3345,9 @@ func TestComputeMachinePool(t *testing.T) { Replicas: ptr.To[int32](2), } s.UpgradeTracker.MachinePools.MarkUpgrading(tt.upgradingMachinePools...) + if tt.upgradePlan != nil { + s.UpgradeTracker.MachinePools.UpgradePlan = tt.upgradePlan + } e := generator{} @@ -2645,11 +3372,14 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { currentMachineDeploymentState *scope.MachineDeploymentState upgradingMachineDeployments []string upgradeConcurrency int + controlPlanePendingUpgrade bool + controlPlaneWaitingForWorkersUpgrade bool controlPlaneStartingUpgrade bool controlPlaneUpgrading bool controlPlaneProvisioning bool afterControlPlaneUpgradeHookBlocking bool topologyVersion string + upgradePlan []string expectedVersion string expectPendingCreate bool expectPendingUpgrade bool @@ -2686,6 +3416,18 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectPendingUpgrade: true, + }, + { + // Control plane is considered pending an upgrade if topology version did not yet propagate to the control plane. + name: "should return machine deployment's spec.template.spec.version if control plane is pending upgrading", + currentMachineDeploymentState: currentMachineDeploymentState, + upgradingMachineDeployments: []string{}, + controlPlanePendingUpgrade: true, + topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2696,6 +3438,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { upgradingMachineDeployments: []string{}, controlPlaneUpgrading: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2706,6 +3449,16 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { upgradingMachineDeployments: []string{}, controlPlaneStartingUpgrade: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectPendingUpgrade: true, + }, + { + name: "should return machine deployment's spec.template.spec.version if the Machine deployment already performed the upgrade step", + currentMachineDeploymentState: currentMachineDeploymentState, + upgradingMachineDeployments: []string{}, + topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.2", "v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2714,9 +3467,31 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { currentMachineDeploymentState: currentMachineDeploymentState, upgradingMachineDeployments: []string{}, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", expectPendingUpgrade: false, }, + { + name: "should return next version from the upgrade plan if mutistep upgrade, if the control plane is not upgrading, not scaling, not ready to upgrade and none of the machine deployments are upgrading", + currentMachineDeploymentState: currentMachineDeploymentState, + upgradingMachineDeployments: []string{}, + topologyVersion: "v1.4.3", + upgradePlan: []string{"v1.3.3", "v1.4.3"}, + expectedVersion: "v1.3.3", + expectPendingUpgrade: false, + }, + { + // Control plane is considered pending an upgrade if topology version did not yet propagate to the control plane. + name: "should return next version from the upgrade plan if mutistep upgrade, if the control plane is pending an upgrade but this requires workers to upgrade first", + currentMachineDeploymentState: currentMachineDeploymentState, + upgradingMachineDeployments: []string{}, + controlPlanePendingUpgrade: true, + controlPlaneWaitingForWorkersUpgrade: true, + topologyVersion: "v1.4.3", + upgradePlan: []string{"v1.3.3", "v1.4.3"}, + expectedVersion: "v1.3.3", + expectPendingUpgrade: false, + }, { name: "should return machine deployment's spec.template.spec.version if control plane is stable, other machine deployments are upgrading, concurrency limit not reached but AfterControlPlaneUpgrade hook is blocking", currentMachineDeploymentState: currentMachineDeploymentState, @@ -2724,6 +3499,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { upgradeConcurrency: 2, afterControlPlaneUpgradeHookBlocking: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2733,6 +3509,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { upgradingMachineDeployments: []string{"upgrading-md1"}, upgradeConcurrency: 2, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", expectPendingUpgrade: false, }, @@ -2742,6 +3519,7 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { upgradingMachineDeployments: []string{"upgrading-md1", "upgrading-md2"}, upgradeConcurrency: 2, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2765,6 +3543,9 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { UpgradeTracker: scope.NewUpgradeTracker(scope.MaxMDUpgradeConcurrency(tt.upgradeConcurrency)), HookResponseTracker: scope.NewHookResponseTracker(), } + if tt.upgradePlan != nil { + s.UpgradeTracker.MachineDeployments.UpgradePlan = tt.upgradePlan + } if tt.afterControlPlaneUpgradeHookBlocking { s.HookResponseTracker.Add(runtimehooksv1.AfterControlPlaneUpgrade, &runtimehooksv1.AfterControlPlaneUpgradeResponse{ CommonRetryResponse: runtimehooksv1.CommonRetryResponse{ @@ -2775,11 +3556,14 @@ func TestComputeMachineDeploymentVersion(t *testing.T) { s.UpgradeTracker.ControlPlane.IsStartingUpgrade = tt.controlPlaneStartingUpgrade s.UpgradeTracker.ControlPlane.IsUpgrading = tt.controlPlaneUpgrading s.UpgradeTracker.ControlPlane.IsProvisioning = tt.controlPlaneProvisioning + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = tt.controlPlanePendingUpgrade + s.UpgradeTracker.ControlPlane.IsWaitingForWorkersUpgrade = tt.controlPlaneWaitingForWorkersUpgrade s.UpgradeTracker.MachineDeployments.MarkUpgrading(tt.upgradingMachineDeployments...) e := generator{} - version := e.computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, tt.currentMachineDeploymentState) + version, err := e.computeMachineDeploymentVersion(s, tt.machineDeploymentTopology, tt.currentMachineDeploymentState) + g.Expect(err).NotTo(HaveOccurred()) g.Expect(version).To(Equal(tt.expectedVersion)) if tt.currentMachineDeploymentState != nil { @@ -2814,11 +3598,14 @@ func TestComputeMachinePoolVersion(t *testing.T) { currentMachinePoolState *scope.MachinePoolState upgradingMachinePools []string upgradeConcurrency int + controlPlanePendingUpgrade bool + controlPlaneWaitingForWorkersUpgrade bool controlPlaneStartingUpgrade bool controlPlaneUpgrading bool controlPlaneProvisioning bool afterControlPlaneUpgradeHookBlocking bool topologyVersion string + upgradePlan []string expectedVersion string expectPendingCreate bool expectPendingUpgrade bool @@ -2855,9 +3642,21 @@ func TestComputeMachinePoolVersion(t *testing.T) { currentMachinePoolState: currentMachinePoolState, upgradingMachinePools: []string{}, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, + { + // Control plane is considered pending an upgrade if topology version did not yet propagate to the control plane. + name: "should return machine MachinePool's spec.template.spec.version if control plane is pending upgrading", + currentMachinePoolState: currentMachinePoolState, + upgradingMachinePools: []string{}, + controlPlanePendingUpgrade: true, + topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, + expectedVersion: "v1.2.2", + expectPendingUpgrade: true, + }, { // Control plane is considered upgrading if the control plane's spec.version and status.version is not equal. name: "should return MachinePool's spec.template.spec.version if control plane is upgrading", @@ -2865,6 +3664,7 @@ func TestComputeMachinePoolVersion(t *testing.T) { upgradingMachinePools: []string{}, controlPlaneUpgrading: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2875,17 +3675,49 @@ func TestComputeMachinePoolVersion(t *testing.T) { upgradingMachinePools: []string{}, controlPlaneStartingUpgrade: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, + { + name: "should return MachinePool's spec.template.spec.version if the MachinePool already performed the upgrade step", + currentMachinePoolState: currentMachinePoolState, + upgradingMachinePools: []string{}, + topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.2", "v1.2.3"}, + expectedVersion: "v1.2.2", + expectPendingUpgrade: true, + }, { name: "should return cluster.spec.topology.version if the control plane is not upgrading, not scaling, not ready to upgrade and none of the MachinePools are upgrading", currentMachinePoolState: currentMachinePoolState, upgradingMachinePools: []string{}, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", expectPendingUpgrade: false, }, + { + name: "should return next version in the upgrade plan if multistep upgrade, if the control plane is not upgrading, not scaling, not ready to upgrade and none of the MachinePools are upgrading", + currentMachinePoolState: currentMachinePoolState, + upgradingMachinePools: []string{}, + topologyVersion: "v1.4.3", + upgradePlan: []string{"v1.3.3", "v1.4.3"}, + expectedVersion: "v1.3.3", + expectPendingUpgrade: false, + }, + { + // Control plane is considered pending an upgrade if topology version did not yet propagate to the control plane. + name: "should return next version in the upgrade plan if multistep upgrade, if the control plane is pending an upgrade but this requires workers to upgrade first", + currentMachinePoolState: currentMachinePoolState, + upgradingMachinePools: []string{}, + controlPlanePendingUpgrade: true, + controlPlaneWaitingForWorkersUpgrade: true, + topologyVersion: "v1.4.3", + upgradePlan: []string{"v1.3.3", "v1.4.3"}, + expectedVersion: "v1.3.3", + expectPendingUpgrade: false, + }, { name: "should return MachinePool's spec.template.spec.version if control plane is stable, other MachinePools are upgrading, concurrency limit not reached but AfterControlPlaneUpgrade hook is blocking", currentMachinePoolState: currentMachinePoolState, @@ -2893,6 +3725,7 @@ func TestComputeMachinePoolVersion(t *testing.T) { upgradeConcurrency: 2, afterControlPlaneUpgradeHookBlocking: true, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2902,6 +3735,7 @@ func TestComputeMachinePoolVersion(t *testing.T) { upgradingMachinePools: []string{"upgrading-mp1"}, upgradeConcurrency: 2, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.3", expectPendingUpgrade: false, }, @@ -2911,6 +3745,7 @@ func TestComputeMachinePoolVersion(t *testing.T) { upgradingMachinePools: []string{"upgrading-mp1", "upgrading-mp2"}, upgradeConcurrency: 2, topologyVersion: "v1.2.3", + upgradePlan: []string{"v1.2.3"}, expectedVersion: "v1.2.2", expectPendingUpgrade: true, }, @@ -2944,11 +3779,17 @@ func TestComputeMachinePoolVersion(t *testing.T) { s.UpgradeTracker.ControlPlane.IsStartingUpgrade = tt.controlPlaneStartingUpgrade s.UpgradeTracker.ControlPlane.IsUpgrading = tt.controlPlaneUpgrading s.UpgradeTracker.ControlPlane.IsProvisioning = tt.controlPlaneProvisioning + s.UpgradeTracker.ControlPlane.IsPendingUpgrade = tt.controlPlanePendingUpgrade + s.UpgradeTracker.ControlPlane.IsWaitingForWorkersUpgrade = tt.controlPlaneWaitingForWorkersUpgrade s.UpgradeTracker.MachinePools.MarkUpgrading(tt.upgradingMachinePools...) + if tt.upgradePlan != nil { + s.UpgradeTracker.MachinePools.UpgradePlan = tt.upgradePlan + } e := generator{} - version := e.computeMachinePoolVersion(s, tt.machinePoolTopology, tt.currentMachinePoolState) + version, err := e.computeMachinePoolVersion(s, tt.machinePoolTopology, tt.currentMachinePoolState) + g.Expect(err).NotTo(HaveOccurred()) g.Expect(version).To(Equal(tt.expectedVersion)) if tt.currentMachinePoolState != nil { diff --git a/exp/topology/desiredstate/upgrade_plan.go b/exp/topology/desiredstate/upgrade_plan.go new file mode 100644 index 000000000000..c44245fe7e67 --- /dev/null +++ b/exp/topology/desiredstate/upgrade_plan.go @@ -0,0 +1,406 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package desiredstate + +import ( + "context" + "fmt" + "strings" + + "github.com/blang/semver/v4" + "github.com/pkg/errors" + "k8s.io/apimachinery/pkg/util/sets" + + "sigs.k8s.io/cluster-api/exp/topology/scope" + "sigs.k8s.io/cluster-api/internal/contract" + "sigs.k8s.io/cluster-api/util/version" +) + +// ComputeUpgradePlan is responsible to computes the upgrade plan for both control plane and workers +// and to set up the upgrade tracker accordingly when there is an upgrade pending. +// +// The upgrade plan for control plane is the result of a pluggable function that should return all the +// intermediates version a control plan upgrade must go through to reach desired version. +// +// The pluggable function could return also upgrade steps for workers; if not, this func +// will determine the minimal number of workers upgrade steps, thus minimizing impact on workloads and reducing the overall upgrade time. +func ComputeUpgradePlan(ctx context.Context, s *scope.Scope, getUpgradePlan GetUpgradePlanFunc) error { + // Return early if control plane is not yet created. + if s.Current.ControlPlane == nil || s.Current.ControlPlane.Object == nil { + return nil + } + + // Get desired version, control plane versions and min worker versions + // NOTE: we consider both machine deployment and machine pools min for computing workers version + // because we are going to ask only a single workers upgrade plan. + desiredVersion := s.Blueprint.Topology.Version + desiredSemVer, err := semver.ParseTolerant(desiredVersion) + if err != nil { + return errors.Wrapf(err, "failed to parse Cluster version %s", desiredVersion) + } + + controlPlaneVersion := "" + v, err := contract.ControlPlane().Version().Get(s.Current.ControlPlane.Object) + if err != nil { + return errors.Wrap(err, "failed to get the version from control plane spec") + } + controlPlaneVersion = *v + controlPlaneSemVer, err := semver.ParseTolerant(*v) + if err != nil { + return errors.Wrapf(err, "failed to parse ControlPlane version %s", *v) + } + + var minWorkersSemVer *semver.Version + for _, md := range s.Current.MachineDeployments { + if md.Object.Spec.Template.Spec.Version != "" { + currentSemVer, err := semver.ParseTolerant(md.Object.Spec.Template.Spec.Version) + if err != nil { + return errors.Wrapf(err, "failed to parse version %s of MachineDeployment %s", md.Object.Spec.Template.Spec.Version, md.Object.Name) + } + if minWorkersSemVer == nil || isLowerThanMinVersion(currentSemVer, *minWorkersSemVer, controlPlaneSemVer) { + minWorkersSemVer = ¤tSemVer + } + } + } + + for _, mp := range s.Current.MachinePools { + if mp.Object.Spec.Template.Spec.Version != "" { + currentSemVer, err := semver.ParseTolerant(mp.Object.Spec.Template.Spec.Version) + if err != nil { + return errors.Wrapf(err, "failed to parse version %s of MachinePool %s", mp.Object.Spec.Template.Spec.Version, mp.Object.Name) + } + if minWorkersSemVer == nil || isLowerThanMinVersion(currentSemVer, *minWorkersSemVer, controlPlaneSemVer) { + minWorkersSemVer = ¤tSemVer + } + } + } + + minWorkersVersion := "" + if minWorkersSemVer != nil { + minWorkersVersion = fmt.Sprintf("v%s", minWorkersSemVer.String()) + } + + // If both control plane and workers are already at the desired version, there is no need to compute the upgrade plan. + if controlPlaneSemVer.String() == desiredSemVer.String() && (minWorkersSemVer == nil || minWorkersSemVer.String() == desiredSemVer.String()) { + return nil + } + + // At this stage we know that an upgrade is required, then call the pluggable func that returns the upgrade plan. + controlPlaneUpgradePlan, workersUpgradePlan, err := getUpgradePlan(ctx, desiredVersion, controlPlaneVersion, minWorkersVersion) + if err != nil { + return err + } + + // DefaultAndValidateUpgradePlans validates both control plane and workers upgrade plan. + // If workers upgrade plan is not specified, default it with the minimal number of workers upgrade steps. + workersUpgradePlan, err = DefaultAndValidateUpgradePlans(desiredVersion, controlPlaneVersion, minWorkersVersion, controlPlaneUpgradePlan, workersUpgradePlan) + if err != nil { + return err + } + + // Sets the control plane upgrade plan. + s.UpgradeTracker.ControlPlane.UpgradePlan = controlPlaneUpgradePlan + + // Sets the machine deployment and workers upgrade plan. + // Note. Each MachineDeployment/MachinePool then has to figure out if/when to pick up the first version in the plan, + // because the minWorkersVersion will be included until all of them are upgraded. + if len(s.Current.MachineDeployments) > 0 { + s.UpgradeTracker.MachineDeployments.UpgradePlan = workersUpgradePlan + } + if len(s.Current.MachinePools) > 0 { + s.UpgradeTracker.MachinePools.UpgradePlan = workersUpgradePlan + } + + return nil +} + +// DefaultAndValidateUpgradePlans validates both control plane and workers upgrade plan. +// If workers upgrade plan is not specified, default it with the minimal number of workers upgrade steps. +func DefaultAndValidateUpgradePlans(desiredVersion string, controlPlaneVersion string, minWorkersVersion string, controlPlaneUpgradePlan []string, workersUpgradePlan []string) ([]string, error) { + desiredSemVer, err := semver.ParseTolerant(desiredVersion) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse Cluster version %s", desiredVersion) + } + + controlPlaneSemVer, err := semver.ParseTolerant(controlPlaneVersion) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse ControlPlane version %s", controlPlaneVersion) + } + + var minWorkersSemVer *semver.Version + if minWorkersVersion != "" { + v, err := semver.ParseTolerant(minWorkersVersion) + if err != nil { + return nil, errors.Wrapf(err, "failed to parse min workers version %s", minWorkersVersion) + } + minWorkersSemVer = &v + } + + // Setup for tracking known version for each minors; this info will be used to build intermediate steps for workers when required + // Note: The control plane might be already one version ahead of workers, we always add current control plane version + // (it should be used as a target version for workers lagging behind). + minors := map[uint64]string{} + minors[controlPlaneSemVer.Minor] = controlPlaneVersion + + // Setup for tracking version order, which is required for disambiguating where there are version with different build numbers + // and thus it is not possible to determine order (and thus the code relies on the version order in the upgrade plan). + versionOrder := map[string]int{} + versionOrder[controlPlaneVersion] = -1 + + // Validate the control plane upgrade plan. + if version.Compare(controlPlaneSemVer, desiredSemVer, version.WithBuildTags()) != 0 { + currentSemVer := controlPlaneSemVer + for i, targetVersion := range controlPlaneUpgradePlan { + versionOrder[targetVersion] = i + targetSemVer, err := semver.ParseTolerant(targetVersion) + if err != nil { + return nil, errors.Wrapf(err, "invalid ControlPlane upgrade plan: item %d; failed to parse version %s", i, targetVersion) + } + + // Check versions in the control plane upgrade plan are in the right order. + // Note: we tolerate having one version followed by another with the same major.minor.patch but different build tags (version.Compare==2) + if version.Compare(targetSemVer, currentSemVer, version.WithBuildTags()) <= 0 { + return nil, errors.Errorf("invalid ControlPlane upgrade plan: item %d; version %s must be greater than v%s", i, targetVersion, currentSemVer) + } + + // Check we are not skipping minors. + if currentSemVer.Minor != targetSemVer.Minor && currentSemVer.Minor+1 != targetSemVer.Minor { + return nil, errors.Errorf("invalid ControlPlane upgrade plan: item %d; expecting a version with minor %d or %d, found version %s", i, currentSemVer.Minor, currentSemVer.Minor+1, targetVersion) + } + + minors[targetSemVer.Minor] = targetVersion + currentSemVer = targetSemVer + } + if version.Compare(currentSemVer, desiredSemVer, version.WithBuildTags()) != 0 { + return nil, errors.Errorf("invalid ControlPlane upgrade plan: control plane upgrade plan must end with version %s, ends with %s instead", desiredVersion, fmt.Sprintf("v%s", currentSemVer)) + } + } else if len(controlPlaneUpgradePlan) > 0 { + return nil, errors.New("invalid ControlPlane upgrade plan: control plane is already at the desired version") + } + + // Defaults and validate the workers upgrade plan. + if minWorkersSemVer != nil && version.Compare(*minWorkersSemVer, desiredSemVer, version.WithBuildTags()) != 0 { + if len(controlPlaneUpgradePlan) > 0 { + // Check that the workers upgrade plan only includes the same versions considered for the control plane upgrade plan, + // plus the control plane version to handle the case that CP already completed its upgrade. + if diff := sets.New(workersUpgradePlan...).Difference(sets.New(controlPlaneUpgradePlan...).Insert(controlPlaneVersion)); len(diff) > 0 { + return nil, errors.Errorf("invalid workers upgrade plan: versions %s doesn't match any versions in the control plane upgrade plan nor the control plane version", strings.Join(diff.UnsortedList(), ",")) + } + } + + // If the workers upgrade plan is empty, default it by adding: + // - upgrade steps whenever required to prevent violation of version skew rules + // - an upgrade step at the end of the upgrade sequence + if len(workersUpgradePlan) == 0 { + currentMinor := minWorkersSemVer.Minor + targetMinor := desiredSemVer.Minor + for i := range targetMinor - currentMinor { + if i > 0 && i%3 == 0 { + targetVersion, ok := minors[currentMinor+i] + if !ok { + // Note: this should never happen, all the minors in the range minWorkersSemVer.Minor-desiredSemVer.Minor should exist in the list of minors, which is + // derived from control plane upgrade plan + current control plane version (a superset of the versions in the workers upgrade plan) + return nil, errors.Wrapf(err, "invalid upgrade plan; unable to identify version for minor %d", currentMinor+i) + } + workersUpgradePlan = append(workersUpgradePlan, targetVersion) + } + } + if len(workersUpgradePlan) == 0 || workersUpgradePlan[len(workersUpgradePlan)-1] != desiredVersion { + workersUpgradePlan = append(workersUpgradePlan, desiredVersion) + } + } + + // Validate the workers upgrade plan. + currentSemVer := *minWorkersSemVer + currentMinor := currentSemVer.Minor + for i, targetVersion := range workersUpgradePlan { + targetSemVer, err := semver.ParseTolerant(targetVersion) + if err != nil { + return nil, errors.Wrapf(err, "invalid workers upgrade plan, item %d; failed to parse version %s", i, targetVersion) + } + + // Check versions in the workers upgrade plan are in the right order. + cmp := version.Compare(targetSemVer, currentSemVer, version.WithBuildTags()) + switch { + case cmp <= 0: + return nil, errors.Errorf("invalid workers upgrade plan, item %d; version %s must be greater than v%s", i, targetVersion, currentSemVer) + case cmp == 2: + // In the case of same major.minor.patch but different build tags (version.Compare==2), check if + // versions are in the same order as in the control plane upgrade plan. + targetVersionOrder, ok := versionOrder[targetVersion] + if !ok { + // Note: this should never happen, all the versions in the workers upgrade plan should exist in versionOrder, which is + // derived from control plane upgrade plan + current control plane version (a superset of the versions in the workers upgrade plan) + return nil, errors.Errorf("invalid workers upgrade plan, item %d; failer to determine version %s order", i, targetVersion) + } + currentVersionOrder, ok := versionOrder[fmt.Sprintf("v%s", currentSemVer)] + if !ok { + // Note: this should never happen, all the versions in the workers upgrade plan should exist in versionOrder, which is + // derived from control plane upgrade plan + current control plane version (a superset of the versions in the workers upgrade plan) + return nil, errors.Errorf("failer to determine version v%s order", currentSemVer) + } + if targetVersionOrder < currentVersionOrder { + return nil, errors.Errorf("invalid workers upgrade plan, item %d; version %s must be before v%s", i, targetVersion, currentSemVer) + } + } + + targetMinor := targetSemVer.Minor + if targetMinor-currentMinor > 3 { + return nil, errors.Errorf("invalid workers upgrade plan, item %d; workers cannot go from minor %d (%s) to minor %d (%s), an intermediate step is required to comply with Kubernetes version skew rules", i, currentMinor, fmt.Sprintf("v%s", currentSemVer.String()), targetMinor, targetVersion) + } + + currentSemVer = targetSemVer + currentMinor = currentSemVer.Minor + } + if version.Compare(currentSemVer, desiredSemVer, version.WithBuildTags()) != 0 { + return nil, errors.Errorf("invalid workers upgrade plan; workers upgrade plan must end with version %s, ends with %s instead", desiredVersion, fmt.Sprintf("v%s", currentSemVer)) + } + } else if len(workersUpgradePlan) > 0 { + return nil, errors.New("invalid worker upgrade plan; there are no workers or workers already at the desired version") + } + + return workersUpgradePlan, nil +} + +func isLowerThanMinVersion(v, minVersion, controlPlaneSemVer semver.Version) bool { + switch cmp := version.Compare(v, minVersion, version.WithBuildTags()); cmp { + case -1: + // v is lower than minVersion + return true + case 2: + // v is different from minVersion, but it is not possible to determine order; + // use control plane version to resolve: MD/MP version can either be equal to control plane version or an older version, + // so v is considered lower than minVersion when different from control plane version. + return v.String() != controlPlaneSemVer.String() + default: + return false + } +} + +// GetUpgradePlanFunc defines the signature for a func that returns the upgrade plan for control plane and workers. +// +// The upgrade plan for control plane must be a list of intermediate version the control plane must go through +// to reach the desired version. The following rules apply: +// - there should be at least one version for every minor between currentControlPlaneVersion (excluded) and desiredVersion (included). +// - each version must be: +// - greater than currentControlPlaneVersion (or with a different build number) +// - greater than the previous version in the list (or with a different build number) +// - less or equal to desiredVersion (or with a different build number) +// - the last version in the plan must be equal to the desired version +// +// The upgrade plan for workers instead in most cases could be left to empty, because the system will automatically +// determine the minimal number of workers upgrade steps, thus minimizing impact on workloads and reducing +// the overall upgrade time. +// +// If instead for any reason the GetUpgradePlanFunc returns a custom upgrade path for workers, the following rules apply: +// - each version must be: +// - equal to currentControlPlaneVersion or to one of the versions in the control plane upgrade plan. +// - greater than current min worker - MachineDeployment & MachinePool - version (or with a different build number) +// - greater than the previous version in the list (or with a different build number) +// - less or equal to the desiredVersion (or with a different build number) +// - in case of versions with the same major/minor/patch version but different build number, also the order +// of those versions must be the same for control plane and worker upgrade plan. +// - the last version in the plan must be equal to the desired version +// - the upgrade plane must have all the intermediate version which workers must go through to avoid breaking rules +// defining the max version skew between control plane and workers. +type GetUpgradePlanFunc func(_ context.Context, desiredVersion, currentControlPlaneVersion, currentMinWorkersVersion string) ([]string, []string, error) + +// GetUpgradePlanOneMinor returns an upgrade plan to reach the next minor. +// The workers upgrade plan will be left empty, thus deferring to ComputeUpgradePlan to compute it. +// NOTE: This is the func the system is going to use when there are no Kubernetes versions or UpgradePlan hook +// defined in the ClusterClass. In this scenario, only upgrade by one minor is supported (same as before implementing chained upgrades). +func GetUpgradePlanOneMinor(_ context.Context, desiredVersion, currentControlPlaneVersion, _ string) ([]string, []string, error) { + desiredSemVer, err := semver.ParseTolerant(desiredVersion) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to parse desired version") + } + + currentControlPlaneSemVer, err := semver.ParseTolerant(currentControlPlaneVersion) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to parse current ControlPlane version") + } + + if currentControlPlaneSemVer.String() == desiredSemVer.String() { + return nil, nil, nil + } + + if desiredSemVer.Minor > currentControlPlaneSemVer.Minor+1 { + return nil, nil, errors.Errorf("cannot compute an upgrade plan from %s to %s", currentControlPlaneVersion, desiredVersion) + } + + return []string{desiredVersion}, nil, nil +} + +// GetUpgradePlanFromClusterClassVersions returns an upgrade plan based on versions defined on a ClusterClass. +// The control plane plan will use the latest version for each minor in between currentControlPlaneVersion and desiredVersion; +// workers upgrade plan will be left empty, thus deferring to ComputeUpgradePlan to compute the most efficient plan. +// NOTE: This is the func the system is going to use when there are Kubernetes versions defined in the ClusterClass. +func GetUpgradePlanFromClusterClassVersions(clusterClassVersions []string) func(_ context.Context, desiredVersion, currentControlPlaneVersion, _ string) ([]string, []string, error) { + return func(_ context.Context, desiredVersion, currentControlPlaneVersion, _ string) ([]string, []string, error) { + desiredSemVer, err := semver.ParseTolerant(desiredVersion) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to parse desired version") + } + + currentControlPlaneSemVer, err := semver.ParseTolerant(currentControlPlaneVersion) + if err != nil { + return nil, nil, errors.Wrap(err, "failed to parse current ControlPlane version") + } + + if currentControlPlaneSemVer.String() == desiredSemVer.String() { + return nil, nil, nil + } + + // Pick all the known kubernetes versions starting from control plane version (excluded) to desired version. + upgradePlan := []string{} + start := false + end := false + for _, v := range clusterClassVersions { + semV, err := semver.ParseTolerant(v) + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to parse version %s", v) + } + if (start && !end) || (!start && semV.Minor > currentControlPlaneSemVer.Minor) { + upgradePlan = append(upgradePlan, v) + } + if semV.String() == currentControlPlaneSemVer.String() || version.Compare(currentControlPlaneSemVer, semV, version.WithBuildTags()) < 0 { + start = true + } + if semV.String() == desiredSemVer.String() || version.Compare(desiredSemVer, semV, version.WithBuildTags()) < 0 { + end = true + } + } + + // In case there is more than one version for one minor, drop all the versions for one minor except the last. + simplifiedUpgradePlan := []string{} + currentMinor := currentControlPlaneSemVer.Minor + for _, v := range upgradePlan { + semV, err := semver.ParseTolerant(v) + if err != nil { + return nil, nil, errors.Wrapf(err, "failed to parse version %s", v) + } + if semV.Minor > currentMinor { + simplifiedUpgradePlan = append(simplifiedUpgradePlan, v) + } + if semV.Minor == currentMinor && len(simplifiedUpgradePlan) > 0 { + simplifiedUpgradePlan[len(simplifiedUpgradePlan)-1] = v + } + currentMinor = semV.Minor + } + return simplifiedUpgradePlan, nil, nil + } +} diff --git a/exp/topology/desiredstate/upgrade_plan_test.go b/exp/topology/desiredstate/upgrade_plan_test.go new file mode 100644 index 000000000000..40d2aeef49a7 --- /dev/null +++ b/exp/topology/desiredstate/upgrade_plan_test.go @@ -0,0 +1,1133 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package desiredstate + +import ( + "context" + "testing" + + "github.com/blang/semver/v4" + . "github.com/onsi/gomega" + + clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" + "sigs.k8s.io/cluster-api/exp/topology/scope" + "sigs.k8s.io/cluster-api/util/test/builder" +) + +func TestIsLowerThanMinVersion(t *testing.T) { + tests := []struct { + name string + v semver.Version + minVersion semver.Version + controlPlaneVersion semver.Version + want bool + }{ + { + name: "Equal", + controlPlaneVersion: semver.MustParse("1.30.1"), // control plane at 1.30.1 + minVersion: semver.MustParse("1.30.1"), // first machine deployment at 1.30.1 + v: semver.MustParse("1.30.1"), // second machine deployment at 1.30.1 + want: false, + }, + { + name: "Lower", + controlPlaneVersion: semver.MustParse("1.30.1"), // control plane at 1.30.1 + minVersion: semver.MustParse("1.30.1"), // first machine deployment at 1.30.1 + v: semver.MustParse("1.29.0"), // second machine deployment still at 1.29.0 + want: true, + }, + { + name: "Equal pre-release", + controlPlaneVersion: semver.MustParse("1.30.1-alpha.1"), // control plane at 1.30.1-alpha.1 + minVersion: semver.MustParse("1.30.1-alpha.1"), // first machine deployment at 1.30.1-alpha.1 + v: semver.MustParse("1.30.1-alpha.1"), // second machine deployment at 1.30.1-alpha.1 + want: false, + }, + { + name: "Lower pre-release", + controlPlaneVersion: semver.MustParse("1.30.1-alpha.1"), // control plane at 1.30.1-alpha.1 + minVersion: semver.MustParse("1.30.1-alpha.1"), // first machine deployment at 1.30.1-alpha.1 + v: semver.MustParse("1.30.1-alpha.0"), // second machine deployment still at 1.30.1-alpha.0 + want: true, + }, + { + name: "Equal pre-release", + controlPlaneVersion: semver.MustParse("1.31.1+foo.2-bar.1"), // control plane at 1.31.1+foo.2-bar.1 + minVersion: semver.MustParse("1.31.1+foo.2-bar.1"), // first machine deployment at 1.31.1+foo.2-bar.1 + v: semver.MustParse("1.31.1+foo.2-bar.1"), // second machine deployment at 1.31.1+foo.2-bar.1 + want: false, + }, + { + name: "Lower pre-release", + controlPlaneVersion: semver.MustParse("1.31.1+foo.2-bar.1"), // control plane at 1.31.1+foo.2-bar.1 + minVersion: semver.MustParse("1.31.1+foo.2-bar.1"), // first machine deployment at 1.31.1+foo.2-bar.1 + v: semver.MustParse("1.31.1+foo.1-bar.1"), // second machine deployment still at 1.31.1+foo.1-bar.1 + want: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + got := isLowerThanMinVersion(tt.v, tt.minVersion, tt.controlPlaneVersion) + g.Expect(got).To(Equal(tt.want)) + }) + } +} + +func TestComputeUpgradePlan(t *testing.T) { + tests := []struct { + name string + topologyVersion string + controlPlaneVersion string + machineDeploymentVersion string + machinePoolVersion string + F GetUpgradePlanFunc + wantControlPlaneUpgradePlan []string + wantMachineDeploymentUpgradePlan []string + wantMachinePoolUpgradePlan []string + wantErr bool + wantErrMessage string + }{ + // return early + { + name: "No op if control plane dose not exists", + topologyVersion: "v1.33.1", + }, + { + name: "No op if everything is up to date", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.33.1", + }, + + // validation errors + { + name: "Fails for invalid control plane version", + topologyVersion: "v1.33.1", + controlPlaneVersion: "foo", + wantErr: true, + wantErrMessage: "failed to parse ControlPlane version foo: Invalid character(s) found in major number \"0foo\"", + }, + { + name: "Fails if control plane upgrade plan has invalid versions", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"foo"}, nil, nil + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: item 0; failed to parse version foo: Invalid character(s) found in major number \"0foo\"", + }, + { + name: "Fails if control plane upgrade plan starts with the wrong minor (too old)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.30.0"}, nil, nil // v1.31 expected + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: item 0; version v1.30.0 must be greater than v1.30.0", + }, + { + name: "Fails if control plane upgrade plan starts with the wrong minor (too new)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.1"}, nil, nil // v1.31 expected + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: item 0; expecting a version with minor 30 or 31, found version v1.32.1", + }, + { + name: "Fails if control plane upgrade plan has a downgrade", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.30.0"}, nil, nil // v1.31 -> v1.30 is a downgrade! + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: item 1; version v1.30.0 must be greater than v1.31.1", + }, + { + name: "Fails if control plane upgrade plan doesn't end with the target version (stops before the target version)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0"}, nil, nil // v1.33 missing + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: control plane upgrade plan must end with version v1.33.1, ends with v1.32.0 instead", + }, + { + name: "Fails if control plane upgrade plan doesn't end with the target version (goes past the target version)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1", "v1.34.1"}, nil, nil // v1.34 is after the target version + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: control plane upgrade plan must end with version v1.33.1, ends with v1.34.1 instead", + }, + { + name: "Fails if control plane upgrade plan is returned but control plane is already up to date", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.33.1"}, nil, nil // control plane is already up to date + }, + wantErr: true, + wantErrMessage: "invalid ControlPlane upgrade plan: control plane is already at the desired version", + }, + { + name: "Fails if workers plan has versions not included in control plane upgrade plan", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1"}, []string{"v1.32.2"}, nil // v1.32.2 is not a version in the control plane upgrade plan + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan: versions v1.32.2 doesn't match any versions in the control plane upgrade plan nor the control plane version", + }, + { + name: "Fails if workers plan has versions in the wrong order", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1"}, []string{"v1.33.1", "v1.32.0"}, nil // v1.33 -> v1.32 is a downgrade! + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan, item 1; version v1.32.0 must be greater than v1.33.1", + }, + { + name: "Fails if workers plan has invalid versions", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"foo"}, nil + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan, item 0; failed to parse version foo: Invalid character(s) found in major number \"0foo\"", + }, + { + name: "Fails if workers plan starts with the wrong minor (too old)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.30.0"}, nil // v1.30.0 is the current min worker version + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan, item 0; version v1.30.0 must be greater than v1.30.0", + }, + { + name: "Fails if workers plan doesn't end with the target version (stops before the target version)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.32.1"}, nil // v1.32.1 is before the target version + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan; workers upgrade plan must end with version v1.33.1, ends with v1.32.1 instead", + }, + { + name: "Fails if workers plan doesn't end with the target version (goes past the target version)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.31.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.34.1"}, nil // v1.34.1 is past the target version + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan; workers upgrade plan must end with version v1.33.1, ends with v1.34.1 instead", + }, + { + name: "Fails if workers plan doesn't comply with Kubernetes version skew versions", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.34.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.34.1"}, nil // workers cannot go from minor 30 to minor 34, an intermediate step is required + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan, item 0; workers cannot go from minor 30 (v1.30.0) to minor 34 (v1.34.1), an intermediate step is required to comply with Kubernetes version skew rules", + }, + { + name: "Fails if there are no workers and a worker upgrade plan is provided", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1"}, []string{"v1.35.1"}, nil // there should not be worker update plan + }, + wantErr: true, + wantErrMessage: "invalid worker upgrade plan; there are no workers or workers already at the desired version", + }, + + // upgrade sequence 1: CP only + { + name: "Return control plane upgrade plan, empty machine deployment and machine pool upgrade plan", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0", "v1.33.1"}, + }, + { + name: "Return control plane upgrade plan, empty machine deployment and machine pool upgrade plan (after CP upgrade to 1.31)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1"}, + }, + { + name: "Return control plane upgrade plan, empty machine deployment and machine pool upgrade plan (after CP upgrade to 1.32)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.32.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.33.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.33.1"}, + }, + { + name: "Return control plane upgrade plan, empty machine deployment and machine pool upgrade plan (after CP upgrade to 1.33)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + }, + + // upgrade sequence 2: CP, MD (no MP); defer to the system computing the workers upgrade plan + { + name: "Return control plane and machine deployment upgrade plan with last version", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0"}, + wantMachineDeploymentUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version (after CP upgrade to 1.31)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0"}, + wantMachineDeploymentUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version (after CP upgrade to 1.32)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.32.0", + machineDeploymentVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version (after CP and MD upgrade to 1.32)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.32.0", + machineDeploymentVersion: "v1.32.0", + }, + + // upgrade sequence 3: CP, MP (no MD); defer to the system computing the workers upgrade plan + { + name: "Return control plane and machine pool upgrade plan with last version", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0"}, + wantMachinePoolUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine pool upgrade plan with last version (after CP upgrade to 1.31)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.31.1", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0"}, + wantMachinePoolUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine pool upgrade plan with last version (after CP upgrade to 1.32)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.32.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachinePoolUpgradePlan: []string{"v1.32.0"}, + }, + { + name: "Return control plane and machine pool upgrade plan with last version (after CP and MP upgrade to 1.32)", + topologyVersion: "v1.32.0", + controlPlaneVersion: "v1.32.0", + machinePoolVersion: "v1.32.0", + }, + + // upgrade sequence 3: CP, MD, MP; defer to the system computing the workers upgrade plan, an additional worker upgrade step required to respect version skew + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1", "v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0", "v1.33.1", "v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP upgrade to 1.31)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1", "v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1", "v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP upgrade to 1.32)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.32.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.33.1", "v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP upgrade to 1.33)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP and MD upgrade to 1.33)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1", "v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP, MD and MP upgrade to 1.33)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.33.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP upgrade to 1.34)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.34.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.33.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP and MD upgrade to 1.34)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.34.1", + machineDeploymentVersion: "v1.34.1", + machinePoolVersion: "v1.33.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.34.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions (after CP, MD and MP upgrade to 1.34)", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.34.1", + machineDeploymentVersion: "v1.34.1", + machinePoolVersion: "v1.34.1", + }, + + // upgrade sequence 4: CP, MD, MP; workers upgrade plan provided, force worker upgrades to an intermediate K8s version (even if not necessary) + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1"}, []string{"v1.31.1", "v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0", "v1.33.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP upgrade to v1.31)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1"}, []string{"v1.31.1", "v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP and MD upgrade to v1.31)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1"}, []string{"v1.31.1", "v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP, MD and MP upgrade to v1.31)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1"}, []string{"v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP upgrade to v1.32)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.32.0", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.33.1"}, []string{"v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.33.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP upgrade to v1.33)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP and MD upgrade to v1.33)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.33.1"}, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.33.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan custom worker version + last version (after CP, MD and MP upgrade to v1.33)", + topologyVersion: "v1.33.1", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.33.1", + machinePoolVersion: "v1.33.1", + }, + + // upgrade sequence 5: CP, MD, MP; workers upgrade plan provided, force worker upgrades to an intermediate K8s version (even if not necessary) + additional worker upgrade to respect version skew + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, []string{"v1.31.1", "v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP upgrade to v1.31)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, []string{"v1.31.1", "v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP and MD upgrade to v1.31)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, []string{"v1.31.1", "v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.31.1", "v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP, MD and MP upgrade to v1.31)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.31.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, []string{"v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.1", "v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP upgrade to v1.32)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.32.0", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.33.1", "v1.34.2", "v1.35.2"}, []string{"v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.33.1", "v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP upgrade to v1.33)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.33.1", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.34.2", "v1.35.2"}, []string{"v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP upgrade to v1.34)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.34.2", + machineDeploymentVersion: "v1.31.1", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.35.2"}, []string{"v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP and MD upgrade to v1.34)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.34.2", + machineDeploymentVersion: "v1.34.2", + machinePoolVersion: "v1.31.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.35.2"}, []string{"v1.34.2", "v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.34.2", "v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP, MD and MP upgrade to v1.34)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.34.2", + machineDeploymentVersion: "v1.34.2", + machinePoolVersion: "v1.34.2", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.35.2"}, []string{"v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.35.2"}, + wantMachineDeploymentUpgradePlan: []string{"v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP upgrade to v1.35)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.35.2", + machineDeploymentVersion: "v1.34.2", + machinePoolVersion: "v1.34.2", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP and MD upgrade to v1.35)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.35.2", + machineDeploymentVersion: "v1.35.2", + machinePoolVersion: "v1.34.2", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, []string{"v1.35.2"}, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.35.2"}, + wantMachinePoolUpgradePlan: []string{"v1.35.2"}, + }, + { + name: "Return control plane and machine deployment upgrade plan with custom worker version + version skew versions + last version (after CP, MD and MP upgrade to v1.35)", + topologyVersion: "v1.35.2", + controlPlaneVersion: "v1.35.2", + machineDeploymentVersion: "v1.35.2", + machinePoolVersion: "v1.35.2", + }, + + // allows upgrades plans with many patch version for the same minor + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.31.2", "v1.32.0", "v1.33.1", "v1.33.2", "v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.31.2", "v1.32.0", "v1.33.1", "v1.33.2", "v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.2", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.2", "v1.34.1"}, + }, + + // allows upgrades with pre-release versions + { + name: "Return control plane and machine deployment upgrade plan with last version + version skew versions", + topologyVersion: "v1.34.1", + controlPlaneVersion: "v1.30.0", + machineDeploymentVersion: "v1.30.0", + machinePoolVersion: "v1.30.0", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1", "v1.32.0", "v1.33.2", "v1.34.1-beta.0", "v1.34.1-rc.0", "v1.34.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1", "v1.32.0", "v1.33.2", "v1.34.1-beta.0", "v1.34.1-rc.0", "v1.34.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.33.2", "v1.34.1"}, + wantMachinePoolUpgradePlan: []string{"v1.33.2", "v1.34.1"}, + }, + + // upgrade sequence 6: when using build tags + { + name: "Return control plane and machine deployment upgrade plan when using build tags", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.30.0+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.32.1+foo.1-bar.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan when using build tags (after CP upgrade to v1.31.1+foo.1-bar.1)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.31.1+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.32.1+foo.1-bar.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan when using build tags (after CP upgrade to v1.31.1+foo.2-bar.1)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.31.1+foo.2-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.32.1+foo.1-bar.1"}, nil, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.32.1+foo.1-bar.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan when using build tags (after CP upgrade to v1.32.1+foo.1-bar.1)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.32.1+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: []string{"v1.32.1+foo.1-bar.1"}, + }, + { + name: "Return control plane and machine deployment upgrade plan when using build tags (after CP and MD upgrade to v1.32.1+foo.1-bar.1)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.32.1+foo.1-bar.1", + machineDeploymentVersion: "v1.32.1+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return nil, nil, nil + }, + wantControlPlaneUpgradePlan: nil, + wantMachineDeploymentUpgradePlan: nil, + }, + + // fails if control plane plan and workers upgrade plan do not agree on ordering of version with the same major.minor.patch but different build tags + { + name: "Fails if control plane plan and workers upgrade plan do not agree on ordering", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.30.0+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, []string{"v1.31.1+foo.2-bar.1", "v1.31.1+foo.1-bar.1", "v1.32.1+foo.1-bar.1"}, nil + }, + wantErr: true, + wantErrMessage: "invalid workers upgrade plan, item 1; version v1.31.1+foo.1-bar.1 must be before v1.31.1+foo.2-bar.1", + }, + { + name: "Pass if control plane plan and workers upgrade plan do agree on ordering (chained upgrades)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.30.0+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + }, + { + name: "Pass if control plane plan and workers upgrade plan do agree on ordering (chained upgrades)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.31.1+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + }, + { + name: "Pass if control plane plan and workers upgrade plan do agree on ordering (chained upgrade when skipping versions)", + topologyVersion: "v1.32.1+foo.1-bar.1", + controlPlaneVersion: "v1.30.0+foo.1-bar.1", + machineDeploymentVersion: "v1.30.0+foo.1-bar.1", + F: func(_ context.Context, _, _, _ string) ([]string, []string, error) { + return []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, []string{"v1.31.1+foo.1-bar.1", "v1.32.1+foo.1-bar.1"}, nil + }, + wantControlPlaneUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.31.1+foo.2-bar.1", "v1.32.1+foo.1-bar.1"}, + wantMachineDeploymentUpgradePlan: []string{"v1.31.1+foo.1-bar.1", "v1.32.1+foo.1-bar.1"}, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + s := &scope.Scope{ + Blueprint: &scope.ClusterBlueprint{Topology: clusterv1.Topology{ + Version: tt.topologyVersion, + }}, + Current: &scope.ClusterState{}, + UpgradeTracker: scope.NewUpgradeTracker(), + } + s.Current.ControlPlane = &scope.ControlPlaneState{} + if tt.controlPlaneVersion != "" { + s.Current.ControlPlane.Object = builder.ControlPlane("test1", "cp1"). + WithSpecFields(map[string]interface{}{ + "spec.version": tt.controlPlaneVersion, + }).Build() + } + if tt.machineDeploymentVersion != "" { + s.Current.MachineDeployments = map[string]*scope.MachineDeploymentState{ + "md1": { + Object: builder.MachineDeployment("test1", "md1").WithVersion(tt.machineDeploymentVersion).Build(), + }, + } + } + if tt.machinePoolVersion != "" { + s.Current.MachinePools = map[string]*scope.MachinePoolState{ + "mp1": { + Object: builder.MachinePool("test1", "mp1").WithVersion(tt.machinePoolVersion).Build(), + }, + } + } + + err := ComputeUpgradePlan(ctx, s, tt.F) + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + g.Expect(err.Error()).To(Equal(tt.wantErrMessage)) + } else { + g.Expect(err).ToNot(HaveOccurred()) + + if tt.F != nil { + computedControlPlaneUpgradePlan, _, err := tt.F(nil, "", "", "") + g.Expect(err).ToNot(HaveOccurred()) + // Ensure the computed control plane upgrade plan is not modified later in ComputeUpgradePlan. + g.Expect(computedControlPlaneUpgradePlan).To(Equal(tt.wantControlPlaneUpgradePlan)) + } + } + g.Expect(s.UpgradeTracker.ControlPlane.UpgradePlan).To(Equal(tt.wantControlPlaneUpgradePlan)) + g.Expect(s.UpgradeTracker.MachineDeployments.UpgradePlan).To(Equal(tt.wantMachineDeploymentUpgradePlan)) + g.Expect(s.UpgradeTracker.MachinePools.UpgradePlan).To(Equal(tt.wantMachinePoolUpgradePlan)) + }) + } +} + +func TestGetUpgradePlanOneMinor(t *testing.T) { + tests := []struct { + name string + desiredVersion string + currentControlPlaneVersion string + wantControlPlaneUpgradePlan []string + wantWorkersUpgradePlan []string + wantErr bool + }{ + { + name: "return empty plans if everything is up to date", + desiredVersion: "v1.31.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: nil, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "return control plane upgrade plan", + desiredVersion: "v1.32.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.0"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "return control plane upgrade plan with pre-release version", + desiredVersion: "v1.32.0-alpha.2", + currentControlPlaneVersion: "v1.32.0-alpha.1", + wantControlPlaneUpgradePlan: []string{"v1.32.0-alpha.2"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "return control plane upgrade plan with build tags", + desiredVersion: "v1.32.0+foo.2-bar.1", + currentControlPlaneVersion: "v1.32.0+foo.1-bar.1", + wantControlPlaneUpgradePlan: []string{"v1.32.0+foo.2-bar.1"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "fails when upgrading by more than one version", + desiredVersion: "v1.33.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: nil, + wantWorkersUpgradePlan: nil, + wantErr: true, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + controlPlaneUpgradePlan, workersUpgradePlan, err := GetUpgradePlanOneMinor(ctx, tt.desiredVersion, tt.currentControlPlaneVersion, "") + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + g.Expect(controlPlaneUpgradePlan).To(Equal(tt.wantControlPlaneUpgradePlan)) + g.Expect(workersUpgradePlan).To(Equal(tt.wantWorkersUpgradePlan)) + }) + } +} + +func TestGetUpgradePlanFromClusterClassVersions(t *testing.T) { + tests := []struct { + name string + desiredVersion string + clusterClassVersions []string + currentControlPlaneVersion string + wantControlPlaneUpgradePlan []string + wantWorkersUpgradePlan []string + wantErr bool + }{ + { + name: "return empty plans if everything is up to date", + clusterClassVersions: []string{"v1.31.0"}, + desiredVersion: "v1.31.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: nil, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "return control plane upgrade plan for one minor", + clusterClassVersions: []string{"v1.31.0", "v1.32.0", "v1.32.1"}, + desiredVersion: "v1.32.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.0"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "return control plane upgrade plan for more than one minor", + clusterClassVersions: []string{"v1.31.0", "v1.32.0", "v1.33.0"}, + desiredVersion: "v1.33.0", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.0"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "pick latest for every minor", + clusterClassVersions: []string{"v1.31.0", "v1.31.1", "v1.32.0", "v1.32.2", "v1.32.3", "v1.33.0", "v1.33.1"}, + desiredVersion: "v1.33.1", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.3", "v1.33.1"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "pick latest for every minor with pro-release", + clusterClassVersions: []string{"v1.31.0", "v1.31.1", "v1.32.0-alpha.0", "v1.32.0-alpha.1", "v1.32.0-alpha.2", "v1.33.0", "v1.33.1"}, + desiredVersion: "v1.33.1", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.0-alpha.2", "v1.33.1"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "pick latest for every minor with build tags", + clusterClassVersions: []string{"v1.31.0", "v1.31.1", "v1.32.0+foo.1-bar.1", "v1.32.0+foo.2-bar.1", "v1.32.0+foo.2-bar.2", "v1.33.0", "v1.33.1"}, + desiredVersion: "v1.33.1", + currentControlPlaneVersion: "v1.31.0", + wantControlPlaneUpgradePlan: []string{"v1.32.0+foo.2-bar.2", "v1.33.1"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + + // Kubernetes versions in CC is set when clusters already exists + + { + name: "best effort control plane upgrade plan if current version is not on the list of Kubernetes versions (happy path, case 1)", + clusterClassVersions: []string{"v1.31.1", "v1.32.0", "v1.33.0"}, + desiredVersion: "v1.33.0", + currentControlPlaneVersion: "v1.31.0", // v1.31.0 is not in the kubernetes versions, but it is older than the first version on the same minor + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.0"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "best effort control plane upgrade plan if current version is not on the list of Kubernetes versions (happy path, case 2)", + clusterClassVersions: []string{"v1.31.0", "v1.32.0", "v1.33.0"}, + desiredVersion: "v1.33.0", + currentControlPlaneVersion: "v1.31.2", // v1.31.0 is not in the kubernetes versions, but it is newer than the first version on the same minor + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.0"}, + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + { + name: "best effort control plane upgrade plan if current version is not on the list of Kubernetes versions (not happy path)", + clusterClassVersions: []string{"v1.32.0", "v1.33.0"}, + desiredVersion: "v1.33.0", + currentControlPlaneVersion: "v1.30.2", + wantControlPlaneUpgradePlan: []string{"v1.32.0", "v1.33.0"}, // No version for 1.31, ComputeUpgradePlan will fail + wantWorkersUpgradePlan: nil, + wantErr: false, + }, + + // TODO: Kubernetes versions in CC is set after setting target version (when an upgrade is in progress) + + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + f := GetUpgradePlanFromClusterClassVersions(tt.clusterClassVersions) + controlPlaneUpgradePlan, workersUpgradePlan, err := f(nil, tt.desiredVersion, tt.currentControlPlaneVersion, "") + if tt.wantErr { + g.Expect(err).To(HaveOccurred()) + } else { + g.Expect(err).ToNot(HaveOccurred()) + } + g.Expect(controlPlaneUpgradePlan).To(Equal(tt.wantControlPlaneUpgradePlan)) + g.Expect(workersUpgradePlan).To(Equal(tt.wantWorkersUpgradePlan)) + }) + } +} diff --git a/exp/topology/scope/upgradetracker.go b/exp/topology/scope/upgradetracker.go index a13cbbf6ef68..8a1a47a306d1 100644 --- a/exp/topology/scope/upgradetracker.go +++ b/exp/topology/scope/upgradetracker.go @@ -36,6 +36,23 @@ type ControlPlaneUpgradeTracker struct { // - Upgrade is blocked because any of the current MachineDeployments or MachinePools are upgrading. IsPendingUpgrade bool + // IsWaitingForWorkersUpgrade documents when a Control Plane is pending a version upgrade but + // it cannot pick up the new version until workers upgrades. + // Note: this happens when performing a multistep upgrade, and the current upgrade step requires + // also workers to upgrade, e.g. for preventing violation of the rule that defines the max + // version skew between control plane and workers. + IsWaitingForWorkersUpgrade bool + + // UpgradePlan tracks the list of version upgrades required to reach the desired version. + // The following rules apply: + // - there should be at least one version for every minor between currentControlPlaneVersion (excluded) and desiredVersion (included). + // - each version must be: + // - greater than currentControlPlaneVersion (or with a different build number) + // - greater than the previous version in the list (or with a different build number) + // - less or equal to desiredVersion (or with a different build number) + // - the last version in the plan must be equal to the desired version + UpgradePlan []string + // IsProvisioning is true if the current Control Plane is being provisioned for the first time. False otherwise. IsProvisioning bool @@ -70,6 +87,20 @@ type WorkerUpgradeTracker struct { // By marking a MachineDeployment/MachinePool as pendingUpgrade we skip reconciling the MachineDeployment/MachinePool. pendingUpgradeNames sets.Set[string] + // UpgradePlan tracks the list of version upgrades required to reach the desired version. + // the following rules apply: + // - each version must be: + // - equal to currentControlPlaneVersion or to one of the versions in the control plane upgrade plan. + // - greater than current min worker - MachineDeployment & MachinePool - version (or with a different build number) + // - greater than the previous version in the list (or with a different build number) + // - less or equal to the desiredVersion (or with a different build number) + // - in case of versions with the same major/minor/patch version but different build number, also the order + // of those versions must be the same for control plane and worker upgrade plan. + // - the last version in the plan must be equal to the desired version + // - the upgrade plane must have all the intermediate version which workers must go through to avoid breaking rules + // defining the max version skew between control plane and workers. + UpgradePlan []string + // deferredNames is the set of MachineDeployment/MachinePool names that are not going to pick up the new version // in the current reconcile loop because they are deferred by the user. // Note: If a MachineDeployment/MachinePool is marked as deferred it should also be marked as pendingUpgrade. @@ -179,6 +210,33 @@ func (t *ControlPlaneUpgradeTracker) IsControlPlaneStable() bool { return true } +// IsControlPlaneStableOrWaitingForWorkersUpgrade returns true is the ControlPlane is stable or waiting for worker upgrade. +func (t *ControlPlaneUpgradeTracker) IsControlPlaneStableOrWaitingForWorkersUpgrade() bool { + // If the current control plane is provisioning it is not considered stable. + if t.IsProvisioning { + return false + } + + // If the current control plane is upgrading it is not considered stable. + if t.IsUpgrading { + return false + } + + // Check if we are about to upgrade the control plane. Since the control plane is about to start its upgrade process + // it cannot be considered stable. + if t.IsStartingUpgrade { + return false + } + + // If the ControlPlane is pending picking up an upgrade then it is not yet at the desired state and + // cannot be considered stable unless the control plane is waiting for worker upgrade. + if t.IsPendingUpgrade && !t.IsWaitingForWorkersUpgrade { + return false + } + + return true +} + // MarkUpgrading marks a MachineDeployment/MachinePool as currently upgrading or about to upgrade. func (m *WorkerUpgradeTracker) MarkUpgrading(names ...string) { for _, name := range names { diff --git a/internal/api/core/v1alpha4/conversion.go b/internal/api/core/v1alpha4/conversion.go index 6e9ee3e88058..0f49ec959472 100644 --- a/internal/api/core/v1alpha4/conversion.go +++ b/internal/api/core/v1alpha4/conversion.go @@ -248,6 +248,7 @@ func (src *ClusterClass) ConvertTo(dstRaw conversion.Hub) error { dst.Spec.ControlPlane.Deletion.NodeVolumeDetachTimeoutSeconds = restored.Spec.ControlPlane.Deletion.NodeVolumeDetachTimeoutSeconds dst.Spec.ControlPlane.Deletion.NodeDeletionTimeoutSeconds = restored.Spec.ControlPlane.Deletion.NodeDeletionTimeoutSeconds dst.Spec.Workers.MachinePools = restored.Spec.Workers.MachinePools + dst.Spec.KubernetesVersions = restored.Spec.KubernetesVersions for i := range restored.Spec.Workers.MachineDeployments { dst.Spec.Workers.MachineDeployments[i].HealthCheck = restored.Spec.Workers.MachineDeployments[i].HealthCheck diff --git a/internal/api/core/v1alpha4/zz_generated.conversion.go b/internal/api/core/v1alpha4/zz_generated.conversion.go index 308061fec55c..85bbb1cda8d5 100644 --- a/internal/api/core/v1alpha4/zz_generated.conversion.go +++ b/internal/api/core/v1alpha4/zz_generated.conversion.go @@ -683,6 +683,7 @@ func autoConvert_v1beta2_ClusterClassSpec_To_v1alpha4_ClusterClassSpec(in *v1bet } // WARNING: in.Variables requires manual conversion: does not exist in peer-type // WARNING: in.Patches requires manual conversion: does not exist in peer-type + // WARNING: in.KubernetesVersions requires manual conversion: does not exist in peer-type return nil } diff --git a/internal/controllers/machineset/machineset_preflight.go b/internal/controllers/machineset/machineset_preflight.go index f983f3279939..c21cdcbea5d3 100644 --- a/internal/controllers/machineset/machineset_preflight.go +++ b/internal/controllers/machineset/machineset_preflight.go @@ -150,8 +150,19 @@ func (r *Reconciler) controlPlaneStablePreflightCheck(controlPlane *unstructured cpKlogRef := klog.KRef(controlPlane.GetNamespace(), controlPlane.GetName()) if feature.Gates.Enabled(feature.ClusterTopology) { - if cluster.Spec.Topology.IsDefined() && cluster.Spec.Topology.Version != controlPlaneVersion { - return ptr.To(fmt.Sprintf("%s %s has a pending version upgrade to %s (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, cluster.Spec.Topology.Version, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil + // Block when we expect an upgrade to be propagated to the control plane for topology clusters. + // NOTE: in case the cluster is performing an upgrade, allow creation of machines for the current step. + hasSameVersionOfCurrentUpgradeStep := false + if version, ok := cluster.GetAnnotations()[clusterv1.ClusterTopologyUpgradeStepAnnotation]; ok { + hasSameVersionOfCurrentUpgradeStep = version == controlPlaneVersion + } + + if cluster.Spec.Topology.IsDefined() && cluster.Spec.Topology.Version != controlPlaneVersion && !hasSameVersionOfCurrentUpgradeStep { + v := cluster.Spec.Topology.Version + if version, ok := cluster.GetAnnotations()[clusterv1.ClusterTopologyUpgradeStepAnnotation]; ok { + v = version + } + return ptr.To(fmt.Sprintf("%s %s has a pending version upgrade to %s (%q preflight check failed)", controlPlane.GetKind(), cpKlogRef, v, clusterv1.MachineSetPreflightCheckControlPlaneIsStable)), nil } } diff --git a/internal/controllers/machineset/machineset_preflight_test.go b/internal/controllers/machineset/machineset_preflight_test.go index e4b0ed71ebd0..b0a4b5c857cc 100644 --- a/internal/controllers/machineset/machineset_preflight_test.go +++ b/internal/controllers/machineset/machineset_preflight_test.go @@ -189,6 +189,29 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { }, wantErr: false, }, + { + name: "control plane preflight check: should fail if the cluster defines a different version than the control plane, and the control plane is not yet at the current step of the upgrade plan", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Annotations: map[string]string{ + clusterv1.ClusterTopologyUpgradeStepAnnotation: "v1.27.0", + }, + }, + Spec: clusterv1.ClusterSpec{ + ControlPlaneRef: contract.ObjToContractVersionedObjectReference(controlPlaneStable), + Topology: clusterv1.Topology{ + Version: "v1.27.2", + }, + }, + }, + controlPlane: controlPlaneStable, + machineSet: &clusterv1.MachineSet{}, + wantMessages: []string{ + "GenericControlPlane ns1/cp1 has a pending version upgrade to v1.27.0 (\"ControlPlaneIsStable\" preflight check failed)", + }, + wantErr: false, + }, { name: "control plane preflight check: should pass if the control plane is upgrading but the preflight check is skipped", cluster: &clusterv1.Cluster{ @@ -234,6 +257,27 @@ func TestMachineSetReconciler_runPreflightChecks(t *testing.T) { wantMessages: nil, wantErr: false, }, + { + name: "control plane preflight check: should pass if the control plane is stable, and the control plane is at the current step of the upgrade plan", + cluster: &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: ns, + Annotations: map[string]string{ + clusterv1.ClusterTopologyUpgradeStepAnnotation: "v1.28.0", + }, + }, + Spec: clusterv1.ClusterSpec{ + ControlPlaneRef: contract.ObjToContractVersionedObjectReference(controlPlaneStable128), + Topology: clusterv1.Topology{ + Version: "v1.27.2", + }, + }, + }, + controlPlane: controlPlaneStable128, + machineSet: &clusterv1.MachineSet{}, + wantMessages: nil, + wantErr: false, + }, { name: "should pass if the machine set version is not defined", cluster: &clusterv1.Cluster{ diff --git a/internal/controllers/topology/cluster/structuredmerge/options.go b/internal/controllers/topology/cluster/structuredmerge/options.go index 0b94c8c1948b..b5a8ccaedf23 100644 --- a/internal/controllers/topology/cluster/structuredmerge/options.go +++ b/internal/controllers/topology/cluster/structuredmerge/options.go @@ -52,6 +52,7 @@ var ( {"kind"}, {"metadata", "name"}, {"metadata", "namespace"}, + {"metadata", "annotations", clusterv1.ClusterTopologyUpgradeStepAnnotation}, // uid is optional for a server side apply intent but sets the expectation of an object getting created or a specific one updated. {"metadata", "uid"}, // the topology controller controls/has an opinion for the labels ClusterNameLabel diff --git a/internal/webhooks/cluster.go b/internal/webhooks/cluster.go index 706334dcbd53..2023063f9444 100644 --- a/internal/webhooks/cluster.go +++ b/internal/webhooks/cluster.go @@ -391,7 +391,11 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu log.Info(warningMsg) allWarnings = append(allWarnings, warningMsg) } else { - if err := webhook.validateTopologyVersion(ctx, fldPath.Child("version"), newCluster.Spec.Topology.Version, inVersion, oldVersion, oldCluster); err != nil { + // TODO(chained-upgrade): handle properly when upgrade paths are called using a runtime extension. + + // NOTE: We validate the version ceiling only if we can't validate the version against versions defined in the ClusterClass. + shouldValidateVersionCeiling := len(clusterClass.Spec.KubernetesVersions) == 0 + if err := webhook.validateTopologyVersionUpdate(ctx, fldPath.Child("version"), newCluster.Spec.Topology.Version, inVersion, oldVersion, oldCluster, shouldValidateVersionCeiling); err != nil { allErrs = append(allErrs, err) } } @@ -429,7 +433,7 @@ func (webhook *Cluster) validateTopology(ctx context.Context, oldCluster, newClu return allWarnings, allErrs } -func (webhook *Cluster) validateTopologyVersion(ctx context.Context, fldPath *field.Path, fldValue string, inVersion, oldVersion semver.Version, oldCluster *clusterv1.Cluster) *field.Error { +func (webhook *Cluster) validateTopologyVersionUpdate(ctx context.Context, fldPath *field.Path, fldValue string, inVersion, oldVersion semver.Version, oldCluster *clusterv1.Cluster, shouldValidateCeiling bool) *field.Error { // Nothing to do if the version doesn't change. if inVersion.String() == oldVersion.String() { return nil @@ -444,18 +448,20 @@ func (webhook *Cluster) validateTopologyVersion(ctx context.Context, fldPath *fi ) } - // A +2 minor version upgrade is not allowed. - ceilVersion := semver.Version{ - Major: oldVersion.Major, - Minor: oldVersion.Minor + 2, - Patch: 0, - } - if version.Compare(inVersion, ceilVersion, version.WithoutPreReleases()) >= 0 { - return field.Invalid( - fldPath, - fldValue, - fmt.Sprintf("version cannot be increased from %q to %q", oldVersion, inVersion), - ) + if shouldValidateCeiling { + // A +2 minor version upgrade is not allowed. + ceilVersion := semver.Version{ + Major: oldVersion.Major, + Minor: oldVersion.Minor + 2, + Patch: 0, + } + if version.Compare(inVersion, ceilVersion, version.WithoutPreReleases()) >= 0 { + return field.Invalid( + fldPath, + fldValue, + fmt.Sprintf("version cannot be increased from %q to %q", oldVersion, inVersion), + ) + } } allErrs := []error{} @@ -888,6 +894,25 @@ func ValidateClusterForClusterClass(cluster *clusterv1.Cluster, clusterClass *cl if clusterClass == nil { return field.ErrorList{field.InternalError(field.NewPath(""), errors.New("ClusterClass can not be nil"))} } + + // If the ClusterClass defines a list of versions, check the version is one of them. + if len(clusterClass.Spec.KubernetesVersions) > 0 { + found := false + for _, clusterClassVersion := range clusterClass.Spec.KubernetesVersions { + if clusterClassVersion == cluster.Spec.Topology.Version { + found = true + break + } + } + if !found { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "topology", "version"), + cluster.Spec.Topology.Version, + "version must match one of the versions defined in the ClusterClass", + )) + } + } + allErrs = append(allErrs, check.MachineDeploymentTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...) allErrs = append(allErrs, check.MachinePoolTopologiesAreValidAndDefinedInClusterClass(cluster, clusterClass)...) diff --git a/internal/webhooks/cluster_test.go b/internal/webhooks/cluster_test.go index e5913f721016..ec114b445d01 100644 --- a/internal/webhooks/cluster_test.go +++ b/internal/webhooks/cluster_test.go @@ -1675,12 +1675,13 @@ func TestClusterTopologyValidation(t *testing.T) { utilfeature.SetFeatureGateDuringTest(t, feature.Gates, feature.ClusterTopology, true) tests := []struct { - name string - in *clusterv1.Cluster - old *clusterv1.Cluster - additionalObjects []client.Object - expectErr bool - expectWarning bool + name string + in *clusterv1.Cluster + old *clusterv1.Cluster + additionalObjects []client.Object + clusterClassVersions []string + expectErr bool + expectWarning bool }{ { name: "should return error when topology does not have class", @@ -1778,6 +1779,31 @@ func TestClusterTopologyValidation(t *testing.T) { Build()). Build(), }, + { + name: "should pass when changing build tag - not sortable", + expectErr: false, + old: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.2.3+ANCBG0"). + Build()). + Build(), + in: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.2.3+ANCBG1"). + Build()). + Build(), + additionalObjects: []client.Object{ + // Note: CRD is needed to look up the apiVersion from contract labels. + builder.GenericControlPlaneCRD, + builder.ControlPlane("fooboo", "cluster1-cp").WithVersion("v1.2.3+ANCBG0"). + WithStatusFields(map[string]interface{}{"status.version": "v1.2.3+ANCBG0"}). + Build(), + }, + }, { name: "should return error when upgrading +2 minor version", expectErr: true, @@ -1794,6 +1820,84 @@ func TestClusterTopologyValidation(t *testing.T) { Build()). Build(), }, + { + name: "fails when kubernetes version are defined in CC and version does not match (on create)", + expectErr: true, + in: builder.Cluster("fooboo", "cluster1"). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.3.2"). + Build()). + Build(), + clusterClassVersions: []string{"v1.2.3", "v1.3.1", "v1.4.0"}, + }, + { + name: "fails when kubernetes version are defined in CC and version does not match", + expectErr: true, + old: builder.Cluster("fooboo", "cluster1"). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.2.3"). + Build()). + Build(), + in: builder.Cluster("fooboo", "cluster1"). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.3.2"). + Build()). + Build(), + clusterClassVersions: []string{"v1.2.3", "v1.3.1", "v1.4.0"}, + }, + { + name: "should allow upgrading >1 minor version when kubernetes version are defined in CC", + old: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.2.3"). + Build()). + Build(), + in: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.4.0"). + Build()). + Build(), + clusterClassVersions: []string{"v1.2.3", "v1.3.1", "v1.4.0"}, + additionalObjects: []client.Object{ + // Note: CRD is needed to look up the apiVersion from contract labels. + builder.GenericControlPlaneCRD, + builder.ControlPlane("fooboo", "cluster1-cp").WithVersion("v1.2.3"). + WithStatusFields(map[string]interface{}{"status.version": "v1.2.3"}). + Build(), + }, + }, + { + name: "should allow upgrading >1 minor version when kubernetes version are defined in CC - with build tags", + old: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.2.3+ANCBG0"). + Build()). + Build(), + in: builder.Cluster("fooboo", "cluster1"). + WithControlPlane(builder.ControlPlane("fooboo", "cluster1-cp").Build()). + WithTopology(builder.ClusterTopology(). + WithClass("foo"). + WithVersion("v1.4.0+BXCBG0"). + Build()). + Build(), + clusterClassVersions: []string{"v1.2.3+ANCBG0", "v1.3.1+QPAVG0", "v1.4.0+BXCBG0"}, + additionalObjects: []client.Object{ + // Note: CRD is needed to look up the apiVersion from contract labels. + builder.GenericControlPlaneCRD, + builder.ControlPlane("fooboo", "cluster1-cp").WithVersion("v1.2.3+ANCBG0"). + WithStatusFields(map[string]interface{}{"status.version": "v1.2.3+ANCBG0"}). + Build(), + }, + }, { name: "should return error when duplicated MachineDeployments names exists in a Topology", expectErr: true, @@ -2138,6 +2242,10 @@ func TestClusterTopologyValidation(t *testing.T) { ). Build() + if tt.clusterClassVersions != nil { + class.Spec.KubernetesVersions = tt.clusterClassVersions + } + // Mark this condition to true so the webhook sees the ClusterClass as up to date. conditions.Set(class, metav1.Condition{ Type: clusterv1.ClusterClassVariablesReadyCondition, diff --git a/internal/webhooks/clusterclass.go b/internal/webhooks/clusterclass.go index d3d7185992bd..f984f369b5a2 100644 --- a/internal/webhooks/clusterclass.go +++ b/internal/webhooks/clusterclass.go @@ -21,6 +21,7 @@ import ( "fmt" "strings" + "github.com/blang/semver/v4" "github.com/pkg/errors" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" @@ -40,6 +41,7 @@ import ( topologynames "sigs.k8s.io/cluster-api/internal/topology/names" "sigs.k8s.io/cluster-api/internal/topology/variables" clog "sigs.k8s.io/cluster-api/util/log" + "sigs.k8s.io/cluster-api/util/version" ) func (webhook *ClusterClass) SetupWebhookWithManager(mgr ctrl.Manager) error { @@ -145,6 +147,9 @@ func (webhook *ClusterClass) validate(ctx context.Context, oldClusterClass, newC // Validate metadata allErrs = append(allErrs, validateClusterClassMetadata(newClusterClass)...) + // Ensure all kubernetes versions are valid. + allErrs = append(allErrs, validateKubernetesVersions(newClusterClass.Spec.KubernetesVersions)...) + // If this is an update run additional validation. if oldClusterClass != nil { // Ensure spec changes are compatible. @@ -501,3 +506,40 @@ func validateAutoscalerAnnotationsForClusterClass(clusters []clusterv1.Cluster, } return allErrs } + +// validateKubernetesVersions iterates over a list of versions and check they are valid. +func validateKubernetesVersions(versions []string) field.ErrorList { + var allErrs field.ErrorList + var previousVersion *semver.Version + for i, v := range versions { + semV, err := semver.ParseTolerant(v) + if err != nil { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "kubernetesVersion").Index(i), + v, + "version must be a valid semantic version", + )) + continue + } + if previousVersion != nil { + // Note: we tolerate having one version followed by another with the same major.minor.patch but different build tags (version.Compare==2) + if version.Compare(semV, *previousVersion, version.WithBuildTags()) <= 0 { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "kubernetesVersion").Index(i), + v, + fmt.Sprintf("version must be greater than v%s", previousVersion.String()), + )) + } + + if previousVersion.Minor != semV.Minor && previousVersion.Minor+1 != semV.Minor { + allErrs = append(allErrs, field.Invalid( + field.NewPath("spec", "kubernetesVersion").Index(i), + v, + fmt.Sprintf("expecting a version with minor %d or %d, found version %s", previousVersion.Minor, previousVersion.Minor+1, semV), + )) + } + } + previousVersion = &semV + } + return allErrs +} diff --git a/internal/webhooks/clusterclass_test.go b/internal/webhooks/clusterclass_test.go index 869ebfa2c71d..600fa24c304a 100644 --- a/internal/webhooks/clusterclass_test.go +++ b/internal/webhooks/clusterclass_test.go @@ -232,6 +232,7 @@ func TestClusterClassValidation(t *testing.T) { WithBootstrapTemplate( builder.BootstrapTemplate(metav1.NamespaceDefault, "bootstrap1").Build()). Build()). + WithVersions("v1.31.0", "v1.32.0"). Build(), expectErr: false, }, @@ -1686,6 +1687,113 @@ func TestClusterClassValidation(t *testing.T) { Build(), expectErr: false, }, + + // Versions tests + { + name: "fails with invalid versions", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("foo"). + Build(), + expectErr: true, + }, + { + name: "fails with versions in the wrong order", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.32.0", "v1.31.0"). + Build(), + expectErr: true, + }, + { + name: "fails when version skip minors", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.31.0", "v1.33.0"). + Build(), + expectErr: true, + }, + { + name: "pass with pre-release version", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.31.0-alpha.0", "v1.32.0-alpha.0"). + Build(), + expectErr: false, + }, + { + name: "fails with pre-release version in the wrong order", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.31.0-alpha.1", "v1.31.0-alpha.0"). + Build(), + expectErr: true, + }, + { + name: "pass with build metadata version", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.31.0+foo.1-bar.1", "v1.32.0+foo.1-bar.1"). + Build(), + expectErr: false, + }, + { + name: "pass with pre-release version in the wrong order (build metadata cannot be sorted!)", + in: builder.ClusterClass(metav1.NamespaceDefault, "class1"). + WithInfrastructureClusterTemplate( + builder.InfrastructureClusterTemplate(metav1.NamespaceDefault, "infra1").Build()). + WithControlPlaneTemplate( + builder.ControlPlaneTemplate(metav1.NamespaceDefault, "cp1"). + Build()). + WithControlPlaneInfrastructureMachineTemplate( + builder.InfrastructureMachineTemplate(metav1.NamespaceDefault, "cpInfra1"). + Build()). + WithVersions("v1.31.0+foo.2-bar.1", "v1.31.0+foo.1-bar.1"). + Build(), + expectErr: false, + }, } for _, tt := range tests { diff --git a/scripts/ci-test.sh b/scripts/ci-test.sh index eaeecf158165..eadd6ecb2333 100755 --- a/scripts/ci-test.sh +++ b/scripts/ci-test.sh @@ -27,14 +27,11 @@ source "${REPO_ROOT}/hack/ensure-go.sh" echo "*** Testing Cluster API ***" make test-junit -echo -e "\n*** Testing Cluster API Provider Docker ***\n" -# Docker provider -make test-docker-infrastructure-junit +echo -e "\n*** Testing test/infrastructure folder ***\n" +make test-infrastructure-junit echo -e "\n*** Testing Cluster API Runtime SDK test extension ***\n" -# Test Extension make test-test-extension-junit echo -e "\n*** Testing Cluster API testing framework ***\n" -# Test Framework make test-framework-junit diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index 2810ddab437a..b905c593cc6b 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -171,19 +171,20 @@ func ClusterUpgradeConformanceSpec(ctx context.Context, inputGetter func() Clust // Cluster is using ClusterClass, upgrade via topology. By("Upgrading the Cluster topology") framework.UpgradeClusterTopologyAndWaitForUpgrade(ctx, framework.UpgradeClusterTopologyAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - ControlPlane: clusterResources.ControlPlane, - EtcdImageTag: etcdVersionUpgradeTo, - DNSImageTag: coreDNSVersionUpgradeTo, - MachineDeployments: clusterResources.MachineDeployments, - MachinePools: clusterResources.MachinePools, - KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), - WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + ControlPlane: clusterResources.ControlPlane, + EtcdImageTag: etcdVersionUpgradeTo, + DNSImageTag: coreDNSVersionUpgradeTo, + MachineDeployments: clusterResources.MachineDeployments, + MachinePools: clusterResources.MachinePools, + KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), + WaitForControlPlaneToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-control-plane-upgrade"), + WaitForMachineDeploymentToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-deployment-upgrade"), + WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), + WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), PreWaitForControlPlaneToBeUpgraded: func() { if input.PreWaitForControlPlaneToBeUpgraded != nil { input.PreWaitForControlPlaneToBeUpgraded(input.BootstrapClusterProxy, namespace.Name, clusterName) diff --git a/test/e2e/cluster_upgrade_runtimesdk.go b/test/e2e/cluster_upgrade_runtimesdk.go index d9ad4c251ad4..67bbae4ea01e 100644 --- a/test/e2e/cluster_upgrade_runtimesdk.go +++ b/test/e2e/cluster_upgrade_runtimesdk.go @@ -32,12 +32,16 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/sets" "k8s.io/klog/v2" "k8s.io/utils/ptr" "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/core/v1beta2" runtimev1 "sigs.k8s.io/cluster-api/api/runtime/v1beta2" + "sigs.k8s.io/cluster-api/controllers/external" + "sigs.k8s.io/cluster-api/exp/topology/desiredstate" + "sigs.k8s.io/cluster-api/internal/contract" "sigs.k8s.io/cluster-api/test/e2e/internal/log" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" @@ -84,11 +88,19 @@ type ClusterUpgradeWithRuntimeSDKSpecInput struct { // Flavor to use when creating the cluster for testing, "upgrades" is used if not specified. Flavor *string - // Allows to inject a function to be run after test namespace is created. + // KubernetesVersionFrom allow to specify the Kubernetes version from for this test. + // If not specified, the KUBERNETES_VERSION_UPGRADE_FROM env variable will be used + KubernetesVersionFrom string + + // KubernetesVersions allows providing a list of Kubernetes version to be used for computing upgrade paths. + // This list will be considered only if there is no a list in the ClusterClass created from the template. + KubernetesVersions []string + + // Allows injecting a function to be run after test namespace is created. // If not specified, this is a no-op. PostNamespaceCreated func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace string) - // Allows to inject a function to be run after the cluster is upgraded. + // Allows injecting a function to be run after the cluster is upgraded. // If not specified, this is a no-op. PostUpgrade func(managementClusterProxy framework.ClusterProxy, workloadClusterNamespace, workloadClusterName string) @@ -112,7 +124,7 @@ type ClusterUpgradeWithRuntimeSDKSpecInput struct { // Upgrading a cluster refers to upgrading the control-plane and worker nodes (managed by MD and machine pools). // NOTE: This test only works with a KubeadmControlPlane. // NOTE: This test works with Clusters with and without ClusterClass. -// When using ClusterClass the ClusterClass must have the variables "etcdImageTag" and "coreDNSImageTag" of type string. +// When using ClusterClass, the ClusterClass must have the variables "etcdImageTag" and "coreDNSImageTag" of type string. // Those variables should have corresponding patches which set the etcd and CoreDNS tags in KCP. func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() ClusterUpgradeWithRuntimeSDKSpecInput) { const ( @@ -139,7 +151,9 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl Expect(input.BootstrapClusterProxy).ToNot(BeNil(), "Invalid argument. input.BootstrapClusterProxy can't be nil when calling %s spec", specName) Expect(os.MkdirAll(input.ArtifactFolder, 0750)).To(Succeed(), "Invalid argument. input.ArtifactFolder can't be created for %s spec", specName) - Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersionUpgradeFrom)) + if input.KubernetesVersionFrom == "" { + Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersionUpgradeFrom)) + } Expect(input.E2EConfig.Variables).To(HaveKey(KubernetesVersionUpgradeTo)) Expect(input.ExtensionServiceNamespace).ToNot(BeEmpty()) @@ -172,8 +186,8 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl It("Should create, upgrade and delete a workload cluster", func() { // NOTE: test extension is already deployed in the management cluster. If for any reason in future we want - // to make this test more self-contained this test should be modified in order to create an additional - // management cluster; also the E2E test configuration should be modified introducing something like + // to make this test more self-contained, this test should be modified in order to create an additional + // management cluster; also, the E2E test configuration should be modified introducing something like // optional:true allowing to define which providers should not be installed by default in // a management cluster. @@ -193,11 +207,6 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl By("Creating a workload cluster; creation waits for BeforeClusterCreateHook to gate the operation") - clusterRef := types.NamespacedName{ - Name: clusterName, - Namespace: namespace.Name, - } - infrastructureProvider := clusterctl.DefaultInfrastructureProvider if input.InfrastructureProvider != nil { infrastructureProvider = *input.InfrastructureProvider @@ -211,6 +220,12 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl variables["CLUSTER_CLASS_NAMESPACE"] = clusterClassNamespace.Name } + fromVersion := input.KubernetesVersionFrom + if fromVersion == "" { + fromVersion = input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeFrom) + } + toVersion := input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo) + clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{ ClusterProxy: input.BootstrapClusterProxy, ConfigCluster: clusterctl.ConfigClusterInput{ @@ -221,49 +236,73 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl Flavor: ptr.Deref(input.Flavor, "upgrades"), Namespace: namespace.Name, ClusterName: clusterName, - KubernetesVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeFrom), + KubernetesVersion: fromVersion, ControlPlaneMachineCount: ptr.To[int64](controlPlaneMachineCount), WorkerMachineCount: ptr.To[int64](workerMachineCount), ClusterctlVariables: variables, }, PreWaitForCluster: func() { + cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ + Name: clusterName, Namespace: namespace.Name, Getter: input.BootstrapClusterProxy.GetClient()}) + + // Check for the beforeClusterUpgrade being called, then unblock beforeClusterCreateTestHandler(ctx, input.BootstrapClusterProxy.GetClient(), - clusterRef, + cluster, input.ExtensionConfigName, input.E2EConfig.GetIntervals(specName, "wait-cluster")) }, - PostMachinesProvisioned: func() { - Eventually(func() error { - // Before running the BeforeClusterUpgrade hook, the topology controller - // checks if the ControlPlane `IsScaling()` and for MachineDeployments if - // `IsAnyRollingOut()`. - // This PostMachineProvisioned function ensures that the clusters machines - // are healthy by checking the MachineNodeHealthyCondition, so the upgrade - // below does not get delayed or runs into timeouts before even reaching - // the BeforeClusterUpgrade hook. - machineList := &clusterv1.MachineList{} - if err := input.BootstrapClusterProxy.GetClient().List(ctx, machineList, client.InNamespace(namespace.Name)); err != nil { - return errors.Wrap(err, "list machines") - } - - for i := range machineList.Items { - machine := &machineList.Items[i] - if !conditions.IsTrue(machine, clusterv1.MachineNodeHealthyCondition) { - return errors.Errorf("machine %q does not have %q condition set to true", machine.GetName(), clusterv1.MachineNodeHealthyCondition) - } - } - - return nil - }, 5*time.Minute, 15*time.Second).Should(Succeed(), "Waiting for rollouts to finish") - }, WaitForClusterIntervals: input.E2EConfig.GetIntervals(specName, "wait-cluster"), WaitForControlPlaneIntervals: input.E2EConfig.GetIntervals(specName, "wait-control-plane"), WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), WaitForMachinePools: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-nodes"), }, clusterResources) - // TODO: check if AfterControlPlaneInitialized has been called (or add this check to the operation above) + // Compute the upgrade plan that must be followed during the upgrade, so it can be validated step by step in the second + // part of the test. + // By default, the test assumes we are upgrading by one minor, however: + // - If the cluster class provides a list of Kubernetes versions, this list will be used to compute the + // upgrade plan, which can span multiple versions in this case. + // - If the test has in input a list of Kubernetes versions, the cluster class will be update using it + // and used like in the example above. + + getUpgradePlanFunc := desiredstate.GetUpgradePlanOneMinor + if len(clusterResources.ClusterClass.Spec.KubernetesVersions) != 0 { + log.Logf("Using Kubernetes versions from cluster class: %s", clusterResources.ClusterClass.Spec.KubernetesVersions) + getUpgradePlanFunc = desiredstate.GetUpgradePlanFromClusterClassVersions(clusterResources.ClusterClass.Spec.KubernetesVersions) + } else if len(input.KubernetesVersions) != 0 { + log.Logf("Using Kubernetes versions provided as input to the test: %s", input.KubernetesVersions) + clusterResources.ClusterClass.Spec.KubernetesVersions = input.KubernetesVersions + Expect(input.BootstrapClusterProxy.GetClient().Update(ctx, clusterResources.ClusterClass)).To(Succeed(), "Failed to update cluster class %s with Kubernetes versions", klog.KObj(clusterResources.ClusterClass)) + + getUpgradePlanFunc = desiredstate.GetUpgradePlanFromClusterClassVersions(input.KubernetesVersions) + } + + controlPlaneUpgradePlan, workersUpgradePlan, err := getUpgradePlanFunc(ctx, toVersion, fromVersion, fromVersion) + Expect(err).ToNot(HaveOccurred(), "Failed to get the upgrade plan") + + workersUpgradePlan, err = desiredstate.DefaultAndValidateUpgradePlans(toVersion, fromVersion, fromVersion, controlPlaneUpgradePlan, workersUpgradePlan) + Expect(err).ToNot(HaveOccurred(), "Failed to default and validate the upgrade plan") + + log.Logf("Control Plane upgrade plan: %s", controlPlaneUpgradePlan) + log.Logf("Workers upgrade plan: %s", workersUpgradePlan) + + // Perform the upgrade and check every is going as expected. + // More specifically: + // - After control plane upgrade steps + // - Check control plane machines are on the desired version, nodes are healthy + // - Workers machines are still on the old version, nodes are healthy + // - MachineSet preflight checks are preventing creation on machines in the old version (this is implemented before unblocking the afterControlPlaneUpgradeTestHandler) + // - After workers upgrade steps + // - Check control plane machines are on the desired version, nodes are healthy + // - Workers machines are still are on the desired version, nodes are healthy + // - Check lifecycle hooks before and after every step + + controlPlaneVersion := fromVersion + workersVersion := fromVersion + + checkControlPlaneVersion(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, controlPlaneVersion) + checkWorkersVersions(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, workersVersion) // Add a BeforeClusterUpgrade hook annotation to block via the annotation. beforeClusterUpgradeAnnotation := clusterv1.BeforeClusterUpgradeHookAnnotationPrefix + "/upgrade-test" @@ -273,46 +312,63 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl Expect(patchHelper.Patch(ctx, clusterResources.Cluster)).To(Succeed()) // Upgrade the Cluster topology to run through an entire cluster lifecycle to test the lifecycle hooks. - By("Upgrading the Cluster topology; creation waits for BeforeClusterUpgradeHook and AfterControlPlaneUpgradeHook to gate the operation") + By("Upgrading the Cluster topology") framework.UpgradeClusterTopologyAndWaitForUpgrade(ctx, framework.UpgradeClusterTopologyAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: clusterResources.Cluster, - ControlPlane: clusterResources.ControlPlane, - MachineDeployments: clusterResources.MachineDeployments, - MachinePools: clusterResources.MachinePools, - KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), - WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + ClusterProxy: input.BootstrapClusterProxy, + Cluster: clusterResources.Cluster, + ControlPlane: clusterResources.ControlPlane, + MachineDeployments: clusterResources.MachineDeployments, + MachinePools: clusterResources.MachinePools, + KubernetesUpgradeVersion: toVersion, + WaitForControlPlaneToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-control-plane-upgrade"), + WaitForMachineDeploymentToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-deployment-upgrade"), + WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), + WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), PreWaitForControlPlaneToBeUpgraded: func() { - beforeClusterUpgradeAnnotationIsBlocking(ctx, - input.BootstrapClusterProxy.GetClient(), - clusterRef, - input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - beforeClusterUpgradeAnnotation, - input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) + // WaitForControlPlaneToBeUpgraded inside UpgradeClusterTopologyAndWaitForUpgrade checks if all the machines are at the target + // version; however, in this test we want a finer control of what happen during CP upgrade, so we use this func to go through + // the control plane upgrade step by step using hooks to block/unblock every phase. + // Check for the beforeClusterUpgrade being called, then unblock beforeClusterUpgradeTestHandler(ctx, input.BootstrapClusterProxy.GetClient(), - clusterRef, + clusterResources.Cluster, input.ExtensionConfigName, - input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), + fromVersion, + toVersion, input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) - }, - PreWaitForWorkersToBeUpgraded: func() { - machineSetPreflightChecksTestHandler(ctx, - input.BootstrapClusterProxy.GetClient(), - clusterRef, - input.ExtensionConfigName) - afterControlPlaneUpgradeTestHandler(ctx, - input.BootstrapClusterProxy.GetClient(), - clusterRef, - input.ExtensionConfigName, - input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) + // Then check the upgrade is progressing step by step according to the upgrade plan + for _, version := range controlPlaneUpgradePlan { + // Wait CP to update to version + controlPlaneVersion = version + waitControlPlaneVersion(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, controlPlaneVersion, input.E2EConfig.GetIntervals(specName, "wait-control-plane-upgrade")) + + // Check workers are not yet upgraded. + checkWorkersVersions(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, workersVersion) + + // make sure afterControlPlaneUpgrade still blocks, then unblock the upgrade. + afterControlPlaneUpgradeTestHandler(ctx, + input.BootstrapClusterProxy.GetClient(), + clusterResources.Cluster, + input.ExtensionConfigName, + controlPlaneVersion, + workersVersion, + input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) + + // If worker should not upgrade at this step, continue + if !sets.New[string](workersUpgradePlan...).Has(version) { + continue + } + + // Wait for workers to update to version + workersVersion = version + waitWorkersVersions(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, workersVersion, input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade")) + + // TODO(chained-upgrade): FP, we can't check next CP upgrade doesn't start, it starts immediately + } }, }) @@ -321,7 +377,7 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl workloadClient := workloadProxy.GetClient() framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ Lister: workloadClient, - KubernetesVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), + KubernetesVersion: toVersion, Count: int(clusterResources.ExpectedTotalNodes()), WaitForNodesReady: input.E2EConfig.GetIntervals(specName, "wait-nodes-ready"), }) @@ -348,18 +404,23 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl By("Dumping resources and deleting the workload cluster; deletion waits for BeforeClusterDeleteHook to gate the operation") dumpAndDeleteCluster(ctx, input.BootstrapClusterProxy, input.ClusterctlConfigPath, namespace.Name, clusterName, input.ArtifactFolder) - beforeClusterDeleteHandler(ctx, input.BootstrapClusterProxy.GetClient(), clusterRef, input.ExtensionConfigName, input.E2EConfig.GetIntervals(specName, "wait-delete-cluster")) + beforeClusterDeleteHandler(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, input.ExtensionConfigName, input.E2EConfig.GetIntervals(specName, "wait-delete-cluster")) By("Checking all lifecycle hooks have been called") // Assert that each hook has been called and returned "Success" during the test. - Expect(checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), clusterRef, input.ExtensionConfigName, map[string]string{ - "BeforeClusterCreate": "Status: Success, RetryAfterSeconds: 0", - "BeforeClusterUpgrade": "Status: Success, RetryAfterSeconds: 0", - "BeforeClusterDelete": "Status: Success, RetryAfterSeconds: 0", - "AfterControlPlaneUpgrade": "Status: Success, RetryAfterSeconds: 0", - "AfterControlPlaneInitialized": "Success", - "AfterClusterUpgrade": "Success", - })).To(Succeed(), "Lifecycle hook calls were not as expected") + expectedHooks := map[string]string{ + computeHookName("BeforeClusterCreate", nil): "Status: Success, RetryAfterSeconds: 0", + computeHookName("BeforeClusterUpgrade", nil): "Status: Success, RetryAfterSeconds: 0", + computeHookName("BeforeClusterDelete", nil): "Status: Success, RetryAfterSeconds: 0", + computeHookName("AfterControlPlaneUpgrade", []string{toVersion}): "Status: Success, RetryAfterSeconds: 0", + computeHookName("AfterControlPlaneInitialized", nil): "Success", + computeHookName("AfterClusterUpgrade", nil): "Success", + } + for _, v := range controlPlaneUpgradePlan { + expectedHooks[computeHookName("AfterControlPlaneUpgrade", []string{v})] = "Status: Success, RetryAfterSeconds: 0" + } + + checkLifecycleHookResponses(ctx, input.BootstrapClusterProxy.GetClient(), clusterResources.Cluster, input.ExtensionConfigName, expectedHooks) By("PASSED!") }) @@ -403,44 +464,31 @@ func ClusterUpgradeWithRuntimeSDKSpec(ctx context.Context, inputGetter func() Cl }) } -// machineSetPreflightChecksTestHandler verifies the MachineSet preflight checks. -// At this point in the test the ControlPlane is upgraded to the new version and the upgrade to the MachineDeployments +// machineSetPreflightChecksTest verifies the MachineSet preflight checks. +// At this point in the test the ControlPlane is upgraded to the new version, and the upgrade to the MachineDeployments // should be blocked by the AfterControlPlaneUpgrade hook. // Test the MachineSet preflight checks by scaling up the MachineDeployment. The creation on the new Machine // should be blocked because the preflight checks should not pass (kubeadm version skew preflight check should fail). -func machineSetPreflightChecksTestHandler(ctx context.Context, c client.Client, clusterRef types.NamespacedName, extensionConfigName string) { - // Verify that the hook is called and the topology reconciliation is blocked. - hookName := "AfterControlPlaneUpgrade" - Eventually(func() error { - if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, clusterRef, extensionConfigName, []string{hookName}); err != nil { - return err - } - - cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ - Name: clusterRef.Name, Namespace: clusterRef.Namespace, Getter: c}) - - if !clusterConditionShowsHookBlocking(cluster, hookName) { - return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName) - } - - return nil - }, 30*time.Second).Should(Succeed(), "%s has not been called", hookName) +func machineSetPreflightChecksTest(ctx context.Context, c client.Client, cluster *clusterv1.Cluster) { + log.Logf("Test the MachineSet preflight checks") // Scale up the MachineDeployment machineDeployments := framework.GetMachineDeploymentsByCluster(ctx, framework.GetMachineDeploymentsByClusterInput{ Lister: c, - ClusterName: clusterRef.Name, - Namespace: clusterRef.Namespace, + ClusterName: cluster.Name, + Namespace: cluster.Namespace, }) md := machineDeployments[0] + log.Logf("Scaling up MD %s", md.Name) + // Note: It is fair to assume that the Cluster is ClusterClass based since RuntimeSDK // is only supported for ClusterClass based Clusters. patchHelper, err := patch.NewHelper(md, c) Expect(err).ToNot(HaveOccurred()) // Scale up the MachineDeployment. - // IMPORTANT: Since the MachineDeployment is pending an upgrade at this point the topology controller will not push any changes + // IMPORTANT: Since the MachineDeployment is pending an upgrade, at this point the topology controller will not push any changes // to the MachineDeployment. Therefore, the changes made to the MachineDeployment here will not be replaced // until the AfterControlPlaneUpgrade hook unblocks the upgrade. *md.Spec.Replicas++ @@ -461,6 +509,8 @@ func machineSetPreflightChecksTestHandler(ctx context.Context, c client.Client, g.Expect(targetMD.Spec.Replicas).To(Equal(md.Spec.Replicas)) }, 10*time.Second, 1*time.Second).Should(Succeed()) + log.Logf("Checking that a new MachineSet is created, but no machine are created for MD %s (upgrade is still pending)", md.Name) + // Since the MachineDeployment is scaled up (overriding the topology controller) at this point the MachineSet would // also scale up. However, a new Machine creation would be blocked by one of the MachineSet preflight checks (KubeadmVersionSkew). // Verify the MachineSet is blocking new Machine creation. @@ -494,13 +544,15 @@ func machineSetPreflightChecksTestHandler(ctx context.Context, c client.Client, originalReplicas := int(*md.Spec.Replicas - 1) machines := framework.GetMachinesByMachineDeployments(ctx, framework.GetMachinesByMachineDeploymentsInput{ Lister: c, - ClusterName: clusterRef.Name, - Namespace: clusterRef.Namespace, + ClusterName: cluster.Name, + Namespace: cluster.Namespace, MachineDeployment: *md, }) g.Expect(machines).To(HaveLen(originalReplicas), "New Machines should not be created") }, 10*time.Second, time.Second).Should(Succeed()) + log.Logf("Scaling down MD %s to the original state", md.Name) + // Scale down the MachineDeployment to the original replicas to restore to the state of the MachineDeployment // it existed in before this test block. patchHelper, err = patch.NewHelper(md, c) @@ -522,7 +574,7 @@ func extensionConfig(name, extensionServiceNamespace, extensionServiceName strin // when configuring external patches. Name: name, Annotations: map[string]string{ - // Note: this assumes the test extension get deployed in the default namespace defined in its own runtime-extensions-components.yaml + // Note: this assumes the test extension gets deployed in the default namespace defined in its own runtime-extensions-components.yaml runtimev1.InjectCAFromSecretAnnotation: fmt.Sprintf("%s/%s-cert", extensionServiceNamespace, extensionServiceName), }, }, @@ -530,7 +582,7 @@ func extensionConfig(name, extensionServiceNamespace, extensionServiceName strin ClientConfig: runtimev1.ClientConfig{ Service: runtimev1.ServiceReference{ Name: extensionServiceName, - // Note: this assumes the test extension get deployed in the default namespace defined in its own runtime-extensions-components.yaml + // Note: this assumes the test extension gets deployed in the default namespace defined in its own runtime-extensions-components.yaml Namespace: extensionServiceNamespace, }, }, @@ -556,33 +608,34 @@ func extensionConfig(name, extensionServiceNamespace, extensionServiceName strin } // Check that each hook in hooks has been called at least once by checking if its actualResponseStatus is in the hook response configmap. -// If the provided hooks have both keys and values check that the values match those in the hook response configmap. -func checkLifecycleHookResponses(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, expectedHookResponses map[string]string) error { - responseData := getLifecycleHookResponsesFromConfigMap(ctx, c, cluster, extensionConfigName) - for hookName, expectedResponse := range expectedHookResponses { - actualResponse, ok := responseData[hookName+"-actualResponseStatus"] - if !ok { - return errors.Errorf("hook %s call not recorded in configMap %s", hookName, klog.KRef(cluster.Namespace, hookResponsesConfigMapName(cluster.Name, extensionConfigName))) - } - if expectedResponse != "" && expectedResponse != actualResponse { - return errors.Errorf("hook %s was expected to be %s in configMap got %s", hookName, expectedResponse, actualResponse) +// If the provided hooks have both keys and values, check that the values match those in the hook response configmap. +func checkLifecycleHookResponses(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, expectedHookResponses map[string]string) { + Eventually(func() error { + responseData := getLifecycleHookResponsesFromConfigMap(ctx, c, cluster, extensionConfigName) + for hookName, expectedResponse := range expectedHookResponses { + actualResponse, ok := responseData[hookName+"-actualResponseStatus"] + if !ok { + return errors.Errorf("hook %s call not recorded in configMap %s", hookName, klog.KRef(cluster.Namespace, hookResponsesConfigMapName(cluster.Name, extensionConfigName))) + } + if expectedResponse != "" && expectedResponse != actualResponse { + return errors.Errorf("hook %s was expected to be %s in configMap got %s", hookName, expectedResponse, actualResponse) + } } - } - return nil + return nil + }, 30*time.Second).Should(Succeed(), "Lifecycle hook calls were not as expected") } // Check that each hook in expectedHooks has been called at least once by checking if its actualResponseStatus is in the hook response configmap. -func checkLifecycleHooksCalledAtLeastOnce(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, expectedHooks []string) error { +func checkLifecycleHooksCalledAtLeastOnce(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, hook string, attributes []string) error { responseData := getLifecycleHookResponsesFromConfigMap(ctx, c, cluster, extensionConfigName) - for _, hookName := range expectedHooks { - if _, ok := responseData[hookName+"-actualResponseStatus"]; !ok { - return errors.Errorf("hook %s call not recorded in configMap %s", hookName, klog.KRef(cluster.Namespace, hookResponsesConfigMapName(cluster.Name, extensionConfigName))) - } + hookName := computeHookName(hook, attributes) + if _, ok := responseData[hookName+"-actualResponseStatus"]; !ok { + return errors.Errorf("hook %s call not recorded in configMap %s", hookName, klog.KRef(cluster.Namespace, hookResponsesConfigMapName(cluster.Name, extensionConfigName))) } return nil } -func getLifecycleHookResponsesFromConfigMap(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string) map[string]string { +func getLifecycleHookResponsesFromConfigMap(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string) map[string]string { configMap := &corev1.ConfigMap{} Eventually(func() error { return c.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: hookResponsesConfigMapName(cluster.Name, extensionConfigName)}, configMap) @@ -591,169 +644,268 @@ func getLifecycleHookResponsesFromConfigMap(ctx context.Context, c client.Client } // beforeClusterCreateTestHandler calls runtimeHookTestHandler with a blockedCondition function which returns false if -// the Cluster has entered ClusterPhaseProvisioned. -func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, intervals []interface{}) { +// the Cluster has a control plane or an infrastructure reference. +func beforeClusterCreateTestHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, intervals []interface{}) { hookName := "BeforeClusterCreate" - runtimeHookTestHandler(ctx, c, cluster, hookName, extensionConfigName, true, func() bool { + + isBlockingCreate := func() bool { blocked := true // This hook should block the Cluster from entering the "Provisioned" state. cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{Name: cluster.Name, Namespace: cluster.Namespace, Getter: c}) - if cluster.Status.Phase == string(clusterv1.ClusterPhaseProvisioned) { + if cluster.Spec.InfrastructureRef.IsDefined() || cluster.Spec.ControlPlaneRef.IsDefined() { blocked = false } return blocked - }, intervals) -} + } -// beforeClusterUpgradeAnnotationIsBlocking checks if the cluster is successfully blocking due to the given BeforeClusterUpgrade -// hook annotation by checking for the right condition message and that none of the machines in the control plane has been -// updated to the target Kubernetes version. -func beforeClusterUpgradeAnnotationIsBlocking(ctx context.Context, c client.Client, clusterRef types.NamespacedName, toVersion, annotation string, intervals []interface{}) { - hookName := "BeforeClusterUpgrade" - log.Logf("Blocking with %s hook for 60 seconds with the annotation", hookName) + // Test the BeforeClusterCreate the hook. + runtimeHookTestHandler(ctx, c, cluster, extensionConfigName, hookName, nil, isBlockingCreate, nil, intervals) - expectedBlockingMessage := fmt.Sprintf("hook %q is blocking: annotation [%s] is set", hookName, annotation) + Byf("ClusterCreate unblocked") +} - blockingConditionCheck := func() error { - cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ - Name: clusterRef.Name, Namespace: clusterRef.Namespace, Getter: c}) +// beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if +// any of the control plane, machine deployments or machine pools has been updated from the initial Kubernetes version. +func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, fromVersion, toVersion string, intervals []interface{}) { + hookName := "BeforeClusterUpgrade" + beforeClusterUpgradeAnnotation := clusterv1.BeforeClusterUpgradeHookAnnotationPrefix + "/upgrade-test" - if conditions.GetReason(cluster, clusterv1.ClusterTopologyReconciledCondition) != clusterv1.TopologyReconciledHookBlockingV1Beta1Reason { - return fmt.Errorf("hook %s (via annotation) should lead to LifecycleHookBlocking reason", hookName) + isBlockingUpgrade := func() bool { + controlPlane, err := external.GetObjectFromContractVersionedRef(ctx, c, cluster.Spec.ControlPlaneRef, cluster.Namespace) + if err != nil { + return false } - if !strings.Contains(conditions.GetMessage(cluster, clusterv1.ClusterTopologyReconciledCondition), expectedBlockingMessage) { - return fmt.Errorf("hook %[1]s (via annotation) should show hook %[1]s is blocking as message with: %[2]s", hookName, expectedBlockingMessage) + controlPlaneVersion, err := contract.ControlPlane().Version().Get(controlPlane) + if err != nil { + return false } - - controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, - framework.GetControlPlaneMachinesByClusterInput{Lister: c, ClusterName: clusterRef.Name, Namespace: clusterRef.Namespace}) - for _, machine := range controlPlaneMachines { - if machine.Spec.Version == toVersion { - return errors.Errorf("Machine's %s version (%s) does match %s", klog.KObj(&machine), machine.Spec.Version, toVersion) - } + if *controlPlaneVersion != fromVersion { + return false } - return nil - } - - // Check that the LifecycleHook annotation is blocking at least once with the expected blocking reason, message and none of the CP machines being upgraded. - Eventually(blockingConditionCheck, 30*time.Second).Should(Succeed(), "%s (via annotation %s) did not block", hookName, annotation) - - // The check should consistently succeed. - Consistently(blockingConditionCheck, 60*time.Second).Should(Succeed(), - fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s (via annotation %s) is not blocking", hookName, annotation)) - - // Patch the Cluster to remove the LifecycleHook annotation hook and unblock. - cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ - Name: clusterRef.Name, Namespace: clusterRef.Namespace, Getter: c}) - patchHelper, err := patch.NewHelper(cluster, c) - Expect(err).ToNot(HaveOccurred()) - delete(cluster.Annotations, annotation) - Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) - - // Expect the LifecycleHook annotation to not block anymore. - Eventually(func() error { - cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ - Name: clusterRef.Name, Namespace: clusterRef.Namespace, Getter: c}) - - if strings.Contains(conditions.GetMessage(cluster, clusterv1.ClusterTopologyReconciledCondition), expectedBlockingMessage) { - return fmt.Errorf("hook %s (via annotation %s) should not be blocking anymore with message: %s", hookName, annotation, expectedBlockingMessage) + mds := framework.GetMachineDeploymentsByCluster(ctx, + framework.GetMachineDeploymentsByClusterInput{ClusterName: cluster.Name, Namespace: cluster.Namespace, Lister: c}) + for _, md := range mds { + if md.Spec.Template.Spec.Version != fromVersion { + return false + } } - return nil - }, intervals...).Should(Succeed(), - fmt.Sprintf("ClusterTopology reconcile did not proceed as expected when unblocking hook %s (via annotation %s)", hookName, annotation)) -} - -// beforeClusterUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if -// any of the machines in the control plane has been updated to the target Kubernetes version. -func beforeClusterUpgradeTestHandler(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, toVersion string, intervals []interface{}) { - hookName := "BeforeClusterUpgrade" - runtimeHookTestHandler(ctx, c, cluster, hookName, extensionConfigName, true, func() bool { - var blocked = true - controlPlaneMachines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{Lister: c, ClusterName: cluster.Name, Namespace: cluster.Namespace}) for _, machine := range controlPlaneMachines { if machine.Spec.Version == toVersion { - blocked = false + return false } } - return blocked - }, intervals) + return true + } + + // BeforeClusterUpgrade can be blocked via an annotation hook. Check it works + annotationHookTestHandler(ctx, c, cluster, hookName, beforeClusterUpgradeAnnotation, isBlockingUpgrade, intervals) + + // Test the BeforeClusterUpgrade the hook. + runtimeHookTestHandler(ctx, c, cluster, extensionConfigName, hookName, nil, isBlockingUpgrade, nil, intervals) + + Byf("ClusterUpgrade to %s unblocked", toVersion) } // afterControlPlaneUpgradeTestHandler calls runtimeHookTestHandler with a blocking function which returns false if any // MachineDeployment in the Cluster has upgraded to the target Kubernetes version. -func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, version string, intervals []interface{}) { +func afterControlPlaneUpgradeTestHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, controlPlaneVersion, workersVersion string, intervals []interface{}) { hookName := "AfterControlPlaneUpgrade" - runtimeHookTestHandler(ctx, c, cluster, hookName, extensionConfigName, true, func() bool { - var blocked = true + + isBlockingUpgrade := func() bool { + controlPlane, err := external.GetObjectFromContractVersionedRef(ctx, c, cluster.Spec.ControlPlaneRef, cluster.Namespace) + if err != nil { + return false + } + v, err := contract.ControlPlane().Version().Get(controlPlane) + if err != nil { + return false + } + if *v != controlPlaneVersion { + return false + } mds := framework.GetMachineDeploymentsByCluster(ctx, framework.GetMachineDeploymentsByClusterInput{ClusterName: cluster.Name, Namespace: cluster.Namespace, Lister: c}) - // If any of the MachineDeployments have the target Kubernetes Version, the hook is unblocked. for _, md := range mds { - if md.Spec.Template.Spec.Version == version { - blocked = false + if md.Spec.Template.Spec.Version != workersVersion { + return false } } - return blocked - }, intervals) + + mps := framework.GetMachinePoolsByCluster(ctx, + framework.GetMachinePoolsByClusterInput{ClusterName: cluster.Name, Namespace: cluster.Namespace, Lister: c}) + for _, mp := range mps { + if mp.Spec.Template.Spec.Version != workersVersion { + return false + } + } + return true + } + + beforeUnblockingUpgrade := func() { + machineSetPreflightChecksTest(ctx, c, cluster) + } + + // Test the AfterControlPlaneUpgrade hook and perform machine set preflight checks before unblocking. + runtimeHookTestHandler(ctx, c, cluster, extensionConfigName, hookName, []string{controlPlaneVersion}, isBlockingUpgrade, beforeUnblockingUpgrade, intervals) } // beforeClusterDeleteHandler calls runtimeHookTestHandler with a blocking function which returns false if the Cluster -// can not be found in the API server. -func beforeClusterDeleteHandler(ctx context.Context, c client.Client, cluster types.NamespacedName, extensionConfigName string, intervals []interface{}) { +// cannot be found in the API server. +func beforeClusterDeleteHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, intervals []interface{}) { hookName := "BeforeClusterDelete" - runtimeHookTestHandler(ctx, c, cluster, hookName, extensionConfigName, false, func() bool { + + isBlockingDelete := func() bool { var blocked = true - // If the Cluster is not found it has been deleted and the hook is unblocked. - if apierrors.IsNotFound(c.Get(ctx, cluster, &clusterv1.Cluster{})) { + // If the Cluster is not found, it has been deleted and the hook is unblocked. + if apierrors.IsNotFound(c.Get(ctx, client.ObjectKey{Name: cluster.Name, Namespace: cluster.Namespace}, &clusterv1.Cluster{})) { blocked = false } return blocked - }, intervals) + } + + // Test the BeforeClusterDelete the hook. + runtimeHookTestHandler(ctx, c, cluster, extensionConfigName, hookName, nil, isBlockingDelete, nil, intervals) +} + +// annotationHookTestHandler runs a series of tests in sequence to check if the annotation hook can block. +// 1. Check if the annotation hook is blocking and if the TopologyReconciled condition reports if the annotation hook is blocking. +// 2. Remove the annotation hook. +// 3. Check if the TopologyReconciled condition stops reporting the annotation hook is blocking. +func annotationHookTestHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, hook, annotation string, blockingCondition func() bool, intervals []interface{}) { + log.Logf("Blocking with the %s annotation hook for 60 seconds", hook) + + expectedBlockingMessage := fmt.Sprintf("hook %q is blocking: annotation [%s] is set", hook, annotation) + + // Check if TopologyReconciledCondition reports if the annotation hook is blocking + topologyConditionCheck := func() bool { + cluster = framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ + Name: cluster.Name, Namespace: cluster.Namespace, Getter: c}) + + if conditions.GetReason(cluster, clusterv1.ClusterTopologyReconciledCondition) != clusterv1.ClusterTopologyReconciledHookBlockingReason { + return false + } + if !strings.Contains(conditions.GetMessage(cluster, clusterv1.ClusterTopologyReconciledCondition), expectedBlockingMessage) { + return false + } + + return true + } + + Byf("Waiting for %s hook (via annotation %s) to start blocking", hook, annotation) + + // Check if the annotation hook is blocking. + Eventually(func(_ Gomega) bool { + if !topologyConditionCheck() { + return false + } + if !blockingCondition() { + return false + } + return true + }, 30*time.Second).Should(BeTrue(), "%s (via annotation %s) did not block", hook, annotation) + + Byf("Validating %s hook (via annotation %s) consistently blocks progress in the reconciliation", hook, annotation) + + // Check if the annotation hook keeps blocking. + Consistently(func(_ Gomega) bool { + if !topologyConditionCheck() { + return false + } + if !blockingCondition() { + return false + } + return true + }, 60*time.Second).Should(BeTrue(), + fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s (via annotation %s) is not blocking", hook, annotation)) + + // Patch the Cluster to remove the LifecycleHook annotation hook and unblock. + Byf("Removing the %s annotation", annotation) + + patchHelper, err := patch.NewHelper(cluster, c) + Expect(err).ToNot(HaveOccurred()) + delete(cluster.Annotations, annotation) + Expect(patchHelper.Patch(ctx, cluster)).To(Succeed()) + + // Expect the LifecycleHook annotation to not block anymore. + // NOTE: we check only the topology reconciled message and not that blockingCondition because a runtime hook will block progress on reconciliation. + + Byf("Waiting for %s hook (via annotation %s) to stop blocking", hook, annotation) + + Eventually(func() error { + cluster = framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ + Name: cluster.Name, Namespace: cluster.Namespace, Getter: c}) + + if strings.Contains(conditions.GetMessage(cluster, clusterv1.ClusterTopologyReconciledCondition), expectedBlockingMessage) { + return fmt.Errorf("hook %s (via annotation %s) should not be blocking anymore with message: %s", hook, annotation, expectedBlockingMessage) + } + return nil + }, intervals...).Should(Succeed(), + fmt.Sprintf("ClusterTopology reconcile did not proceed as expected when unblocking hook %s (via annotation %s)", hook, annotation)) } -// runtimeHookTestHandler runs a series of tests in sequence to check if the runtimeHook passed to it succeeds. -// 1. Checks that the hook has been called at least once and, if withTopologyReconciledCondition is set, checks that the TopologyReconciled condition is a Failure. -// 2. Check that the hook's blockingCondition is consistently true. -// - At this point the function sets the hook's response to be non-blocking. -// 3. Check that the hook's blocking condition becomes false. +// runtimeHookTestHandler runs a series of tests in sequence to check if the runtimeHook passed in has been called, and it can block. +// 1. Check if the hook is actually called, it is blocking, and if the TopologyReconciled condition reports the hook is blocking. +// 2. Remove the block. +// 3. Check that hook is not blocking anymore. // // Note: runtimeHookTestHandler assumes that the hook passed to it is currently returning a blocking response. // Updating the response to be non-blocking happens inline in the function. -func runtimeHookTestHandler(ctx context.Context, c client.Client, cluster types.NamespacedName, hookName, extensionConfigName string, withTopologyReconciledCondition bool, blockingCondition func() bool, intervals []interface{}) { - log.Logf("Blocking with %s hook for 60 seconds after the hook has been called for the first time", hookName) +func runtimeHookTestHandler(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, extensionConfigName string, hook string, attributes []string, blockingCondition func() bool, beforeUnblocking func(), intervals []interface{}) { + hookName := computeHookName(hook, attributes) + log.Logf("Blocking with the %s hook for 60 seconds", hookName) - // Check that the LifecycleHook has been called at least once and - when required - that the TopologyReconciled condition is a Failure. - Eventually(func() error { - if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, cluster, extensionConfigName, []string{hookName}); err != nil { - return err - } + Byf("Waiting for %s hook to be called and start blocking", hookName) - // Check for the existence of the condition if withTopologyReconciledCondition is true. - if withTopologyReconciledCondition { + // Check if TopologyReconciledCondition reports if the hook is blocking + topologyConditionCheck := func() bool { + if hook != "BeforeClusterDelete" { cluster := framework.GetClusterByName(ctx, framework.GetClusterByNameInput{ Name: cluster.Name, Namespace: cluster.Namespace, Getter: c}) - if !clusterConditionShowsHookBlocking(cluster, hookName) { - return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName) + if !clusterConditionShowsHookBlocking(cluster, hook) { + return false } } + return true + } + + // Check if the hook is actually called, it is blocking, and if the TopologyReconciled condition reports the hook is blocking. + Eventually(func(_ Gomega) error { + if err := checkLifecycleHooksCalledAtLeastOnce(ctx, c, cluster, extensionConfigName, hook, attributes); err != nil { + return err + } + if !topologyConditionCheck() { + return errors.Errorf("Blocking condition for %s not found on Cluster object", hookName) + } return nil }, 30*time.Second).Should(Succeed(), "%s has not been called", hookName) - // blockingCondition should consistently be true as the Runtime hook is returning "Failure". - Consistently(func() bool { - return blockingCondition() + Byf("Validating %s hook consistently blocks progress in the reconciliation", hookName) + + // Check if the hook keeps blocking. + Consistently(func(_ Gomega) bool { + if !topologyConditionCheck() { + return false + } + if !blockingCondition() { + return false + } + return true }, 60*time.Second).Should(BeTrue(), fmt.Sprintf("Cluster Topology reconciliation continued unexpectedly: hook %s not blocking", hookName)) + if beforeUnblocking != nil { + beforeUnblocking() + } + // Patch the ConfigMap to set the hook response to "Success". Byf("Setting %s response to Status:Success to unblock the reconciliation", hookName) @@ -761,25 +913,146 @@ func runtimeHookTestHandler(ctx context.Context, c client.Client, cluster types. Eventually(func() error { return c.Get(ctx, util.ObjectKey(configMap), configMap) }).Should(Succeed(), "Failed to get ConfigMap %s", klog.KObj(configMap)) - patch := client.RawPatch(types.MergePatchType, + patchData := client.RawPatch(types.MergePatchType, []byte(fmt.Sprintf(`{"data":{"%s-preloadedResponse":%s}}`, hookName, "\"{\\\"Status\\\": \\\"Success\\\"}\""))) Eventually(func() error { - return c.Patch(ctx, configMap, patch) + return c.Patch(ctx, configMap, patchData) }).Should(Succeed(), "Failed to set %s response to Status:Success to unblock the reconciliation", hookName) - // Expect the Hook to pass, setting the blockingCondition to false before the timeout ends. - Eventually(func() bool { - return blockingCondition() + // Check if the hook stops blocking and if the TopologyReconciled condition stops reporting the hook is blocking. + + Byf("Waiting for %s hook to stop blocking", hookName) + + Eventually(func(_ Gomega) bool { + if topologyConditionCheck() { + return false + } + if blockingCondition() { + return false + } + return true }, intervals...).Should(BeFalse(), fmt.Sprintf("ClusterTopology reconcile did not proceed as expected when calling %s", hookName)) } +func computeHookName(hook string, attributes []string) string { + return strings.Join(append([]string{hook}, attributes...), "-") +} + // clusterConditionShowsHookBlocking checks if the TopologyReconciled condition message contains both the hook name and hookFailedMessage. func clusterConditionShowsHookBlocking(cluster *clusterv1.Cluster, hookName string) bool { return conditions.GetReason(cluster, clusterv1.ClusterTopologyReconciledCondition) == clusterv1.ClusterTopologyReconciledHookBlockingReason && strings.Contains(conditions.GetMessage(cluster, clusterv1.ClusterTopologyReconciledCondition), hookName) } +func waitControlPlaneVersion(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, version string, intervals []interface{}) { + Byf("Waiting for control plane to have version %s", version) + controlPlaneVersion(ctx, c, cluster, version, intervals...) +} + +func checkControlPlaneVersion(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, version string) { + Byf("Checking control plane has version %s", version) + controlPlaneVersion(ctx, c, cluster, version, "10s", "2s") +} + +func controlPlaneVersion(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, version string, intervals ...interface{}) { + Eventually(func(_ Gomega) bool { + controlPlane, err := external.GetObjectFromContractVersionedRef(ctx, c, cluster.Spec.ControlPlaneRef, cluster.Namespace) + if err != nil { + return false + } + v, err := contract.ControlPlane().Version().Get(controlPlane) + if err != nil { + return false + } + if *v != version { + return false + } + + sv, err := contract.ControlPlane().StatusVersion().Get(controlPlane) + if err != nil { + return false + } + if *sv != version { + return false + } + + machineList := &clusterv1.MachineList{} + if err := c.List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels{ + clusterv1.ClusterNameLabel: cluster.Name, + clusterv1.MachineControlPlaneLabel: "", + }); err != nil { + return false + } + + for i := range machineList.Items { + machine := &machineList.Items[i] + if machine.Spec.Version != version { + return false + } + + if !conditions.IsTrue(machine, clusterv1.MachineNodeHealthyCondition) { + return false + } + } + + return true + }, intervals...).Should(BeTrue(), fmt.Sprintf("Failed to wait for ControlPlane to reach version %s and Nodes to become healthy", version)) +} + +func waitWorkersVersions(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, workersVersion string, intervals []interface{}) { + Byf("Waiting for workers to have version %s", workersVersion) + workersVersions(ctx, c, cluster, workersVersion, intervals...) +} + +func checkWorkersVersions(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, workersVersion string) { + Byf("Checking workers have version %s", workersVersion) + workersVersions(ctx, c, cluster, workersVersion, "10s", "2s") +} + +func workersVersions(ctx context.Context, c client.Client, cluster *clusterv1.Cluster, workersVersion string, intervals ...interface{}) { + Eventually(func(_ Gomega) bool { + mds := framework.GetMachineDeploymentsByCluster(ctx, + framework.GetMachineDeploymentsByClusterInput{ClusterName: cluster.Name, Namespace: cluster.Namespace, Lister: c}) + for _, md := range mds { + if md.Spec.Template.Spec.Version != workersVersion { + return false + } + } + + mps := framework.GetMachinePoolsByCluster(ctx, + framework.GetMachinePoolsByClusterInput{ClusterName: cluster.Name, Namespace: cluster.Namespace, Lister: c}) + for _, mp := range mps { + if mp.Spec.Template.Spec.Version != workersVersion { + return false + } + } + + machineList := &clusterv1.MachineList{} + if err := c.List(ctx, machineList, client.InNamespace(cluster.Namespace), client.MatchingLabels{ + clusterv1.ClusterNameLabel: cluster.Name, + }); err != nil { + return false + } + + for i := range machineList.Items { + machine := &machineList.Items[i] + if util.IsControlPlaneMachine(machine) { + continue + } + + if machine.Spec.Version != workersVersion { + return false + } + + if !conditions.IsTrue(machine, clusterv1.MachineNodeHealthyCondition) { + return false + } + } + return true + }, intervals...).Should(BeTrue(), fmt.Sprintf("Failed to wait for workers to reach version %s and Nodes to become healthy", workersVersion)) +} + func dumpAndDeleteCluster(ctx context.Context, proxy framework.ClusterProxy, clusterctlConfigPath, namespace, clusterName, artifactFolder string) { By("Deleting the workload cluster") diff --git a/test/e2e/cluster_upgrade_runtimesdk_test.go b/test/e2e/cluster_upgrade_runtimesdk_test.go index cfe322ba7773..5ed4d3af430f 100644 --- a/test/e2e/cluster_upgrade_runtimesdk_test.go +++ b/test/e2e/cluster_upgrade_runtimesdk_test.go @@ -25,6 +25,7 @@ import ( clusterctlcluster "sigs.k8s.io/cluster-api/cmd/clusterctl/client/cluster" "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/infrastructure/kind" ) var _ = Describe("When upgrading a workload cluster using ClusterClass with RuntimeSDK [ClusterClass]", Label("ClusterClass"), func() { @@ -79,3 +80,35 @@ var _ = Describe("When upgrading a workload cluster using ClusterClass in a diff } }) }) + +var _ = Describe("When performing chained upgrades for workload cluster using ClusterClass in a different NS with RuntimeSDK [ClusterClass]", Label("ClusterClass"), func() { + ClusterUpgradeWithRuntimeSDKSpec(ctx, func() ClusterUpgradeWithRuntimeSDKSpecInput { + return ClusterUpgradeWithRuntimeSDKSpecInput{ + E2EConfig: e2eConfig, + ClusterctlConfigPath: clusterctlConfigPath, + BootstrapClusterProxy: bootstrapClusterProxy, + ArtifactFolder: artifactFolder, + SkipCleanup: skipCleanup, + InfrastructureProvider: ptr.To("docker"), + PostUpgrade: func(proxy framework.ClusterProxy, namespace, clusterName string) { + // This check ensures that the resourceVersions are stable, i.e. it verifies there are no + // continuous reconciles when everything should be stable. + framework.ValidateResourceVersionStable(ctx, proxy, namespace, clusterctlcluster.FilterClusterObjectsWithNameFilter(clusterName)) + }, + // "upgrades" is the same as the "topology" flavor but with an additional MachinePool. + Flavor: ptr.To("upgrades-runtimesdk"), + DeployClusterClassInSeparateNamespace: true, + // Setting Kubernetes version from + KubernetesVersionFrom: e2eConfig.MustGetVariable(KubernetesVersionChainedUpgradeFrom), + // use Kubernetes versions from the kind mapper. + KubernetesVersions: kind.GetKubernetesVersions(), + // The runtime extension gets deployed to the test-extension-system namespace and is exposed + // by the test-extension-webhook-service. + // The below values are used when creating the cluster-wide ExtensionConfig to refer + // the actual service. + ExtensionServiceNamespace: "test-extension-system", + ExtensionServiceName: "test-extension-webhook-service", + ExtensionConfigName: "k8s-chained-upgrade-with-runtimesdk-cross-ns", + } + }) +}) diff --git a/test/e2e/common.go b/test/e2e/common.go index 9ec9fd8d7d83..2b7566706ec6 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -34,17 +34,18 @@ import ( // Test suite constants for e2e config variables. const ( - KubernetesVersionManagement = "KUBERNETES_VERSION_MANAGEMENT" - KubernetesVersion = "KUBERNETES_VERSION" - CNIPath = "CNI" - CNIResources = "CNI_RESOURCES" - KubernetesVersionUpgradeFrom = "KUBERNETES_VERSION_UPGRADE_FROM" - KubernetesVersionUpgradeTo = "KUBERNETES_VERSION_UPGRADE_TO" - CPMachineTemplateUpgradeTo = "CONTROL_PLANE_MACHINE_TEMPLATE_UPGRADE_TO" - WorkersMachineTemplateUpgradeTo = "WORKERS_MACHINE_TEMPLATE_UPGRADE_TO" - EtcdVersionUpgradeTo = "ETCD_VERSION_UPGRADE_TO" - CoreDNSVersionUpgradeTo = "COREDNS_VERSION_UPGRADE_TO" - IPFamily = "IP_FAMILY" + KubernetesVersionManagement = "KUBERNETES_VERSION_MANAGEMENT" + KubernetesVersion = "KUBERNETES_VERSION" + CNIPath = "CNI" + CNIResources = "CNI_RESOURCES" + KubernetesVersionChainedUpgradeFrom = "KUBERNETES_VERSION_CHAINED_UPGRADE_FROM" + KubernetesVersionUpgradeFrom = "KUBERNETES_VERSION_UPGRADE_FROM" + KubernetesVersionUpgradeTo = "KUBERNETES_VERSION_UPGRADE_TO" + CPMachineTemplateUpgradeTo = "CONTROL_PLANE_MACHINE_TEMPLATE_UPGRADE_TO" + WorkersMachineTemplateUpgradeTo = "WORKERS_MACHINE_TEMPLATE_UPGRADE_TO" + EtcdVersionUpgradeTo = "ETCD_VERSION_UPGRADE_TO" + CoreDNSVersionUpgradeTo = "COREDNS_VERSION_UPGRADE_TO" + IPFamily = "IP_FAMILY" ) var stableReleaseMarkerPrefix = "go://sigs.k8s.io/cluster-api@v%s" diff --git a/test/e2e/config/docker.yaml b/test/e2e/config/docker.yaml index a613a54f2650..24b5f44f4dab 100644 --- a/test/e2e/config/docker.yaml +++ b/test/e2e/config/docker.yaml @@ -371,6 +371,7 @@ variables: # This avoids building node images in the default case which improves the test duration significantly. KUBERNETES_VERSION_MANAGEMENT: "v1.34.0" KUBERNETES_VERSION: "v1.34.0" + KUBERNETES_VERSION_CHAINED_UPGRADE_FROM: "v1.30.0" # Should always be KUBERNETES_VERSION_UPGRADE_TO - 4 minor KUBERNETES_VERSION_UPGRADE_FROM: "v1.33.4" KUBERNETES_VERSION_UPGRADE_TO: "v1.34.0" KUBERNETES_VERSION_LATEST_CI: "ci/latest-1.35" @@ -408,6 +409,8 @@ intervals: default/wait-machine-pool-nodes: ["5m", "10s"] default/wait-delete-cluster: ["3m", "10s"] default/wait-machine-upgrade: ["20m", "10s"] + default/wait-control-plane-upgrade: ["5m", "10s"] + default/wait-machine-deployment-upgrade: ["5m", "10s"] default/wait-machine-pool-upgrade: ["5m", "10s"] default/wait-nodes-ready: ["10m", "10s"] default/wait-machine-remediation: ["5m", "10s"] diff --git a/test/e2e/scale.go b/test/e2e/scale.go index 81843a76b313..365296aa3462 100644 --- a/test/e2e/scale.go +++ b/test/e2e/scale.go @@ -427,14 +427,16 @@ func ScaleSpec(ctx context.Context, inputGetter func() ScaleSpecInput) { By("Upgrade the workload clusters concurrently") // Get the upgrade function for upgrading the workload clusters. upgrader := getClusterUpgradeAndWaitFn(framework.UpgradeClusterTopologyAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - EtcdImageTag: input.E2EConfig.GetVariableOrEmpty(EtcdVersionUpgradeTo), - DNSImageTag: input.E2EConfig.GetVariableOrEmpty(CoreDNSVersionUpgradeTo), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + ClusterProxy: input.BootstrapClusterProxy, + KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), + EtcdImageTag: input.E2EConfig.GetVariableOrEmpty(EtcdVersionUpgradeTo), + DNSImageTag: input.E2EConfig.GetVariableOrEmpty(CoreDNSVersionUpgradeTo), + WaitForControlPlaneToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-control-plane-upgrade"), + WaitForMachineDeploymentToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-deployment-upgrade"), + WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), + WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), }) clusterNamesToUpgrade := []string{} @@ -837,15 +839,17 @@ func getClusterUpgradeAndWaitFn(input framework.UpgradeClusterTopologyAndWaitFor // will be called multiple times and this closure will keep modifying the same `input` multiple // times. It is safer to pass the values explicitly into `UpgradeClusterTopologyAndWaitForUpgradeInput`. framework.UpgradeClusterTopologyAndWaitForUpgrade(ctx, framework.UpgradeClusterTopologyAndWaitForUpgradeInput{ - ClusterProxy: input.ClusterProxy, - Cluster: resources.cluster, - ControlPlane: resources.controlPlane, - MachineDeployments: resources.machineDeployments, - KubernetesUpgradeVersion: input.KubernetesUpgradeVersion, - WaitForMachinesToBeUpgraded: input.WaitForMachinesToBeUpgraded, - WaitForKubeProxyUpgrade: input.WaitForKubeProxyUpgrade, - WaitForDNSUpgrade: input.WaitForDNSUpgrade, - WaitForEtcdUpgrade: input.WaitForEtcdUpgrade, + ClusterProxy: input.ClusterProxy, + Cluster: resources.cluster, + ControlPlane: resources.controlPlane, + MachineDeployments: resources.machineDeployments, + KubernetesUpgradeVersion: input.KubernetesUpgradeVersion, + WaitForControlPlaneToBeUpgraded: input.WaitForControlPlaneToBeUpgraded, + WaitForMachineDeploymentToBeUpgraded: input.WaitForMachineDeploymentToBeUpgraded, + WaitForMachinePoolToBeUpgraded: input.WaitForMachinePoolToBeUpgraded, + WaitForKubeProxyUpgrade: input.WaitForKubeProxyUpgrade, + WaitForDNSUpgrade: input.WaitForDNSUpgrade, + WaitForEtcdUpgrade: input.WaitForEtcdUpgrade, // TODO: (killianmuldoon) Checking the kube-proxy, etcd and DNS version doesn't work as we can't access the control plane endpoint for the workload cluster // from the host. Need to figure out a way to route the calls to the workload Cluster correctly. EtcdImageTag: "", diff --git a/test/e2e/self_hosted.go b/test/e2e/self_hosted.go index 5cef4f93d664..c071207b9f9c 100644 --- a/test/e2e/self_hosted.go +++ b/test/e2e/self_hosted.go @@ -365,19 +365,20 @@ func SelfHostedSpec(ctx context.Context, inputGetter func() SelfHostedSpecInput) // Cluster is using ClusterClass, upgrade via topology. By("Upgrading the Cluster topology") framework.UpgradeClusterTopologyAndWaitForUpgrade(ctx, framework.UpgradeClusterTopologyAndWaitForUpgradeInput{ - ClusterProxy: selfHostedClusterProxy, - Cluster: clusterResources.Cluster, - ControlPlane: clusterResources.ControlPlane, - EtcdImageTag: etcdVersionUpgradeTo, - DNSImageTag: coreDNSVersionUpgradeTo, - MachineDeployments: clusterResources.MachineDeployments, - MachinePools: clusterResources.MachinePools, - KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), - WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + ClusterProxy: selfHostedClusterProxy, + Cluster: clusterResources.Cluster, + ControlPlane: clusterResources.ControlPlane, + EtcdImageTag: etcdVersionUpgradeTo, + DNSImageTag: coreDNSVersionUpgradeTo, + MachineDeployments: clusterResources.MachineDeployments, + MachinePools: clusterResources.MachinePools, + KubernetesUpgradeVersion: input.E2EConfig.MustGetVariable(KubernetesVersionUpgradeTo), + WaitForControlPlaneToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-control-plane-upgrade"), + WaitForMachineDeploymentToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-deployment-upgrade"), + WaitForMachinePoolToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-pool-upgrade"), + WaitForKubeProxyUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForDNSUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + WaitForEtcdUpgrade: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), }) } else { // Cluster is not using ClusterClass, upgrade via individual resources. diff --git a/test/extension/handlers/lifecycle/handlers.go b/test/extension/handlers/lifecycle/handlers.go index bdebc784a4f7..eedcf5926379 100644 --- a/test/extension/handlers/lifecycle/handlers.go +++ b/test/extension/handlers/lifecycle/handlers.go @@ -24,6 +24,7 @@ package lifecycle import ( "context" "fmt" + "strings" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -35,6 +36,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/yaml" + clusterv1beta1 "sigs.k8s.io/cluster-api/api/core/v1beta1" runtimehooksv1 "sigs.k8s.io/cluster-api/api/runtime/hooks/v1alpha1" runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog" ) @@ -70,12 +72,12 @@ func (m *ExtensionHandlers) DoBeforeClusterCreate(ctx context.Context, request * settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterCreate, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterCreate, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterCreate, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterCreate, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -92,13 +94,13 @@ func (m *ExtensionHandlers) DoBeforeClusterUpgrade(ctx context.Context, request settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterUpgrade, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterUpgrade, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterUpgrade, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterUpgrade, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -115,13 +117,13 @@ func (m *ExtensionHandlers) DoAfterControlPlaneInitialized(ctx context.Context, settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterControlPlaneInitialized, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterControlPlaneInitialized, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterControlPlaneInitialized, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterControlPlaneInitialized, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -136,15 +138,16 @@ func (m *ExtensionHandlers) DoAfterControlPlaneUpgrade(ctx context.Context, requ ctx = ctrl.LoggerInto(ctx, log) log.Info("AfterControlPlaneUpgrade is called") + attributes := []string{request.KubernetesVersion} settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterControlPlaneUpgrade, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterControlPlaneUpgrade, attributes, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterControlPlaneUpgrade, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterControlPlaneUpgrade, attributes, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -161,13 +164,13 @@ func (m *ExtensionHandlers) DoAfterClusterUpgrade(ctx context.Context, request * settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterClusterUpgrade, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterClusterUpgrade, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.AfterClusterUpgrade, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.AfterClusterUpgrade, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -184,12 +187,12 @@ func (m *ExtensionHandlers) DoBeforeClusterDelete(ctx context.Context, request * settings := request.GetSettings() - if err := m.readResponseFromConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterDelete, settings, response); err != nil { + if err := m.readResponseFromConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterDelete, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() return } - if err := m.recordCallInConfigMap(ctx, request.Cluster.Name, request.Cluster.Namespace, runtimehooksv1.BeforeClusterDelete, settings[extensionConfigNameKey], response); err != nil { + if err := m.recordCallInConfigMap(ctx, &request.Cluster, runtimehooksv1.BeforeClusterDelete, nil, settings, response); err != nil { response.Status = runtimehooksv1.ResponseStatusFailure response.Message = err.Error() } @@ -197,27 +200,60 @@ func (m *ExtensionHandlers) DoBeforeClusterDelete(ctx context.Context, request * // TODO: consider if to cleanup the ConfigMap after gating Cluster deletion. } -func (m *ExtensionHandlers) readResponseFromConfigMap(ctx context.Context, clusterName, clusterNamespace string, hook runtimecatalog.Hook, settings map[string]string, response runtimehooksv1.ResponseObject) error { - hookName := runtimecatalog.HookName(hook) +func (m *ExtensionHandlers) readResponseFromConfigMap(ctx context.Context, cluster *clusterv1beta1.Cluster, hook runtimecatalog.Hook, attributes []string, settings map[string]string, response runtimehooksv1.ResponseObject) error { + hookName := computeHookName(hook, attributes) configMap := &corev1.ConfigMap{} - configMapName := fmt.Sprintf("%s-%s-test-extension-hookresponses", clusterName, settings[extensionConfigNameKey]) - if err := m.client.Get(ctx, client.ObjectKey{Namespace: clusterNamespace, Name: configMapName}, configMap); err != nil { + if _, ok := settings[extensionConfigNameKey]; !ok { + return errors.New(extensionConfigNameKey + " mest be set in settings") + } + configMapName := configMapName(cluster.Name, settings[extensionConfigNameKey]) + log := ctrl.LoggerFrom(ctx) + if err := m.client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: configMapName}, configMap); err != nil { if apierrors.IsNotFound(err) { // A ConfigMap of responses does not exist. Create one now. - // The ConfigMap is created with blocking responses if "defaultAllHandlersToBlocking" is set to "true" - // in the settings. - // This allows the test-extension to have non-blocking behavior by default but can be switched to blocking - // as needed, example: during E2E testing. - defaultAllHandlersToBlocking := settings["defaultAllHandlersToBlocking"] == "true" - configMap = responsesConfigMap(clusterName, clusterNamespace, settings[extensionConfigNameKey], defaultAllHandlersToBlocking) + configMap = &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapName, + Namespace: cluster.Namespace, + }, + } if err := m.client.Create(ctx, configMap); err != nil { - return errors.Wrapf(err, "failed to create the ConfigMap %s", klog.KRef(clusterNamespace, configMapName)) + return errors.Wrapf(err, "failed to create the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName)) } + log.Info(fmt.Sprintf("Created ConfigMap %s", configMapName)) } else { - return errors.Wrapf(err, "failed to read the ConfigMap %s", klog.KRef(clusterNamespace, configMapName)) + return errors.Wrapf(err, "failed to read the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName)) + } + } + data, ok := configMap.Data[hookName+"-preloadedResponse"] + if !ok { + // If there is no preloadedResponse for the given hook, create one with blocking responses if "defaultAllHandlersToBlocking" is set to "true" in the settings. + // This allows the test-extension to have non-blocking behavior by default but can be switched to blocking as needed, example: during E2E testing. + retryAfterSeconds := 0 + if settings["defaultAllHandlersToBlocking"] == "true" { + retryAfterSeconds = 5 + } + + switch runtimecatalog.HookName(hook) { + // Blocking hooks are set to return RetryAfterSeconds initially. These will be changed during the test. + case "BeforeClusterCreate": + data = fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds) + case "BeforeClusterUpgrade": + data = fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds) + case "AfterControlPlaneUpgrade": + data = fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds) + case "BeforeClusterDelete": + data = fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds) + + // Non-blocking hooks are set to Status:Success. + case "AfterControlPlaneInitialized": + data = `{"Status": "Success"}` + case "AfterClusterUpgrade": + data = `{"Status": "Success"}` } } - if err := yaml.Unmarshal([]byte(configMap.Data[hookName+"-preloadedResponse"]), response); err != nil { + + if err := yaml.Unmarshal([]byte(data), response); err != nil { return errors.Wrapf(err, "failed to read %q response information from ConfigMap", hook) } if r, ok := response.(runtimehooksv1.RetryResponseObject); ok { @@ -227,40 +263,15 @@ func (m *ExtensionHandlers) readResponseFromConfigMap(ctx context.Context, clust return nil } -// responsesConfigMap generates a ConfigMap with preloaded responses for the test extension. -// If defaultAllHandlersToBlocking is set to true, all the preloaded responses are set to blocking. -func responsesConfigMap(clusterName, clusterNamespace string, extensionConfigName string, defaultAllHandlersToBlocking bool) *corev1.ConfigMap { - retryAfterSeconds := 0 - if defaultAllHandlersToBlocking { - retryAfterSeconds = 5 - } - - return &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: configMapName(clusterName, extensionConfigName), - Namespace: clusterNamespace, - }, - // Set the initial preloadedResponses for each of the tested hooks. - Data: map[string]string{ - // Blocking hooks are set to return RetryAfterSeconds initially. These will be changed during the test. - "BeforeClusterCreate-preloadedResponse": fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds), - "BeforeClusterUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds), - "AfterControlPlaneUpgrade-preloadedResponse": fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds), - "BeforeClusterDelete-preloadedResponse": fmt.Sprintf(`{"Status": "Success", "RetryAfterSeconds": %d}`, retryAfterSeconds), - - // Non-blocking hooks are set to Status:Success. - "AfterControlPlaneInitialized-preloadedResponse": `{"Status": "Success"}`, - "AfterClusterUpgrade-preloadedResponse": `{"Status": "Success"}`, - }, - } -} - -func (m *ExtensionHandlers) recordCallInConfigMap(ctx context.Context, clusterName, clusterNamespace string, hook runtimecatalog.Hook, extensionConfigName string, response runtimehooksv1.ResponseObject) error { - hookName := runtimecatalog.HookName(hook) +func (m *ExtensionHandlers) recordCallInConfigMap(ctx context.Context, cluster *clusterv1beta1.Cluster, hook runtimecatalog.Hook, attributes []string, settings map[string]string, response runtimehooksv1.ResponseObject) error { + hookName := computeHookName(hook, attributes) configMap := &corev1.ConfigMap{} - configMapName := configMapName(clusterName, extensionConfigName) - if err := m.client.Get(ctx, client.ObjectKey{Namespace: clusterNamespace, Name: configMapName}, configMap); err != nil { - return errors.Wrapf(err, "failed to read the ConfigMap %s", klog.KRef(clusterNamespace, configMapName)) + if _, ok := settings[extensionConfigNameKey]; !ok { + return errors.New(extensionConfigNameKey + " must be set in runtime extension settings") + } + configMapName := configMapName(cluster.Name, settings[extensionConfigNameKey]) + if err := m.client.Get(ctx, client.ObjectKey{Namespace: cluster.Namespace, Name: configMapName}, configMap); err != nil { + return errors.Wrapf(err, "failed to read the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName)) } var patch client.Patch if r, ok := response.(runtimehooksv1.RetryResponseObject); ok { @@ -272,7 +283,7 @@ func (m *ExtensionHandlers) recordCallInConfigMap(ctx context.Context, clusterNa []byte(fmt.Sprintf(`{"data":{"%s-actualResponseStatus":"%s"}}`, hookName, response.GetStatus()))) //nolint:gocritic } if err := m.client.Patch(ctx, configMap, patch); err != nil { - return errors.Wrapf(err, "failed to update the ConfigMap %s", klog.KRef(clusterNamespace, configMapName)) + return errors.Wrapf(err, "failed to update the ConfigMap %s", klog.KRef(cluster.Namespace, configMapName)) } return nil } @@ -280,3 +291,7 @@ func (m *ExtensionHandlers) recordCallInConfigMap(ctx context.Context, clusterNa func configMapName(clusterName, extensionConfigName string) string { return fmt.Sprintf("%s-%s-test-extension-hookresponses", clusterName, extensionConfigName) } + +func computeHookName(hook runtimecatalog.Hook, attributes []string) string { + return strings.Join(append([]string{runtimecatalog.HookName(hook)}, attributes...), "-") +} diff --git a/test/framework/cluster_topology_helpers.go b/test/framework/cluster_topology_helpers.go index 11f7f386c884..37937843bca6 100644 --- a/test/framework/cluster_topology_helpers.go +++ b/test/framework/cluster_topology_helpers.go @@ -59,22 +59,23 @@ func GetClusterClassByName(ctx context.Context, input GetClusterClassByNameInput // UpgradeClusterTopologyAndWaitForUpgradeInput is the input type for UpgradeClusterTopologyAndWaitForUpgrade. type UpgradeClusterTopologyAndWaitForUpgradeInput struct { - ClusterProxy ClusterProxy - Cluster *clusterv1.Cluster - ControlPlane *controlplanev1.KubeadmControlPlane - EtcdImageTag string - DNSImageTag string - MachineDeployments []*clusterv1.MachineDeployment - MachinePools []*clusterv1.MachinePool - KubernetesUpgradeVersion string - WaitForMachinesToBeUpgraded []interface{} - WaitForMachinePoolToBeUpgraded []interface{} - WaitForKubeProxyUpgrade []interface{} - WaitForDNSUpgrade []interface{} - WaitForEtcdUpgrade []interface{} - PreWaitForControlPlaneToBeUpgraded func() - PreWaitForWorkersToBeUpgraded func() - SkipKubeProxyCheck bool + ClusterProxy ClusterProxy + Cluster *clusterv1.Cluster + ControlPlane *controlplanev1.KubeadmControlPlane + EtcdImageTag string + DNSImageTag string + MachineDeployments []*clusterv1.MachineDeployment + MachinePools []*clusterv1.MachinePool + KubernetesUpgradeVersion string + WaitForControlPlaneToBeUpgraded []interface{} + WaitForMachineDeploymentToBeUpgraded []interface{} + WaitForMachinePoolToBeUpgraded []interface{} + WaitForKubeProxyUpgrade []interface{} + WaitForDNSUpgrade []interface{} + WaitForEtcdUpgrade []interface{} + PreWaitForControlPlaneToBeUpgraded func() + PreWaitForWorkersToBeUpgraded func() + SkipKubeProxyCheck bool } // UpgradeClusterTopologyAndWaitForUpgrade upgrades a Cluster topology and waits for it to be upgraded. @@ -88,7 +89,7 @@ func UpgradeClusterTopologyAndWaitForUpgrade(ctx context.Context, input UpgradeC mgmtClient := input.ClusterProxy.GetClient() - log.Logf("Patching the new Kubernetes version to Cluster topology") + log.Logf("Patching the new Kubernetes version %s to Cluster topology", input.KubernetesUpgradeVersion) patchHelper, err := patch.NewHelper(input.Cluster, mgmtClient) Expect(err).ToNot(HaveOccurred()) @@ -121,7 +122,7 @@ func UpgradeClusterTopologyAndWaitForUpgrade(ctx context.Context, input UpgradeC Cluster: input.Cluster, MachineCount: int(*input.ControlPlane.Spec.Replicas), KubernetesUpgradeVersion: input.KubernetesUpgradeVersion, - }, input.WaitForMachinesToBeUpgraded...) + }, input.WaitForControlPlaneToBeUpgraded...) workloadCluster := input.ClusterProxy.GetWorkloadCluster(ctx, input.Cluster.Namespace, input.Cluster.Name) workloadClient := workloadCluster.GetClient() @@ -173,7 +174,7 @@ func UpgradeClusterTopologyAndWaitForUpgrade(ctx context.Context, input UpgradeC MachineCount: int(*deployment.Spec.Replicas), KubernetesUpgradeVersion: input.KubernetesUpgradeVersion, MachineDeployment: *deployment, - }, input.WaitForMachinesToBeUpgraded...) + }, input.WaitForMachineDeploymentToBeUpgraded...) } } diff --git a/test/infrastructure/docker/README.md b/test/infrastructure/docker/README.md index 41eec801f888..2aefd89d864d 100644 --- a/test/infrastructure/docker/README.md +++ b/test/infrastructure/docker/README.md @@ -17,8 +17,8 @@ For a complete overview, please refer to the documentation available [here](http ## Testing In order to test your local changes, go to the top level directory of this project, `cluster-api/` and run -`make test-docker-infrastructure` to run the unit tests. +`make test-infrastructure` to run the unit tests. **Note:** `make test-e2e` runs the CAPI E2E tests that are based on CAPD (CAPD does not have a separated e2e suite). -This make target will build an image based on the local source code and use that image during testing. \ No newline at end of file +This make target will build an image based on the local source code and use that image during testing. diff --git a/test/infrastructure/kind/mapper.go b/test/infrastructure/kind/mapper.go index 8c99c9cdbce3..dd9384aa47cb 100644 --- a/test/infrastructure/kind/mapper.go +++ b/test/infrastructure/kind/mapper.go @@ -36,6 +36,7 @@ package kind import ( "fmt" + "sort" "github.com/blang/semver/v4" @@ -625,3 +626,27 @@ func pickFirstNotEmpty(a, b string) string { } return b } + +// GetKubernetesVersions returns a sorted list with all the Kubernetes version know by the kind mapper. +// Note: we are returning only Kubernetes version for the latest kind mode. +func GetKubernetesVersions() []string { + versionMap := map[string]semver.Version{} + for _, m := range preBuiltMappings { + if m.Mode != latestMode { + continue + } + versionMap[m.KubernetesVersion.String()] = m.KubernetesVersion + } + + semVersions := []semver.Version{} + for _, v := range versionMap { + semVersions = append(semVersions, v) + } + sort.Slice(semVersions, func(i, j int) bool { return semVersions[i].LT(semVersions[j]) }) + + versions := make([]string, len(semVersions)) + for i, s := range semVersions { + versions[i] = fmt.Sprintf("v%s", s) + } + return versions +} diff --git a/test/infrastructure/kind/mapper_test.go b/test/infrastructure/kind/mapper_test.go index 952ebcf0594f..686568105af8 100644 --- a/test/infrastructure/kind/mapper_test.go +++ b/test/infrastructure/kind/mapper_test.go @@ -127,3 +127,26 @@ func TestGetMapping(t *testing.T) { }) } } + +func TestGetKubernetesVersion(t *testing.T) { + g := NewWithT(t) + + got := GetKubernetesVersions() + + g.Expect(got).To(Equal([]string{ + "v1.21.14", + "v1.22.17", + "v1.23.17", + "v1.24.15", "v1.24.17", + "v1.25.11", "v1.25.16", + "v1.26.6", "v1.26.13", "v1.26.14", "v1.26.15", + "v1.27.3", "v1.27.10", "v1.27.11", "v1.27.13", "v1.27.16", + "v1.28.0", "v1.28.6", "v1.28.7", "v1.28.9", "v1.28.12", "v1.28.13", "v1.28.15", + "v1.29.0", "v1.29.1", "v1.29.2", "v1.29.4", "v1.29.7", "v1.29.8", "v1.29.10", "v1.29.12", "v1.29.14", + "v1.30.0", "v1.30.3", "v1.30.4", "v1.30.6", "v1.30.8", "v1.30.10", "v1.30.13", + "v1.31.0", "v1.31.2", "v1.31.4", "v1.31.6", "v1.31.9", "v1.31.12", + "v1.32.0", "v1.32.2", "v1.32.5", "v1.32.8", + "v1.33.0", "v1.33.1", "v1.33.4", + "v1.34.0", + })) +} diff --git a/util/test/builder/builders.go b/util/test/builder/builders.go index cd3df11864c2..28777cfc7f51 100644 --- a/util/test/builder/builders.go +++ b/util/test/builder/builders.go @@ -359,6 +359,7 @@ type ClusterClassBuilder struct { statusVariables []clusterv1.ClusterClassStatusVariable patches []clusterv1.ClusterClassPatch conditions []metav1.Condition + versions []string } // ClusterClass returns a ClusterClassBuilder with the given name and namespace. @@ -480,6 +481,12 @@ func (c *ClusterClassBuilder) WithWorkerMachinePoolClasses(mpcs ...clusterv1.Mac return c } +// WithVersions sets versions in the ClusterClass. +func (c *ClusterClassBuilder) WithVersions(versions ...string) *ClusterClassBuilder { + c.versions = versions + return c +} + // Build takes the objects and variables in the ClusterClass builder and uses them to create a ClusterClass object. func (c *ClusterClassBuilder) Build() *clusterv1.ClusterClass { obj := &clusterv1.ClusterClass{ @@ -534,6 +541,7 @@ func (c *ClusterClassBuilder) Build() *clusterv1.ClusterClass { obj.Spec.Workers.MachineDeployments = c.machineDeploymentClasses obj.Spec.Workers.MachinePools = c.machinePoolClasses + obj.Spec.KubernetesVersions = c.versions return obj } @@ -1424,6 +1432,18 @@ func ControlPlane(namespace, name string) *ControlPlaneBuilder { } } +// WithLabels adds the passed labels to the ControlPlaneBuilder. +func (c *ControlPlaneBuilder) WithLabels(labels map[string]string) *ControlPlaneBuilder { + c.obj.SetLabels(labels) + return c +} + +// WithAnnotations adds the passed annotations to the ControlPlaneBuilder. +func (c *ControlPlaneBuilder) WithAnnotations(annotations map[string]string) *ControlPlaneBuilder { + c.obj.SetAnnotations(annotations) + return c +} + // WithInfrastructureMachineTemplate adds the given unstructured object to the ControlPlaneBuilder as its InfrastructureMachineTemplate. func (c *ControlPlaneBuilder) WithInfrastructureMachineTemplate(t *unstructured.Unstructured, contractVersion string) *ControlPlaneBuilder { if contractVersion == "v1beta1" { @@ -1546,18 +1566,6 @@ func (c *TestControlPlaneBuilder) WithVersion(version string) *TestControlPlaneB return c } -// WithLabels adds the passed labels to the ControlPlaneBuilder. -func (c *ControlPlaneBuilder) WithLabels(labels map[string]string) *ControlPlaneBuilder { - c.obj.SetLabels(labels) - return c -} - -// WithAnnotations adds the passed annotations to the ControlPlaneBuilder. -func (c *ControlPlaneBuilder) WithAnnotations(annotations map[string]string) *ControlPlaneBuilder { - c.obj.SetAnnotations(annotations) - return c -} - // WithSpecFields sets a map of spec fields on the unstructured object. The keys in the map represent the path and the value corresponds // to the value of the spec field. // diff --git a/util/test/builder/zz_generated.deepcopy.go b/util/test/builder/zz_generated.deepcopy.go index 426f5dcbb2d2..3f5cf4a6c67f 100644 --- a/util/test/builder/zz_generated.deepcopy.go +++ b/util/test/builder/zz_generated.deepcopy.go @@ -201,6 +201,11 @@ func (in *ClusterClassBuilder) DeepCopyInto(out *ClusterClassBuilder) { (*in)[i].DeepCopyInto(&(*out)[i]) } } + if in.versions != nil { + in, out := &in.versions, &out.versions + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new ClusterClassBuilder.