diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 7c46339..ae6f0cf 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -48,6 +48,16 @@ type EtcdClusterReconciler struct { ImageRegistry string } +// reconcileState holds all transient data for a single reconciliation loop. +// Every phase of Reconcile stores intermediate information here so that +// subsequent phases can operate without additional lookups. +type reconcileState struct { + cluster *ecv1alpha1.EtcdCluster // cluster custom resource currently being reconciled + sts *appsv1.StatefulSet // associated StatefulSet for the cluster + memberListResp *clientv3.MemberListResponse // member list fetched from the etcd cluster + memberHealth []etcdutils.EpHealth // health information for each etcd member +} + // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters/status,verbs=get;update;patch // +kubebuilder:rbac:groups=operator.etcd.io,resources=etcdclusters/finalizers,verbs=update @@ -60,103 +70,151 @@ type EtcdClusterReconciler struct { // +kubebuilder:rbac:groups="cert-manager.io",resources=clusterissuers,verbs=get;list;watch // +kubebuilder:rbac:groups="cert-manager.io",resources=issuers,verbs=get;list;watch -// Reconcile is part of the main kubernetes reconciliation loop which aims to -// move the current state of the cluster closer to the desired state. -// TODO(user): Modify the Reconcile function to compare the state specified by -// the EtcdCluster object against the actual cluster state, and then -// perform operations to make the cluster state reflect the state specified by -// the user. +// Reconcile orchestrates a single reconciliation cycle for an EtcdCluster. It +// sequentially fetches resources, ensures primitive objects exist, checks the +// health of the etcd cluster and then adjusts its state to match the desired +// specification. Each phase is handled by a dedicated helper method. // -// For more details, check Reconcile and its Result here: -// - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/reconcile +// For more details on the controller-runtime Reconcile contract see: +// https://pkg.go.dev/sigs.k8s.io/controller-runtime/pkg/reconcile func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - logger := log.FromContext(ctx) + state, res, err := r.fetchAndValidateState(ctx, req) + if state == nil || err != nil { + return res, err + } - // Fetch the EtcdCluster resource - etcdCluster := &ecv1alpha1.EtcdCluster{} + if bootstrapRes, err := r.bootstrapStatefulSet(ctx, state); err != nil || !bootstrapRes.IsZero() { + return bootstrapRes, err + } - err := r.Get(ctx, req.NamespacedName, etcdCluster) - if err != nil { + if err = r.performHealthChecks(ctx, state); err != nil { + return ctrl.Result{}, err + } + + return r.reconcileClusterState(ctx, state) +} + +// fetchAndValidateState retrieves the EtcdCluster and its StatefulSet and ensures +// the StatefulSet, if present, is owned by the cluster. It returns a populated +// reconcileState for use in later phases. A non-empty ctrl.Result requests a +// requeue when transient issues occur. +func (r *EtcdClusterReconciler) fetchAndValidateState(ctx context.Context, req ctrl.Request) (*reconcileState, ctrl.Result, error) { + logger := log.FromContext(ctx) + + ec := &ecv1alpha1.EtcdCluster{} + if err := r.Get(ctx, req.NamespacedName, ec); err != nil { if errors.IsNotFound(err) { logger.Info("EtcdCluster resource not found. Ignoring since object may have been deleted") - return ctrl.Result{}, nil + return nil, ctrl.Result{}, nil } - return ctrl.Result{}, err + return nil, ctrl.Result{}, err } // Determine desired etcd image registry - if etcdCluster.Spec.ImageRegistry == "" { - etcdCluster.Spec.ImageRegistry = r.ImageRegistry + if ec.Spec.ImageRegistry == "" { + ec.Spec.ImageRegistry = r.ImageRegistry } - // Create Client Certificate for etcd-operator to communicate with the etcdCluster - if etcdCluster.Spec.TLS != nil { - clientCertErr := createClientCertificate(ctx, etcdCluster, r.Client) - if clientCertErr != nil { - logger.Error(clientCertErr, "Failed to create Client Certificate.") + // Ensure the operator has TLS credentials when the cluster requests TLS. + if ec.Spec.TLS != nil { + if err := createClientCertificate(ctx, ec, r.Client); err != nil { + logger.Error(err, "Failed to create Client Certificate.") } } else { // TODO: instead of logging error, set default autoConfig - logger.Error(nil, fmt.Sprintf("missing TLS config for %s, ", etcdCluster.Name+ - "\n running etcd-cluster without TLS protection is NOT recommended for production.")) + logger.Error(nil, fmt.Sprintf( + "missing TLS config for %s,\n running etcd-cluster without TLS protection is NOT recommended for production.", + ec.Name, + )) } - logger.Info("Reconciling EtcdCluster", "spec", etcdCluster.Spec) + logger.Info("Reconciling EtcdCluster", "spec", ec.Spec) - // Get the statefulsets which has the same name as the EtcdCluster resource - sts, err := getStatefulSet(ctx, r.Client, etcdCluster.Name, etcdCluster.Namespace) + sts, err := getStatefulSet(ctx, r.Client, ec.Name, ec.Namespace) if err != nil { if errors.IsNotFound(err) { - logger.Info("Creating StatefulSet with 0 replica", "expectedSize", etcdCluster.Spec.Size) - // Create a new StatefulSet - - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 0, r.Scheme) - if err != nil { - return ctrl.Result{}, err - } + sts = nil } else { - // If an error occurs during Get/Create, we'll requeue the item, so we can - // attempt processing again later. This could have been caused by a - // temporary network failure, or any other transient reason. logger.Error(err, "Failed to get StatefulSet. Requesting requeue") - return ctrl.Result{RequeueAfter: requeueDuration}, nil + return nil, ctrl.Result{RequeueAfter: requeueDuration}, nil } } - // If the Statefulsets is not controlled by this EtcdCluster resource, we should log - // a warning to the event recorder and return error msg. - err = checkStatefulSetControlledByEtcdOperator(etcdCluster, sts) - if err != nil { - logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") - return ctrl.Result{}, err + if sts != nil { + if err := checkStatefulSetControlledByEtcdOperator(ec, sts); err != nil { + logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") + return nil, ctrl.Result{}, err + } } - // If statefulset size is 0. try to instantiate the cluster with 1 member - if sts.Spec.Replicas != nil && *sts.Spec.Replicas == 0 { - logger.Info("StatefulSet has 0 replicas. Trying to create a new cluster with 1 member") + return &reconcileState{cluster: ec, sts: sts}, ctrl.Result{}, nil +} + +// bootstrapStatefulSet ensures that the foundational Kubernetes objects for +// a cluster exist and are correctly initialized. It creates the StatefulSet (initially +// with 0 replicas) and the headless Service if necessary. When either resource +// is created or the StatefulSet is scaled from zero to one replica, the returned +// ctrl.Result requests a requeue so the next reconciliation loop can observe the +// new state. The reconcileState is updated with the current StatefulSet. +func (r *EtcdClusterReconciler) bootstrapStatefulSet(ctx context.Context, s *reconcileState) (ctrl.Result, error) { + logger := log.FromContext(ctx) + requeue := false + var err error + + switch { + case s.sts == nil: + logger.Info("Creating StatefulSet with 0 replica", "expectedSize", s.cluster.Spec.Size) + s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, 0, r.Scheme) + if err != nil { + return ctrl.Result{}, err + } + requeue = true - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 1, r.Scheme) + case s.sts.Spec.Replicas != nil && *s.sts.Spec.Replicas == 0: + logger.Info("StatefulSet has 0 replicas. Trying to create a new cluster with 1 member") + s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, 1, r.Scheme) if err != nil { return ctrl.Result{}, err } + requeue = true } - err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, etcdCluster, r.Scheme) - if err != nil { + if err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, s.cluster, r.Scheme); err != nil { return ctrl.Result{}, err } + if requeue { + return ctrl.Result{RequeueAfter: requeueDuration}, nil + } + return ctrl.Result{}, nil +} + +// performHealthChecks obtains the member list and health status from the etcd +// cluster specified in the StatefulSet. Results are stored on the reconcileState +// for later reconciliation steps. +func (r *EtcdClusterReconciler) performHealthChecks(ctx context.Context, s *reconcileState) error { + logger := log.FromContext(ctx) logger.Info("Now checking health of the cluster members") - memberListResp, healthInfos, err := healthCheck(sts, logger) + var err error + s.memberListResp, s.memberHealth, err = healthCheck(s.sts, logger) if err != nil { - return ctrl.Result{}, fmt.Errorf("health check failed: %w", err) + return fmt.Errorf("health check failed: %w", err) } + return nil +} +// reconcileClusterState compares the desired cluster size with the observed +// etcd member list and StatefulSet replica count. It performs scaling actions +// and handles learner promotion when needed. A ctrl.Result with a requeue +// instructs the controller to retry after adjustments. +func (r *EtcdClusterReconciler) reconcileClusterState(ctx context.Context, s *reconcileState) (ctrl.Result, error) { + logger := log.FromContext(ctx) memberCnt := 0 - if memberListResp != nil { - memberCnt = len(memberListResp.Members) + if s.memberListResp != nil { + memberCnt = len(s.memberListResp.Members) } - targetReplica := *sts.Spec.Replicas // Start with the current size of the stateful set + targetReplica := *s.sts.Spec.Replicas + var err error // The number of replicas in the StatefulSet doesn't match the number of etcd members in the cluster. if int(targetReplica) != memberCnt { @@ -165,16 +223,14 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("An etcd member was added into the cluster, but the StatefulSet hasn't scaled out yet") newReplicaCount := targetReplica + 1 logger.Info("Increasing StatefulSet replicas to match the etcd cluster member count", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) - if err != nil { + if _, err := reconcileStatefulSet(ctx, logger, s.cluster, r.Client, newReplicaCount, r.Scheme); err != nil { return ctrl.Result{}, err } } else { logger.Info("An etcd member was removed from the cluster, but the StatefulSet hasn't scaled in yet") newReplicaCount := targetReplica - 1 logger.Info("Decreasing StatefulSet replicas to remove the unneeded Pod.", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) - if err != nil { + if _, err := reconcileStatefulSet(ctx, logger, s.cluster, r.Client, newReplicaCount, r.Scheme); err != nil { return ctrl.Result{}, err } } @@ -189,48 +245,45 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if memberCnt > 0 { // Find the leader status - _, leaderStatus = etcdutils.FindLeaderStatus(healthInfos, logger) + _, leaderStatus = etcdutils.FindLeaderStatus(s.memberHealth, logger) if leaderStatus == nil { - // If the leader is not available, let's wait for the leader to be elected + // If the leader is not available, wait for the leader to be elected return ctrl.Result{}, fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) } - learner, learnerStatus = etcdutils.FindLearnerStatus(healthInfos, logger) + learner, learnerStatus = etcdutils.FindLearnerStatus(s.memberHealth, logger) if learner > 0 { - // There is at least one learner. Let's try to promote it or wait - // Find the learner status + // There is at least one learner. Try to promote it if it's ready; otherwise requeue and wait. logger.Info("Learner found", "learnedID", learner, "learnerStatus", learnerStatus) if etcdutils.IsLearnerReady(leaderStatus, learnerStatus) { logger.Info("Learner is ready to be promoted to voting member", "learnerID", learner) logger.Info("Promoting the learner member", "learnerID", learner) - eps := clientEndpointsFromStatefulsets(sts) + eps := clientEndpointsFromStatefulsets(s.sts) eps = eps[:(len(eps) - 1)] - err = etcdutils.PromoteLearner(eps, learner) - if err != nil { - // The member is not promoted yet, so we error out + if err := etcdutils.PromoteLearner(eps, learner); err != nil { + // The member is not promoted yet, so we error out and requeue via the caller. return ctrl.Result{}, err } } else { - // Learner is not yet ready. We can't add another learner or proceed further until this one is promoted - // So let's requeue + // Learner is not yet ready. We can't add another learner or proceed further until this one is promoted. logger.Info("The learner member isn't ready to be promoted yet", "learnerID", learner) return ctrl.Result{RequeueAfter: requeueDuration}, nil } } } - if targetReplica == int32(etcdCluster.Spec.Size) { + if targetReplica == int32(s.cluster.Spec.Size) { logger.Info("EtcdCluster is already up-to-date") return ctrl.Result{}, nil } - eps := clientEndpointsFromStatefulsets(sts) + eps := clientEndpointsFromStatefulsets(s.sts) - // If there is no more learner, then we can proceed to scale the cluster further. - // If there is no more member to add, the control will not reach here after the requeue - if targetReplica < int32(etcdCluster.Spec.Size) { + // If there are no learners left, we can proceed to scale the cluster towards the desired size. + // When there are no members to add, the controller will requeue above and this block won't execute. + if targetReplica < int32(s.cluster.Spec.Size) { // scale out - _, peerURL := peerEndpointForOrdinalIndex(etcdCluster, int(targetReplica)) // The index starts at 0, so we should do this before incrementing targetReplica + _, peerURL := peerEndpointForOrdinalIndex(s.cluster, int(targetReplica)) targetReplica++ logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURLs", peerURL) if _, err := etcdutils.AddMember(eps, []string{peerURL}, true); err != nil { @@ -238,39 +291,47 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } logger.Info("Learner member added successfully", "peerURLs", peerURL) - } else { + + if s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, targetReplica, r.Scheme); err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{RequeueAfter: requeueDuration}, nil + } + + if targetReplica > int32(s.cluster.Spec.Size) { // scale in targetReplica-- - logger = logger.WithValues("targetReplica", targetReplica, "expectedSize", etcdCluster.Spec.Size) + logger = logger.WithValues("targetReplica", targetReplica, "expectedSize", s.cluster.Spec.Size) - memberID := healthInfos[memberCnt-1].Status.Header.MemberId + memberID := s.memberHealth[memberCnt-1].Status.Header.MemberId logger.Info("[Scale in] removing one member", "memberID", memberID) eps = eps[:targetReplica] if err := etcdutils.RemoveMember(eps, memberID); err != nil { return ctrl.Result{}, err } - } - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, targetReplica, r.Scheme) - if err != nil { - return ctrl.Result{}, err + if s.sts, err = reconcileStatefulSet(ctx, logger, s.cluster, r.Client, targetReplica, r.Scheme); err != nil { + return ctrl.Result{}, err + } + + return ctrl.Result{RequeueAfter: requeueDuration}, nil } - allMembersHealthy, err := areAllMembersHealthy(sts, logger) + // Ensure every etcd member reports itself healthy before declaring success. + allMembersHealthy, err := areAllMembersHealthy(s.sts, logger) if err != nil { return ctrl.Result{}, err } - if *sts.Spec.Replicas != int32(etcdCluster.Spec.Size) || !allMembersHealthy { - // Requeue if the statefulset size is not equal to the expected size of ETCD cluster - // Or if all members of the cluster are not healthy + if !allMembersHealthy { + // Requeue until the StatefulSet settles and all members are healthy. return ctrl.Result{RequeueAfter: requeueDuration}, nil } logger.Info("EtcdCluster reconciled successfully") return ctrl.Result{}, nil - } // SetupWithManager sets up the controller with the Manager. diff --git a/internal/controller/etcdcluster_controller_test.go b/internal/controller/etcdcluster_controller_test.go index 630726a..6e195de 100644 --- a/internal/controller/etcdcluster_controller_test.go +++ b/internal/controller/etcdcluster_controller_test.go @@ -19,91 +19,309 @@ package controller import ( "testing" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" - "k8s.io/apimachinery/pkg/api/errors" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" - "sigs.k8s.io/controller-runtime/pkg/reconcile" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/client/fake" - operatorv1alpha1 "go.etcd.io/etcd-operator/api/v1alpha1" + ecv1alpha1 "go.etcd.io/etcd-operator/api/v1alpha1" ) -func TestControllerReconcile(t *testing.T) { - ctx := t.Context() +// TestFetchAndValidateState describes the scenarios for the fetchAndValidateState +// helper. Each sub-test will set up a fake client with different existing +// resources and assert on the returned state, result and error. +func TestFetchAndValidateState(t *testing.T) { + scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) + _ = appsv1.AddToScheme(scheme) + _ = ecv1alpha1.AddToScheme(scheme) - const ( - resourceName = "test-etcd-cluster" - expectedSize int32 = 1 - ) + cases := []struct { + name string + req ctrl.Request + ec *ecv1alpha1.EtcdCluster + sts *appsv1.StatefulSet + assert func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) + }{ + { + name: "EtcdCluster Not Found", + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, _ *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { + assert.Nil(t, state) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{}, res) + }, + }, + { + name: "StatefulSet Not Found", + ec: &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + UID: "1", + }, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, + }, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { + require.NotNil(t, state) + assert.Equal(t, ec.Name, state.cluster.Name) + assert.Nil(t, state.sts) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{}, res) + }, + }, + { + name: "Resources Exist and Owned", + ec: &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + UID: "2", + }, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, + }, + sts: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + OwnerReferences: []metav1.OwnerReference{ + { + APIVersion: ecv1alpha1.GroupVersion.String(), + Kind: "EtcdCluster", + Name: "etcd", + UID: "2", + Controller: pointerToBool(true), + }, + }, + }, + }, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, ec *ecv1alpha1.EtcdCluster, sts *appsv1.StatefulSet) { + require.NotNil(t, state) + assert.Equal(t, ec.Name, state.cluster.Name) + require.NotNil(t, state.sts) + assert.Equal(t, sts.Name, state.sts.Name) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{}, res) + }, + }, + { + name: "StatefulSet Not Owned", + ec: &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + UID: "3", + }, + Spec: ecv1alpha1.EtcdClusterSpec{Size: 1, Version: "3.5.17"}, + }, + sts: &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + }, + }, + req: ctrl.Request{NamespacedName: types.NamespacedName{Name: "etcd", Namespace: "default"}}, + assert: func(t *testing.T, state *reconcileState, res ctrl.Result, err error, _ *ecv1alpha1.EtcdCluster, _ *appsv1.StatefulSet) { + assert.Nil(t, state) + assert.Error(t, err) + assert.Contains(t, err.Error(), "not controlled") + assert.Equal(t, ctrl.Result{}, res) + }, + }, + } - typeNamespacedName := types.NamespacedName{ - Name: resourceName, - Namespace: "default", + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + ctx := t.Context() + + objs := []client.Object{} + if tc.ec != nil { + objs = append(objs, tc.ec) + } + if tc.sts != nil { + objs = append(objs, tc.sts) + } + + builder := fake.NewClientBuilder().WithScheme(scheme) + if len(objs) > 0 { + builder.WithObjects(objs...) + } + fakeClient := builder.Build() + + r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} + + state, res, err := r.fetchAndValidateState(ctx, tc.req) + tc.assert(t, state, res, err, tc.ec, tc.sts) + }) + } +} + +// TestBootstrapStatefulSet outlines tests for ensuring StatefulSet and Service +// creation and bootstrap logic. +func TestBootstrapStatefulSet(t *testing.T) { + scheme := runtime.NewScheme() + _ = corev1.AddToScheme(scheme) + _ = appsv1.AddToScheme(scheme) + _ = ecv1alpha1.AddToScheme(scheme) + + ec := &ecv1alpha1.EtcdCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "etcd", + Namespace: "default", + UID: "1", + }, + Spec: ecv1alpha1.EtcdClusterSpec{ + Size: 1, + Version: "3.5.17", + }, } - etcdcluster := &operatorv1alpha1.EtcdCluster{} + t.Run("Initial Creation", func(t *testing.T) { + ctx := t.Context() + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec).Build() + r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} + state := &reconcileState{cluster: ec} + + res, err := r.bootstrapStatefulSet(ctx, state) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{RequeueAfter: requeueDuration}, res) + require.NotNil(t, state.sts) + assert.NotNil(t, state.sts.Spec.Replicas) + assert.Equal(t, int32(0), *state.sts.Spec.Replicas) + + sts := &appsv1.StatefulSet{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, sts) + assert.NoError(t, err) + assert.NotNil(t, sts.Spec.Replicas) + assert.Equal(t, int32(0), *sts.Spec.Replicas) + + svc := &corev1.Service{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc) + assert.NoError(t, err) + assert.Equal(t, "None", svc.Spec.ClusterIP) - t.Log("Checking for EtcdCluster resource") - err := k8sClient.Get(ctx, typeNamespacedName, etcdcluster) - if err != nil && errors.IsNotFound(err) { - t.Log("Creating EtcdCluster resource") - resource := &operatorv1alpha1.EtcdCluster{ + cm := &corev1.ConfigMap{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, cm) + assert.NoError(t, err) + }) + + t.Run("Bootstrap from Zero", func(t *testing.T) { + ctx := t.Context() + + sts := &appsv1.StatefulSet{ ObjectMeta: metav1.ObjectMeta{ - Name: resourceName, - Namespace: "default", + Name: ec.Name, + Namespace: ec.Namespace, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: ecv1alpha1.GroupVersion.String(), + Kind: "EtcdCluster", + Name: ec.Name, + UID: ec.UID, + Controller: pointerToBool(true), + }}, }, - Spec: operatorv1alpha1.EtcdClusterSpec{ - Size: int(expectedSize), + Spec: appsv1.StatefulSetSpec{ + Replicas: pointerToInt32(0), }, + Status: appsv1.StatefulSetStatus{ReadyReplicas: 1}, } - if createErr := k8sClient.Create(ctx, resource); createErr != nil { - t.Fatalf("Failed to create EtcdCluster resource: %v", createErr) + + cm := newEtcdClusterState(ec, 0) + + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, sts, cm).Build() + r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} + state := &reconcileState{cluster: ec, sts: sts} + + oldRV := sts.ResourceVersion + res, err := r.bootstrapStatefulSet(ctx, state) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{RequeueAfter: requeueDuration}, res) + + updatedSTS := &appsv1.StatefulSet{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, updatedSTS) + assert.NoError(t, err) + assert.Equal(t, int32(1), *updatedSTS.Spec.Replicas) + assert.Equal(t, int32(1), updatedSTS.Status.ReadyReplicas) + assert.NotEqual(t, oldRV, updatedSTS.ResourceVersion) + + require.NotNil(t, state.sts) + assert.Equal(t, int32(1), *state.sts.Spec.Replicas) + + svc := &corev1.Service{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, svc) + assert.NoError(t, err) + assert.Equal(t, "None", svc.Spec.ClusterIP) + + cmUpdated := &corev1.ConfigMap{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, cmUpdated) + assert.NoError(t, err) + assert.Equal(t, "new", cmUpdated.Data["ETCD_INITIAL_CLUSTER_STATE"]) + assert.Contains(t, cmUpdated.Data["ETCD_INITIAL_CLUSTER"], "etcd-0=") + }) + + t.Run("Resources Already Exist", func(t *testing.T) { + ctx := t.Context() + + sts := &appsv1.StatefulSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: ec.Name, + Namespace: ec.Namespace, + OwnerReferences: []metav1.OwnerReference{{ + APIVersion: ecv1alpha1.GroupVersion.String(), + Kind: "EtcdCluster", + Name: ec.Name, + UID: ec.UID, + Controller: pointerToBool(true), + }}, + }, + Spec: appsv1.StatefulSetSpec{Replicas: pointerToInt32(1)}, + Status: appsv1.StatefulSetStatus{ReadyReplicas: 1}, } - } else if err != nil { - t.Fatalf("Failed to get EtcdCluster resource: %v", err) - } - // Defer a cleanup function to remove the resource after the test finishes. - defer func() { - t.Log("Cleaning up the EtcdCluster resource") - resource := &operatorv1alpha1.EtcdCluster{} - if getErr := k8sClient.Get(ctx, typeNamespacedName, resource); getErr != nil { - t.Errorf("Failed to get EtcdCluster resource before deletion: %v", getErr) - } else { - if deleteErr := k8sClient.Delete(ctx, resource); deleteErr != nil { - t.Errorf("Failed to delete EtcdCluster resource: %v", deleteErr) - } + svc := &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: ec.Name, + Namespace: ec.Namespace, + }, + Spec: corev1.ServiceSpec{ClusterIP: "None"}, } - }() - reconciler := &EtcdClusterReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - ImageRegistry: "gcr.io/etcd-development/etcd", - } + cm := newEtcdClusterState(ec, 1) - // Reconcile, as it is, returns err since StatefulSet status.ReadyReplicas cannot report its actual status. - // This is due to envtest limitation. Envtest does not include kubelet, making pods to never report its readiness . - _, _ = reconciler.Reconcile(ctx, reconcile.Request{ - NamespacedName: typeNamespacedName, - }) + fakeClient := fake.NewClientBuilder().WithScheme(scheme).WithObjects(ec, sts.DeepCopy(), svc.DeepCopy(), cm.DeepCopy()).Build() + r := &EtcdClusterReconciler{Client: fakeClient, Scheme: scheme} + state := &reconcileState{cluster: ec, sts: sts} - // Verify Statefulset has been created with expected replica size - etcdClusterSts := &appsv1.StatefulSet{} - err = k8sClient.Get(ctx, typeNamespacedName, etcdClusterSts) - if err != nil { - t.Fatalf("Failed to find corresponding Statefulset for %s: %v", resourceName, err) - } + // Capture current objects to verify no updates occur. + storedSTS := sts.DeepCopy() + storedSvc := svc.DeepCopy() + storedCM := cm.DeepCopy() + res, err := r.bootstrapStatefulSet(ctx, state) + assert.NoError(t, err) + assert.Equal(t, ctrl.Result{}, res) - if *etcdClusterSts.Spec.Replicas != expectedSize { - t.Fatalf("Unexpected StatefulSet Size: expected %d, got: %d", expectedSize, *etcdClusterSts.Spec.Replicas) - } + fetchedSTS := &appsv1.StatefulSet{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, fetchedSTS) + assert.NoError(t, err) + assert.Equal(t, storedSTS.Spec, fetchedSTS.Spec) + + fetchedSvc := &corev1.Service{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: ec.Name, Namespace: ec.Namespace}, fetchedSvc) + assert.NoError(t, err) + assert.Equal(t, storedSvc.Spec, fetchedSvc.Spec) - // TODO: Add more specific checks (e.g., verifying status conditions or created resources). - // For example: - // updated := &operatorv1alpha1.EtcdCluster{} - // if err := k8sClient.Get(ctx, typeNamespacedName, updated); err != nil { - // t.Fatalf("Failed to retrieve updated resource: %v", err) - // } - // // Validate updated fields or status + fetchedCM := &corev1.ConfigMap{} + err = fakeClient.Get(ctx, client.ObjectKey{Name: configMapNameForEtcdCluster(ec), Namespace: ec.Namespace}, fetchedCM) + assert.NoError(t, err) + assert.Equal(t, storedCM.Data, fetchedCM.Data) + }) }