From d28f5edc8e3f287e9d81eb52f285076c1f4be401 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Sun, 4 May 2025 23:52:38 -0400 Subject: [PATCH 01/12] added EtcdClusterStatus, modified reconcilation loop for status tracking Signed-off-by: Wenxue Zhao --- api/v1alpha1/etcdcluster_types.go | 15 +++ api/v1alpha1/zz_generated.deepcopy.go | 10 +- .../bases/operator.etcd.io_etcdclusters.yaml | 76 +++++++++++++ internal/controller/etcdcluster_controller.go | 101 +++++++++++++++++- 4 files changed, 199 insertions(+), 3 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index 3b19533..e47c80a 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -130,6 +130,21 @@ type ProviderCertManagerConfig struct { type EtcdClusterStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file + + // ReadyReplicas is the number of pods targeted by this EtcdCluster with a Ready condition. + ReadyReplicas int32 `json:"readyReplicas,omitempty"` + // Members is the number of etcd members in the cluster reported by etcd API. + Members int32 `json:"members,omitempty"` + // CurrentVersion is the version of the etcd cluster. + CurrentVersion string `json:"currentVersion,omitempty"` + // Phase indicates the state of the EtcdCluster. + Phase string `json:"phase,omitempty"` + // Conditions represent the latest available observations of a replica set's state. + // +optional + // +patchMergeKey=type + // +patchStrategy=merge + // +listType=atomic + Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index e1088bd..946ea62 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -21,6 +21,7 @@ limitations under the License. package v1alpha1 import ( + "k8s.io/apimachinery/pkg/apis/meta/v1" runtime "k8s.io/apimachinery/pkg/runtime" netx "net" ) @@ -83,7 +84,7 @@ func (in *EtcdCluster) DeepCopyInto(out *EtcdCluster) { out.TypeMeta = in.TypeMeta in.ObjectMeta.DeepCopyInto(&out.ObjectMeta) in.Spec.DeepCopyInto(&out.Spec) - out.Status = in.Status + in.Status.DeepCopyInto(&out.Status) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdCluster. @@ -174,6 +175,13 @@ func (in *EtcdClusterSpec) DeepCopy() *EtcdClusterSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdClusterStatus) DeepCopyInto(out *EtcdClusterStatus) { *out = *in + if in.Conditions != nil { + in, out := &in.Conditions, &out.Conditions + *out = make([]v1.Condition, len(*in)) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EtcdClusterStatus. diff --git a/config/crd/bases/operator.etcd.io_etcdclusters.yaml b/config/crd/bases/operator.etcd.io_etcdclusters.yaml index aa67be6..77ccac3 100644 --- a/config/crd/bases/operator.etcd.io_etcdclusters.yaml +++ b/config/crd/bases/operator.etcd.io_etcdclusters.yaml @@ -221,6 +221,82 @@ spec: type: object status: description: EtcdClusterStatus defines the observed state of EtcdCluster. + properties: + conditions: + description: Conditions represent the latest available observations + of a replica set's state. + items: + description: Condition contains details for one aspect of the current + state of this API Resource. + properties: + lastTransitionTime: + description: |- + lastTransitionTime is the last time the condition transitioned from one status to another. + This should be when the underlying condition changed. If that is not known, then using the time when the API field changed is acceptable. + format: date-time + type: string + message: + description: |- + message is a human readable message indicating details about the transition. + This may be an empty string. + maxLength: 32768 + type: string + observedGeneration: + description: |- + observedGeneration represents the .metadata.generation that the condition was set based upon. + For instance, if .metadata.generation is currently 12, but the .status.conditions[x].observedGeneration is 9, the condition is out of date + with respect to the current state of the instance. + format: int64 + minimum: 0 + type: integer + reason: + description: |- + reason contains a programmatic identifier indicating the reason for the condition's last transition. + Producers of specific condition types may define expected values and meanings for this field, + and whether the values are considered a guaranteed API. + The value should be a CamelCase string. + This field may not be empty. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-type: atomic + currentVersion: + description: CurrentVersion is the version of the etcd cluster. + type: string + members: + description: Members is the number of etcd members in the cluster + reported by etcd API. + format: int32 + type: integer + phase: + description: Phase indicates the state of the EtcdCluster. + type: string + readyReplicas: + description: ReadyReplicas is the number of pods targeted by this + EtcdCluster with a Ready condition. + format: int32 + type: integer type: object type: object served: true diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 7c46339..a17e98b 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -24,6 +24,7 @@ import ( certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" @@ -84,11 +85,30 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{}, err } + // Keep a copy of the old status for patching later + oldEtcdCluster := etcdCluster.DeepCopy() + + // Defer the status update until the end of reconciliation + defer func() { + if err := r.updateStatusIfNeeded(ctx, etcdCluster, oldEtcdCluster); err != nil { + // Log any status update issue; controller-runtime will trigger a retry if needed. + logger.Error(err, "Deferred status update failed") + } + }() + // Determine desired etcd image registry if etcdCluster.Spec.ImageRegistry == "" { etcdCluster.Spec.ImageRegistry = r.ImageRegistry } + if etcdCluster.Spec.Size == 0 { + logger.Info("EtcdCluster size is 0..Skipping next steps") + etcdCluster.Status.Phase = "Idle" // Example: Set a phase even for size 0 + etcdCluster.Status.ReadyReplicas = 0 + etcdCluster.Status.Members = 0 + return ctrl.Result{}, nil + } + // Create Client Certificate for etcd-operator to communicate with the etcdCluster if etcdCluster.Spec.TLS != nil { clientCertErr := createClientCertificate(ctx, etcdCluster, r.Client) @@ -109,9 +129,10 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if errors.IsNotFound(err) { logger.Info("Creating StatefulSet with 0 replica", "expectedSize", etcdCluster.Spec.Size) // Create a new StatefulSet - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 0, r.Scheme) if err != nil { + logger.Error(err, "Failed to create StatefulSet") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } } else { @@ -119,36 +140,59 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // attempt processing again later. This could have been caused by a // temporary network failure, or any other transient reason. logger.Error(err, "Failed to get StatefulSet. Requesting requeue") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{RequeueAfter: requeueDuration}, nil } } + // At this point, sts should exist (either found or created) + if sts == nil { + // This case should ideally not happen if error handling above is correct + err := fmt.Errorf("statefulSet is unexpectedly nil after get/create") + logger.Error(err, "Internal error") + etcdCluster.Status.Phase = "Failed" + return ctrl.Result{}, err // Return error, defer will update status + } + + // Update status based on STS before proceeding + etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas + // If the Statefulsets is not controlled by this EtcdCluster resource, we should log // a warning to the event recorder and return error msg. err = checkStatefulSetControlledByEtcdOperator(etcdCluster, sts) if err != nil { logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } // If statefulset size is 0. try to instantiate the cluster with 1 member if sts.Spec.Replicas != nil && *sts.Spec.Replicas == 0 { logger.Info("StatefulSet has 0 replicas. Trying to create a new cluster with 1 member") - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 1, r.Scheme) if err != nil { + logger.Error(err, "Failed to scale StatefulSet to 1 replica") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } + // Successfully scaled to 1, update status fields and requeue to wait for pod readiness + etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas + etcdCluster.Status.Phase = "Initializing" + // return ctrl.Result{RequeueAfter: requeueDuration}, nil // Requeue to check readiness, should we do it? } err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, etcdCluster, r.Scheme) if err != nil { + logger.Error(err, "Failed to create Headless Service") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } logger.Info("Now checking health of the cluster members") memberListResp, healthInfos, err := healthCheck(sts, logger) if err != nil { + logger.Error(err, "Health check failed") + etcdCluster.Status.Phase = "Degraded" // Or "Unavailable"? return ctrl.Result{}, fmt.Errorf("health check failed: %w", err) } @@ -156,6 +200,10 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if memberListResp != nil { memberCnt = len(memberListResp.Members) } + etcdCluster.Status.Members = int32(memberCnt) + // TODO: Update CurrentVersion from healthInfos + // TODO: Update Conditions based on healthInfos + targetReplica := *sts.Spec.Replicas // Start with the current size of the stateful set // The number of replicas in the StatefulSet doesn't match the number of etcd members in the cluster. @@ -167,6 +215,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("Increasing StatefulSet replicas to match the etcd cluster member count", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) if err != nil { + logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } } else { @@ -175,9 +225,14 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("Decreasing StatefulSet replicas to remove the unneeded Pod.", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) if err != nil { + logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } } + // Successfully adjusted STS, update status and requeue + etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas + etcdCluster.Status.Phase = "Scaling" return ctrl.Result{RequeueAfter: requeueDuration}, nil } @@ -191,6 +246,10 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Find the leader status _, leaderStatus = etcdutils.FindLeaderStatus(healthInfos, logger) if leaderStatus == nil { + err := fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) + logger.Error(err, "Leader election might be in progress or cluster unhealthy") + etcdCluster.Status.Phase = "Degraded" // Or Unavailable + // TODO: Add Condition // If the leader is not available, let's wait for the leader to be elected return ctrl.Result{}, fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) } @@ -207,13 +266,18 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) eps = eps[:(len(eps) - 1)] err = etcdutils.PromoteLearner(eps, learner) if err != nil { + logger.Error(err, "Failed to promote learner") + etcdCluster.Status.Phase = "Failed" // Promotion failed + // TODO: Add Condition // The member is not promoted yet, so we error out return ctrl.Result{}, err } + etcdCluster.Status.Phase = "PromotingLearner" // Indicate promotion happened } else { // Learner is not yet ready. We can't add another learner or proceed further until this one is promoted // So let's requeue logger.Info("The learner member isn't ready to be promoted yet", "learnerID", learner) + etcdCluster.Status.Phase = "PromotingLearner" // Still trying to promote return ctrl.Result{RequeueAfter: requeueDuration}, nil } } @@ -234,6 +298,9 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) targetReplica++ logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURLs", peerURL) if _, err := etcdutils.AddMember(eps, []string{peerURL}, true); err != nil { + logger.Error(err, "Failed to add learner member") + etcdCluster.Status.Phase = "Failed" // Scaling failed + // TODO: Add Condition return ctrl.Result{}, err } @@ -248,31 +315,61 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("[Scale in] removing one member", "memberID", memberID) eps = eps[:targetReplica] if err := etcdutils.RemoveMember(eps, memberID); err != nil { + logger.Error(err, "Failed to remove member") + etcdCluster.Status.Phase = "Failed" // Scaling failed + // TODO: Add Condition return ctrl.Result{}, err } } sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, targetReplica, r.Scheme) if err != nil { + logger.Error(err, "Failed to update StatefulSet during scaling") + etcdCluster.Status.Phase = "Failed" return ctrl.Result{}, err } allMembersHealthy, err := areAllMembersHealthy(sts, logger) if err != nil { + logger.Error(err, "Final health check failed") + etcdCluster.Status.Phase = "Degraded" + // TODO: Add Condition return ctrl.Result{}, err } if *sts.Spec.Replicas != int32(etcdCluster.Spec.Size) || !allMembersHealthy { // Requeue if the statefulset size is not equal to the expected size of ETCD cluster // Or if all members of the cluster are not healthy + etcdCluster.Status.Phase = "Degraded" + // TODO: Add Condition return ctrl.Result{RequeueAfter: requeueDuration}, nil } + etcdCluster.Status.Phase = "Running" // Final healthy state + // TODO: Set Available Condition to True logger.Info("EtcdCluster reconciled successfully") return ctrl.Result{}, nil } +// updateStatusIfNeeded compares the old and new status and patches if changed. +func (r *EtcdClusterReconciler) updateStatusIfNeeded(ctx context.Context, etcdCluster *ecv1alpha1.EtcdCluster, oldEtcdCluster *ecv1alpha1.EtcdCluster) error { + logger := log.FromContext(ctx) + // Compare the new status with the old status + if !equality.Semantic.DeepEqual(oldEtcdCluster.Status, etcdCluster.Status) { + logger.Info("Updating EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) + err := r.Status().Patch(ctx, etcdCluster, client.MergeFrom(oldEtcdCluster)) + if err != nil { + logger.Error(err, "Failed to update EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) + return err // Return the error so the Reconcile loop retries + } + logger.Info("Successfully updated EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) + } else { + logger.V(1).Info("EtcdCluster status is already up-to-date", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) // Use V(1) for less important info + } + return nil // No error occurred during status update itself +} + // SetupWithManager sets up the controller with the Manager. func (r *EtcdClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { r.Recorder = mgr.GetEventRecorderFor("etcdcluster-controller") From c56db22eb2bec6a664ed650dcff10a210d298091 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Mon, 5 May 2025 00:05:32 -0400 Subject: [PATCH 02/12] added more phrases Signed-off-by: Wenxue Zhao --- internal/controller/etcdcluster_controller.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index a17e98b..595d3ee 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -127,6 +127,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) sts, err := getStatefulSet(ctx, r.Client, etcdCluster.Name, etcdCluster.Namespace) if err != nil { if errors.IsNotFound(err) { + etcdCluster.Status.Phase = "Creating" logger.Info("Creating StatefulSet with 0 replica", "expectedSize", etcdCluster.Spec.Size) // Create a new StatefulSet sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 0, r.Scheme) @@ -209,6 +210,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // The number of replicas in the StatefulSet doesn't match the number of etcd members in the cluster. if int(targetReplica) != memberCnt { logger.Info("The expected number of replicas doesn't match the number of etcd members in the cluster", "targetReplica", targetReplica, "memberCnt", memberCnt) + etcdCluster.Status.Phase = "Scaling" if int(targetReplica) < memberCnt { logger.Info("An etcd member was added into the cluster, but the StatefulSet hasn't scaled out yet") newReplicaCount := targetReplica + 1 @@ -232,7 +234,6 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } // Successfully adjusted STS, update status and requeue etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas - etcdCluster.Status.Phase = "Scaling" return ctrl.Result{RequeueAfter: requeueDuration}, nil } From 117f9d4c80dc328d467420dcfab0639a894b85da Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Mon, 12 May 2025 12:47:54 -0400 Subject: [PATCH 03/12] status refactor, added status.condition Signed-off-by: Wenxue Zhao --- api/v1alpha1/etcdcluster_types.go | 2 + internal/controller/etcdcluster_controller.go | 400 ++++++++++++------ pkg/status/manager.go | 150 +++++++ pkg/status/patch.go | 115 +++++ pkg/status/type.go | 64 +++ 5 files changed, 590 insertions(+), 141 deletions(-) create mode 100644 pkg/status/manager.go create mode 100644 pkg/status/patch.go create mode 100644 pkg/status/type.go diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index e47c80a..6d079e1 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -139,6 +139,8 @@ type EtcdClusterStatus struct { CurrentVersion string `json:"currentVersion,omitempty"` // Phase indicates the state of the EtcdCluster. Phase string `json:"phase,omitempty"` + // CurreLeaderId is the ID of etcd cluster leader. + LeaderId string `json:"leaderId,omitempty"` // Conditions represent the latest available observations of a replica set's state. // +optional // +patchMergeKey=type diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 595d3ee..e59f0b6 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -22,10 +22,11 @@ import ( "time" certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" + "go.etcd.io/etcd-operator/pkg/status" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" @@ -70,29 +71,55 @@ type EtcdClusterReconciler struct { // // For more details, check Reconcile and its Result here: // - https://pkg.go.dev/sigs.k8s.io/controller-runtime@v0.19.1/pkg/reconcile -func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { +func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) (res ctrl.Result, err error) { // Use named return values logger := log.FromContext(ctx) - // Fetch the EtcdCluster resource + // --------------------------------------------------------------------- + // 1. Fetch EtcdCluster resource + // --------------------------------------------------------------------- etcdCluster := &ecv1alpha1.EtcdCluster{} - - err := r.Get(ctx, req.NamespacedName, etcdCluster) - if err != nil { + if err = r.Get(ctx, req.NamespacedName, etcdCluster); err != nil { if errors.IsNotFound(err) { logger.Info("EtcdCluster resource not found. Ignoring since object may have been deleted") - return ctrl.Result{}, nil + return ctrl.Result{}, nil // Return nil error for IsNotFound } + logger.Error(err, "Failed to get EtcdCluster") + // Cannot update status if get fails, just return the error return ctrl.Result{}, err } - // Keep a copy of the old status for patching later - oldEtcdCluster := etcdCluster.DeepCopy() + // --------------------------------------------------------------------- + // 2. Initialize Status Patch Helper & Defer Status Update + // --------------------------------------------------------------------- + // We use PatchStatusMutate. It fetches the latest object inside its retry loop. + // We prepare the *desired* status modifications in a 'calculatedStatus' variable + // within the Reconcile scope, and the mutate function applies these calculations. + var calculatedStatus ecv1alpha1.EtcdClusterStatus = etcdCluster.Status // Start with current status - // Defer the status update until the end of reconciliation + // Initialize ConditionManager using the calculatedStatus *copy*. + // Modifications via cm will only affect this copy until the patch is applied. + observedGeneration := etcdCluster.Generation + cm := status.NewManager(&calculatedStatus.Conditions, observedGeneration) + + // Defer the actual patch operation. The mutate function will apply the final 'calculatedStatus'. defer func() { - if err := r.updateStatusIfNeeded(ctx, etcdCluster, oldEtcdCluster); err != nil { - // Log any status update issue; controller-runtime will trigger a retry if needed. - logger.Error(err, "Deferred status update failed") + // Call PatchStatusMutate. The mutate function copies the calculatedStatus + // onto the latest version fetched inside the patch retry loop. + patchErr := status.PatchStatusMutate(ctx, r.Client, etcdCluster, func(latest *ecv1alpha1.EtcdCluster) error { + // Apply the status we calculated during *this* reconcile cycle + latest.Status = calculatedStatus + // Ensure Phase is derived *before* patching + r.derivePhaseFromConditions(latest) // Pass the object being patched + return nil + }) + if patchErr != nil { + logger.Error(patchErr, "Failed to patch EtcdCluster status") + // If the main reconcile logic didn't return an error, return the patch error + if err == nil { + err = patchErr + res = ctrl.Result{} // Reset result if only status patch failed + } + // If the main reconcile *did* return an error, we prefer that original error. } }() @@ -101,15 +128,28 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) etcdCluster.Spec.ImageRegistry = r.ImageRegistry } + // --------------------------------------------------------------------- + // 3. Initial Status Setup & Handle Size 0 + // --------------------------------------------------------------------- + // Set initial 'Unknown'/'False' conditions for this reconcile cycle + cm.SetAvailable(false, status.ReasonReconciling, "Reconciliation started") // Default to Not Available + cm.SetProgressing(true, status.ReasonReconciling, "Reconciliation started") // Default to Progressing + cm.SetDegraded(false, status.ReasonReconciling, "Reconciliation started") // Default to Not Degraded + if etcdCluster.Spec.Size == 0 { logger.Info("EtcdCluster size is 0..Skipping next steps") - etcdCluster.Status.Phase = "Idle" // Example: Set a phase even for size 0 - etcdCluster.Status.ReadyReplicas = 0 - etcdCluster.Status.Members = 0 + calculatedStatus.ReadyReplicas = 0 + calculatedStatus.Members = 0 + cm.SetAvailable(false, status.ReasonSizeIsZero, "Desired cluster size is 0") + cm.SetProgressing(false, status.ReasonSizeIsZero, "Desired cluster size is 0") return ctrl.Result{}, nil } - // Create Client Certificate for etcd-operator to communicate with the etcdCluster + // --------------------------------------------------------------------- + // 4. Reconcile Core Resources (STS, Service) & Handle Errors + // --------------------------------------------------------------------- + + // Create Client Certificate for etcd-operator to communicate with the EtcdCluster if etcdCluster.Spec.TLS != nil { clientCertErr := createClientCertificate(ctx, etcdCluster, r.Client) if clientCertErr != nil { @@ -117,31 +157,34 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) } } else { // TODO: instead of logging error, set default autoConfig - logger.Error(nil, fmt.Sprintf("missing TLS config for %s, ", etcdCluster.Name+ - "\n running etcd-cluster without TLS protection is NOT recommended for production.")) + logger.Error(nil, fmt.Sprintf("missing TLS config for %s,\n running etcd-cluster without TLS protection is NOT recommended for production.", etcdCluster.Name)) } - logger.Info("Reconciling EtcdCluster", "spec", etcdCluster.Spec) - // Get the statefulsets which has the same name as the EtcdCluster resource - sts, err := getStatefulSet(ctx, r.Client, etcdCluster.Name, etcdCluster.Namespace) + var sts *appsv1.StatefulSet + sts, err = getStatefulSet(ctx, r.Client, etcdCluster.Name, etcdCluster.Namespace) if err != nil { if errors.IsNotFound(err) { - etcdCluster.Status.Phase = "Creating" logger.Info("Creating StatefulSet with 0 replica", "expectedSize", etcdCluster.Spec.Size) - // Create a new StatefulSet + cm.SetProgressing(true, status.ReasonCreatingResources, "StatefulSet not found, creating...") + cm.SetAvailable(false, status.ReasonCreatingResources, "StatefulSet does not exist yet") + sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 0, r.Scheme) if err != nil { logger.Error(err, "Failed to create StatefulSet") - etcdCluster.Status.Phase = "Failed" + cm.SetProgressing(false, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) + cm.SetDegraded(true, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) return ctrl.Result{}, err } + // STS created with 0 replicas, needs scaling to 1. Set Initializing. + cm.SetProgressing(true, status.ReasonInitializingCluster, "StatefulSet created with 0 replicas, requires scaling to 1") } else { // If an error occurs during Get/Create, we'll requeue the item, so we can // attempt processing again later. This could have been caused by a // temporary network failure, or any other transient reason. logger.Error(err, "Failed to get StatefulSet. Requesting requeue") - etcdCluster.Status.Phase = "Failed" + cm.SetProgressing(false, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) // Stop progressing on persistent get error + cm.SetDegraded(true, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) return ctrl.Result{RequeueAfter: requeueDuration}, nil } } @@ -149,226 +192,301 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // At this point, sts should exist (either found or created) if sts == nil { // This case should ideally not happen if error handling above is correct - err := fmt.Errorf("statefulSet is unexpectedly nil after get/create") + err = fmt.Errorf("statefulSet is unexpectedly nil after get/create") logger.Error(err, "Internal error") - etcdCluster.Status.Phase = "Failed" - return ctrl.Result{}, err // Return error, defer will update status + cm.SetDegraded(true, "InternalError", err.Error()) + return ctrl.Result{}, err } - // Update status based on STS before proceeding - etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas + // Update ReadyReplicas based on current STS status + calculatedStatus.ReadyReplicas = sts.Status.ReadyReplicas // If the Statefulsets is not controlled by this EtcdCluster resource, we should log // a warning to the event recorder and return error msg. err = checkStatefulSetControlledByEtcdOperator(etcdCluster, sts) if err != nil { logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") - etcdCluster.Status.Phase = "Failed" + cm.SetDegraded(true, status.ReasonNotOwnedResource, err.Error()) return ctrl.Result{}, err } // If statefulset size is 0. try to instantiate the cluster with 1 member if sts.Spec.Replicas != nil && *sts.Spec.Replicas == 0 { logger.Info("StatefulSet has 0 replicas. Trying to create a new cluster with 1 member") + cm.SetProgressing(true, status.ReasonInitializingCluster, "Scaling StatefulSet from 0 to 1 replica") + sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 1, r.Scheme) if err != nil { logger.Error(err, "Failed to scale StatefulSet to 1 replica") - etcdCluster.Status.Phase = "Failed" + cm.SetProgressing(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) + cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) return ctrl.Result{}, err } - // Successfully scaled to 1, update status fields and requeue to wait for pod readiness - etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas - etcdCluster.Status.Phase = "Initializing" // return ctrl.Result{RequeueAfter: requeueDuration}, nil // Requeue to check readiness, should we do it? } + // Create Headless Service err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, etcdCluster, r.Scheme) if err != nil { logger.Error(err, "Failed to create Headless Service") - etcdCluster.Status.Phase = "Failed" + cm.SetProgressing(false, status.ReasonResourceCreateFail, status.FormatError("Failed to ensure Headless Service", err)) + cm.SetDegraded(true, status.ReasonResourceCreateFail, status.FormatError("Failed to ensure Headless Service", err)) return ctrl.Result{}, err } + // --------------------------------------------------------------------- + // 5. Etcd Cluster Health Check & Member Status + // --------------------------------------------------------------------- logger.Info("Now checking health of the cluster members") - memberListResp, healthInfos, err := healthCheck(sts, logger) + var memberListResp *clientv3.MemberListResponse // Declare other multi-return variables if needed + var healthInfos []etcdutils.EpHealth + memberListResp, healthInfos, err = healthCheck(sts, logger) if err != nil { logger.Error(err, "Health check failed") - etcdCluster.Status.Phase = "Degraded" // Or "Unavailable"? + cm.SetAvailable(false, status.ReasonHealthCheckError, status.FormatError("Health check failed", err)) + cm.SetDegraded(true, status.ReasonHealthCheckError, status.FormatError("Health check failed", err)) + // Keep Progressing True as we need to retry health check + cm.SetProgressing(true, status.ReasonHealthCheckError, "Retrying after health check failure") return ctrl.Result{}, fmt.Errorf("health check failed: %w", err) } + // calculatedStatus.LastHealthCheckTime = metav1.Now() // TODO: Add this field + // TODO: Update CurrentVersion from healthInfos (e.g., from leaderStatus.Version if available) + // TODO: Populate UnhealthyMembers list based on healthInfos + // Update member count memberCnt := 0 if memberListResp != nil { memberCnt = len(memberListResp.Members) } - etcdCluster.Status.Members = int32(memberCnt) - // TODO: Update CurrentVersion from healthInfos - // TODO: Update Conditions based on healthInfos + calculatedStatus.Members = int32(memberCnt) + // --------------------------------------------------------------------- + // 6. Handle Member/Replica Mismatch + // --------------------------------------------------------------------- targetReplica := *sts.Spec.Replicas // Start with the current size of the stateful set - // The number of replicas in the StatefulSet doesn't match the number of etcd members in the cluster. if int(targetReplica) != memberCnt { logger.Info("The expected number of replicas doesn't match the number of etcd members in the cluster", "targetReplica", targetReplica, "memberCnt", memberCnt) - etcdCluster.Status.Phase = "Scaling" + cm.SetProgressing(true, status.ReasonMembersMismatch, fmt.Sprintf("StatefulSet replicas (%d) differ from etcd members (%d), adjusting...", targetReplica, memberCnt)) + cm.SetAvailable(false, status.ReasonMembersMismatch, "Adjusting StatefulSet replicas to match etcd members") // Not fully available during adjustment + + var newReplicaCount int32 + var reason string if int(targetReplica) < memberCnt { logger.Info("An etcd member was added into the cluster, but the StatefulSet hasn't scaled out yet") - newReplicaCount := targetReplica + 1 + newReplicaCount = targetReplica + 1 + reason = status.ReasonScalingUp // More specific reason logger.Info("Increasing StatefulSet replicas to match the etcd cluster member count", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) - if err != nil { - logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") - etcdCluster.Status.Phase = "Failed" - return ctrl.Result{}, err - } } else { logger.Info("An etcd member was removed from the cluster, but the StatefulSet hasn't scaled in yet") - newReplicaCount := targetReplica - 1 + newReplicaCount = targetReplica - 1 + reason = status.ReasonScalingDown // More specific reason logger.Info("Decreasing StatefulSet replicas to remove the unneeded Pod.", "oldReplicaCount", targetReplica, "newReplicaCount", newReplicaCount) - _, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) - if err != nil { - logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") - etcdCluster.Status.Phase = "Failed" - return ctrl.Result{}, err - } } - // Successfully adjusted STS, update status and requeue - etcdCluster.Status.ReadyReplicas = sts.Status.ReadyReplicas + cm.SetProgressing(true, reason, fmt.Sprintf("Adjusting StatefulSet replicas to %d", newReplicaCount)) // Update reason + + sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) + if err != nil { + logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") + cm.SetProgressing(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) + cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) + return ctrl.Result{}, err + } + // Requeue to check state after adjustment return ctrl.Result{RequeueAfter: requeueDuration}, nil } + // --------------------------------------------------------------------- + // 7. Handle Learners + // --------------------------------------------------------------------- var ( learnerStatus *clientv3.StatusResponse learner uint64 leaderStatus *clientv3.StatusResponse ) - if memberCnt > 0 { - // Find the leader status _, leaderStatus = etcdutils.FindLeaderStatus(healthInfos, logger) if leaderStatus == nil { - err := fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) + err = fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) logger.Error(err, "Leader election might be in progress or cluster unhealthy") - etcdCluster.Status.Phase = "Degraded" // Or Unavailable - // TODO: Add Condition + cm.SetAvailable(false, status.ReasonLeaderNotFound, err.Error()) + cm.SetDegraded(true, status.ReasonLeaderNotFound, err.Error()) + cm.SetProgressing(true, status.ReasonLeaderNotFound, "Waiting for leader election") // Still progressing towards stable state // If the leader is not available, let's wait for the leader to be elected - return ctrl.Result{}, fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) + return ctrl.Result{RequeueAfter: requeueDuration}, err } + calculatedStatus.LeaderId = fmt.Sprintf("%x", leaderStatus.Header.MemberId) learner, learnerStatus = etcdutils.FindLearnerStatus(healthInfos, logger) if learner > 0 { // There is at least one learner. Let's try to promote it or wait // Find the learner status logger.Info("Learner found", "learnedID", learner, "learnerStatus", learnerStatus) + cm.SetProgressing(true, status.ReasonPromotingLearner, fmt.Sprintf("Learner member %x found", learner)) + cm.SetAvailable(false, status.ReasonPromotingLearner, "Cluster has learner member, not fully available") // Not fully available with learner if etcdutils.IsLearnerReady(leaderStatus, learnerStatus) { logger.Info("Learner is ready to be promoted to voting member", "learnerID", learner) logger.Info("Promoting the learner member", "learnerID", learner) + cm.SetProgressing(true, status.ReasonPromotingLearner, fmt.Sprintf("Promoting ready learner %x", learner)) + eps := clientEndpointsFromStatefulsets(sts) eps = eps[:(len(eps) - 1)] err = etcdutils.PromoteLearner(eps, learner) if err != nil { logger.Error(err, "Failed to promote learner") - etcdCluster.Status.Phase = "Failed" // Promotion failed - // TODO: Add Condition - // The member is not promoted yet, so we error out + cm.SetProgressing(false, status.ReasonEtcdClientError, status.FormatError(fmt.Sprintf("Failed to promote learner %x", learner), err)) + cm.SetDegraded(true, status.ReasonEtcdClientError, status.FormatError(fmt.Sprintf("Failed to promote learner %x", learner), err)) return ctrl.Result{}, err } - etcdCluster.Status.Phase = "PromotingLearner" // Indicate promotion happened + // CHANGED BEHAVIOR: Promotion initiated, requeue shortly + return ctrl.Result{RequeueAfter: requeueDuration}, nil } else { - // Learner is not yet ready. We can't add another learner or proceed further until this one is promoted - // So let's requeue logger.Info("The learner member isn't ready to be promoted yet", "learnerID", learner) - etcdCluster.Status.Phase = "PromotingLearner" // Still trying to promote + cm.SetProgressing(true, status.ReasonWaitingForLearner, fmt.Sprintf("Waiting for learner %x to become ready for promotion", learner)) return ctrl.Result{RequeueAfter: requeueDuration}, nil } } } - if targetReplica == int32(etcdCluster.Spec.Size) { - logger.Info("EtcdCluster is already up-to-date") - return ctrl.Result{}, nil - } - - eps := clientEndpointsFromStatefulsets(sts) - - // If there is no more learner, then we can proceed to scale the cluster further. - // If there is no more member to add, the control will not reach here after the requeue - if targetReplica < int32(etcdCluster.Spec.Size) { - // scale out - _, peerURL := peerEndpointForOrdinalIndex(etcdCluster, int(targetReplica)) // The index starts at 0, so we should do this before incrementing targetReplica - targetReplica++ - logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURLs", peerURL) - if _, err := etcdutils.AddMember(eps, []string{peerURL}, true); err != nil { - logger.Error(err, "Failed to add learner member") - etcdCluster.Status.Phase = "Failed" // Scaling failed - // TODO: Add Condition - return ctrl.Result{}, err + // --------------------------------------------------------------------- + // 8. Perform Scaling based on Spec.Size (if needed and no learner) + // --------------------------------------------------------------------- + if targetReplica != int32(etcdCluster.Spec.Size) { + eps := clientEndpointsFromStatefulsets(sts) + var scalingReason string + var scalingMessage string + + // If there is no more learner, then we can proceed to scale the cluster further. + // If there is no more member to add, the control will not reach here after the requeue + if targetReplica < int32(etcdCluster.Spec.Size) { + // scale out + scalingReason = status.ReasonScalingUp + scalingMessage = fmt.Sprintf("Scaling up from %d to %d", targetReplica, targetReplica+1) + cm.SetProgressing(true, scalingReason, scalingMessage) + cm.SetAvailable(false, scalingReason, "Cluster is scaling up") + + _, peerURL := peerEndpointForOrdinalIndex(etcdCluster, int(targetReplica)) // The index starts at 0, so we should do this before incrementing targetReplica + targetReplica++ // This is the new desired STS replica count + logger.Info("[Scale out] adding a new learner member to etcd cluster", "peerURLs", peerURL) + if _, err = etcdutils.AddMember(eps, []string{peerURL}, true); err != nil { + logger.Error(err, "Failed to add learner member") + cm.SetProgressing(false, status.ReasonEtcdClientError, status.FormatError("Failed to add member", err)) + cm.SetDegraded(true, status.ReasonEtcdClientError, status.FormatError("Failed to add member", err)) + return ctrl.Result{}, err + } + logger.Info("Learner member added successfully", "peerURLs", peerURL) + } else { + // scale in + scalingReason = status.ReasonScalingDown + targetReplica-- + logger = logger.WithValues("targetReplica", targetReplica, "expectedSize", etcdCluster.Spec.Size) + scalingMessage = fmt.Sprintf("Scaling down from %d to %d", targetReplica+1, targetReplica) + cm.SetProgressing(true, scalingReason, scalingMessage) + cm.SetAvailable(false, scalingReason, "Cluster is scaling down") + + memberID := healthInfos[memberCnt-1].Status.Header.MemberId + logger.Info("[Scale in] removing one member", "memberID", memberID) + eps = eps[:targetReplica] + if err = etcdutils.RemoveMember(eps, memberID); err != nil { + logger.Error(err, "Failed to remove member") + cm.SetProgressing(false, status.ReasonEtcdClientError, status.FormatError(fmt.Sprintf("Failed to remove member %x", memberID), err)) + cm.SetDegraded(true, status.ReasonEtcdClientError, status.FormatError(fmt.Sprintf("Failed to remove member %x", memberID), err)) + return ctrl.Result{}, err + } + logger.Info("Member removed successfully", "memberID", memberID) } - logger.Info("Learner member added successfully", "peerURLs", peerURL) - } else { - // scale in - targetReplica-- - logger = logger.WithValues("targetReplica", targetReplica, "expectedSize", etcdCluster.Spec.Size) - - memberID := healthInfos[memberCnt-1].Status.Header.MemberId - - logger.Info("[Scale in] removing one member", "memberID", memberID) - eps = eps[:targetReplica] - if err := etcdutils.RemoveMember(eps, memberID); err != nil { - logger.Error(err, "Failed to remove member") - etcdCluster.Status.Phase = "Failed" // Scaling failed - // TODO: Add Condition + // Update StatefulSet to the new targetReplica count + sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, targetReplica, r.Scheme) + if err != nil { + logger.Error(err, "Failed to update StatefulSet during scaling") + cm.SetProgressing(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to update STS during scaling", err)) + cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to update STS during scaling", err)) return ctrl.Result{}, err } - } - sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, targetReplica, r.Scheme) - if err != nil { - logger.Error(err, "Failed to update StatefulSet during scaling") - etcdCluster.Status.Phase = "Failed" - return ctrl.Result{}, err + // CHANGED BEHAVIOR: Scaling action initiated, requeue to wait for stabilization + return ctrl.Result{RequeueAfter: requeueDuration}, nil } - allMembersHealthy, err := areAllMembersHealthy(sts, logger) + // --------------------------------------------------------------------- + // 9. Final State Check (Desired size reached, no learners) + // --------------------------------------------------------------------- + logger.Info("EtcdCluster is at the desired size and has no learners pending promotion") + + var allMembersHealthy bool + allMembersHealthy, err = areAllMembersHealthy(sts, logger) // Re-check health if err != nil { logger.Error(err, "Final health check failed") - etcdCluster.Status.Phase = "Degraded" - // TODO: Add Condition - return ctrl.Result{}, err - } - - if *sts.Spec.Replicas != int32(etcdCluster.Spec.Size) || !allMembersHealthy { - // Requeue if the statefulset size is not equal to the expected size of ETCD cluster - // Or if all members of the cluster are not healthy - etcdCluster.Status.Phase = "Degraded" - // TODO: Add Condition + cm.SetAvailable(false, status.ReasonHealthCheckError, status.FormatError("Final health check failed", err)) + cm.SetDegraded(true, status.ReasonHealthCheckError, status.FormatError("Final health check failed", err)) + cm.SetProgressing(true, status.ReasonHealthCheckError, "Retrying after final health check failure") // Keep progressing? Or just degraded? return ctrl.Result{RequeueAfter: requeueDuration}, nil } - etcdCluster.Status.Phase = "Running" // Final healthy state - // TODO: Set Available Condition to True - logger.Info("EtcdCluster reconciled successfully") - return ctrl.Result{}, nil + if allMembersHealthy { + logger.Info("EtcdCluster reconciled successfully and is healthy") + cm.SetAvailable(true, status.ReasonClusterReady, "Cluster is fully available and healthy") + cm.SetProgressing(false, status.ReasonReconcileSuccess, "Cluster reconciled to desired state") + cm.SetDegraded(false, status.ReasonClusterReady, "Cluster is healthy") // Explicitly clear Degraded + return ctrl.Result{}, nil // Success! Defer will update status. + } else { + logger.Info("EtcdCluster reached desired size, but some members are unhealthy") + cm.SetAvailable(false, status.ReasonMembersUnhealthy, "Cluster reached size, but some members are unhealthy") + cm.SetProgressing(false, status.ReasonMembersUnhealthy, "Cluster reached size, reconciliation paused pending health") + cm.SetDegraded(true, status.ReasonMembersUnhealthy, "Some members are unhealthy") + return ctrl.Result{RequeueAfter: requeueDuration}, nil // Requeue to monitor health + } + // Defer function will execute here before returning } -// updateStatusIfNeeded compares the old and new status and patches if changed. -func (r *EtcdClusterReconciler) updateStatusIfNeeded(ctx context.Context, etcdCluster *ecv1alpha1.EtcdCluster, oldEtcdCluster *ecv1alpha1.EtcdCluster) error { - logger := log.FromContext(ctx) - // Compare the new status with the old status - if !equality.Semantic.DeepEqual(oldEtcdCluster.Status, etcdCluster.Status) { - logger.Info("Updating EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) - err := r.Status().Patch(ctx, etcdCluster, client.MergeFrom(oldEtcdCluster)) - if err != nil { - logger.Error(err, "Failed to update EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) - return err // Return the error so the Reconcile loop retries +// derivePhaseFromConditions determines the overall Phase based on the set Conditions. +// NOTE: This logic needs refinement based on the exact conditions being set. +func (r *EtcdClusterReconciler) derivePhaseFromConditions(cluster *ecv1alpha1.EtcdCluster) { + // Default to Pending if no other condition matches + phase := "Pending" + + if cluster.Spec.Size == 0 { + phase = "Idle" + } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionDegraded) { + // Check for fatal reasons first + degradedCondition := meta.FindStatusCondition(cluster.Status.Conditions, status.ConditionDegraded) + if degradedCondition != nil { + switch degradedCondition.Reason { + case status.ReasonResourceCreateFail, status.ReasonResourceUpdateFail, status.ReasonEtcdClientError, status.ReasonNotOwnedResource, "InternalError": // Add more fatal reasons if needed + phase = "Failed" + default: + phase = "Degraded" + } + } else { + phase = "Degraded" // Should have reason if Degraded is True } - logger.Info("Successfully updated EtcdCluster status", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) - } else { - logger.V(1).Info("EtcdCluster status is already up-to-date", "namespace", etcdCluster.Namespace, "name", etcdCluster.Name) // Use V(1) for less important info + } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionProgressing) { + // Determine specific progressing phase based on reason + progressingCondition := meta.FindStatusCondition(cluster.Status.Conditions, status.ConditionProgressing) + if progressingCondition != nil { + switch progressingCondition.Reason { + case status.ReasonCreatingResources: + phase = "Creating" + case status.ReasonInitializingCluster: + phase = "Initializing" + case status.ReasonScalingUp, status.ReasonScalingDown, status.ReasonMemberConfiguration, status.ReasonMembersMismatch: + phase = "Scaling" + case status.ReasonPromotingLearner, status.ReasonWaitingForLearner: + phase = "PromotingLearner" + default: + phase = "Progressing" // Generic progressing state if reason not specific + } + } else { + phase = "Progressing" // Should have reason if Progressing is True + } + } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionAvailable) { + phase = "Running" } - return nil // No error occurred during status update itself + // Add logic for Terminating Phase if finalizers are implemented + + cluster.Status.Phase = phase } // SetupWithManager sets up the controller with the Manager. diff --git a/pkg/status/manager.go b/pkg/status/manager.go new file mode 100644 index 0000000..3db7747 --- /dev/null +++ b/pkg/status/manager.go @@ -0,0 +1,150 @@ +package status + +import ( + "fmt" + "time" + + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Manager helps manage conditions in a Kubernetes resource status. +type Manager struct { + conditions *[]metav1.Condition + generation int64 // Store the generation +} + +// NewManager creates a new condition manager for the given slice of conditions and generation. +// The slice passed must be a pointer, as the manager will modify it directly. +func NewManager(conditions *[]metav1.Condition, generation int64) *Manager { + if conditions == nil { + // Initialize if nil to prevent panics, although the caller should ensure this. + emptyConditions := make([]metav1.Condition, 0) + conditions = &emptyConditions + // Or return an error/panic based on desired strictness + // panic("conditions slice cannot be nil") + } + return &Manager{conditions: conditions, generation: generation} +} + +// set is a private helper to set or update a condition using meta.SetStatusCondition. +// meta.SetStatusCondition handles updating LastTransitionTime only when the status changes. +func (m *Manager) set(conditionType string, status metav1.ConditionStatus, reason, message string) { + newCondition := metav1.Condition{ + Type: conditionType, + Status: status, + ObservedGeneration: m.generation, // Use stored generation + LastTransitionTime: metav1.NewTime(time.Now()), + Reason: reason, + Message: message, + } + meta.SetStatusCondition(m.conditions, newCondition) +} + +// RemoveCondition removes a condition of the given type. +func (m *Manager) RemoveCondition(conditionType string) { + meta.RemoveStatusCondition(m.conditions, conditionType) +} + +// FindCondition retrieves a condition by type. Returns nil if not found. +func (m *Manager) FindCondition(conditionType string) *metav1.Condition { + return meta.FindStatusCondition(*m.conditions, conditionType) +} + +// IsConditionTrue checks if a condition of the given type is present and has Status=True. +func (m *Manager) IsConditionTrue(conditionType string) bool { + return meta.IsStatusConditionTrue(*m.conditions, conditionType) +} + +// IsConditionFalse checks if a condition of the given type is present and has Status=False. +func (m *Manager) IsConditionFalse(conditionType string) bool { + return meta.IsStatusConditionFalse(*m.conditions, conditionType) +} + +// IsConditionUnknown checks if a condition of the given type is present and has Status=Unknown, or if it's absent. +func (m *Manager) IsConditionUnknown(conditionType string) bool { + cond := m.FindCondition(conditionType) + if cond == nil { + return true // Absent often implies Unknown for readiness/availability + } + return cond.Status == metav1.ConditionUnknown +} + +// --- Semantic helpers --- +// These helpers make setting common conditions more readable in the reconciler. + +// SetAvailable sets the Available condition status. +func (m *Manager) SetAvailable(status bool, reason, message string) { + conditionStatus := metav1.ConditionFalse + if status { + conditionStatus = metav1.ConditionTrue + // Typically, if Available becomes True, Degraded should be False. + m.SetDegraded(false, reason, message) + } + m.set(ConditionAvailable, conditionStatus, reason, message) +} + +// SetProgressing sets the Progressing condition status. +func (m *Manager) SetProgressing(status bool, reason, message string) { + conditionStatus := metav1.ConditionFalse + if status { + conditionStatus = metav1.ConditionTrue + // If starting to progress, it's usually not fully Available yet. + // Ensure Available reflects this if not explicitly set later. + if !m.IsConditionTrue(ConditionAvailable) && m.IsConditionUnknown(ConditionAvailable) { + m.SetAvailable(false, reason, message) + } + } + m.set(ConditionProgressing, conditionStatus, reason, message) +} + +// SetDegraded sets the Degraded condition status. +func (m *Manager) SetDegraded(status bool, reason, message string) { + conditionStatus := metav1.ConditionFalse + if status { + conditionStatus = metav1.ConditionTrue + // If Degraded becomes True, Available should typically be False. + m.SetAvailable(false, reason, message) + } + m.set(ConditionDegraded, conditionStatus, reason, message) +} + +// --- Optional more specific semantic helpers --- + +// SetScaling sets the Scaling condition status. +// func (m *Manager) SetScaling(status bool, reason, message string) { +// conditionStatus := metav1.ConditionFalse +// if status { +// conditionStatus = metav1.ConditionTrue +// m.SetProgressing(true, reason, message) // Scaling implies Progressing +// } +// m.set(ConditionScaling, conditionStatus, reason, message) +// } + +// SetPromoting sets the Promoting condition status. +// func (m *Manager) SetPromoting(status bool, reason, message string) { +// conditionStatus := metav1.ConditionFalse +// if status { +// conditionStatus = metav1.ConditionTrue +// m.SetProgressing(true, reason, message) // Promoting implies Progressing +// } +// m.set(ConditionPromoting, conditionStatus, reason, message) +// } + +// SetInitializing sets the Initializing condition status. +// func (m *Manager) SetInitializing(status bool, reason, message string) { +// conditionStatus := metav1.ConditionFalse +// if status { +// conditionStatus = metav1.ConditionTrue +// m.SetProgressing(true, reason, message) // Initializing implies Progressing +// } +// m.set(ConditionInitializing, conditionStatus, reason, message) +// } + +// Helper to format error messages concisely for Conditions +func FormatError(prefix string, err error) string { + if err == nil { + return prefix + } + return fmt.Sprintf("%s: %v", prefix, err) +} diff --git a/pkg/status/patch.go b/pkg/status/patch.go new file mode 100644 index 0000000..8bd4e7d --- /dev/null +++ b/pkg/status/patch.go @@ -0,0 +1,115 @@ +// Package status provides utilities for managing Kubernetes resource status, +// particularly focusing on Conditions and status patching. +package status + +import ( + "context" + "fmt" + + apiequality "k8s.io/apimachinery/pkg/api/equality" + apierrors "k8s.io/apimachinery/pkg/api/errors" // For GroupResource in conflict error + "k8s.io/client-go/util/retry" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" +) + +// DefaultRetry is the default backoff for retrying patch operations. +// Customize this if needed, e.g., by providing a different +// retry.Backoff instance to retry.RetryOnConflict. +var DefaultRetry = retry.DefaultRetry + +// PatchStatusMutate applies status changes using a mutate function. +// It fetches the latest version of the object, applies mutations via the mutate func, +// and then patches the status subresource if changes are detected. +// It uses optimistic locking and retries on conflict errors (like "the object has been modified"). +// +// Parameters: +// - ctx: The context for the operation. +// - c: The controller-runtime client. +// - originalObj: The original object instance fetched at the start of the reconcile loop. +// This is used to get the ObjectKey and the Generation at the start of the reconcile. +// It should not be modified by the reconciliation logic directly; status changes +// should be calculated and applied via the mutateFn on a fresh copy. +// - mutateFn: A function that takes the latest fetched object (as type T) and applies +// the calculated status changes *directly to that object's status field*. +// This function should return an error if the mutation itself fails for some reason, +// which will abort the patch attempt. +func PatchStatusMutate[T client.Object]( + ctx context.Context, + c client.Client, + originalObj T, // Original object from reconcile start + mutateFn func(latestFetchedObj T) error, // Mutate function now takes the latest object +) error { + logger := log.FromContext(ctx).WithValues( + "objectName", originalObj.GetName(), + "objectNamespace", originalObj.GetNamespace(), + "gvk", originalObj.GetObjectKind().GroupVersionKind().String(), + ) + key := client.ObjectKeyFromObject(originalObj) + startGeneration := originalObj.GetGeneration() // Generation at the start of reconcile + + return retry.RetryOnConflict(DefaultRetry, func() error { + // Fetch the latest version of the object in each retry iteration. + latestFetchedObj := originalObj.DeepCopyObject().(T) // Create a new instance of type T + getErr := c.Get(ctx, key, latestFetchedObj) + if getErr != nil { + if apierrors.IsNotFound(getErr) { + logger.Info("Object not found while attempting to patch status, skipping patch.") + return nil // Object deleted, no status to patch. + } + logger.Error(getErr, "Failed to get latest object for status patch.") + // Returning the error from Get will stop retries by default retry settings + // if it's not a conflict error. + return fmt.Errorf("failed to get latest object %v for status patch: %w", key, getErr) + } + + // Check if the generation changed during our reconcile. This might mean the spec changed + // and our calculated status is stale. + if startGeneration != latestFetchedObj.GetGeneration() { + logger.Info("Object generation changed during reconcile, status calculated based on old spec might be stale. Aborting this patch attempt.", + "key", key, "startGeneration", startGeneration, "currentGeneration", latestFetchedObj.GetGeneration()) + // Returning nil here means we acknowledge the generation change and decide *not* to patch stale status. + // The main Reconcile loop will requeue soon with the new generation. + // This avoids spamming patch retries for stale data. + return nil + } + + // Create a copy *before* mutation to compare against. This is the object state + // from the API server *before* we apply our calculated changes for this attempt. + beforeMutateStatusCopy := latestFetchedObj.DeepCopyObject().(T) + + // Apply the calculated status changes by calling the mutate function. + // The mutate function should modify the 'latestFetchedObj's status field directly. + if err := mutateFn(latestFetchedObj); err != nil { + // If the mutation logic itself fails, we likely want to stop retrying the patch. + logger.Error(err, "Mutation function failed during status patch attempt.") + return fmt.Errorf("mutate function failed during status patch: %w", err) // Stop retries by returning non-conflict error + } + + // Compare the object's status before and after mutation. + // We use Semantic.DeepEqual on the whole objects because `Status()` subresource patch + // still sends a patch based on the whole object typically. More accurately, + // we should compare just the status fields if we could extract them generically. + // However, comparing the whole object after mutation (which only touched status) + // against its state before mutation (but after GET) is correct. + if apiequality.Semantic.DeepEqual(beforeMutateStatusCopy, latestFetchedObj) { + logger.V(1).Info("No status change detected after applying mutation, skipping patch.") + return nil // No actual changes to status, no need to patch + } + + // Patch the status subresource using the mutated 'latestFetchedObj' object. + // client.MergeFrom(beforeMutateStatusCopy) creates a strategic merge patch or JSON merge patch + // based on the difference between 'latestFetchedObj' and 'beforeMutateStatusCopy'. + logger.Info("Status change detected, attempting to patch status subresource.") + patchErr := c.Status().Patch(ctx, latestFetchedObj, client.MergeFrom(beforeMutateStatusCopy)) + if patchErr != nil { + // Log the patch error. RetryOnConflict will decide whether to retry based on the error type. + // Conflict errors will be retried. Other errors might not. + logger.Info("Failed to patch status, will retry if conflict error.", "error", patchErr.Error()) + return patchErr // Return the error to retry.RetryOnConflict + } + + logger.Info("Successfully patched status subresource.") + return nil // Patch successful + }) +} diff --git a/pkg/status/type.go b/pkg/status/type.go new file mode 100644 index 0000000..54c2bea --- /dev/null +++ b/pkg/status/type.go @@ -0,0 +1,64 @@ +// Package status provides utilities for managing Kubernetes resource status, +// particularly focusing on Conditions according to standard practices. +package status + +// Condition types used for EtcdCluster status. +// Adhering to Kubernetes API conventions as much as possible. +// See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties +const ( + // ConditionAvailable indicates that the etcd cluster has reached its desired state, + // has quorum, and is ready to serve requests. All members are healthy. + ConditionAvailable string = "Available" + + // ConditionProgressing indicates that the operator is actively working + // to bring the etcd cluster towards the desired state (e.g., creating resources, + // scaling, promoting learners). It's True when reconciliation is in progress + // and False when the desired state is reached or a terminal state occurs. + ConditionProgressing string = "Progressing" + + // ConditionDegraded indicates that the etcd cluster is functional but + // operating with potential issues that might impact performance or fault tolerance + // (e.g., some members unhealthy but quorum maintained, leader missing temporarily). + // It requires attention but is not necessarily completely unavailable. + ConditionDegraded string = "Degraded" +) + +// Common reasons for EtcdCluster status conditions.// Reasons should be CamelCase and concise. +const ( + // General Reasons + ReasonReconciling string = "Reconciling" + ReasonReconcileSuccess string = "ReconcileSuccess" + ReasonReconcileError string = "ReconcileError" // Generic error + + // Available Reasons + ReasonClusterReady string = "ClusterReady" + ReasonPodsNotReady string = "PodsNotReady" + ReasonEtcdNotHealthy string = "EtcdNotHealthy" + ReasonQuorumLost string = "QuorumLost" + ReasonLeaderNotFound string = "LeaderNotFound" + ReasonWaitingForSafeInit string = "WaitingForSafeInit" // e.g., waiting for first pod + ReasonSizeIsZero string = "SizeIsZero" + + // Progressing Reasons + ReasonInitializingCluster string = "InitializingCluster" + ReasonCreatingResources string = "CreatingResources" // STS, Service etc. + ReasonScalingUp string = "ScalingUp" + ReasonScalingDown string = "ScalingDown" + ReasonPromotingLearner string = "PromotingLearner" + ReasonWaitingForLearner string = "WaitingForLearner" + ReasonStatefulSetNotReady string = "StatefulSetNotReady" + ReasonPodsNotReadyYet string = "PodsNotReadyYet" // Different from PodsNotReady reason for Available=False + ReasonMemberConfiguration string = "MemberConfiguration" // Adding/removing member via etcd API + ReasonMembersMismatch string = "MembersMismatch" + ReasonTerminating string = "Terminating" // During finalization + + // Degraded Reasons + ReasonMembersUnhealthy string = "MembersUnhealthy" + ReasonLeaderMissing string = "LeaderMissing" // Could be transient + ReasonHealthCheckError string = "HealthCheckError" + ReasonEtcdClientError string = "EtcdClientError" // Persistent client errors leading to degraded state + ReasonResourceCreateFail string = "ResourceCreateFail" // Non-fatal resource creation failure + ReasonResourceUpdateFail string = "ResourceUpdateFail" // Non-fatal resource update failure + ReasonStatefulSetGetError string = "StatefulSetGetError" + ReasonNotOwnedResource string = "NotOwnedResource" +) From f44e3ee273f3255d7544889ac6cd6375527d90b5 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Mon, 12 May 2025 19:12:05 -0400 Subject: [PATCH 04/12] added status.CurrentReplicas and update logic, changed status.Members to status.MembersCount Signed-off-by: Wenxue Zhao --- api/v1alpha1/etcdcluster_types.go | 28 +++++++++++++-- api/v1alpha1/zz_generated.deepcopy.go | 20 +++++++++++ .../bases/operator.etcd.io_etcdclusters.yaml | 3 ++ internal/controller/etcdcluster_controller.go | 35 +++++++++++++++++-- pkg/status/manager.go | 4 +-- 5 files changed, 83 insertions(+), 7 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index 6d079e1..4f3e688 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -133,14 +133,20 @@ type EtcdClusterStatus struct { // ReadyReplicas is the number of pods targeted by this EtcdCluster with a Ready condition. ReadyReplicas int32 `json:"readyReplicas,omitempty"` - // Members is the number of etcd members in the cluster reported by etcd API. - Members int32 `json:"members,omitempty"` + // CurrentReplicas is the number of pods managed by this EtcdCluster (via StatefulSet). + CurrentReplicas int32 `json:"CurrentReplicas,omitempty"` + // MembersCount is the number of etcd members in the cluster reported by etcd API. + MembersCount int32 `json:"members,omitempty"` // CurrentVersion is the version of the etcd cluster. CurrentVersion string `json:"currentVersion,omitempty"` // Phase indicates the state of the EtcdCluster. Phase string `json:"phase,omitempty"` // CurreLeaderId is the ID of etcd cluster leader. LeaderId string `json:"leaderId,omitempty"` + + // Members provides the status of each individual etcd member. + // +optional + Members []MemberStatus `json:"members,omitempty"` // Conditions represent the latest available observations of a replica set's state. // +optional // +patchMergeKey=type @@ -149,6 +155,24 @@ type EtcdClusterStatus struct { Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } +type MemberStatus struct { + // Name of the etcd member. + Name string `json:"name,omitempty"` + // ID of the etcd member as reported by etcd. + ID string `json:"id,omitempty"` + // Version of etcd running on this member. + Version string `json:"version,omitempty"` + // IsHealthy indicates if the member is considered healthy by etcd. + IsHealthy bool `json:"isHealthy,omitempty"` + // IsLearner indicates if the member is a learner. + IsLearner bool `json:"isLearner,omitempty"` + // ClientURL is the client URL for this member. + ClientURL string `json:"clientURL,omitempty"` + // PeerURL is the peer URL for this member. + PeerURL string `json:"peerURL,omitempty"` + // TODO: Add more fields later like Alarms, DBSize, etc. +} + // +kubebuilder:object:root=true // +kubebuilder:subresource:status diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 946ea62..7150dc9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -175,6 +175,11 @@ func (in *EtcdClusterSpec) DeepCopy() *EtcdClusterSpec { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *EtcdClusterStatus) DeepCopyInto(out *EtcdClusterStatus) { *out = *in + if in.Members != nil { + in, out := &in.Members, &out.Members + *out = make([]MemberStatus, len(*in)) + copy(*out, *in) + } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions *out = make([]v1.Condition, len(*in)) @@ -194,6 +199,21 @@ func (in *EtcdClusterStatus) DeepCopy() *EtcdClusterStatus { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MemberStatus) DeepCopyInto(out *MemberStatus) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemberStatus. +func (in *MemberStatus) DeepCopy() *MemberStatus { + if in == nil { + return nil + } + out := new(MemberStatus) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PodMetadata) DeepCopyInto(out *PodMetadata) { *out = *in diff --git a/config/crd/bases/operator.etcd.io_etcdclusters.yaml b/config/crd/bases/operator.etcd.io_etcdclusters.yaml index 77ccac3..85a8723 100644 --- a/config/crd/bases/operator.etcd.io_etcdclusters.yaml +++ b/config/crd/bases/operator.etcd.io_etcdclusters.yaml @@ -284,6 +284,9 @@ spec: currentVersion: description: CurrentVersion is the version of the etcd cluster. type: string + leaderId: + description: CurreLeaderId is the ID of etcd cluster leader. + type: string members: description: Members is the number of etcd members in the cluster reported by etcd API. diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index e59f0b6..f46e3fb 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -139,7 +139,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if etcdCluster.Spec.Size == 0 { logger.Info("EtcdCluster size is 0..Skipping next steps") calculatedStatus.ReadyReplicas = 0 - calculatedStatus.Members = 0 + calculatedStatus.CurrentReplicas = 0 + calculatedStatus.MembersCount = 0 cm.SetAvailable(false, status.ReasonSizeIsZero, "Desired cluster size is 0") cm.SetProgressing(false, status.ReasonSizeIsZero, "Desired cluster size is 0") return ctrl.Result{}, nil @@ -176,6 +177,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetDegraded(true, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) return ctrl.Result{}, err } + r.updateStatusFromStatefulSet(&calculatedStatus, sts) // Update after creation // STS created with 0 replicas, needs scaling to 1. Set Initializing. cm.SetProgressing(true, status.ReasonInitializingCluster, "StatefulSet created with 0 replicas, requires scaling to 1") } else { @@ -183,10 +185,14 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // attempt processing again later. This could have been caused by a // temporary network failure, or any other transient reason. logger.Error(err, "Failed to get StatefulSet. Requesting requeue") + r.updateStatusFromStatefulSet(&calculatedStatus, nil) cm.SetProgressing(false, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) // Stop progressing on persistent get error cm.SetDegraded(true, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) + r.updateStatusFromStatefulSet(&calculatedStatus, nil) return ctrl.Result{RequeueAfter: requeueDuration}, nil } + } else { // StatefulSet was found + r.updateStatusFromStatefulSet(&calculatedStatus, sts) } // At this point, sts should exist (either found or created) @@ -195,6 +201,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) err = fmt.Errorf("statefulSet is unexpectedly nil after get/create") logger.Error(err, "Internal error") cm.SetDegraded(true, "InternalError", err.Error()) + r.updateStatusFromStatefulSet(&calculatedStatus, nil) return ctrl.Result{}, err } @@ -222,6 +229,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) return ctrl.Result{}, err } + r.updateStatusFromStatefulSet(&calculatedStatus, sts) // return ctrl.Result{RequeueAfter: requeueDuration}, nil // Requeue to check readiness, should we do it? } @@ -258,7 +266,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if memberListResp != nil { memberCnt = len(memberListResp.Members) } - calculatedStatus.Members = int32(memberCnt) + calculatedStatus.MembersCount = int32(memberCnt) // --------------------------------------------------------------------- // 6. Handle Member/Replica Mismatch @@ -292,6 +300,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) return ctrl.Result{}, err } + r.updateStatusFromStatefulSet(&calculatedStatus, sts) // Requeue to check state after adjustment return ctrl.Result{RequeueAfter: requeueDuration}, nil } @@ -404,6 +413,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to update STS during scaling", err)) return ctrl.Result{}, err } + r.updateStatusFromStatefulSet(&calculatedStatus, sts) // CHANGED BEHAVIOR: Scaling action initiated, requeue to wait for stabilization return ctrl.Result{RequeueAfter: requeueDuration}, nil @@ -414,6 +424,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // --------------------------------------------------------------------- logger.Info("EtcdCluster is at the desired size and has no learners pending promotion") + r.updateStatusFromStatefulSet(&calculatedStatus, sts) var allMembersHealthy bool allMembersHealthy, err = areAllMembersHealthy(sts, logger) // Re-check health if err != nil { @@ -489,6 +500,26 @@ func (r *EtcdClusterReconciler) derivePhaseFromConditions(cluster *ecv1alpha1.Et cluster.Status.Phase = phase } +func (r *EtcdClusterReconciler) updateStatusFromStatefulSet( + etcdClusterStatus *ecv1alpha1.EtcdClusterStatus, // Pass the status struct to modify + sts *appsv1.StatefulSet, +) { + if sts == nil { + // If sts is nil, perhaps after a failed creation attempt. + // Set to 0 or ensure prior logic handles default state. + etcdClusterStatus.CurrentReplicas = 0 + etcdClusterStatus.ReadyReplicas = 0 + return + } + + if sts.Spec.Replicas != nil { + etcdClusterStatus.CurrentReplicas = *sts.Spec.Replicas + } else { + etcdClusterStatus.CurrentReplicas = 0 // Should not occur with a valid STS spec + } + etcdClusterStatus.ReadyReplicas = sts.Status.ReadyReplicas +} + // SetupWithManager sets up the controller with the Manager. func (r *EtcdClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { r.Recorder = mgr.GetEventRecorderFor("etcdcluster-controller") diff --git a/pkg/status/manager.go b/pkg/status/manager.go index 3db7747..be8659e 100644 --- a/pkg/status/manager.go +++ b/pkg/status/manager.go @@ -18,11 +18,9 @@ type Manager struct { // The slice passed must be a pointer, as the manager will modify it directly. func NewManager(conditions *[]metav1.Condition, generation int64) *Manager { if conditions == nil { - // Initialize if nil to prevent panics, although the caller should ensure this. + // Initialize if nil to prevent panics. emptyConditions := make([]metav1.Condition, 0) conditions = &emptyConditions - // Or return an error/panic based on desired strictness - // panic("conditions slice cannot be nil") } return &Manager{conditions: conditions, generation: generation} } From 3f31e0f3b75a0d57081ce23d6a18fe3988d5710f Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Tue, 13 May 2025 13:38:47 -0400 Subject: [PATCH 05/12] update status.Members type Signed-off-by: Wenxue Zhao --- api/v1alpha1/etcdcluster_types.go | 27 +++++++++++++++++++++------ api/v1alpha1/zz_generated.deepcopy.go | 9 ++++++++- 2 files changed, 29 insertions(+), 7 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index 4f3e688..bed0de2 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -147,30 +147,45 @@ type EtcdClusterStatus struct { // Members provides the status of each individual etcd member. // +optional Members []MemberStatus `json:"members,omitempty"` - // Conditions represent the latest available observations of a replica set's state. + // Conditions represent the latest available observations of the EtcdCluster's state. // +optional // +patchMergeKey=type // +patchStrategy=merge - // +listType=atomic + // +listType=map + // +listMapKey=type Conditions []metav1.Condition `json:"conditions,omitempty" patchStrategy:"merge" patchMergeKey:"type"` } +// MemberStatus defines the observed state of a single etcd member. type MemberStatus struct { // Name of the etcd member. + // +optional Name string `json:"name,omitempty"` // ID of the etcd member as reported by etcd. ID string `json:"id,omitempty"` // Version of etcd running on this member. + // +optional Version string `json:"version,omitempty"` // IsHealthy indicates if the member is considered healthy by etcd. - IsHealthy bool `json:"isHealthy,omitempty"` + IsHealthy bool `json:"isHealthy"` // No omitempty, always show health // IsLearner indicates if the member is a learner. IsLearner bool `json:"isLearner,omitempty"` - // ClientURL is the client URL for this member. + // ClientURL is one of the client URLs for this member. + // +optional ClientURL string `json:"clientURL,omitempty"` - // PeerURL is the peer URL for this member. + // PeerURL is one of the peer URLs for this member. + // +optional PeerURL string `json:"peerURL,omitempty"` - // TODO: Add more fields later like Alarms, DBSize, etc. + // DBSize is the current database size in bytes of this member. + // +optional + DBSize int64 `json:"dbSize,omitempty"` + // DBSizeInUse is the actual disk space actively used by the database. + // +optional + DBSizeInUse int64 `json:"dbSizeInUse,omitempty"` + // Alarms are active alarms on this member. + // +optional + Alarms []string `json:"alarms,omitempty"` + // TODO: Add other relevant fields like storageVersion, downgradeInfo if needed. } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 7150dc9..e49b823 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -178,7 +178,9 @@ func (in *EtcdClusterStatus) DeepCopyInto(out *EtcdClusterStatus) { if in.Members != nil { in, out := &in.Members, &out.Members *out = make([]MemberStatus, len(*in)) - copy(*out, *in) + for i := range *in { + (*in)[i].DeepCopyInto(&(*out)[i]) + } } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions @@ -202,6 +204,11 @@ func (in *EtcdClusterStatus) DeepCopy() *EtcdClusterStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MemberStatus) DeepCopyInto(out *MemberStatus) { *out = *in + if in.Alarms != nil { + in, out := &in.Alarms, &out.Alarms + *out = make([]string, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemberStatus. From 6723ce16fb60e03a8d7e5f8f1e76b8867a07fb0f Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Thu, 5 Jun 2025 17:19:11 -0400 Subject: [PATCH 06/12] Signed-off-by: Wenxue Zhao refactor(api, health): Update status API and etcd health check logic This commit lays the foundational work for detailed member status reporting in the EtcdCluster resource. It updates the status API definition and refactors the underlying health checking logic. The reconciler has not yet been updated to populate all the new status fields. This integration will be handled in a subsequent commit. Key changes include: 1. API (`api/v1alpha1/etcdcluster_types.go`): - The `EtcdClusterStatus` struct is updated to include `ObservedGeneration`. - The `MemberStatus` struct is introduced to hold individual member data, including fields like `ID`, `Version`, `IsHealthy`, and a new `ErrorMessage` field. - Adds `+listMapKey=id` to the `Members` slice for merge patch support. - Field names and comments are clarified for better readability. 2. Health Check Logic (`internal/etcdutils.go`): - The `ClusterHealth` function is refactored to ensure the `client.AlarmList()` API is called only once per execution, reducing redundant calls. - The logic is decomposed into smaller, private helper functions to improve modularity and readability. - The internal `EpHealth` struct now uses a `[]string` for its `Alarms` field to store pre-processed, member-specific alarm types. - Potential nil pointer panics in `FindLeaderStatus` and `FindLearnerStatus` are fixed. 3. Test Suite (`internal/etcdutils_test.go`): - `TestClusterHealth` is converted to a table-driven test to cover more scenarios, including mixed-health clusters and members with active alarms. - A positive test case is added for `TestRemoveMember`. - Tests are added to verify the nil pointer fixes. - Test state leakage is addressed using `t.Cleanup` to ensure test isolation. --- api/v1alpha1/etcdcluster_types.go | 86 ++++- .../bases/operator.etcd.io_etcdclusters.yaml | 110 +++++- internal/controller/etcdcluster_controller.go | 4 +- internal/etcdutils/etcdutils.go | 249 ++++++++++--- internal/etcdutils/etcdutils_test.go | 352 ++++++++++++++++-- 5 files changed, 689 insertions(+), 112 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index bed0de2..38ef923 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -131,22 +131,48 @@ type EtcdClusterStatus struct { // INSERT ADDITIONAL STATUS FIELD - define observed state of cluster // Important: Run "make" to regenerate code after modifying this file - // ReadyReplicas is the number of pods targeted by this EtcdCluster with a Ready condition. + // ObservedGeneration is the most recent generation observed for this EtcdCluster by the controller. + // +optional + ObservedGeneration int64 `json:"observedGeneration,omitempty"` + + // CurrentReplicas is the number of etcd pods managed by the StatefulSet for this cluster. + // This reflects the .spec.replicas of the underlying StatefulSet. + // +optional + CurrentReplicas int32 `json:"currentReplicas,omitempty"` + + // ReadyReplicas is the number of etcd pods managed by the StatefulSet that are currently ready. + // This reflects the .status.readyReplicas of the underlying StatefulSet. + // +optional ReadyReplicas int32 `json:"readyReplicas,omitempty"` - // CurrentReplicas is the number of pods managed by this EtcdCluster (via StatefulSet). - CurrentReplicas int32 `json:"CurrentReplicas,omitempty"` - // MembersCount is the number of etcd members in the cluster reported by etcd API. - MembersCount int32 `json:"members,omitempty"` - // CurrentVersion is the version of the etcd cluster. + + // MemberCount is the number of members currently registered in the etcd cluster, + // as reported by the etcd 'member list' API. This may differ from CurrentReplicas + // during scaling operations or if members are added/removed outside the operator's direct control. + // +optional + MemberCount int32 `json:"memberCount,omitempty"` + + // CurrentVersion is the observed etcd version of the cluster. + // This is typically derived from the version of the healthy leader or a consensus among healthy members. + // +optional CurrentVersion string `json:"currentVersion,omitempty"` - // Phase indicates the state of the EtcdCluster. - Phase string `json:"phase,omitempty"` - // CurreLeaderId is the ID of etcd cluster leader. + + // LeaderId is the hex-encoded ID of the current etcd cluster leader, if one exists and is known. + // +optional LeaderId string `json:"leaderId,omitempty"` + // Phase indicates the current lifecycle phase of the EtcdCluster. + // Examples: "Pending", "Initializing", "Running", "Scaling", "Upgrading", "Failed", "Idle". + // +optional + Phase string `json:"phase,omitempty"` + // Members provides the status of each individual etcd member. // +optional + // +listType=map + // +listMapKey=id + // Alternative listMapKey could be 'name' if 'id' is not always immediately available or stable during init. + // However, 'id' is more canonical once a member is part of the cluster. Members []MemberStatus `json:"members,omitempty"` + // Conditions represent the latest available observations of the EtcdCluster's state. // +optional // +patchMergeKey=type @@ -158,34 +184,58 @@ type EtcdClusterStatus struct { // MemberStatus defines the observed state of a single etcd member. type MemberStatus struct { - // Name of the etcd member. + // Name of the etcd member, typically the pod name (e.g., "etcd-cluster-example-0"). + // This can also be the name reported by etcd itself if set. // +optional Name string `json:"name,omitempty"` - // ID of the etcd member as reported by etcd. - ID string `json:"id,omitempty"` + + // ID is the hex-encoded member ID as reported by etcd. + // This is the canonical identifier for an etcd member. + ID string `json:"id"` // Made non-optional as it's key for identification + // Version of etcd running on this member. // +optional Version string `json:"version,omitempty"` - // IsHealthy indicates if the member is considered healthy by etcd. + + // IsHealthy indicates if the member is considered healthy. + // A member is healthy if its etcd /health endpoint is reachable and reports OK, + // and its Status endpoint does not report any 'Errors'. IsHealthy bool `json:"isHealthy"` // No omitempty, always show health - // IsLearner indicates if the member is a learner. + + // IsLearner indicates if the member is currently a learner in the etcd cluster. + // +optional IsLearner bool `json:"isLearner,omitempty"` + // ClientURL is one of the client URLs for this member. // +optional ClientURL string `json:"clientURL,omitempty"` + // PeerURL is one of the peer URLs for this member. // +optional PeerURL string `json:"peerURL,omitempty"` - // DBSize is the current database size in bytes of this member. + + // DBSize is the current database size in bytes of this member, as reported by etcd. + // This is the physically allocated size. // +optional DBSize int64 `json:"dbSize,omitempty"` - // DBSizeInUse is the actual disk space actively used by the database. + + // DBSizeInUse is the actual disk space logically in use by the database, in bytes, of this member. // +optional DBSizeInUse int64 `json:"dbSizeInUse,omitempty"` - // Alarms are active alarms on this member. + + // Alarms are active alarms on this member (e.g., "NOSPACE", "CORRUPT"). + // This lists specific alarm types. // +optional Alarms []string `json:"alarms,omitempty"` - // TODO: Add other relevant fields like storageVersion, downgradeInfo if needed. + + // ErrorMessage provides any error associated with fetching status for this member, + // or errors reported by the member itself via its StatusResponse.Errors field. + // +optional + ErrorMessage string `json:"errorMessage,omitempty"` + + // TODO: Consider adding these in future iterations if deemed valuable: + // StorageVersion string `json:"storageVersion,omitempty"` // Raft storage version + // DowngradeInfo string `json:"downgradeInfo,omitempty"` // Information about downgrade capabilities } // +kubebuilder:object:root=true diff --git a/config/crd/bases/operator.etcd.io_etcdclusters.yaml b/config/crd/bases/operator.etcd.io_etcdclusters.yaml index 85a8723..e6f2a22 100644 --- a/config/crd/bases/operator.etcd.io_etcdclusters.yaml +++ b/config/crd/bases/operator.etcd.io_etcdclusters.yaml @@ -224,7 +224,7 @@ spec: properties: conditions: description: Conditions represent the latest available observations - of a replica set's state. + of the EtcdCluster's state. items: description: Condition contains details for one aspect of the current state of this API Resource. @@ -280,24 +280,114 @@ spec: - type type: object type: array - x-kubernetes-list-type: atomic + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + currentReplicas: + description: |- + CurrentReplicas is the number of etcd pods managed by the StatefulSet for this cluster. + This reflects the .spec.replicas of the underlying StatefulSet. + format: int32 + type: integer currentVersion: - description: CurrentVersion is the version of the etcd cluster. + description: |- + CurrentVersion is the observed etcd version of the cluster. + This is typically derived from the version of the healthy leader or a consensus among healthy members. type: string leaderId: - description: CurreLeaderId is the ID of etcd cluster leader. + description: LeaderId is the hex-encoded ID of the current etcd cluster + leader, if one exists and is known. type: string - members: - description: Members is the number of etcd members in the cluster - reported by etcd API. + memberCount: + description: |- + MemberCount is the number of members currently registered in the etcd cluster, + as reported by the etcd 'member list' API. This may differ from CurrentReplicas + during scaling operations or if members are added/removed outside the operator's direct control. format: int32 type: integer + members: + description: |- + Members provides the status of each individual etcd member. + Alternative listMapKey could be 'name' if 'id' is not always immediately available or stable during init. + However, 'id' is more canonical once a member is part of the cluster. + items: + description: MemberStatus defines the observed state of a single + etcd member. + properties: + alarms: + description: |- + Alarms are active alarms on this member (e.g., "NOSPACE", "CORRUPT"). + This lists specific alarm types. + items: + type: string + type: array + clientURL: + description: ClientURL is one of the client URLs for this member. + type: string + dbSize: + description: |- + DBSize is the current database size in bytes of this member, as reported by etcd. + This is the physically allocated size. + format: int64 + type: integer + dbSizeInUse: + description: DBSizeInUse is the actual disk space logically + in use by the database, in bytes, of this member. + format: int64 + type: integer + errorMessage: + description: |- + ErrorMessage provides any error associated with fetching status for this member, + or errors reported by the member itself via its StatusResponse.Errors field. + type: string + id: + description: |- + ID is the hex-encoded member ID as reported by etcd. + This is the canonical identifier for an etcd member. + type: string + isHealthy: + description: |- + IsHealthy indicates if the member is considered healthy. + A member is healthy if its etcd /health endpoint is reachable and reports OK, + and its Status endpoint does not report any 'Errors'. + type: boolean + isLearner: + description: IsLearner indicates if the member is currently + a learner in the etcd cluster. + type: boolean + name: + description: |- + Name of the etcd member, typically the pod name (e.g., "etcd-cluster-example-0"). + This can also be the name reported by etcd itself if set. + type: string + peerURL: + description: PeerURL is one of the peer URLs for this member. + type: string + version: + description: Version of etcd running on this member. + type: string + required: + - id + - isHealthy + type: object + type: array + x-kubernetes-list-map-keys: + - id + x-kubernetes-list-type: map + observedGeneration: + description: ObservedGeneration is the most recent generation observed + for this EtcdCluster by the controller. + format: int64 + type: integer phase: - description: Phase indicates the state of the EtcdCluster. + description: |- + Phase indicates the current lifecycle phase of the EtcdCluster. + Examples: "Pending", "Initializing", "Running", "Scaling", "Upgrading", "Failed", "Idle". type: string readyReplicas: - description: ReadyReplicas is the number of pods targeted by this - EtcdCluster with a Ready condition. + description: |- + ReadyReplicas is the number of etcd pods managed by the StatefulSet that are currently ready. + This reflects the .status.readyReplicas of the underlying StatefulSet. format: int32 type: integer type: object diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index f46e3fb..283baab 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -140,7 +140,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("EtcdCluster size is 0..Skipping next steps") calculatedStatus.ReadyReplicas = 0 calculatedStatus.CurrentReplicas = 0 - calculatedStatus.MembersCount = 0 + calculatedStatus.MemberCount = 0 cm.SetAvailable(false, status.ReasonSizeIsZero, "Desired cluster size is 0") cm.SetProgressing(false, status.ReasonSizeIsZero, "Desired cluster size is 0") return ctrl.Result{}, nil @@ -266,7 +266,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) if memberListResp != nil { memberCnt = len(memberListResp.Members) } - calculatedStatus.MembersCount = int32(memberCnt) + calculatedStatus.MemberCount = int32(memberCnt) // --------------------------------------------------------------------- // 6. Handle Member/Replica Mismatch diff --git a/internal/etcdutils/etcdutils.go b/internal/etcdutils/etcdutils.go index 7c894ca..0ee6a86 100644 --- a/internal/etcdutils/etcdutils.go +++ b/internal/etcdutils/etcdutils.go @@ -44,11 +44,12 @@ func MemberList(eps []string) (*clientv3.MemberListResponse, error) { } type EpHealth struct { - Ep string `json:"endpoint"` - Health bool `json:"health"` - Took string `json:"took"` - Status *clientv3.StatusResponse - Error string `json:"error,omitempty"` + Ep string `json:"endpoint"` + Health bool `json:"health"` + Took string `json:"took"` + Status *clientv3.StatusResponse // Contains MemberID via Status.Header.MemberId + Alarms []string `json:"alarms,omitempty"` // Stores string representations of alarm types specific to this member + Error string `json:"error,omitempty"` } type healthReport []EpHealth @@ -69,10 +70,19 @@ func (eh EpHealth) String() string { var sb strings.Builder sb.WriteString(fmt.Sprintf("endpoint: %s, health: %t, took: %s", eh.Ep, eh.Health, eh.Took)) if eh.Status != nil { + sb.WriteString(fmt.Sprintf(", version: %s", eh.Status.Version)) + sb.WriteString(fmt.Sprintf(", dbSize: %d", eh.Status.DbSize)) sb.WriteString(fmt.Sprintf(", isLearner: %t", eh.Status.IsLearner)) + if eh.Status.Header != nil { + sb.WriteString(fmt.Sprintf(", memberId: %x", eh.Status.Header.MemberId)) + sb.WriteString(fmt.Sprintf(", raftTerm: %d", eh.Status.Header.RaftTerm)) + } + } + if len(eh.Alarms) > 0 { + sb.WriteString(fmt.Sprintf(", alarms: [%s]", strings.Join(eh.Alarms, "; "))) } if len(eh.Error) > 0 { - sb.WriteString("error: ") + sb.WriteString(", error: ") sb.WriteString(eh.Error) } return sb.String() @@ -92,6 +102,9 @@ func FindLeaderStatus(healthInfos []EpHealth, logger logr.Logger) (uint64, *clie // Find the leader status for i := range healthInfos { status := healthInfos[i].Status + if status == nil { + continue + } if status.Leader == status.Header.MemberId { leader = status.Header.MemberId leaderStatus = status @@ -102,7 +115,6 @@ func FindLeaderStatus(healthInfos []EpHealth, logger logr.Logger) (uint64, *clie logger.Info("Leader found", "leaderID", leader) } return leader, leaderStatus - } func FindLearnerStatus(healthInfos []EpHealth, logger logr.Logger) (uint64, *clientv3.StatusResponse) { @@ -110,6 +122,10 @@ func FindLearnerStatus(healthInfos []EpHealth, logger logr.Logger) (uint64, *cli var learnerStatus *clientv3.StatusResponse logger.Info("Now checking if there is any pending learner member that needs to be promoted") for i := range healthInfos { + status := healthInfos[i].Status + if status == nil { + continue + } if healthInfos[i].Status.IsLearner { learner = healthInfos[i].Status.Header.MemberId learnerStatus = healthInfos[i].Status @@ -121,79 +137,218 @@ func FindLearnerStatus(healthInfos []EpHealth, logger logr.Logger) (uint64, *cli } func ClusterHealth(eps []string) ([]EpHealth, error) { - lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) + if len(eps) == 0 { + return nil, nil + } + + // Overall context for the duration of ClusterHealth operation, can be passed down. + // For simplicity, Background is used here, but a cancellable context from caller is better. + // TODO: Change the method signature to support cancellable context in the future. + ctx := context.Background() + + lg, err := logutil.CreateDefaultZapLogger(zap.InfoLevel) // Or pass logger as an argument if err != nil { - return nil, err + // This error is about creating a logger, which is a prerequisite. + return nil, fmt.Errorf("failed to create zap logger for ClusterHealth: %w", err) + } + lg = lg.Named("ClusterHealth") // Add a name to the logger for context + + // 1. Get a client for cluster-wide operations + clusterOpClient, clientErr := getClusterOperationClient(eps, lg) + if clientErr != nil { + // If we can't even create a client for cluster ops, log it. + // We might decide to proceed and try individual endpoint checks without alarm info, + // or return an error. For now, let's log and proceed, alarms will be empty. + lg.Error("Failed to create cluster operation client", zap.Error(clientErr), zap.Strings("endpoints", eps)) + // clusterOpClient will be nil + } + if clusterOpClient != nil { + defer clusterOpClient.Close() + } + + // 2. Fetch cluster-wide alarms ONCE + var clusterAlarmsResponse *clientv3.AlarmResponse + if clusterOpClient != nil { + clusterAlarmsResponse, err = fetchClusterAlarmsOnce(ctx, clusterOpClient, lg) + if err != nil { + lg.Error("Failed to fetch cluster-wide alarms, proceeding without alarm information.", zap.Error(err)) + // clusterAlarmsResponse remains nil, subsequent logic handles this. + } + } + + // 3. Index alarms by MemberID for quick lookup + alarmsMap := indexAlarmsByMemberID(clusterAlarmsResponse) + if len(alarmsMap) > 0 { + lg.Debug("Indexed cluster alarms", zap.Int("distinct_members_with_alarms", len(alarmsMap))) } - var cfgs = make([]*clientv3.Config, 0, len(eps)) + // 4. Prepare per-endpoint client configurations + var cfgs = make([]clientv3.Config, 0, len(eps)) // Store as value to ensure copies for goroutines for _, ep := range eps { - cfg := &clientv3.Config{ + cfg := clientv3.Config{ Endpoints: []string{ep}, DialTimeout: 2 * time.Second, DialKeepAliveTime: 2 * time.Second, DialKeepAliveTimeout: 6 * time.Second, + // Logger will be set per-goroutine for its client instance } - cfgs = append(cfgs, cfg) } - healthCh := make(chan EpHealth, len(eps)) - + // 5. Concurrently check each endpoint's health + healthCh := make(chan EpHealth, len(cfgs)) var wg sync.WaitGroup - for _, cfg := range cfgs { + + for _, endpointCfg := range cfgs { wg.Add(1) - go func(cfg *clientv3.Config) { + go func(cfg clientv3.Config) { // Pass cfg by value to ensure each goroutine gets its own copy defer wg.Done() - ep := cfg.Endpoints[0] - cfg.Logger = lg.Named("client") - cli, err := clientv3.New(*cfg) - if err != nil { - healthCh <- EpHealth{Ep: ep, Health: false, Error: err.Error()} - return - } - startTs := time.Now() - // get a random key. As long as we can get the response - // without an error, the endpoint is health. - ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) - _, err = cli.Get(ctx, "health", clientv3.WithSerializable()) - eh := EpHealth{Ep: ep, Health: false, Took: time.Since(startTs).String()} - if err == nil || errors.Is(err, rpctypes.ErrPermissionDenied) { - eh.Health = true - } else { - eh.Error = err.Error() - } + // Perform core health check for this single endpoint + epHealthResult := checkSingleEndpointCoreHealth(ctx, cfg, lg) // lg is the base ClusterHealth logger - if eh.Health { - epStatus, err := cli.Status(ctx, ep) - if err != nil { - eh.Health = false - eh.Error = "Unable to fetch the status" + // Populate alarms using the pre-fetched and indexed data + if epHealthResult.Status != nil && epHealthResult.Status.Header != nil { + memberID := epHealthResult.Status.Header.MemberId + if specificAlarms, found := alarmsMap[memberID]; found { + epHealthResult.Alarms = specificAlarms } else { - eh.Status = epStatus - if len(epStatus.Errors) > 0 { - eh.Health = false - eh.Error = strings.Join(epStatus.Errors, ",") - } + epHealthResult.Alarms = nil } + } else { + // No Status or Header, so cannot determine MemberID to look up alarms. + epHealthResult.Alarms = nil } - cancel() - healthCh <- eh - }(cfg) + healthCh <- epHealthResult + }(endpointCfg) } + wg.Wait() close(healthCh) + // 6. Collect and sort results var healthList = make([]EpHealth, 0, len(healthCh)) for h := range healthCh { healthList = append(healthList, h) } sort.Sort(healthReport(healthList)) + lg.Info("Cluster health check completed", zap.Int("endpoints_checked", len(eps)), zap.Int("results_collected", len(healthList))) return healthList, nil } +// appendError concatenates error strings. +func appendError(existingError string, newErrorString string) string { + if existingError == "" { + return newErrorString + } + return existingError + "; " + newErrorString +} + +// getClusterOperationClient tries to create a client suitable for cluster-wide operations. +func getClusterOperationClient(eps []string, lg *zap.Logger) (*clientv3.Client, error) { + if len(eps) == 0 { + return nil, errors.New("no endpoints provided to create cluster operation client") + } + // Use all provided endpoints to create a robust client for cluster operations. + cfg := clientv3.Config{ + Endpoints: eps, + DialTimeout: 5 * time.Second, // Reasonably short timeout for one-off ops + DialKeepAliveTime: 10 * time.Second, + DialKeepAliveTimeout: 5 * time.Second, + Logger: lg.Named("cluster-op-client"), + } + cli, err := clientv3.New(cfg) + if err != nil { + return nil, fmt.Errorf("failed to create cluster operation client with endpoints %v: %w", eps, err) + } + return cli, nil +} + +// fetchClusterAlarmsOnce calls client.AlarmList() once. +func fetchClusterAlarmsOnce(ctx context.Context, client *clientv3.Client, lg *zap.Logger) (*clientv3.AlarmResponse, error) { + // Use a specific timeout for this operation. + alarmCtx, cancel := context.WithTimeout(ctx, 5*time.Second) // 5-second timeout for AlarmList + defer cancel() + + alarmResp, err := client.AlarmList(alarmCtx) + if err != nil { + // Log the error at the source of the call if needed, or let the caller decide. + return nil, fmt.Errorf("client.AlarmList call failed: %w", err) + } + lg.Debug("Successfully fetched cluster alarms") + return alarmResp, nil +} + +// indexAlarmsByMemberID converts AlarmResponse to a map for easy lookup. +func indexAlarmsByMemberID(alarmResp *clientv3.AlarmResponse) map[uint64][]string { + alarmsMap := make(map[uint64][]string) + if alarmResp != nil && len(alarmResp.Alarms) > 0 { + for _, alarmMember := range alarmResp.Alarms { + alarmsMap[alarmMember.MemberID] = append( + alarmsMap[alarmMember.MemberID], + alarmMember.GetAlarm().String(), // Store string representation of AlarmType + ) + } + } + return alarmsMap +} + +// checkSingleEndpointCoreHealth performs basic health checks for a single endpoint. +// It does NOT call AlarmList. +func checkSingleEndpointCoreHealth(ctx context.Context, cfg clientv3.Config, parentLogger *zap.Logger) EpHealth { + ep := cfg.Endpoints[0] + + // Set a specific logger for the etcd client instance. + // Since cfg is passed by value, modifying it here is safe and only affects this local copy. + cfg.Logger = parentLogger.Named(fmt.Sprintf("client-%s", strings.ReplaceAll(ep, "://", "_"))) + + epHealth := EpHealth{Ep: ep, Health: false} // Initialize with Health: false + + cli, err := clientv3.New(cfg) // Use the modified local cfg copy directly. + if err != nil { + epHealth.Error = err.Error() + // Took is not measured if client creation fails. + return epHealth + } + defer cli.Close() + + startTs := time.Now() + + // Context for this endpoint's operations + opCtx, cancel := context.WithTimeout(ctx, 10*time.Second) // 10-second timeout for Get + Status + defer cancel() + + // 1. Basic health check (e.g., Get a key) + _, getErr := cli.Get(opCtx, "health", clientv3.WithSerializable()) + if getErr == nil || errors.Is(getErr, rpctypes.ErrPermissionDenied) { + epHealth.Health = true // Initial health check passed + } else { + epHealth.Error = getErr.Error() + // If basic Get fails, we might still want to attempt Status for more info, + // but the endpoint is likely unhealthy. Health remains false. + } + + // 2. Get detailed status, regardless of initial Get result, to gather more info + epStatus, statusErr := cli.Status(opCtx, ep) // ep is this client's only endpoint + if statusErr != nil { + epHealth.Health = false // Explicitly set to false if Status fails + epHealth.Error = appendError(epHealth.Error, fmt.Sprintf("unable to fetch status: %v", statusErr)) + } else { + epHealth.Status = epStatus + if len(epStatus.Errors) > 0 { // Errors reported by the etcd member itself + epHealth.Health = false + epHealth.Error = appendError(epHealth.Error, fmt.Sprintf("etcd member reported errors: %s", strings.Join(epStatus.Errors, ", "))) + } else if !epHealth.Health { + // If Get failed but Status succeeded without internal errors, + // it's a mixed signal. For now, if Get failed, overall Health is false. + } + } + + epHealth.Took = time.Since(startTs).String() + return epHealth +} + func AddMember(eps []string, peerURLs []string, learner bool) (*clientv3.MemberAddResponse, error) { cfg := clientv3.Config{ Endpoints: eps, diff --git a/internal/etcdutils/etcdutils_test.go b/internal/etcdutils/etcdutils_test.go index a7cee42..58d3c91 100644 --- a/internal/etcdutils/etcdutils_test.go +++ b/internal/etcdutils/etcdutils_test.go @@ -78,23 +78,121 @@ func TestClusterHealth(t *testing.T) { e := setupEtcdServer(t) defer e.Close() - t.Run("ReturnsHealthStatus", func(t *testing.T) { - eps := []string{"http://localhost:2379"} - health, err := ClusterHealth(eps) - assert.NoError(t, err) - assert.NotNil(t, health) - assert.Greater(t, len(health), 0) - assert.True(t, health[0].Health) - }) + // This is a valid, healthy endpoint for our tests + healthyEp := e.Clients[0].Addr().String() + + testCases := []struct { + name string + setupFunc func(t *testing.T, e *embed.Etcd) // Optional setup for specific cases + endpoints []string + assertions func(t *testing.T, health []EpHealth, err error) + }{ + { + name: "ReturnsHealthStatusForSingleNode", + endpoints: []string{healthyEp}, + assertions: func(t *testing.T, health []EpHealth, err error) { + assert.NoError(t, err) + assert.NotNil(t, health) + assert.Len(t, health, 1) + assert.True(t, health[0].Health) + assert.Equal(t, healthyEp, health[0].Ep) + assert.Empty(t, health[0].Error) + }, + }, + { + name: "HandlesInvalidEndpoint", + endpoints: []string{"http://invalid:2379"}, + assertions: func(t *testing.T, health []EpHealth, err error) { + assert.NoError(t, err) + assert.NotNil(t, health) + assert.Len(t, health, 1) + assert.False(t, health[0].Health) + assert.Equal(t, "http://invalid:2379", health[0].Ep) + assert.Contains(t, health[0].Error, "context deadline exceeded") + }, + }, + { + name: "HandlesMixedHealthEndpoints", + endpoints: []string{healthyEp, "http://invalid:2379"}, + assertions: func(t *testing.T, health []EpHealth, err error) { + assert.NoError(t, err) + assert.NotNil(t, health) + assert.Len(t, health, 2) // Should have results for both + + // Don't rely on sort order. Find each result and test it. + var healthyResult, invalidResult EpHealth + var foundHealthy, foundInvalid bool + + for _, h := range health { + if h.Ep == healthyEp { + healthyResult = h + foundHealthy = true + } else if h.Ep == "http://invalid:2379" { + invalidResult = h + foundInvalid = true + } + } + + assert.True(t, foundHealthy, "Did not find result for healthy endpoint") + assert.True(t, foundInvalid, "Did not find result for invalid endpoint") + + // Check healthy endpoint's result + assert.True(t, healthyResult.Health) + assert.Empty(t, healthyResult.Error) + + // Check invalid endpoint's result + assert.False(t, invalidResult.Health) + assert.Contains(t, invalidResult.Error, "context deadline exceeded") + }, + }, + { + name: "DetectsMemberWithAlarms", + setupFunc: func(t *testing.T, e *embed.Etcd) { + // Activate a NOSPACE alarm on the server + _, err := e.Server.Alarm(context.Background(), &etcdserverpb.AlarmRequest{ + Action: etcdserverpb.AlarmRequest_ACTIVATE, + MemberID: uint64(e.Server.ID()), + Alarm: etcdserverpb.AlarmType_NOSPACE, + }) + assert.NoError(t, err) + }, + endpoints: []string{healthyEp}, + assertions: func(t *testing.T, health []EpHealth, err error) { + assert.NoError(t, err) + assert.NotNil(t, health) + assert.Len(t, health, 1) - t.Run("ReturnsErrorForInvalidEndpoint", func(t *testing.T) { - eps := []string{"http://invalid:2379"} - health, err := ClusterHealth(eps) - assert.NoError(t, err) - assert.Equal(t, "http://invalid:2379", health[0].Ep) - assert.Equal(t, false, health[0].Health) - assert.Equal(t, "context deadline exceeded", health[0].Error) - }) + // A member with a NOSPACE alarm is not considered fully healthy + assert.False(t, health[0].Health) + + // Check that the error reported by the member contains the alarm type. + assert.Contains(t, health[0].Error, "NOSPACE") + + // Verify that our alarm detection logic still works correctly. + assert.NotEmpty(t, health[0].Alarms) + assert.Contains(t, health[0].Alarms, "NOSPACE") + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + if tc.setupFunc != nil { + tc.setupFunc(t, e) + t.Cleanup(func() { + // Disarm the NOSPACE alarm to clean up the state for the next test. + _, _ = e.Server.Alarm(context.Background(), &etcdserverpb.AlarmRequest{ + Action: etcdserverpb.AlarmRequest_DEACTIVATE, + MemberID: uint64(e.Server.ID()), + Alarm: etcdserverpb.AlarmType_NOSPACE, + }) + }) + } + + health, err := ClusterHealth(tc.endpoints) + tc.assertions(t, health, err) + }) + } } func TestAddMember(t *testing.T) { @@ -145,6 +243,52 @@ func TestRemoveMember(t *testing.T) { e := setupEtcdServer(t) defer e.Close() + // Positive case: Successfully remove a member + t.Run("RemovesExistingMember", func(t *testing.T) { + // 1. Add a new member to be removed + // Note: The new member won't actually start, but it will exist in the member list. + newMemberPeerURL := "http://new-member:2380" + memberToAdd := membership.Member{ + RaftAttributes: membership.RaftAttributes{PeerURLs: []string{newMemberPeerURL}}, + } + + // The return value is the full list of members after addition. + updatedMemberList, err := e.Server.AddMember(context.Background(), memberToAdd) + assert.NoError(t, err) + + // Find the newly added member in the list to get its ID + var newMemberID uint64 + foundNewMember := false + for _, m := range updatedMemberList { + // Find the member by the unique PeerURL we assigned. + if len(m.PeerURLs) > 0 && m.PeerURLs[0] == newMemberPeerURL { + newMemberID = uint64(m.ID) + foundNewMember = true + break + } + } + assert.True(t, foundNewMember, "Failed to find the newly added member in the member list") + + // 2. Use our utility function to remove it using the correct ID + eps := []string{e.Clients[0].Addr().String()} + err = RemoveMember(eps, newMemberID) + assert.NoError(t, err) + + // 3. Verify it's gone + membersResp, err := MemberList(eps) + assert.NoError(t, err) + + found := false + for _, member := range membersResp.Members { + if member.ID == newMemberID { + found = true + break + } + } + assert.False(t, found, "Removed member should not be in the member list") + }) + + // Negative case t.Run("ReturnsErrorForInvalidEndpoint", func(t *testing.T) { eps := []string{"http://invalid:2379"} err := RemoveMember(eps, 12345) @@ -231,6 +375,26 @@ func TestFindLeaderStatus(t *testing.T) { assert.Equal(t, uint64(0), leader) assert.Nil(t, leaderStatus) }) + + t.Run("HandlesNilStatus", func(t *testing.T) { + logger := logr.Discard() + healthInfos := []EpHealth{ + {Status: nil}, + { + Ep: "http://localhost:2380", + Health: true, + Status: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{MemberId: 2}, + Leader: 2, + }, + }, + } + // We just want to ensure it doesn't panic. + // The result should be finding the leader from the valid entry. + leader, leaderStatus := FindLeaderStatus(healthInfos, logger) + assert.Equal(t, uint64(2), leader) + assert.NotNil(t, leaderStatus) + }) } func TestFindLearnerStatus(t *testing.T) { @@ -295,32 +459,150 @@ func TestFindLearnerStatus(t *testing.T) { assert.Equal(t, uint64(0), learner) assert.Nil(t, learnerStatus) }) + + t.Run("HandlesNilStatus", func(t *testing.T) { + logger := logr.Discard() + healthInfos := []EpHealth{ + {Status: nil}, + { + Ep: "http://localhost:2380", + Health: true, + Status: &clientv3.StatusResponse{ + Header: &etcdserverpb.ResponseHeader{MemberId: 2}, + IsLearner: false, + }, + }, + } + // We just want to ensure it doesn't panic. + // The result should be finding no learner. + learner, learnerStatus := FindLearnerStatus(healthInfos, logger) + assert.Equal(t, uint64(0), learner) + assert.Nil(t, learnerStatus) + }) } func TestEpHealthString(t *testing.T) { - eh := EpHealth{ - Ep: "http://localhost:2379", - Health: true, - Took: "1s", - Status: &clientv3.StatusResponse{ - IsLearner: true, + // Define test cases in a table + testCases := []struct { + name string + input EpHealth + expected string + }{ + { + name: "Basic healthy with no status", + input: EpHealth{ + Ep: "http://ep1:2379", + Health: true, + Took: "50ms", + }, + expected: "endpoint: http://ep1:2379, health: true, took: 50ms", + }, + { + name: "Unhealthy with error and no status", + input: EpHealth{ + Ep: "http://ep2:2379", + Health: false, + Took: "5s", + Error: "connection refused", + }, + expected: "endpoint: http://ep2:2379, health: false, took: 5s, error: connection refused", + }, + { + name: "Healthy with Status but Header is nil (case that caused panic)", + input: EpHealth{ + Ep: "http://ep3:2379", + Health: true, + Took: "100ms", + Status: &clientv3.StatusResponse{ + Version: "3.5.0", + DbSize: 12345, + IsLearner: false, + Header: nil, // Important: Header is nil + }, + }, + expected: "endpoint: http://ep3:2379, health: true, took: 100ms, version: 3.5.0, dbSize: 12345, isLearner: false", + }, + { + name: "Healthy with full Status and Header", + input: EpHealth{ + Ep: "http://leader:2379", + Health: true, + Took: "20ms", + Status: &clientv3.StatusResponse{ + Version: "3.5.1", + DbSize: 54321, + IsLearner: false, + Header: &etcdserverpb.ResponseHeader{MemberId: 123, RaftTerm: 5}, + }, + }, + expected: "endpoint: http://leader:2379, health: true, took: 20ms, version: 3.5.1, dbSize: 54321, isLearner: false, memberId: 7b, raftTerm: 5", + }, + { + name: "Healthy with Alarms but no error", + input: EpHealth{ + Ep: "http://ep4:2379", + Health: true, + Took: "30ms", + Alarms: []string{"NOSPACE"}, + }, + expected: "endpoint: http://ep4:2379, health: true, took: 30ms, alarms: [NOSPACE]", + }, + { + name: "Healthy with multiple Alarms", + input: EpHealth{ + Ep: "http://ep5:2379", + Health: true, + Took: "35ms", + Alarms: []string{"NOSPACE", "CORRUPT"}, + }, + expected: "endpoint: http://ep5:2379, health: true, took: 35ms, alarms: [NOSPACE; CORRUPT]", + }, + { + name: "Kitchen sink - all fields populated", + input: EpHealth{ + Ep: "http://ep6:2379", + Health: true, + Took: "40ms", + Status: &clientv3.StatusResponse{ + Version: "3.5.2", + DbSize: 99999, + IsLearner: true, + Header: &etcdserverpb.ResponseHeader{MemberId: 456, RaftTerm: 6}, + }, + Alarms: []string{"NOSPACE"}, + Error: "internal warning", + }, + expected: "endpoint: http://ep6:2379, health: true, took: 40ms, version: 3.5.2, dbSize: 99999, isLearner: true, memberId: 1c8, raftTerm: 6, alarms: [NOSPACE], error: internal warning", + }, + { + name: "Error string", + input: EpHealth{ + Ep: "http://localhost:2379", + Health: true, + Took: "1s", + Status: &clientv3.StatusResponse{ + // Version is empty string, DbSize is 0 in a default StatusResponse + IsLearner: true, + }, + Error: "some error", + }, + // The new String() method adds ", version: , dbSize: 0" and a comma before the error + expected: "endpoint: http://localhost:2379, health: true, took: 1s, version: , dbSize: 0, isLearner: true, error: some error", + }, + { + name: "Empty struct", + input: EpHealth{}, + expected: "endpoint: , health: false, took: ", }, - Error: "some error", } - expected := "endpoint: http://localhost:2379, health: true, took: 1s, isLearner: trueerror: some error" - assert.Equal(t, expected, eh.String()) - - eh = EpHealth{ - Ep: "http://localhost:2379", - Health: false, - Took: "1s", - Status: nil, - Error: "", + // Iterate over the test cases + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + actual := tc.input.String() + assert.Equal(t, tc.expected, actual) + }) } - - expected = "endpoint: http://localhost:2379, health: false, took: 1s" - assert.Equal(t, expected, eh.String()) } func TestHealthReportSwap(t *testing.T) { From 781e1d3c0341d93a5d3a96a0895cdd03da9cb10c Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Thu, 5 Jun 2025 18:54:00 -0400 Subject: [PATCH 07/12] Signed-off-by: Wenxue Zhao feat(status): Populate detailed member status in reconciler This commit implements the logic within the main controller reconcile loop to populate the new, detailed status fields defined in the EtcdCluster API. It utilizes the refactored health checking utilities to translate raw etcd health information into the structured status format. Key implementation details: - A new `populateMemberStatuses` helper method is introduced to handle the translation from `etcdutils.EpHealth` to the `ecv1alpha1.MemberStatus` slice. - `Status.Members` is now fully populated with individual member data, including health, version, DB size, and active alarms. - `Status.CurrentVersion` is derived from the leader's status. - `Status.ObservedGeneration` is now correctly set at the end of every successful reconciliation cycle to reflect that the status is in sync with the spec. - When a health check fails, stale status fields (like `Members`, `LeaderId`) are now cleared to avoid presenting a confusing mix of old data and new error conditions. With this logic in place, the next step is to refactor the Reconcile function for better testability and add comprehensive controller-level tests. --- internal/controller/etcdcluster_controller.go | 97 +++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index 283baab..a91a1d8 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -103,6 +103,15 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Defer the actual patch operation. The mutate function will apply the final 'calculatedStatus'. defer func() { + // Using the named return value `err`, we can determine if the reconciliation + // was successful before patching the status. + // We should only update ObservedGeneration if the reconciliation cycle completed without an error. + if err == nil { + // If we are about to exit without an error, it means we have successfully + // processed the current spec generation. + calculatedStatus.ObservedGeneration = etcdCluster.Generation + } + // Call PatchStatusMutate. The mutate function copies the calculatedStatus // onto the latest version fetched inside the patch retry loop. patchErr := status.PatchStatusMutate(ctx, r.Client, etcdCluster, func(latest *ecv1alpha1.EtcdCluster) error { @@ -255,6 +264,12 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetDegraded(true, status.ReasonHealthCheckError, status.FormatError("Health check failed", err)) // Keep Progressing True as we need to retry health check cm.SetProgressing(true, status.ReasonHealthCheckError, "Retrying after health check failure") + + // Clear stale data on error to avoid using stale member status + calculatedStatus.Members = nil + calculatedStatus.MemberCount = 0 + calculatedStatus.LeaderId = "" + calculatedStatus.CurrentVersion = "" return ctrl.Result{}, fmt.Errorf("health check failed: %w", err) } // calculatedStatus.LastHealthCheckTime = metav1.Now() // TODO: Add this field @@ -267,6 +282,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) memberCnt = len(memberListResp.Members) } calculatedStatus.MemberCount = int32(memberCnt) + // Populate the detailed member status slice using our new helper function. + calculatedStatus.Members = r.populateMemberStatuses(ctx, memberListResp, healthInfos) // --------------------------------------------------------------------- // 6. Handle Member/Replica Mismatch @@ -325,6 +342,10 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) return ctrl.Result{RequeueAfter: requeueDuration}, err } calculatedStatus.LeaderId = fmt.Sprintf("%x", leaderStatus.Header.MemberId) + // Set the cluster's current version based on the leader's version. + if leaderStatus.Version != "" { + calculatedStatus.CurrentVersion = leaderStatus.Version + } learner, learnerStatus = etcdutils.FindLearnerStatus(healthInfos, logger) if learner > 0 { @@ -393,6 +414,14 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) cm.SetProgressing(true, scalingReason, scalingMessage) cm.SetAvailable(false, scalingReason, "Cluster is scaling down") + // TODO: The current logic for selecting a member to remove is not robust. + // It assumes the last member in `healthInfos` (sorted alphabetically by endpoint) + // is the member with the highest ordinal index. This can be incorrect under + // certain naming conventions (e.g., with more than 10 pods) and could lead + // to removing the wrong member. + // A robust implementation should find the member to remove by its pod name + // (e.g., "etcd-cluster-N") instead of relying on slice order. + // This will be addressed in a future PR to keep this change focused. memberID := healthInfos[memberCnt-1].Status.Header.MemberId logger.Info("[Scale in] removing one member", "memberID", memberID) eps = eps[:targetReplica] @@ -520,6 +549,74 @@ func (r *EtcdClusterReconciler) updateStatusFromStatefulSet( etcdClusterStatus.ReadyReplicas = sts.Status.ReadyReplicas } +// populateMemberStatuses translates the raw health and member info from etcd into the +// structured MemberStatus API type. +func (r *EtcdClusterReconciler) populateMemberStatuses( + ctx context.Context, + memberListResp *clientv3.MemberListResponse, + healthInfos []etcdutils.EpHealth, +) []ecv1alpha1.MemberStatus { + logger := log.FromContext(ctx) + // If there's no member list, there's nothing to populate. + if memberListResp == nil || len(memberListResp.Members) == 0 { + return nil + } + + // Create a map of health info keyed by member ID for efficient lookups. + healthMap := make(map[uint64]etcdutils.EpHealth) + for _, hi := range healthInfos { + if hi.Status != nil && hi.Status.Header != nil { + healthMap[hi.Status.Header.MemberId] = hi + } + } + + // Allocate space for the resulting statuses. + statuses := make([]ecv1alpha1.MemberStatus, 0, len(memberListResp.Members)) + + // Iterate through the canonical member list from etcd. + for _, etcdMember := range memberListResp.Members { + // Start building the status for this specific member. + ms := ecv1alpha1.MemberStatus{ + ID: fmt.Sprintf("%x", etcdMember.ID), // Use hex representation for the uint64 ID. + Name: etcdMember.Name, + IsLearner: etcdMember.IsLearner, + } + + // Populate URLs, taking the first one if available. + if len(etcdMember.ClientURLs) > 0 { + ms.ClientURL = etcdMember.ClientURLs[0] + } + if len(etcdMember.PeerURLs) > 0 { + ms.PeerURL = etcdMember.PeerURLs[0] + } + + // Look up the detailed health status for this member in the map. + if healthInfo, found := healthMap[etcdMember.ID]; found { + // A health record was found for this member. + ms.IsHealthy = healthInfo.Health + ms.ErrorMessage = healthInfo.Error + ms.Alarms = healthInfo.Alarms // Directly use the pre-processed []string from EpHealth. + + // Populate fields from the detailed StatusResponse if it exists. + if healthInfo.Status != nil { + ms.Version = healthInfo.Status.Version + ms.DBSize = healthInfo.Status.DbSize + ms.DBSizeInUse = healthInfo.Status.DbSizeInUse + } + } else { + // A member was present in the member list but we couldn't get its + // individual health status (e.g., its endpoint was unreachable during the check). + ms.IsHealthy = false + ms.ErrorMessage = "Health status for this member could not be retrieved." + logger.Info("No detailed health status found for etcd member, marking as unhealthy by default", "memberID", ms.ID, "memberName", ms.Name) + } + + statuses = append(statuses, ms) + } + + return statuses +} + // SetupWithManager sets up the controller with the Manager. func (r *EtcdClusterReconciler) SetupWithManager(mgr ctrl.Manager) error { r.Recorder = mgr.GetEventRecorderFor("etcdcluster-controller") From aa59fe43736c8cd2fd691bd7b13659f11d2eadee Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Thu, 5 Jun 2025 19:18:15 -0400 Subject: [PATCH 08/12] fix(test): Use correct MemberID method in alarm test Signed-off-by: Wenxue Zhao --- internal/etcdutils/etcdutils_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/etcdutils/etcdutils_test.go b/internal/etcdutils/etcdutils_test.go index 58d3c91..402c306 100644 --- a/internal/etcdutils/etcdutils_test.go +++ b/internal/etcdutils/etcdutils_test.go @@ -151,7 +151,7 @@ func TestClusterHealth(t *testing.T) { // Activate a NOSPACE alarm on the server _, err := e.Server.Alarm(context.Background(), &etcdserverpb.AlarmRequest{ Action: etcdserverpb.AlarmRequest_ACTIVATE, - MemberID: uint64(e.Server.ID()), + MemberID: uint64(e.Server.MemberID()), Alarm: etcdserverpb.AlarmType_NOSPACE, }) assert.NoError(t, err) @@ -183,7 +183,7 @@ func TestClusterHealth(t *testing.T) { // Disarm the NOSPACE alarm to clean up the state for the next test. _, _ = e.Server.Alarm(context.Background(), &etcdserverpb.AlarmRequest{ Action: etcdserverpb.AlarmRequest_DEACTIVATE, - MemberID: uint64(e.Server.ID()), + MemberID: uint64(e.Server.MemberID()), Alarm: etcdserverpb.AlarmType_NOSPACE, }) }) From 1e9152a57301c9ee7957d03be975b3008138c48f Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Thu, 5 Jun 2025 19:51:54 -0400 Subject: [PATCH 09/12] fix(lint): Resolve lint errors Signed-off-by: Wenxue Zhao --- internal/etcdutils/etcdutils.go | 15 ++++++++++----- pkg/status/patch.go | 6 ++++-- pkg/status/type.go | 4 +++- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/internal/etcdutils/etcdutils.go b/internal/etcdutils/etcdutils.go index 0ee6a86..6250a15 100644 --- a/internal/etcdutils/etcdutils.go +++ b/internal/etcdutils/etcdutils.go @@ -163,7 +163,11 @@ func ClusterHealth(eps []string) ([]EpHealth, error) { // clusterOpClient will be nil } if clusterOpClient != nil { - defer clusterOpClient.Close() + defer func() { + if err := clusterOpClient.Close(); err != nil { + lg.Warn("Failed to close cluster operation client", zap.Error(err)) + } + }() } // 2. Fetch cluster-wide alarms ONCE @@ -311,7 +315,11 @@ func checkSingleEndpointCoreHealth(ctx context.Context, cfg clientv3.Config, par // Took is not measured if client creation fails. return epHealth } - defer cli.Close() + defer func() { + if err := cli.Close(); err != nil { + parentLogger.Warn("Failed to close etcd client for endpoint", zap.String("endpoint", ep), zap.Error(err)) + } + }() startTs := time.Now() @@ -339,9 +347,6 @@ func checkSingleEndpointCoreHealth(ctx context.Context, cfg clientv3.Config, par if len(epStatus.Errors) > 0 { // Errors reported by the etcd member itself epHealth.Health = false epHealth.Error = appendError(epHealth.Error, fmt.Sprintf("etcd member reported errors: %s", strings.Join(epStatus.Errors, ", "))) - } else if !epHealth.Health { - // If Get failed but Status succeeded without internal errors, - // it's a mixed signal. For now, if Get failed, overall Health is false. } } diff --git a/pkg/status/patch.go b/pkg/status/patch.go index 8bd4e7d..9aa6c26 100644 --- a/pkg/status/patch.go +++ b/pkg/status/patch.go @@ -66,7 +66,8 @@ func PatchStatusMutate[T client.Object]( // Check if the generation changed during our reconcile. This might mean the spec changed // and our calculated status is stale. if startGeneration != latestFetchedObj.GetGeneration() { - logger.Info("Object generation changed during reconcile, status calculated based on old spec might be stale. Aborting this patch attempt.", + logger.Info("Object generation changed during reconcile, status calculated based on old spec might be stale. "+ + "Aborting this patch attempt.", "key", key, "startGeneration", startGeneration, "currentGeneration", latestFetchedObj.GetGeneration()) // Returning nil here means we acknowledge the generation change and decide *not* to patch stale status. // The main Reconcile loop will requeue soon with the new generation. @@ -83,7 +84,8 @@ func PatchStatusMutate[T client.Object]( if err := mutateFn(latestFetchedObj); err != nil { // If the mutation logic itself fails, we likely want to stop retrying the patch. logger.Error(err, "Mutation function failed during status patch attempt.") - return fmt.Errorf("mutate function failed during status patch: %w", err) // Stop retries by returning non-conflict error + // Stop retries by returning non-conflict error + return fmt.Errorf("mutate function failed during status patch: %w", err) } // Compare the object's status before and after mutation. diff --git a/pkg/status/type.go b/pkg/status/type.go index 54c2bea..4d28888 100644 --- a/pkg/status/type.go +++ b/pkg/status/type.go @@ -4,7 +4,9 @@ package status // Condition types used for EtcdCluster status. // Adhering to Kubernetes API conventions as much as possible. -// See: https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#typical-status-properties +// See: +// github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md + const ( // ConditionAvailable indicates that the etcd cluster has reached its desired state, // has quorum, and is ready to serve requests. All members are healthy. From 1ea2d503fbc579d54bfb0917f2b27569ca741738 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Mon, 29 Sep 2025 05:54:18 -0400 Subject: [PATCH 10/12] fix(test): added missing context package in etcdutils_test.go Signed-off-by: Wenxue Zhao --- internal/etcdutils/etcdutils_test.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/etcdutils/etcdutils_test.go b/internal/etcdutils/etcdutils_test.go index 402c306..1beb953 100644 --- a/internal/etcdutils/etcdutils_test.go +++ b/internal/etcdutils/etcdutils_test.go @@ -1,6 +1,7 @@ package etcdutils import ( + "context" "testing" "time" From 08ad17f04b3ab5e47f3dad726cf3c927e9e7cb18 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Mon, 29 Sep 2025 06:12:14 -0400 Subject: [PATCH 11/12] Fix lint findings after rebase Signed-off-by: Wenxue Zhao --- internal/controller/etcdcluster_controller.go | 4 ++-- internal/etcdutils/etcdutils_test.go | 5 +++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index a91a1d8..ff76b53 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -22,7 +22,6 @@ import ( "time" certv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1" - "go.etcd.io/etcd-operator/pkg/status" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" @@ -35,6 +34,7 @@ import ( ecv1alpha1 "go.etcd.io/etcd-operator/api/v1alpha1" "go.etcd.io/etcd-operator/internal/etcdutils" + "go.etcd.io/etcd-operator/pkg/status" clientv3 "go.etcd.io/etcd/client/v3" ) @@ -94,7 +94,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // We use PatchStatusMutate. It fetches the latest object inside its retry loop. // We prepare the *desired* status modifications in a 'calculatedStatus' variable // within the Reconcile scope, and the mutate function applies these calculations. - var calculatedStatus ecv1alpha1.EtcdClusterStatus = etcdCluster.Status // Start with current status + calculatedStatus := etcdCluster.Status // Start with current status // Initialize ConditionManager using the calculatedStatus *copy*. // Modifications via cm will only affect this copy until the patch is applied. diff --git a/internal/etcdutils/etcdutils_test.go b/internal/etcdutils/etcdutils_test.go index 1beb953..8157f2a 100644 --- a/internal/etcdutils/etcdutils_test.go +++ b/internal/etcdutils/etcdutils_test.go @@ -125,10 +125,11 @@ func TestClusterHealth(t *testing.T) { var foundHealthy, foundInvalid bool for _, h := range health { - if h.Ep == healthyEp { + switch h.Ep { + case healthyEp: healthyResult = h foundHealthy = true - } else if h.Ep == "http://invalid:2379" { + case "http://invalid:2379": invalidResult = h foundInvalid = true } From 1814dc114fc6f1ded4c3aa76ec90ab8db38e80d7 Mon Sep 17 00:00:00 2001 From: Wenxue Zhao Date: Tue, 30 Sep 2025 13:53:45 -0400 Subject: [PATCH 12/12] status: shrink CR status surface and tighten healthy-state reporting Signed-off-by: Wenxue Zhao --- api/v1alpha1/etcdcluster_types.go | 36 +----- api/v1alpha1/zz_generated.deepcopy.go | 9 +- internal/controller/etcdcluster_controller.go | 113 ++++++------------ pkg/status/manager.go | 9 -- pkg/status/type.go | 3 +- test/e2e/datapersistence_test.go | 2 + test/e2e/e2e_test.go | 6 + test/e2e/helpers_test.go | 60 ++++++++++ 8 files changed, 111 insertions(+), 127 deletions(-) diff --git a/api/v1alpha1/etcdcluster_types.go b/api/v1alpha1/etcdcluster_types.go index 38ef923..6d02f00 100644 --- a/api/v1alpha1/etcdcluster_types.go +++ b/api/v1alpha1/etcdcluster_types.go @@ -160,10 +160,7 @@ type EtcdClusterStatus struct { // +optional LeaderId string `json:"leaderId,omitempty"` - // Phase indicates the current lifecycle phase of the EtcdCluster. - // Examples: "Pending", "Initializing", "Running", "Scaling", "Upgrading", "Failed", "Idle". - // +optional - Phase string `json:"phase,omitempty"` + // TODO: expose LastDefragTime once the controller owns automated defragmentation. // Members provides the status of each individual etcd member. // +optional @@ -206,36 +203,9 @@ type MemberStatus struct { // +optional IsLearner bool `json:"isLearner,omitempty"` - // ClientURL is one of the client URLs for this member. - // +optional - ClientURL string `json:"clientURL,omitempty"` - - // PeerURL is one of the peer URLs for this member. - // +optional - PeerURL string `json:"peerURL,omitempty"` - - // DBSize is the current database size in bytes of this member, as reported by etcd. - // This is the physically allocated size. - // +optional - DBSize int64 `json:"dbSize,omitempty"` - - // DBSizeInUse is the actual disk space logically in use by the database, in bytes, of this member. - // +optional - DBSizeInUse int64 `json:"dbSizeInUse,omitempty"` - - // Alarms are active alarms on this member (e.g., "NOSPACE", "CORRUPT"). - // This lists specific alarm types. + // IsLeader indicates if this member is currently the cluster leader. // +optional - Alarms []string `json:"alarms,omitempty"` - - // ErrorMessage provides any error associated with fetching status for this member, - // or errors reported by the member itself via its StatusResponse.Errors field. - // +optional - ErrorMessage string `json:"errorMessage,omitempty"` - - // TODO: Consider adding these in future iterations if deemed valuable: - // StorageVersion string `json:"storageVersion,omitempty"` // Raft storage version - // DowngradeInfo string `json:"downgradeInfo,omitempty"` // Information about downgrade capabilities + IsLeader bool `json:"isLeader,omitempty"` } // +kubebuilder:object:root=true diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index e49b823..7150dc9 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -178,9 +178,7 @@ func (in *EtcdClusterStatus) DeepCopyInto(out *EtcdClusterStatus) { if in.Members != nil { in, out := &in.Members, &out.Members *out = make([]MemberStatus, len(*in)) - for i := range *in { - (*in)[i].DeepCopyInto(&(*out)[i]) - } + copy(*out, *in) } if in.Conditions != nil { in, out := &in.Conditions, &out.Conditions @@ -204,11 +202,6 @@ func (in *EtcdClusterStatus) DeepCopy() *EtcdClusterStatus { // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *MemberStatus) DeepCopyInto(out *MemberStatus) { *out = *in - if in.Alarms != nil { - in, out := &in.Alarms, &out.Alarms - *out = make([]string, len(*in)) - copy(*out, *in) - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MemberStatus. diff --git a/internal/controller/etcdcluster_controller.go b/internal/controller/etcdcluster_controller.go index ff76b53..3976b5e 100644 --- a/internal/controller/etcdcluster_controller.go +++ b/internal/controller/etcdcluster_controller.go @@ -25,7 +25,6 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" @@ -117,8 +116,6 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) patchErr := status.PatchStatusMutate(ctx, r.Client, etcdCluster, func(latest *ecv1alpha1.EtcdCluster) error { // Apply the status we calculated during *this* reconcile cycle latest.Status = calculatedStatus - // Ensure Phase is derived *before* patching - r.derivePhaseFromConditions(latest) // Pass the object being patched return nil }) if patchErr != nil { @@ -182,6 +179,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 0, r.Scheme) if err != nil { logger.Error(err, "Failed to create StatefulSet") + cm.SetAvailable(false, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) cm.SetProgressing(false, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) cm.SetDegraded(true, status.ReasonResourceCreateFail, status.FormatError("Failed to create StatefulSet", err)) return ctrl.Result{}, err @@ -195,6 +193,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // temporary network failure, or any other transient reason. logger.Error(err, "Failed to get StatefulSet. Requesting requeue") r.updateStatusFromStatefulSet(&calculatedStatus, nil) + cm.SetAvailable(false, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) cm.SetProgressing(false, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) // Stop progressing on persistent get error cm.SetDegraded(true, status.ReasonStatefulSetGetError, status.FormatError("Failed to get StatefulSet", err)) r.updateStatusFromStatefulSet(&calculatedStatus, nil) @@ -209,6 +208,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // This case should ideally not happen if error handling above is correct err = fmt.Errorf("statefulSet is unexpectedly nil after get/create") logger.Error(err, "Internal error") + cm.SetAvailable(false, status.ReasonReconcileError, err.Error()) cm.SetDegraded(true, "InternalError", err.Error()) r.updateStatusFromStatefulSet(&calculatedStatus, nil) return ctrl.Result{}, err @@ -222,6 +222,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) err = checkStatefulSetControlledByEtcdOperator(etcdCluster, sts) if err != nil { logger.Error(err, "StatefulSet is not controlled by this EtcdCluster resource") + cm.SetAvailable(false, status.ReasonNotOwnedResource, err.Error()) cm.SetDegraded(true, status.ReasonNotOwnedResource, err.Error()) return ctrl.Result{}, err } @@ -234,6 +235,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, 1, r.Scheme) if err != nil { logger.Error(err, "Failed to scale StatefulSet to 1 replica") + cm.SetAvailable(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) cm.SetProgressing(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to scale StatefulSet to 1", err)) return ctrl.Result{}, err @@ -246,6 +248,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) err = createHeadlessServiceIfNotExist(ctx, logger, r.Client, etcdCluster, r.Scheme) if err != nil { logger.Error(err, "Failed to create Headless Service") + cm.SetAvailable(false, status.ReasonResourceCreateFail, status.FormatError("Failed to ensure Headless Service", err)) cm.SetProgressing(false, status.ReasonResourceCreateFail, status.FormatError("Failed to ensure Headless Service", err)) cm.SetDegraded(true, status.ReasonResourceCreateFail, status.FormatError("Failed to ensure Headless Service", err)) return ctrl.Result{}, err @@ -272,9 +275,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) calculatedStatus.CurrentVersion = "" return ctrl.Result{}, fmt.Errorf("health check failed: %w", err) } - // calculatedStatus.LastHealthCheckTime = metav1.Now() // TODO: Add this field - // TODO: Update CurrentVersion from healthInfos (e.g., from leaderStatus.Version if available) - // TODO: Populate UnhealthyMembers list based on healthInfos + // calculatedStatus.LastHealthCheckTime = metav1.Now() // TODO: Add this field in a future iteration. + // TODO: Populate UnhealthyMembers list based on healthInfos if exposure becomes necessary. // Update member count memberCnt := 0 @@ -282,8 +284,28 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) memberCnt = len(memberListResp.Members) } calculatedStatus.MemberCount = int32(memberCnt) + + var ( + leaderStatus *clientv3.StatusResponse + leaderIDHex string + ) + // Default to empty leader identity/version. We'll overwrite once a leader is confirmed. + calculatedStatus.LeaderId = "" + calculatedStatus.CurrentVersion = "" + + if memberCnt > 0 { + _, leaderStatus = etcdutils.FindLeaderStatus(healthInfos, logger) + if leaderStatus != nil && leaderStatus.Header != nil { + leaderIDHex = fmt.Sprintf("%x", leaderStatus.Header.MemberId) + calculatedStatus.LeaderId = leaderIDHex + if leaderStatus.Version != "" { + calculatedStatus.CurrentVersion = leaderStatus.Version + } + } + } + // Populate the detailed member status slice using our new helper function. - calculatedStatus.Members = r.populateMemberStatuses(ctx, memberListResp, healthInfos) + calculatedStatus.Members = r.populateMemberStatuses(ctx, memberListResp, healthInfos, leaderIDHex) // --------------------------------------------------------------------- // 6. Handle Member/Replica Mismatch @@ -313,6 +335,7 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) sts, err = reconcileStatefulSet(ctx, logger, etcdCluster, r.Client, newReplicaCount, r.Scheme) if err != nil { logger.Error(err, "Failed to adjust StatefulSet replicas to match member count") + cm.SetAvailable(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) cm.SetProgressing(false, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) cm.SetDegraded(true, status.ReasonResourceUpdateFail, status.FormatError("Failed to adjust StatefulSet replicas to match member count", err)) return ctrl.Result{}, err @@ -328,10 +351,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) var ( learnerStatus *clientv3.StatusResponse learner uint64 - leaderStatus *clientv3.StatusResponse ) if memberCnt > 0 { - _, leaderStatus = etcdutils.FindLeaderStatus(healthInfos, logger) if leaderStatus == nil { err = fmt.Errorf("couldn't find leader, memberCnt: %d", memberCnt) logger.Error(err, "Leader election might be in progress or cluster unhealthy") @@ -341,11 +362,6 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // If the leader is not available, let's wait for the leader to be elected return ctrl.Result{RequeueAfter: requeueDuration}, err } - calculatedStatus.LeaderId = fmt.Sprintf("%x", leaderStatus.Header.MemberId) - // Set the cluster's current version based on the leader's version. - if leaderStatus.Version != "" { - calculatedStatus.CurrentVersion = leaderStatus.Version - } learner, learnerStatus = etcdutils.FindLearnerStatus(healthInfos, logger) if learner > 0 { @@ -468,8 +484,8 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) logger.Info("EtcdCluster reconciled successfully and is healthy") cm.SetAvailable(true, status.ReasonClusterReady, "Cluster is fully available and healthy") cm.SetProgressing(false, status.ReasonReconcileSuccess, "Cluster reconciled to desired state") - cm.SetDegraded(false, status.ReasonClusterReady, "Cluster is healthy") // Explicitly clear Degraded - return ctrl.Result{}, nil // Success! Defer will update status. + cm.SetDegraded(false, status.ReasonClusterHealthy, "Cluster is healthy") // Explicitly clear Degraded + return ctrl.Result{}, nil // Success! Defer will update status. } else { logger.Info("EtcdCluster reached desired size, but some members are unhealthy") cm.SetAvailable(false, status.ReasonMembersUnhealthy, "Cluster reached size, but some members are unhealthy") @@ -481,54 +497,6 @@ func (r *EtcdClusterReconciler) Reconcile(ctx context.Context, req ctrl.Request) // Defer function will execute here before returning } -// derivePhaseFromConditions determines the overall Phase based on the set Conditions. -// NOTE: This logic needs refinement based on the exact conditions being set. -func (r *EtcdClusterReconciler) derivePhaseFromConditions(cluster *ecv1alpha1.EtcdCluster) { - // Default to Pending if no other condition matches - phase := "Pending" - - if cluster.Spec.Size == 0 { - phase = "Idle" - } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionDegraded) { - // Check for fatal reasons first - degradedCondition := meta.FindStatusCondition(cluster.Status.Conditions, status.ConditionDegraded) - if degradedCondition != nil { - switch degradedCondition.Reason { - case status.ReasonResourceCreateFail, status.ReasonResourceUpdateFail, status.ReasonEtcdClientError, status.ReasonNotOwnedResource, "InternalError": // Add more fatal reasons if needed - phase = "Failed" - default: - phase = "Degraded" - } - } else { - phase = "Degraded" // Should have reason if Degraded is True - } - } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionProgressing) { - // Determine specific progressing phase based on reason - progressingCondition := meta.FindStatusCondition(cluster.Status.Conditions, status.ConditionProgressing) - if progressingCondition != nil { - switch progressingCondition.Reason { - case status.ReasonCreatingResources: - phase = "Creating" - case status.ReasonInitializingCluster: - phase = "Initializing" - case status.ReasonScalingUp, status.ReasonScalingDown, status.ReasonMemberConfiguration, status.ReasonMembersMismatch: - phase = "Scaling" - case status.ReasonPromotingLearner, status.ReasonWaitingForLearner: - phase = "PromotingLearner" - default: - phase = "Progressing" // Generic progressing state if reason not specific - } - } else { - phase = "Progressing" // Should have reason if Progressing is True - } - } else if meta.IsStatusConditionTrue(cluster.Status.Conditions, status.ConditionAvailable) { - phase = "Running" - } - // Add logic for Terminating Phase if finalizers are implemented - - cluster.Status.Phase = phase -} - func (r *EtcdClusterReconciler) updateStatusFromStatefulSet( etcdClusterStatus *ecv1alpha1.EtcdClusterStatus, // Pass the status struct to modify sts *appsv1.StatefulSet, @@ -555,6 +523,7 @@ func (r *EtcdClusterReconciler) populateMemberStatuses( ctx context.Context, memberListResp *clientv3.MemberListResponse, healthInfos []etcdutils.EpHealth, + leaderID string, ) []ecv1alpha1.MemberStatus { logger := log.FromContext(ctx) // If there's no member list, there's nothing to populate. @@ -576,38 +545,30 @@ func (r *EtcdClusterReconciler) populateMemberStatuses( // Iterate through the canonical member list from etcd. for _, etcdMember := range memberListResp.Members { // Start building the status for this specific member. + memberIDHex := fmt.Sprintf("%x", etcdMember.ID) ms := ecv1alpha1.MemberStatus{ - ID: fmt.Sprintf("%x", etcdMember.ID), // Use hex representation for the uint64 ID. + ID: memberIDHex, Name: etcdMember.Name, IsLearner: etcdMember.IsLearner, } - // Populate URLs, taking the first one if available. - if len(etcdMember.ClientURLs) > 0 { - ms.ClientURL = etcdMember.ClientURLs[0] - } - if len(etcdMember.PeerURLs) > 0 { - ms.PeerURL = etcdMember.PeerURLs[0] + if leaderID != "" && memberIDHex == leaderID { + ms.IsLeader = true } // Look up the detailed health status for this member in the map. if healthInfo, found := healthMap[etcdMember.ID]; found { // A health record was found for this member. ms.IsHealthy = healthInfo.Health - ms.ErrorMessage = healthInfo.Error - ms.Alarms = healthInfo.Alarms // Directly use the pre-processed []string from EpHealth. // Populate fields from the detailed StatusResponse if it exists. if healthInfo.Status != nil { ms.Version = healthInfo.Status.Version - ms.DBSize = healthInfo.Status.DbSize - ms.DBSizeInUse = healthInfo.Status.DbSizeInUse } } else { // A member was present in the member list but we couldn't get its // individual health status (e.g., its endpoint was unreachable during the check). ms.IsHealthy = false - ms.ErrorMessage = "Health status for this member could not be retrieved." logger.Info("No detailed health status found for etcd member, marking as unhealthy by default", "memberID", ms.ID, "memberName", ms.Name) } diff --git a/pkg/status/manager.go b/pkg/status/manager.go index be8659e..af86b95 100644 --- a/pkg/status/manager.go +++ b/pkg/status/manager.go @@ -76,8 +76,6 @@ func (m *Manager) SetAvailable(status bool, reason, message string) { conditionStatus := metav1.ConditionFalse if status { conditionStatus = metav1.ConditionTrue - // Typically, if Available becomes True, Degraded should be False. - m.SetDegraded(false, reason, message) } m.set(ConditionAvailable, conditionStatus, reason, message) } @@ -87,11 +85,6 @@ func (m *Manager) SetProgressing(status bool, reason, message string) { conditionStatus := metav1.ConditionFalse if status { conditionStatus = metav1.ConditionTrue - // If starting to progress, it's usually not fully Available yet. - // Ensure Available reflects this if not explicitly set later. - if !m.IsConditionTrue(ConditionAvailable) && m.IsConditionUnknown(ConditionAvailable) { - m.SetAvailable(false, reason, message) - } } m.set(ConditionProgressing, conditionStatus, reason, message) } @@ -101,8 +94,6 @@ func (m *Manager) SetDegraded(status bool, reason, message string) { conditionStatus := metav1.ConditionFalse if status { conditionStatus = metav1.ConditionTrue - // If Degraded becomes True, Available should typically be False. - m.SetAvailable(false, reason, message) } m.set(ConditionDegraded, conditionStatus, reason, message) } diff --git a/pkg/status/type.go b/pkg/status/type.go index 4d28888..6bad6d4 100644 --- a/pkg/status/type.go +++ b/pkg/status/type.go @@ -25,12 +25,13 @@ const ( ConditionDegraded string = "Degraded" ) -// Common reasons for EtcdCluster status conditions.// Reasons should be CamelCase and concise. +// Common reasons for EtcdCluster status conditions. Reasons should be CamelCase and concise. const ( // General Reasons ReasonReconciling string = "Reconciling" ReasonReconcileSuccess string = "ReconcileSuccess" ReasonReconcileError string = "ReconcileError" // Generic error + ReasonClusterHealthy string = "ClusterHealthy" // Available Reasons ReasonClusterReady string = "ClusterReady" diff --git a/test/e2e/datapersistence_test.go b/test/e2e/datapersistence_test.go index 2937c8d..222e10a 100644 --- a/test/e2e/datapersistence_test.go +++ b/test/e2e/datapersistence_test.go @@ -114,6 +114,8 @@ func TestDataPersistence(t *testing.T) { t.Fatalf("unable to create sts with size 1: %s", err) } + waitForClusterHealthyStatus(t, c, etcdClusterName, size) + return ctx }, ) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 8178a53..0a565b7 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -145,6 +145,7 @@ func TestScaling(t *testing.T) { feature.Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { createEtcdClusterWithPVC(ctx, t, c, etcdClusterName, tc.initialSize) waitForSTSReadiness(t, c, etcdClusterName, tc.initialSize) + waitForClusterHealthyStatus(t, c, etcdClusterName, tc.initialSize) return ctx }) @@ -154,6 +155,7 @@ func TestScaling(t *testing.T) { scaleEtcdCluster(ctx, t, c, etcdClusterName, tc.scaleTo) // Wait until StatefulSet spec/status reflect the scaled size and are ready waitForSTSReadiness(t, c, etcdClusterName, tc.scaleTo) + waitForClusterHealthyStatus(t, c, etcdClusterName, tc.expectedMembers) return ctx }, ) @@ -193,6 +195,7 @@ func TestPodRecovery(t *testing.T) { feature.Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { createEtcdClusterWithPVC(ctx, t, c, etcdClusterName, 3) waitForSTSReadiness(t, c, etcdClusterName, 3) + waitForClusterHealthyStatus(t, c, etcdClusterName, 3) return ctx }) @@ -238,6 +241,7 @@ func TestPodRecovery(t *testing.T) { // Wait for full StatefulSet readiness waitForSTSReadiness(t, c, etcdClusterName, int(initialReplicas)) + waitForClusterHealthyStatus(t, c, etcdClusterName, int(initialReplicas)) // Verify PVC usage after recovery verifyPodUsesPVC(t, c, targetPodName, "etcd-data-"+etcdClusterName) @@ -294,6 +298,7 @@ func TestEtcdClusterFunctionality(t *testing.T) { feature.Setup(func(ctx context.Context, t *testing.T, c *envconf.Config) context.Context { createEtcdClusterWithPVC(ctx, t, c, etcdClusterName, 3) waitForSTSReadiness(t, c, etcdClusterName, 3) + waitForClusterHealthyStatus(t, c, etcdClusterName, 3) return ctx }) @@ -303,6 +308,7 @@ func TestEtcdClusterFunctionality(t *testing.T) { // Wait until all members are promoted to voting members (no learners), // otherwise endpoint health will fail on learners. waitForNoLearners(t, c, podName, 3) + waitForClusterHealthyStatus(t, c, etcdClusterName, 3) // Check health for the whole cluster rather than a single member command := []string{"etcdctl", "endpoint", "health", "--cluster"} diff --git a/test/e2e/helpers_test.go b/test/e2e/helpers_test.go index b6c7c5b..d45a726 100644 --- a/test/e2e/helpers_test.go +++ b/test/e2e/helpers_test.go @@ -29,12 +29,14 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" storagev1 "k8s.io/api/storage/v1" + "k8s.io/apimachinery/pkg/api/meta" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "sigs.k8s.io/e2e-framework/klient/wait" "sigs.k8s.io/e2e-framework/pkg/envconf" ecv1alpha1 "go.etcd.io/etcd-operator/api/v1alpha1" + "go.etcd.io/etcd-operator/pkg/status" etcdserverpb "go.etcd.io/etcd/api/v3/etcdserverpb" ) @@ -296,3 +298,61 @@ func verifyDataOperations(t *testing.T, c *envconf.Config, etcdClusterName strin t.Errorf("Expected key-value pair [%s=%s], but got output: %s", testKey, testValue, stdout) } } + +func waitForClusterHealthyStatus(t *testing.T, c *envconf.Config, name string, expectedMembers int) ecv1alpha1.EtcdCluster { + t.Helper() + var cluster ecv1alpha1.EtcdCluster + err := wait.For(func(ctx context.Context) (bool, error) { + if err := c.Client().Resources().Get(ctx, name, namespace, &cluster); err != nil { + return false, err + } + + if cluster.Status.ObservedGeneration < cluster.Generation { + return false, nil + } + + if cluster.Status.CurrentReplicas != int32(expectedMembers) || + cluster.Status.ReadyReplicas != int32(expectedMembers) || + cluster.Status.MemberCount != int32(expectedMembers) || + len(cluster.Status.Members) != expectedMembers { + return false, nil + } + + leaderCount := 0 + for _, member := range cluster.Status.Members { + if !member.IsHealthy || member.IsLearner { + return false, nil + } + if member.IsLeader { + leaderCount++ + } + } + if leaderCount != 1 { + return false, nil + } + + if !conditionEquals(cluster.Status.Conditions, status.ConditionAvailable, metav1.ConditionTrue, status.ReasonClusterReady) { + return false, nil + } + if !conditionEquals(cluster.Status.Conditions, status.ConditionProgressing, metav1.ConditionFalse, status.ReasonReconcileSuccess) { + return false, nil + } + if !conditionEquals(cluster.Status.Conditions, status.ConditionDegraded, metav1.ConditionFalse, status.ReasonClusterHealthy) { + return false, nil + } + + return true, nil + }, wait.WithTimeout(5*time.Minute), wait.WithInterval(5*time.Second)) + if err != nil { + t.Fatalf("EtcdCluster %s did not reach healthy status for %d members: %v", name, expectedMembers, err) + } + return cluster +} + +func conditionEquals(conditions []metav1.Condition, condType string, condStatus metav1.ConditionStatus, condReason string) bool { + cond := meta.FindStatusCondition(conditions, condType) + if cond == nil { + return false + } + return cond.Status == condStatus && cond.Reason == condReason +}