From d348f254cccb3a91b3706ee35939dd90765da77d Mon Sep 17 00:00:00 2001 From: Itamar Syn-Hershko Date: Fri, 5 Sep 2025 15:14:21 +0300 Subject: [PATCH 1/2] Fix: Prevent illegal Pod spec updates on bootstrap Pod Signed-off-by: Itamar Syn-Hershko --- opensearch-operator/go.sum | 10 -- .../pkg/reconcilers/cluster.go | 2 +- .../pkg/reconcilers/cluster_test.go | 129 ++++++++++++++++++ 3 files changed, 130 insertions(+), 11 deletions(-) create mode 100644 opensearch-operator/pkg/reconcilers/cluster_test.go diff --git a/opensearch-operator/go.sum b/opensearch-operator/go.sum index 8b0cf77f6..2cd6f9ea1 100644 --- a/opensearch-operator/go.sum +++ b/opensearch-operator/go.sum @@ -590,8 +590,6 @@ golang.org/x/net v0.0.0-20210224082022-3d97a244fca7/go.mod h1:m0MpNAwzfU5UDzcl9v golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= golang.org/x/net v0.0.0-20210520170846-37e1c6afe023/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.0.0-20211216030914-fe4d6282115f/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8= -golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8= golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -658,15 +656,11 @@ golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik= -golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= -golang.org/x/term v0.30.0 h1:PQ39fJZ+mfadBm0y5WlL4vlM7Sx1Hgf13sMIY2+QS9Y= -golang.org/x/term v0.30.0/go.mod h1:NYYFdzHoI5wRh/h5tDMdMqCqPJZEuNqVR5xJLd/n67g= golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= @@ -676,8 +670,6 @@ golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.23.0 h1:D71I7dUrlY+VX0gQShAThNGHFxZ13dGLBHQLVl1mJlY= -golang.org/x/text v0.23.0/go.mod h1:/BLNzu4aZCJ1+kcD0DNRotWKage4q2rGVAg4o22unh4= golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -729,8 +721,6 @@ golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roY golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.0/go.mod h1:xkSsbof2nBLbhDlRMhhhyNLN/zl3eTqcnHD5viDpcZ0= -golang.org/x/tools v0.26.0 h1:v/60pFQmzmT9ExmjDv2gGIfi3OqfKoEP6I5+umXlbnQ= -golang.org/x/tools v0.26.0/go.mod h1:TPVVj70c7JJ3WCazhD8OdXcZg/og+b9+tH/KxylGwH0= golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= diff --git a/opensearch-operator/pkg/reconcilers/cluster.go b/opensearch-operator/pkg/reconcilers/cluster.go index 710f58568..4a98d7c2c 100644 --- a/opensearch-operator/pkg/reconcilers/cluster.go +++ b/opensearch-operator/pkg/reconcilers/cluster.go @@ -100,7 +100,7 @@ func (r *ClusterReconciler) Reconcile() (ctrl.Result, error) { if r.instance.Status.Initialized { result.Combine(r.client.ReconcileResource(bootstrapPod, reconciler.StateAbsent)) } else { - result.Combine(r.client.ReconcileResource(bootstrapPod, reconciler.StatePresent)) + result.Combine(r.client.ReconcileResource(bootstrapPod, reconciler.StateCreated)) } for _, nodePool := range r.instance.Spec.NodePools { diff --git a/opensearch-operator/pkg/reconcilers/cluster_test.go b/opensearch-operator/pkg/reconcilers/cluster_test.go new file mode 100644 index 000000000..1238660f3 --- /dev/null +++ b/opensearch-operator/pkg/reconcilers/cluster_test.go @@ -0,0 +1,129 @@ +package reconcilers + +import ( + opsterv1 "github.com/Opster/opensearch-k8s-operator/opensearch-operator/api/v1" + "github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/builders" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +var _ = Describe("Bootstrap Pod Reconciliation Fix", func() { + Context("Regression test for immutable Pod spec updates", func() { + It("should verify bootstrap Pod logic uses StateCreated to avoid illegal Pod updates", func() { + instance := &opsterv1.OpenSearchCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "test-cluster", + Namespace: "test-namespace", + }, + Spec: opsterv1.ClusterSpec{ + General: opsterv1.GeneralConfig{ + HttpPort: 9200, + ServiceName: "test-cluster", + Version: "2.8.0", + }, + }, + Status: opsterv1.ClusterStatus{ + Initialized: false, // Not initialized, so bootstrap Pod should be created + }, + } + + // Create a bootstrap Pod to validate the spec + volumes := []corev1.Volume{} + volumeMounts := []corev1.VolumeMount{} + bootstrapPod := builders.NewBootstrapPod(instance, volumes, volumeMounts) + + // Verify the bootstrap Pod is created with the expected name + expectedName := "test-cluster-bootstrap-0" + Expect(bootstrapPod.Name).To(Equal(expectedName)) + Expect(bootstrapPod.Namespace).To(Equal("test-namespace")) + + // The key test: in the actual reconciliation logic, when instance.Status.Initialized is false, + // the bootstrap Pod should be reconciled with StateCreated, NOT StatePresent. + // This prevents illegal updates to immutable Pod spec fields. + + // We can't easily test the actual ReconcileResource call without complex mocking, + // but we can validate that the bootstrap Pod is properly constructed and our fix + // addresses the issue described in the GitHub issue. + + // Validate that the Pod spec contains the fields that were causing update conflicts: + // - ServiceAccountName (when different from cluster spec) + // - Tolerations and Affinity from bootstrap spec + // - Volumes and VolumeMounts + + Expect(bootstrapPod.Spec.Containers).To(HaveLen(1)) + Expect(bootstrapPod.Spec.Containers[0].Name).To(Equal("opensearch")) + + // Verify bootstrap-specific configuration exists + foundDataMount := false + for _, mount := range bootstrapPod.Spec.Containers[0].VolumeMounts { + if mount.Name == "data" && mount.MountPath == "/usr/share/opensearch/data" { + foundDataMount = true + break + } + } + Expect(foundDataMount).To(BeTrue(), "Bootstrap Pod should have data volume mount") + }) + + It("should handle Pod spec fields that could cause update conflicts", func() { + instance := &opsterv1.OpenSearchCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "immutable-test", + Namespace: "test-namespace", + }, + Spec: opsterv1.ClusterSpec{ + General: opsterv1.GeneralConfig{ + HttpPort: 9200, + ServiceName: "immutable-test", + Version: "2.8.0", + ServiceAccount: "custom-sa", // This could cause ServiceAccountName conflicts + }, + Bootstrap: opsterv1.BootstrapConfig{ + Tolerations: []corev1.Toleration{ + { + Key: "purpose", + Operator: "Equal", + Value: "logging", + Effect: "NoSchedule", + }, + }, + }, + }, + Status: opsterv1.ClusterStatus{ + Initialized: false, + }, + } + + volumes := []corev1.Volume{} + volumeMounts := []corev1.VolumeMount{} + bootstrapPod := builders.NewBootstrapPod(instance, volumes, volumeMounts) + + // These are the types of fields that caused the original bug when trying to update: + + // 1. ServiceAccountName - can cause conflicts when different from existing Pod + Expect(bootstrapPod.Spec.ServiceAccountName).To(Equal("custom-sa")) + + // 2. Tolerations - can only be added to existing tolerations, not removed or modified + Expect(bootstrapPod.Spec.Tolerations).To(HaveLen(1)) + Expect(bootstrapPod.Spec.Tolerations[0].Key).To(Equal("purpose")) + + // 3. Volumes - immutable after Pod creation + foundDataVolume := false + for _, vol := range bootstrapPod.Spec.Volumes { + if vol.Name == "data" && vol.EmptyDir != nil { + foundDataVolume = true + break + } + } + Expect(foundDataVolume).To(BeTrue(), "Bootstrap Pod should have EmptyDir data volume") + + // The fix ensures that when reconciling this Pod: + // - If Pod doesn't exist: StateCreated will create it successfully + // - If Pod exists but differs: StateCreated will NOT attempt to update (avoiding the error) + // - If cluster is initialized: StateAbsent will delete it properly + + By("Validating that bootstrap Pod has expected structure for StateCreated reconciliation") + }) + }) +}) \ No newline at end of file From 44495bacd5a67bf46ba097363e5f6c88140404fd Mon Sep 17 00:00:00 2001 From: josedev-union Date: Fri, 17 Oct 2025 12:57:16 +0200 Subject: [PATCH 2/2] recreate bootstrap pod when it is changed Signed-off-by: josedev-union --- .../pkg/reconcilers/k8s/mock_K8sClient.go | 47 +++++ .../pkg/reconcilers/cluster.go | 40 +++- .../pkg/reconcilers/cluster_test.go | 189 ++++++++---------- .../pkg/reconcilers/k8s/client.go | 34 ++++ .../pkg/reconcilers/util/util.go | 8 + 5 files changed, 211 insertions(+), 107 deletions(-) diff --git a/opensearch-operator/mocks/github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/reconcilers/k8s/mock_K8sClient.go b/opensearch-operator/mocks/github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/reconcilers/k8s/mock_K8sClient.go index 583a48678..799b8407b 100644 --- a/opensearch-operator/mocks/github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/reconcilers/k8s/mock_K8sClient.go +++ b/opensearch-operator/mocks/github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/reconcilers/k8s/mock_K8sClient.go @@ -1501,6 +1501,53 @@ func (_c *MockK8sClient_UpdatePVC_Call) RunAndReturn(run func(*v1.PersistentVolu return _c } +// WaitForPodDeletion provides a mock function with given fields: podName, namespace +func (_m *MockK8sClient) WaitForPodDeletion(podName string, namespace string) error { + ret := _m.Called(podName, namespace) + + if len(ret) == 0 { + panic("no return value specified for WaitForPodDeletion") + } + + var r0 error + if rf, ok := ret.Get(0).(func(string, string) error); ok { + r0 = rf(podName, namespace) + } else { + r0 = ret.Error(0) + } + + return r0 +} + +// MockK8sClient_WaitForPodDeletion_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'WaitForPodDeletion' +type MockK8sClient_WaitForPodDeletion_Call struct { + *mock.Call +} + +// WaitForPodDeletion is a helper method to define mock.On call +// - podName string +// - namespace string +func (_e *MockK8sClient_Expecter) WaitForPodDeletion(podName interface{}, namespace interface{}) *MockK8sClient_WaitForPodDeletion_Call { + return &MockK8sClient_WaitForPodDeletion_Call{Call: _e.mock.On("WaitForPodDeletion", podName, namespace)} +} + +func (_c *MockK8sClient_WaitForPodDeletion_Call) Run(run func(podName string, namespace string)) *MockK8sClient_WaitForPodDeletion_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(string), args[1].(string)) + }) + return _c +} + +func (_c *MockK8sClient_WaitForPodDeletion_Call) Return(_a0 error) *MockK8sClient_WaitForPodDeletion_Call { + _c.Call.Return(_a0) + return _c +} + +func (_c *MockK8sClient_WaitForPodDeletion_Call) RunAndReturn(run func(string, string) error) *MockK8sClient_WaitForPodDeletion_Call { + _c.Call.Return(run) + return _c +} + // NewMockK8sClient creates a new instance of MockK8sClient. It also registers a testing interface on the mock and a cleanup function to assert the mocks expectations. // The first argument is typically a *testing.T value. func NewMockK8sClient(t interface { diff --git a/opensearch-operator/pkg/reconcilers/cluster.go b/opensearch-operator/pkg/reconcilers/cluster.go index 4a98d7c2c..bdb7c9681 100644 --- a/opensearch-operator/pkg/reconcilers/cluster.go +++ b/opensearch-operator/pkg/reconcilers/cluster.go @@ -17,6 +17,8 @@ import ( "github.com/go-logr/logr" "github.com/samber/lo" appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/tools/record" @@ -100,7 +102,7 @@ func (r *ClusterReconciler) Reconcile() (ctrl.Result, error) { if r.instance.Status.Initialized { result.Combine(r.client.ReconcileResource(bootstrapPod, reconciler.StateAbsent)) } else { - result.Combine(r.client.ReconcileResource(bootstrapPod, reconciler.StateCreated)) + result.Combine(r.reconcileBootstrapPod(bootstrapPod)) } for _, nodePool := range r.instance.Spec.NodePools { @@ -484,3 +486,39 @@ func (r *ClusterReconciler) UpdateClusterStatus() error { instance.Status.AvailableNodes = availableNodes }) } + +// reconcileBootstrapPod handles bootstrap pod reconciliation with recreation for any changes +func (r *ClusterReconciler) reconcileBootstrapPod(desiredPod *corev1.Pod) (*ctrl.Result, error) { + // Check if bootstrap pod exists + existingPod, err := r.client.GetPod(desiredPod.Name, desiredPod.Namespace) + if err != nil && !k8serrors.IsNotFound(err) { + return &ctrl.Result{}, err + } + + if k8serrors.IsNotFound(err) { + // Pod doesn't exist, create it + r.logger.Info("Creating bootstrap pod", "pod", desiredPod.Name) + return r.client.ReconcileResource(desiredPod, reconciler.StateCreated) + } + + // Pod exists, check if spec has changed + if util.PodSpecChanged(&existingPod, desiredPod) { + r.logger.Info("Bootstrap pod spec changed, recreating pod", "pod", desiredPod.Name) + + // Delete existing pod + if err := r.client.DeletePod(&existingPod); err != nil { + r.logger.Error(err, "Failed to delete existing bootstrap pod", "pod", desiredPod.Name) + return &ctrl.Result{}, err + } + if err := r.client.WaitForPodDeletion(desiredPod.Name, desiredPod.Namespace); err != nil { + r.logger.Error(err, "Timeout waiting for bootstrap pod deletion", "pod", desiredPod.Name) + return &ctrl.Result{}, err + } + + // Create new pod with updated spec + r.logger.Info("Creating new bootstrap pod with updated spec", "pod", desiredPod.Name) + return r.client.ReconcileResource(desiredPod, reconciler.StateCreated) + } + + return &ctrl.Result{}, nil +} diff --git a/opensearch-operator/pkg/reconcilers/cluster_test.go b/opensearch-operator/pkg/reconcilers/cluster_test.go index 1238660f3..e482a85d7 100644 --- a/opensearch-operator/pkg/reconcilers/cluster_test.go +++ b/opensearch-operator/pkg/reconcilers/cluster_test.go @@ -3,6 +3,7 @@ package reconcilers import ( opsterv1 "github.com/Opster/opensearch-k8s-operator/opensearch-operator/api/v1" "github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/builders" + "github.com/Opster/opensearch-k8s-operator/opensearch-operator/pkg/reconcilers/util" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" @@ -10,120 +11,96 @@ import ( ) var _ = Describe("Bootstrap Pod Reconciliation Fix", func() { - Context("Regression test for immutable Pod spec updates", func() { - It("should verify bootstrap Pod logic uses StateCreated to avoid illegal Pod updates", func() { - instance := &opsterv1.OpenSearchCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "test-cluster", - Namespace: "test-namespace", - }, - Spec: opsterv1.ClusterSpec{ - General: opsterv1.GeneralConfig{ - HttpPort: 9200, - ServiceName: "test-cluster", - Version: "2.8.0", + Context("Bootstrap Pod Recreation Approach", func() { + It("should detect when any bootstrap pod spec field has changed", func() { + instance := &opsterv1.OpenSearchCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "recreation-test", + Namespace: "test-namespace", }, - }, - Status: opsterv1.ClusterStatus{ - Initialized: false, // Not initialized, so bootstrap Pod should be created - }, - } + Spec: opsterv1.ClusterSpec{ + General: opsterv1.GeneralConfig{ + HttpPort: 9200, + ServiceName: "recreation-test", + Version: "2.8.0", + ServiceAccount: "default-sa", + }, + Bootstrap: opsterv1.BootstrapConfig{ + Tolerations: []corev1.Toleration{ + { + Key: "purpose", + Operator: "Equal", + Value: "logging", + Effect: "NoSchedule", + }, + }, + }, + }, + Status: opsterv1.ClusterStatus{ + Initialized: false, + }, + } - // Create a bootstrap Pod to validate the spec - volumes := []corev1.Volume{} - volumeMounts := []corev1.VolumeMount{} - bootstrapPod := builders.NewBootstrapPod(instance, volumes, volumeMounts) + volumes := []corev1.Volume{} + volumeMounts := []corev1.VolumeMount{} - // Verify the bootstrap Pod is created with the expected name - expectedName := "test-cluster-bootstrap-0" - Expect(bootstrapPod.Name).To(Equal(expectedName)) - Expect(bootstrapPod.Namespace).To(Equal("test-namespace")) + originalPod := builders.NewBootstrapPod(instance, volumes, volumeMounts) - // The key test: in the actual reconciliation logic, when instance.Status.Initialized is false, - // the bootstrap Pod should be reconciled with StateCreated, NOT StatePresent. - // This prevents illegal updates to immutable Pod spec fields. - - // We can't easily test the actual ReconcileResource call without complex mocking, - // but we can validate that the bootstrap Pod is properly constructed and our fix - // addresses the issue described in the GitHub issue. - - // Validate that the Pod spec contains the fields that were causing update conflicts: - // - ServiceAccountName (when different from cluster spec) - // - Tolerations and Affinity from bootstrap spec - // - Volumes and VolumeMounts - - Expect(bootstrapPod.Spec.Containers).To(HaveLen(1)) - Expect(bootstrapPod.Spec.Containers[0].Name).To(Equal("opensearch")) - - // Verify bootstrap-specific configuration exists - foundDataMount := false - for _, mount := range bootstrapPod.Spec.Containers[0].VolumeMounts { - if mount.Name == "data" && mount.MountPath == "/usr/share/opensearch/data" { - foundDataMount = true - break - } - } - Expect(foundDataMount).To(BeTrue(), "Bootstrap Pod should have data volume mount") - }) + By("Testing PodSpecChanged utility function") - It("should handle Pod spec fields that could cause update conflicts", func() { - instance := &opsterv1.OpenSearchCluster{ - ObjectMeta: metav1.ObjectMeta{ - Name: "immutable-test", - Namespace: "test-namespace", - }, - Spec: opsterv1.ClusterSpec{ - General: opsterv1.GeneralConfig{ - HttpPort: 9200, - ServiceName: "immutable-test", - Version: "2.8.0", - ServiceAccount: "custom-sa", // This could cause ServiceAccountName conflicts - }, - Bootstrap: opsterv1.BootstrapConfig{ - Tolerations: []corev1.Toleration{ - { - Key: "purpose", - Operator: "Equal", - Value: "logging", - Effect: "NoSchedule", - }, - }, + // Test 1: Same spec should not trigger recreation + Expect(util.PodSpecChanged(originalPod, originalPod)).To(BeFalse()) + + // Test 2: Different ServiceAccountName should trigger recreation + modifiedPod := originalPod.DeepCopy() + modifiedPod.Spec.ServiceAccountName = "new-sa" + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) + + // Test 3: Different Tolerations should trigger recreation + modifiedPod = originalPod.DeepCopy() + modifiedPod.Spec.Tolerations = []corev1.Toleration{ + { + Key: "new-purpose", + Operator: "Equal", + Value: "monitoring", + Effect: "NoSchedule", }, - }, - Status: opsterv1.ClusterStatus{ - Initialized: false, - }, - } + } + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) - volumes := []corev1.Volume{} - volumeMounts := []corev1.VolumeMount{} - bootstrapPod := builders.NewBootstrapPod(instance, volumes, volumeMounts) + // Test 4: Different NodeSelector should trigger recreation + modifiedPod = originalPod.DeepCopy() + modifiedPod.Spec.NodeSelector = map[string]string{ + "node-type": "compute", + } + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) - // These are the types of fields that caused the original bug when trying to update: - - // 1. ServiceAccountName - can cause conflicts when different from existing Pod - Expect(bootstrapPod.Spec.ServiceAccountName).To(Equal("custom-sa")) - - // 2. Tolerations - can only be added to existing tolerations, not removed or modified - Expect(bootstrapPod.Spec.Tolerations).To(HaveLen(1)) - Expect(bootstrapPod.Spec.Tolerations[0].Key).To(Equal("purpose")) - - // 3. Volumes - immutable after Pod creation - foundDataVolume := false - for _, vol := range bootstrapPod.Spec.Volumes { - if vol.Name == "data" && vol.EmptyDir != nil { - foundDataVolume = true - break + // Test 5: Different environment variables should trigger recreation + modifiedPod = originalPod.DeepCopy() + if len(modifiedPod.Spec.Containers) > 0 { + modifiedPod.Spec.Containers[0].Env = append(modifiedPod.Spec.Containers[0].Env, corev1.EnvVar{ + Name: "NEW_VAR", + Value: "new_value", + }) } - } - Expect(foundDataVolume).To(BeTrue(), "Bootstrap Pod should have EmptyDir data volume") - - // The fix ensures that when reconciling this Pod: - // - If Pod doesn't exist: StateCreated will create it successfully - // - If Pod exists but differs: StateCreated will NOT attempt to update (avoiding the error) - // - If cluster is initialized: StateAbsent will delete it properly - - By("Validating that bootstrap Pod has expected structure for StateCreated reconciliation") + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) + + // Test 6: Different container image should trigger recreation + modifiedPod = originalPod.DeepCopy() + if len(modifiedPod.Spec.Containers) > 0 { + modifiedPod.Spec.Containers[0].Image = "opensearch:2.9.0" + } + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) + + // Test 7: Different volumes should trigger recreation + modifiedPod = originalPod.DeepCopy() + modifiedPod.Spec.Volumes = append(modifiedPod.Spec.Volumes, corev1.Volume{ + Name: "extra-volume", + VolumeSource: corev1.VolumeSource{ + EmptyDir: &corev1.EmptyDirVolumeSource{}, + }, + }) + Expect(util.PodSpecChanged(originalPod, modifiedPod)).To(BeTrue()) }) }) -}) \ No newline at end of file +}) diff --git a/opensearch-operator/pkg/reconcilers/k8s/client.go b/opensearch-operator/pkg/reconcilers/k8s/client.go index 4004dc5fb..79e550f5b 100644 --- a/opensearch-operator/pkg/reconcilers/k8s/client.go +++ b/opensearch-operator/pkg/reconcilers/k8s/client.go @@ -2,14 +2,18 @@ package k8s import ( "context" + "fmt" + "time" opsterv1 "github.com/Opster/opensearch-k8s-operator/opensearch-operator/api/v1" "github.com/cisco-open/operator-tools/pkg/reconciler" appsv1 "k8s.io/api/apps/v1" batchv1 "k8s.io/api/batch/v1" corev1 "k8s.io/api/core/v1" + k8serrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -41,6 +45,7 @@ type K8sClient interface { GetPod(name, namespace string) (corev1.Pod, error) DeletePod(pod *corev1.Pod) error ListPods(listOptions *client.ListOptions) (corev1.PodList, error) + WaitForPodDeletion(podName, namespace string) error GetPVC(name, namespace string) (corev1.PersistentVolumeClaim, error) UpdatePVC(pvc *corev1.PersistentVolumeClaim) error ListPVCs(listOptions *client.ListOptions) (corev1.PersistentVolumeClaimList, error) @@ -238,5 +243,34 @@ func (c K8sClientImpl) Context() context.Context { return c.ctx } +// WaitForPodDeletion waits for a pod to be deleted from the Kubernetes API using PollUntilContextTimeout +func (c K8sClientImpl) WaitForPodDeletion(podName, namespace string) error { + interval := 500 * time.Millisecond + timeout := 30 * time.Second + + err := wait.PollUntilContextTimeout(c.ctx, interval, timeout, true, func(ctx context.Context) (bool, error) { + _, err := c.GetPod(podName, namespace) + if k8serrors.IsNotFound(err) { + log.FromContext(c.ctx).V(1).Info("Pod deleted successfully", "pod", podName) + return true, nil + } + if err != nil { + log.FromContext(c.ctx).V(1).Info("Error checking pod deletion, will retry", "pod", podName, "error", err) + return false, nil + } + log.FromContext(c.ctx).V(2).Info("Pod still exists, continuing to wait", "pod", podName) + return false, nil + }) + + if err != nil { + if err == context.DeadlineExceeded { + return fmt.Errorf("timeout waiting for pod %s deletion after %v", podName, timeout) + } + return fmt.Errorf("error waiting for pod %s deletion: %w", podName, err) + } + + return nil +} + // Validate K8sClientImpl implements the interface var _ K8sClient = (*K8sClientImpl)(nil) diff --git a/opensearch-operator/pkg/reconcilers/util/util.go b/opensearch-operator/pkg/reconcilers/util/util.go index ddf842462..06930139d 100644 --- a/opensearch-operator/pkg/reconcilers/util/util.go +++ b/opensearch-operator/pkg/reconcilers/util/util.go @@ -7,6 +7,7 @@ import ( "fmt" "k8s.io/utils/ptr" "net/http" + "reflect" "sort" "strings" @@ -338,3 +339,10 @@ func GetAvailableOpenSearchNodes(k8sClient k8s.K8sClient, ctx context.Context, c return availableNodes } + +// PodSpecChanged checks if any pod spec fields have changed +func PodSpecChanged(existing, desired *corev1.Pod) bool { + // Use DeepEqual to compare the entire pod spec + // This is simpler, more comprehensive, and catches all changes + return !reflect.DeepEqual(existing.Spec, desired.Spec) +}