Skip to content

Commit 1d49d1c

Browse files
committed
add test coverage for bug OCPBUGS-63348
Signed-off-by: Huiran Wang <[email protected]>
1 parent 563a12f commit 1d49d1c

File tree

2 files changed

+214
-0
lines changed

2 files changed

+214
-0
lines changed

test/extended/networking/egressip.go

Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ import (
1717
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1818
"k8s.io/client-go/kubernetes"
1919
"k8s.io/kubernetes/test/e2e/framework"
20+
e2enode "k8s.io/kubernetes/test/e2e/framework/node"
2021
"k8s.io/kubernetes/test/e2e/framework/skipper"
2122
admissionapi "k8s.io/pod-security-admission/api"
2223

@@ -564,6 +565,124 @@ var _ = g.Describe("[sig-network][Feature:EgressIP][apigroup:operator.openshift.
564565
spawnProberSendEgressIPTrafficCheckLogs(oc, externalNamespace, probePodName, routeName, targetProtocol, targetHost, targetPort, numberOfRequestsToSend, numberOfRequestsToSend, packetSnifferDaemonSet, egressIPSet)
565566
}
566567
})
568+
569+
g.It("Rebooting a node/Restarting CNCC pod should not change the EgressIPs capacity", func() {
570+
g.By("Get one Egress node")
571+
egressNodeName := egressIPNodesOrderedNames[0]
572+
for _, node := range egressIPNodesOrderedNames[1:] {
573+
// Only keep one egress node, remove egress labels from other nodes
574+
_, _ = runOcWithRetry(oc.AsAdmin(), "label", "node", node, "k8s.ovn.org/egress-assignable-")
575+
}
576+
577+
g.By("Get capacity of one Egress node before reboot")
578+
egressNode, err := clientset.CoreV1().Nodes().Get(context.TODO(), egressNodeName, metav1.GetOptions{})
579+
o.Expect(err).NotTo(o.HaveOccurred())
580+
nodeEgressIPConfigs, err := getNodeEgressIPConfiguration(egressNode)
581+
o.Expect(err).NotTo(o.HaveOccurred())
582+
capacityBeforeReboot := nodeEgressIPConfigs[0].Capacity
583+
framework.Logf("The capacity of node %s before reboot is %v", egressNodeName, capacityBeforeReboot)
584+
585+
g.By("Getting a map of source nodes and potential Egress IPs for these nodes")
586+
var egressIPsPerNode int
587+
if capacityBeforeReboot.IPv4 == 0 {
588+
// On OpenStack and GCP, the capacity per node is the number of IPs per node, not the number of IPv4 addresses, like "capacity":{"ip":8}
589+
egressIPsPerNode = capacityBeforeReboot.IP
590+
} else {
591+
egressIPsPerNode = capacityBeforeReboot.IPv4
592+
}
593+
if egressIPsPerNode > 20 {
594+
egressIPsPerNode = 20
595+
}
596+
597+
nodeEgressIPMap, err := findNodeEgressIPs(oc, clientset, cloudNetworkClientset, egressIPNodesOrderedNames, cloudType, egressIPsPerNode)
598+
framework.Logf("%v", nodeEgressIPMap)
599+
o.Expect(err).NotTo(o.HaveOccurred())
600+
601+
g.By(fmt.Sprintf("Creating %d EgressIPs objects for the Egress node", egressIPsPerNode))
602+
defer func() {
603+
g.By("Deleting all EgressIPs objects")
604+
_, err := runOcWithRetry(oc.AsAdmin(), "delete", "egressIP", "--all")
605+
o.Expect(err).NotTo(o.HaveOccurred())
606+
}()
607+
for i := 0; i < egressIPsPerNode; i++ {
608+
egressIPSet := make(map[string]string)
609+
egressIPSet[nodeEgressIPMap[egressNodeName][i]] = egressNodeName
610+
611+
g.By(fmt.Sprintf("Creating %dth EgressIP object ", i))
612+
egressIPObjectName := egressIPNamespace + fmt.Sprintf("-%d", i)
613+
egressIPYamlPath := tmpDirEgressIP + "/" + fmt.Sprintf("egressip-%d.yaml", i)
614+
createEgressIPObject(oc, egressIPYamlPath, egressIPObjectName, egressIPNamespace, "", egressIPSet)
615+
616+
g.By(fmt.Sprintf("Applying %dth EgressIP object ", i))
617+
applyEgressIPObject(oc, cloudNetworkClientset, egressIPYamlPath, egressIPObjectName, egressIPSet, egressUpdateTimeout)
618+
619+
}
620+
g.By("Rebooting the node gracefully")
621+
err = exutil.TriggerNodeRebootGraceful(clientset, egressNodeName)
622+
o.Expect(err).NotTo(o.HaveOccurred())
623+
624+
g.By("Waiting for the node to become NotReady")
625+
ctx := context.TODO()
626+
isNotReady := e2enode.WaitForNodeToBeNotReady(ctx, clientset, egressNodeName, 10*time.Minute)
627+
o.Expect(isNotReady).To(o.BeTrue(), "Node should become NotReady after reboot trigger")
628+
629+
g.By("Waiting for the node to become Ready again after reboot")
630+
isReady := e2enode.WaitForNodeToBeReady(ctx, clientset, egressNodeName, 15*time.Minute)
631+
o.Expect(isReady).To(o.BeTrue(), "Node should become Ready again after reboot")
632+
633+
g.By("Waiting for all EgressIPs to be assigned")
634+
err = waitAllEgressIPsAssigned(oc, 15*time.Minute)
635+
o.Expect(err).NotTo(o.HaveOccurred())
636+
637+
g.By("Get capacity of the Egress node after reboot")
638+
nodeEgressIPConfigsAfterReboot, err := getNodeEgressIPConfiguration(egressNode)
639+
o.Expect(err).NotTo(o.HaveOccurred())
640+
capacityAfterReboot := nodeEgressIPConfigsAfterReboot[0].Capacity
641+
framework.Logf("The capacity of node %s after reboot is %v", egressNodeName, capacityAfterReboot)
642+
643+
g.By("Checking CloudPrivateIPConfigs for CloudResponseError, should be 0, after node reboot")
644+
errorCount, err := countCloudPrivateIPConfigsByReason(oc, "CloudResponseError")
645+
o.Expect(err).NotTo(o.HaveOccurred())
646+
framework.Logf("Found %d CloudPrivateIPConfigs with CloudResponseError", errorCount)
647+
o.Expect(errorCount).To(o.Equal(0), "Expected no CloudPrivateIPConfigs with CloudResponseError, but found %d", errorCount)
648+
649+
g.By("Checking CloudPrivateIPConfigs for CloudResponseSuccess")
650+
successCount, err := countCloudPrivateIPConfigsByReason(oc, "CloudResponseSuccess")
651+
o.Expect(err).NotTo(o.HaveOccurred())
652+
framework.Logf("Found %d CloudPrivateIPConfigs with CloudResponseSuccess", successCount)
653+
o.Expect(successCount).To(o.Equal(egressIPsPerNode), "Expected %d CloudPrivateIPConfigs with CloudResponseSuccess, but found %d", egressIPsPerNode, successCount)
654+
655+
g.By("Comparing capacity before and after reboot")
656+
o.Expect(capacityAfterReboot).To(o.Equal(capacityBeforeReboot),
657+
"EgressIP capacity should remain the same after node reboot. Before: %v, After: %v",
658+
capacityBeforeReboot, capacityAfterReboot)
659+
660+
g.By("Restarting the CNCC pod")
661+
restartCNCCPod(oc, clientset)
662+
663+
g.By("Get capacity of the Egress node after CNCC restart")
664+
nodeEgressIPConfigsAfterCNCCRestart, err := getNodeEgressIPConfiguration(egressNode)
665+
o.Expect(err).NotTo(o.HaveOccurred())
666+
capacityAfterCNCCRestart := nodeEgressIPConfigsAfterCNCCRestart[0].Capacity
667+
framework.Logf("The capacity of node %s after CNCC restart is %v", egressNodeName, capacityAfterCNCCRestart)
668+
669+
g.By("Comparing capacity before and after CNCC restart")
670+
o.Expect(capacityAfterCNCCRestart).To(o.Equal(capacityBeforeReboot),
671+
"EgressIP capacity should remain the same after CNCC restart. Before: %v, After: %v",
672+
capacityBeforeReboot, capacityAfterCNCCRestart)
673+
674+
g.By("Checking CloudPrivateIPConfigs for CloudResponseError,should be 0, after CNCC restart")
675+
errorCount, err = countCloudPrivateIPConfigsByReason(oc, "CloudResponseError")
676+
o.Expect(err).NotTo(o.HaveOccurred())
677+
framework.Logf("Found %d CloudPrivateIPConfigs with CloudResponseError", errorCount)
678+
o.Expect(errorCount).To(o.Equal(0), "Expected no CloudPrivateIPConfigs with CloudResponseError, but found %d", errorCount)
679+
680+
g.By("Checking CloudPrivateIPConfigs for CloudResponseSuccess")
681+
successCount, err = countCloudPrivateIPConfigsByReason(oc, "CloudResponseSuccess")
682+
o.Expect(err).NotTo(o.HaveOccurred())
683+
framework.Logf("Found %d CloudPrivateIPConfigs with CloudResponseSuccess", successCount)
684+
o.Expect(successCount).To(o.Equal(egressIPsPerNode), "Expected %d CloudPrivateIPConfigs with CloudResponseSuccess, but found %d", egressIPsPerNode, successCount)
685+
})
567686
}) // end testing to external targets
568687
})
569688

test/extended/networking/egressip_helpers.go

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ import (
1818
"time"
1919

2020
"github.com/google/uuid"
21+
o "github.com/onsi/gomega"
2122
appsv1 "k8s.io/api/apps/v1"
2223
corev1 "k8s.io/api/core/v1"
2324
v1 "k8s.io/api/core/v1"
@@ -1738,3 +1739,97 @@ func getEgressIP(oc *exutil.CLI, name string) (*EgressIP, error) {
17381739
}
17391740
return egressip, nil
17401741
}
1742+
1743+
// restartCNCCPod restarts the CNCC pod by deleting it and waiting for it to become Ready again.
1744+
func restartCNCCPod(oc *exutil.CLI, clientset kubernetes.Interface) {
1745+
framework.Logf("Restarting CNCC pod by deleting it")
1746+
cnccPodLabel := exutil.ParseLabelsOrDie("app=cloud-network-config-controller")
1747+
cnccNamespace := "openshift-cloud-network-config-controller"
1748+
cnccPodNames, err := exutil.GetPodNamesByFilter(clientset.CoreV1().Pods(cnccNamespace), cnccPodLabel, exutil.CheckPodIsRunning)
1749+
o.Expect(err).NotTo(o.HaveOccurred())
1750+
o.Expect(cnccPodNames).NotTo(o.BeEmpty())
1751+
_, err = runOcWithRetry(oc.AsAdmin(), "delete", "pod", cnccPodNames[0], "-n", cnccNamespace)
1752+
o.Expect(err).NotTo(o.HaveOccurred())
1753+
1754+
framework.Logf("Waiting for the CNCC pod to become Ready again after restart")
1755+
_, err = exutil.WaitForPods(
1756+
clientset.CoreV1().Pods(cnccNamespace),
1757+
cnccPodLabel,
1758+
exutil.CheckPodIsRunning,
1759+
1,
1760+
2*time.Minute)
1761+
o.Expect(err).NotTo(o.HaveOccurred())
1762+
}
1763+
1764+
// countCloudPrivateIPConfigsByReason counts the number of CloudPrivateIPConfigs that have
1765+
// a condition reason matching the specified reason (e.g., "CloudResponseError" or "CloudResponseSuccess").
1766+
func countCloudPrivateIPConfigsByReason(oc *exutil.CLI, reason string) (int, error) {
1767+
output, err := runOcWithRetry(oc.AsAdmin(), "get", "cloudprivateipconfigs",
1768+
"-o", "custom-columns=NAME:.metadata.name,NODE:.spec.node,STATE:.status.conditions[].reason",
1769+
"--no-headers")
1770+
if err != nil {
1771+
return 0, fmt.Errorf("failed to get cloudprivateipconfigs: %v", err)
1772+
}
1773+
1774+
trimmedOutput := strings.TrimSpace(output)
1775+
if trimmedOutput == "" {
1776+
return 0, nil
1777+
}
1778+
1779+
lines := strings.Split(trimmedOutput, "\n")
1780+
count := 0
1781+
for _, line := range lines {
1782+
trimmedLine := strings.TrimSpace(line)
1783+
if trimmedLine != "" && strings.Contains(trimmedLine, reason) {
1784+
count++
1785+
}
1786+
}
1787+
return count, nil
1788+
}
1789+
1790+
// waitAllEgressIPsAssigned waits for all EgressIPs to have status.Items populated.
1791+
func waitAllEgressIPsAssigned(oc *exutil.CLI, timeout time.Duration) error { // egressips
1792+
egressipList, err := listEgressIPs(oc)
1793+
if err != nil {
1794+
return err
1795+
}
1796+
for _, egressip := range egressipList.Items {
1797+
// Wait for status.Items to be populated
1798+
err = wait.PollUntilContextTimeout(context.TODO(), 10*time.Second, timeout, true, func(ctx context.Context) (bool, error) {
1799+
eip, err := getEgressIP(oc, egressip.Name)
1800+
if err != nil {
1801+
return false, fmt.Errorf("failed to get EgressIP %s: %v", egressip.Name, err)
1802+
}
1803+
// Check that status is not empty
1804+
if len(eip.Status.Items) == 0 {
1805+
framework.Logf("EgressIP %s has empty status.Items, waiting...", egressip.Name)
1806+
return false, nil
1807+
}
1808+
// Check that each IP in spec has a corresponding entry in status.Items with node assigned
1809+
for _, ip := range egressip.Spec.EgressIPs {
1810+
foundInStatus := false
1811+
for _, statusItem := range eip.Status.Items {
1812+
if statusItem.EgressIP == ip {
1813+
if statusItem.Node == "" {
1814+
framework.Logf("EgressIP %s found in status but node is empty, waiting...", ip)
1815+
return false, nil
1816+
}
1817+
foundInStatus = true
1818+
break
1819+
}
1820+
}
1821+
if !foundInStatus {
1822+
framework.Logf("EgressIP %s not found in status.Items, waiting...", ip)
1823+
return false, nil
1824+
}
1825+
}
1826+
framework.Logf("EgressIP %s has all IPs assigned in status.Items", egressip.Name)
1827+
return true, nil
1828+
})
1829+
if err != nil {
1830+
return fmt.Errorf("EgressIP %s failed to appear in status.Items: %v", egressip.Name, err)
1831+
}
1832+
}
1833+
1834+
return nil
1835+
}

0 commit comments

Comments
 (0)