Skip to content

Commit 1ce9ca3

Browse files
committed
feat: add composable/filterable default tests
Add a getCheck function and refactor checks to constants. Add a new function PreBootSequenceChecksFiltered to allow users to purposefully skip some checks. Signed-off-by: shellwhale <[email protected]>
1 parent c949f55 commit 1ce9ca3

File tree

1 file changed

+196
-119
lines changed

1 file changed

+196
-119
lines changed

pkg/cluster/check/default.go

Lines changed: 196 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -13,57 +13,171 @@ import (
1313
"github.com/siderolabs/talos/pkg/machinery/config/machine"
1414
)
1515

16+
// PreBootSequenceChecks
17+
const (
18+
CheckEtcdHealthy = "etcd to be healthy"
19+
CheckEtcdConsistent = "etcd members to be consistent across nodes"
20+
CheckEtcdControlPlane = "etcd members to be control plane nodes"
21+
CheckApidReady = "apid to be ready"
22+
CheckAllNodesMemorySizes = "all nodes memory sizes"
23+
CheckAllNodesDiskSizes = "all nodes disk sizes"
24+
CheckNoDiagnostics = "no diagnostics"
25+
CheckKubeletHealthy = "kubelet to be healthy"
26+
CheckAllNodesBootSequenceFinished = "all nodes to finish boot sequence"
27+
)
28+
29+
// K8sComponentsReadinessChecks
30+
const (
31+
CheckK8sAllNodesReported = "all k8s nodes to report"
32+
CheckControlPlaneStaticPodsRunning = "all control plane static pods to be running"
33+
CheckControlPlaneComponentsReady = "all control plane components to be ready"
34+
)
35+
36+
// DefaultClusterChecks
37+
const (
38+
CheckK8sAllNodesReady = "all k8s nodes to report ready"
39+
CheckKubeProxyReady = "kube-proxy to report ready"
40+
CheckCoreDNSReady = "coredns to report ready"
41+
CheckK8sNodesSchedulable = "all k8s nodes to report schedulable"
42+
)
43+
44+
func getCheck(name string) ClusterCheck {
45+
switch name {
46+
// PreBootSequenceChecks
47+
case CheckEtcdHealthy:
48+
return func(cluster ClusterInfo) conditions.Condition {
49+
return conditions.PollingCondition(CheckEtcdHealthy, func(ctx context.Context) error {
50+
return ServiceHealthAssertion(ctx, cluster, "etcd", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
51+
}, 5*time.Minute, 5*time.Second)
52+
}
53+
case CheckEtcdConsistent:
54+
return func(cluster ClusterInfo) conditions.Condition {
55+
return conditions.PollingCondition(CheckEtcdConsistent, func(ctx context.Context) error {
56+
return EtcdConsistentAssertion(ctx, cluster)
57+
}, 5*time.Minute, 5*time.Second)
58+
}
59+
case CheckEtcdControlPlane:
60+
return func(cluster ClusterInfo) conditions.Condition {
61+
return conditions.PollingCondition(CheckEtcdControlPlane, func(ctx context.Context) error {
62+
return EtcdControlPlaneNodesAssertion(ctx, cluster)
63+
}, 5*time.Minute, 5*time.Second)
64+
}
65+
case CheckApidReady:
66+
return func(cluster ClusterInfo) conditions.Condition {
67+
return conditions.PollingCondition(CheckApidReady, func(ctx context.Context) error {
68+
return ApidReadyAssertion(ctx, cluster)
69+
}, 5*time.Minute, 5*time.Second)
70+
}
71+
case CheckAllNodesMemorySizes:
72+
return func(cluster ClusterInfo) conditions.Condition {
73+
return conditions.PollingCondition(CheckAllNodesMemorySizes, func(ctx context.Context) error {
74+
return AllNodesMemorySizes(ctx, cluster)
75+
}, 5*time.Minute, 5*time.Second)
76+
}
77+
case CheckAllNodesDiskSizes:
78+
return func(cluster ClusterInfo) conditions.Condition {
79+
return conditions.PollingCondition(CheckAllNodesDiskSizes, func(ctx context.Context) error {
80+
return AllNodesDiskSizes(ctx, cluster)
81+
}, 5*time.Minute, 5*time.Second)
82+
}
83+
case CheckNoDiagnostics:
84+
return func(cluster ClusterInfo) conditions.Condition {
85+
return conditions.PollingCondition(CheckNoDiagnostics, func(ctx context.Context) error {
86+
return NoDiagnostics(ctx, cluster)
87+
}, time.Minute, 5*time.Second)
88+
}
89+
case CheckKubeletHealthy:
90+
return func(cluster ClusterInfo) conditions.Condition {
91+
return conditions.PollingCondition(CheckKubeletHealthy, func(ctx context.Context) error {
92+
return ServiceHealthAssertion(ctx, cluster, "kubelet", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
93+
}, 5*time.Minute, 5*time.Second)
94+
}
95+
case CheckAllNodesBootSequenceFinished:
96+
return func(cluster ClusterInfo) conditions.Condition {
97+
return conditions.PollingCondition(CheckAllNodesBootSequenceFinished, func(ctx context.Context) error {
98+
return AllNodesBootedAssertion(ctx, cluster)
99+
}, 5*time.Minute, 5*time.Second)
100+
}
101+
102+
// K8sComponentsReadinessChecks
103+
case CheckK8sAllNodesReported:
104+
return func(cluster ClusterInfo) conditions.Condition {
105+
return conditions.PollingCondition(CheckK8sAllNodesReported, func(ctx context.Context) error {
106+
return K8sAllNodesReportedAssertion(ctx, cluster)
107+
}, 5*time.Minute, 30*time.Second)
108+
}
109+
case CheckControlPlaneStaticPodsRunning:
110+
return func(cluster ClusterInfo) conditions.Condition {
111+
return conditions.PollingCondition(CheckControlPlaneStaticPodsRunning, func(ctx context.Context) error {
112+
return K8sControlPlaneStaticPods(ctx, cluster)
113+
}, 5*time.Minute, 5*time.Second)
114+
}
115+
case CheckControlPlaneComponentsReady:
116+
return func(cluster ClusterInfo) conditions.Condition {
117+
return conditions.PollingCondition(CheckControlPlaneComponentsReady, func(ctx context.Context) error {
118+
return K8sFullControlPlaneAssertion(ctx, cluster)
119+
}, 5*time.Minute, 5*time.Second)
120+
}
121+
122+
// Additional Checks for Default Cluster Checks
123+
case CheckK8sAllNodesReady:
124+
return func(cluster ClusterInfo) conditions.Condition {
125+
return conditions.PollingCondition(CheckK8sAllNodesReady, func(ctx context.Context) error {
126+
return K8sAllNodesReadyAssertion(ctx, cluster)
127+
}, 10*time.Minute, 5*time.Second)
128+
}
129+
case CheckKubeProxyReady:
130+
return func(cluster ClusterInfo) conditions.Condition {
131+
return conditions.PollingCondition(CheckKubeProxyReady, func(ctx context.Context) error {
132+
present, replicas, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
133+
if err != nil {
134+
return err
135+
}
136+
if !present {
137+
return conditions.ErrSkipAssertion
138+
}
139+
return K8sPodReadyAssertion(ctx, cluster, replicas, "kube-system", "k8s-app=kube-proxy")
140+
}, 5*time.Minute, 5*time.Second)
141+
}
142+
case CheckCoreDNSReady:
143+
return func(cluster ClusterInfo) conditions.Condition {
144+
return conditions.PollingCondition(CheckCoreDNSReady, func(ctx context.Context) error {
145+
present, replicas, err := DeploymentPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
146+
if err != nil {
147+
return err
148+
}
149+
if !present {
150+
return conditions.ErrSkipAssertion
151+
}
152+
return K8sPodReadyAssertion(ctx, cluster, replicas, "kube-system", "k8s-app=kube-dns")
153+
}, 5*time.Minute, 5*time.Second)
154+
}
155+
case CheckK8sNodesSchedulable:
156+
return func(cluster ClusterInfo) conditions.Condition {
157+
return conditions.PollingCondition(CheckK8sNodesSchedulable, func(ctx context.Context) error {
158+
return K8sAllNodesSchedulableAssertion(ctx, cluster)
159+
}, 5*time.Minute, 5*time.Second)
160+
}
161+
default:
162+
panic("unknown check name: " + name)
163+
}
164+
}
165+
16166
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
17167
func DefaultClusterChecks() []ClusterCheck {
168+
// Concatenate pre-boot, Kubernetes component, and additional checks.
18169
return slices.Concat(
19170
PreBootSequenceChecks(),
20171
K8sComponentsReadinessChecks(),
21172
[]ClusterCheck{
22173
// wait for all the nodes to report ready at k8s level
23-
func(cluster ClusterInfo) conditions.Condition {
24-
return conditions.PollingCondition("all k8s nodes to report ready", func(ctx context.Context) error {
25-
return K8sAllNodesReadyAssertion(ctx, cluster)
26-
}, 10*time.Minute, 5*time.Second)
27-
},
28-
174+
getCheck(CheckK8sAllNodesReady),
29175
// wait for kube-proxy to report ready
30-
func(cluster ClusterInfo) conditions.Condition {
31-
return conditions.PollingCondition("kube-proxy to report ready", func(ctx context.Context) error {
32-
present, replicas, err := DaemonSetPresent(ctx, cluster, "kube-system", "k8s-app=kube-proxy")
33-
if err != nil {
34-
return err
35-
}
36-
37-
if !present {
38-
return conditions.ErrSkipAssertion
39-
}
40-
41-
return K8sPodReadyAssertion(ctx, cluster, replicas, "kube-system", "k8s-app=kube-proxy")
42-
}, 5*time.Minute, 5*time.Second)
43-
},
44-
176+
getCheck(CheckKubeProxyReady),
45177
// wait for coredns to report ready
46-
func(cluster ClusterInfo) conditions.Condition {
47-
return conditions.PollingCondition("coredns to report ready", func(ctx context.Context) error {
48-
present, replicas, err := DeploymentPresent(ctx, cluster, "kube-system", "k8s-app=kube-dns")
49-
if err != nil {
50-
return err
51-
}
52-
53-
if !present {
54-
return conditions.ErrSkipAssertion
55-
}
56-
57-
return K8sPodReadyAssertion(ctx, cluster, replicas, "kube-system", "k8s-app=kube-dns")
58-
}, 5*time.Minute, 5*time.Second)
59-
},
60-
178+
getCheck(CheckCoreDNSReady),
61179
// wait for all the nodes to be schedulable
62-
func(cluster ClusterInfo) conditions.Condition {
63-
return conditions.PollingCondition("all k8s nodes to report schedulable", func(ctx context.Context) error {
64-
return K8sAllNodesSchedulableAssertion(ctx, cluster)
65-
}, 5*time.Minute, 5*time.Second)
66-
},
180+
getCheck(CheckK8sNodesSchedulable),
67181
},
68182
)
69183
}
@@ -74,25 +188,11 @@ func DefaultClusterChecks() []ClusterCheck {
74188
func K8sComponentsReadinessChecks() []ClusterCheck {
75189
return []ClusterCheck{
76190
// wait for all the nodes to report in at k8s level
77-
func(cluster ClusterInfo) conditions.Condition {
78-
return conditions.PollingCondition("all k8s nodes to report", func(ctx context.Context) error {
79-
return K8sAllNodesReportedAssertion(ctx, cluster)
80-
}, 5*time.Minute, 30*time.Second) // give more time per each attempt, as this check is going to build and cache kubeconfig
81-
},
82-
191+
getCheck(CheckK8sAllNodesReported),
83192
// wait for k8s control plane static pods
84-
func(cluster ClusterInfo) conditions.Condition {
85-
return conditions.PollingCondition("all control plane static pods to be running", func(ctx context.Context) error {
86-
return K8sControlPlaneStaticPods(ctx, cluster)
87-
}, 5*time.Minute, 5*time.Second)
88-
},
89-
193+
getCheck(CheckControlPlaneStaticPodsRunning),
90194
// wait for HA k8s control plane
91-
func(cluster ClusterInfo) conditions.Condition {
92-
return conditions.PollingCondition("all control plane components to be ready", func(ctx context.Context) error {
93-
return K8sFullControlPlaneAssertion(ctx, cluster)
94-
}, 5*time.Minute, 5*time.Second)
95-
},
195+
getCheck(CheckControlPlaneComponentsReady),
96196
}
97197
}
98198

@@ -103,70 +203,47 @@ func ExtraClusterChecks() []ClusterCheck {
103203
return []ClusterCheck{}
104204
}
105205

206+
// preBootSequenceCheckNames returns the list of pre-boot check names.
207+
func preBootSequenceCheckNames() []string {
208+
return []string{
209+
CheckEtcdHealthy,
210+
CheckEtcdConsistent,
211+
CheckEtcdControlPlane,
212+
CheckApidReady,
213+
CheckAllNodesMemorySizes,
214+
CheckAllNodesDiskSizes,
215+
CheckNoDiagnostics,
216+
CheckKubeletHealthy,
217+
CheckAllNodesBootSequenceFinished,
218+
}
219+
}
220+
106221
// PreBootSequenceChecks returns a set of Talos cluster readiness checks which are run before boot sequence.
107222
func PreBootSequenceChecks() []ClusterCheck {
108-
return []ClusterCheck{
109-
// wait for etcd to be healthy on all control plane nodes
110-
func(cluster ClusterInfo) conditions.Condition {
111-
return conditions.PollingCondition("etcd to be healthy", func(ctx context.Context) error {
112-
return ServiceHealthAssertion(ctx, cluster, "etcd", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
113-
}, 5*time.Minute, 5*time.Second)
114-
},
115-
116-
// wait for etcd members to be consistent across nodes
117-
func(cluster ClusterInfo) conditions.Condition {
118-
return conditions.PollingCondition("etcd members to be consistent across nodes", func(ctx context.Context) error {
119-
return EtcdConsistentAssertion(ctx, cluster)
120-
}, 5*time.Minute, 5*time.Second)
121-
},
122-
123-
// wait for etcd members to be the control plane nodes
124-
func(cluster ClusterInfo) conditions.Condition {
125-
return conditions.PollingCondition("etcd members to be control plane nodes", func(ctx context.Context) error {
126-
return EtcdControlPlaneNodesAssertion(ctx, cluster)
127-
}, 5*time.Minute, 5*time.Second)
128-
},
129-
130-
// wait for apid to be ready on all the nodes
131-
func(cluster ClusterInfo) conditions.Condition {
132-
return conditions.PollingCondition("apid to be ready", func(ctx context.Context) error {
133-
return ApidReadyAssertion(ctx, cluster)
134-
}, 5*time.Minute, 5*time.Second)
135-
},
136-
137-
// wait for all nodes to report their memory size
138-
func(cluster ClusterInfo) conditions.Condition {
139-
return conditions.PollingCondition("all nodes memory sizes", func(ctx context.Context) error {
140-
return AllNodesMemorySizes(ctx, cluster)
141-
}, 5*time.Minute, 5*time.Second)
142-
},
143-
144-
// wait for all nodes to report their disk size
145-
func(cluster ClusterInfo) conditions.Condition {
146-
return conditions.PollingCondition("all nodes disk sizes", func(ctx context.Context) error {
147-
return AllNodesDiskSizes(ctx, cluster)
148-
}, 5*time.Minute, 5*time.Second)
149-
},
150-
151-
// check diagnostics
152-
func(cluster ClusterInfo) conditions.Condition {
153-
return conditions.PollingCondition("no diagnostics", func(ctx context.Context) error {
154-
return NoDiagnostics(ctx, cluster)
155-
}, time.Minute, 5*time.Second)
156-
},
223+
return PreBootSequenceChecksFiltered(nil)
224+
}
157225

158-
// wait for kubelet to be healthy on all
159-
func(cluster ClusterInfo) conditions.Condition {
160-
return conditions.PollingCondition("kubelet to be healthy", func(ctx context.Context) error {
161-
return ServiceHealthAssertion(ctx, cluster, "kubelet", WithNodeTypes(machine.TypeInit, machine.TypeControlPlane))
162-
}, 5*time.Minute, 5*time.Second)
163-
},
226+
// PreBootSequenceChecksFiltered returns a filtered version of the PreBootSequenceChecks,
227+
// removing any checks whose names appear in the provided 'skips' list.
228+
func PreBootSequenceChecksFiltered(skips []string) []ClusterCheck {
229+
checkNames := []string{
230+
CheckEtcdHealthy,
231+
CheckEtcdConsistent,
232+
CheckEtcdControlPlane,
233+
CheckApidReady,
234+
CheckAllNodesMemorySizes,
235+
CheckAllNodesDiskSizes,
236+
CheckNoDiagnostics,
237+
CheckKubeletHealthy,
238+
CheckAllNodesBootSequenceFinished,
239+
}
164240

165-
// wait for all nodes to finish booting
166-
func(cluster ClusterInfo) conditions.Condition {
167-
return conditions.PollingCondition("all nodes to finish boot sequence", func(ctx context.Context) error {
168-
return AllNodesBootedAssertion(ctx, cluster)
169-
}, 5*time.Minute, 5*time.Second)
170-
},
241+
var filtered []ClusterCheck
242+
for _, name := range checkNames {
243+
if slices.Contains(skips, name) {
244+
continue
245+
}
246+
filtered = append(filtered, getCheck(name))
171247
}
248+
return filtered
172249
}

0 commit comments

Comments
 (0)