@@ -13,57 +13,171 @@ import (
13
13
"github.com/siderolabs/talos/pkg/machinery/config/machine"
14
14
)
15
15
16
+ // PreBootSequenceChecks
17
+ const (
18
+ CheckEtcdHealthy = "etcd to be healthy"
19
+ CheckEtcdConsistent = "etcd members to be consistent across nodes"
20
+ CheckEtcdControlPlane = "etcd members to be control plane nodes"
21
+ CheckApidReady = "apid to be ready"
22
+ CheckAllNodesMemorySizes = "all nodes memory sizes"
23
+ CheckAllNodesDiskSizes = "all nodes disk sizes"
24
+ CheckNoDiagnostics = "no diagnostics"
25
+ CheckKubeletHealthy = "kubelet to be healthy"
26
+ CheckAllNodesBootSequenceFinished = "all nodes to finish boot sequence"
27
+ )
28
+
29
+ // K8sComponentsReadinessChecks
30
+ const (
31
+ CheckK8sAllNodesReported = "all k8s nodes to report"
32
+ CheckControlPlaneStaticPodsRunning = "all control plane static pods to be running"
33
+ CheckControlPlaneComponentsReady = "all control plane components to be ready"
34
+ )
35
+
36
+ // DefaultClusterChecks
37
+ const (
38
+ CheckK8sAllNodesReady = "all k8s nodes to report ready"
39
+ CheckKubeProxyReady = "kube-proxy to report ready"
40
+ CheckCoreDNSReady = "coredns to report ready"
41
+ CheckK8sNodesSchedulable = "all k8s nodes to report schedulable"
42
+ )
43
+
44
+ func getCheck (name string ) ClusterCheck {
45
+ switch name {
46
+ // PreBootSequenceChecks
47
+ case CheckEtcdHealthy :
48
+ return func (cluster ClusterInfo ) conditions.Condition {
49
+ return conditions .PollingCondition (CheckEtcdHealthy , func (ctx context.Context ) error {
50
+ return ServiceHealthAssertion (ctx , cluster , "etcd" , WithNodeTypes (machine .TypeInit , machine .TypeControlPlane ))
51
+ }, 5 * time .Minute , 5 * time .Second )
52
+ }
53
+ case CheckEtcdConsistent :
54
+ return func (cluster ClusterInfo ) conditions.Condition {
55
+ return conditions .PollingCondition (CheckEtcdConsistent , func (ctx context.Context ) error {
56
+ return EtcdConsistentAssertion (ctx , cluster )
57
+ }, 5 * time .Minute , 5 * time .Second )
58
+ }
59
+ case CheckEtcdControlPlane :
60
+ return func (cluster ClusterInfo ) conditions.Condition {
61
+ return conditions .PollingCondition (CheckEtcdControlPlane , func (ctx context.Context ) error {
62
+ return EtcdControlPlaneNodesAssertion (ctx , cluster )
63
+ }, 5 * time .Minute , 5 * time .Second )
64
+ }
65
+ case CheckApidReady :
66
+ return func (cluster ClusterInfo ) conditions.Condition {
67
+ return conditions .PollingCondition (CheckApidReady , func (ctx context.Context ) error {
68
+ return ApidReadyAssertion (ctx , cluster )
69
+ }, 5 * time .Minute , 5 * time .Second )
70
+ }
71
+ case CheckAllNodesMemorySizes :
72
+ return func (cluster ClusterInfo ) conditions.Condition {
73
+ return conditions .PollingCondition (CheckAllNodesMemorySizes , func (ctx context.Context ) error {
74
+ return AllNodesMemorySizes (ctx , cluster )
75
+ }, 5 * time .Minute , 5 * time .Second )
76
+ }
77
+ case CheckAllNodesDiskSizes :
78
+ return func (cluster ClusterInfo ) conditions.Condition {
79
+ return conditions .PollingCondition (CheckAllNodesDiskSizes , func (ctx context.Context ) error {
80
+ return AllNodesDiskSizes (ctx , cluster )
81
+ }, 5 * time .Minute , 5 * time .Second )
82
+ }
83
+ case CheckNoDiagnostics :
84
+ return func (cluster ClusterInfo ) conditions.Condition {
85
+ return conditions .PollingCondition (CheckNoDiagnostics , func (ctx context.Context ) error {
86
+ return NoDiagnostics (ctx , cluster )
87
+ }, time .Minute , 5 * time .Second )
88
+ }
89
+ case CheckKubeletHealthy :
90
+ return func (cluster ClusterInfo ) conditions.Condition {
91
+ return conditions .PollingCondition (CheckKubeletHealthy , func (ctx context.Context ) error {
92
+ return ServiceHealthAssertion (ctx , cluster , "kubelet" , WithNodeTypes (machine .TypeInit , machine .TypeControlPlane ))
93
+ }, 5 * time .Minute , 5 * time .Second )
94
+ }
95
+ case CheckAllNodesBootSequenceFinished :
96
+ return func (cluster ClusterInfo ) conditions.Condition {
97
+ return conditions .PollingCondition (CheckAllNodesBootSequenceFinished , func (ctx context.Context ) error {
98
+ return AllNodesBootedAssertion (ctx , cluster )
99
+ }, 5 * time .Minute , 5 * time .Second )
100
+ }
101
+
102
+ // K8sComponentsReadinessChecks
103
+ case CheckK8sAllNodesReported :
104
+ return func (cluster ClusterInfo ) conditions.Condition {
105
+ return conditions .PollingCondition (CheckK8sAllNodesReported , func (ctx context.Context ) error {
106
+ return K8sAllNodesReportedAssertion (ctx , cluster )
107
+ }, 5 * time .Minute , 30 * time .Second )
108
+ }
109
+ case CheckControlPlaneStaticPodsRunning :
110
+ return func (cluster ClusterInfo ) conditions.Condition {
111
+ return conditions .PollingCondition (CheckControlPlaneStaticPodsRunning , func (ctx context.Context ) error {
112
+ return K8sControlPlaneStaticPods (ctx , cluster )
113
+ }, 5 * time .Minute , 5 * time .Second )
114
+ }
115
+ case CheckControlPlaneComponentsReady :
116
+ return func (cluster ClusterInfo ) conditions.Condition {
117
+ return conditions .PollingCondition (CheckControlPlaneComponentsReady , func (ctx context.Context ) error {
118
+ return K8sFullControlPlaneAssertion (ctx , cluster )
119
+ }, 5 * time .Minute , 5 * time .Second )
120
+ }
121
+
122
+ // Additional Checks for Default Cluster Checks
123
+ case CheckK8sAllNodesReady :
124
+ return func (cluster ClusterInfo ) conditions.Condition {
125
+ return conditions .PollingCondition (CheckK8sAllNodesReady , func (ctx context.Context ) error {
126
+ return K8sAllNodesReadyAssertion (ctx , cluster )
127
+ }, 10 * time .Minute , 5 * time .Second )
128
+ }
129
+ case CheckKubeProxyReady :
130
+ return func (cluster ClusterInfo ) conditions.Condition {
131
+ return conditions .PollingCondition (CheckKubeProxyReady , func (ctx context.Context ) error {
132
+ present , replicas , err := DaemonSetPresent (ctx , cluster , "kube-system" , "k8s-app=kube-proxy" )
133
+ if err != nil {
134
+ return err
135
+ }
136
+ if ! present {
137
+ return conditions .ErrSkipAssertion
138
+ }
139
+ return K8sPodReadyAssertion (ctx , cluster , replicas , "kube-system" , "k8s-app=kube-proxy" )
140
+ }, 5 * time .Minute , 5 * time .Second )
141
+ }
142
+ case CheckCoreDNSReady :
143
+ return func (cluster ClusterInfo ) conditions.Condition {
144
+ return conditions .PollingCondition (CheckCoreDNSReady , func (ctx context.Context ) error {
145
+ present , replicas , err := DeploymentPresent (ctx , cluster , "kube-system" , "k8s-app=kube-dns" )
146
+ if err != nil {
147
+ return err
148
+ }
149
+ if ! present {
150
+ return conditions .ErrSkipAssertion
151
+ }
152
+ return K8sPodReadyAssertion (ctx , cluster , replicas , "kube-system" , "k8s-app=kube-dns" )
153
+ }, 5 * time .Minute , 5 * time .Second )
154
+ }
155
+ case CheckK8sNodesSchedulable :
156
+ return func (cluster ClusterInfo ) conditions.Condition {
157
+ return conditions .PollingCondition (CheckK8sNodesSchedulable , func (ctx context.Context ) error {
158
+ return K8sAllNodesSchedulableAssertion (ctx , cluster )
159
+ }, 5 * time .Minute , 5 * time .Second )
160
+ }
161
+ default :
162
+ panic ("unknown check name: " + name )
163
+ }
164
+ }
165
+
16
166
// DefaultClusterChecks returns a set of default Talos cluster readiness checks.
17
167
func DefaultClusterChecks () []ClusterCheck {
168
+ // Concatenate pre-boot, Kubernetes component, and additional checks.
18
169
return slices .Concat (
19
170
PreBootSequenceChecks (),
20
171
K8sComponentsReadinessChecks (),
21
172
[]ClusterCheck {
22
173
// wait for all the nodes to report ready at k8s level
23
- func (cluster ClusterInfo ) conditions.Condition {
24
- return conditions .PollingCondition ("all k8s nodes to report ready" , func (ctx context.Context ) error {
25
- return K8sAllNodesReadyAssertion (ctx , cluster )
26
- }, 10 * time .Minute , 5 * time .Second )
27
- },
28
-
174
+ getCheck (CheckK8sAllNodesReady ),
29
175
// wait for kube-proxy to report ready
30
- func (cluster ClusterInfo ) conditions.Condition {
31
- return conditions .PollingCondition ("kube-proxy to report ready" , func (ctx context.Context ) error {
32
- present , replicas , err := DaemonSetPresent (ctx , cluster , "kube-system" , "k8s-app=kube-proxy" )
33
- if err != nil {
34
- return err
35
- }
36
-
37
- if ! present {
38
- return conditions .ErrSkipAssertion
39
- }
40
-
41
- return K8sPodReadyAssertion (ctx , cluster , replicas , "kube-system" , "k8s-app=kube-proxy" )
42
- }, 5 * time .Minute , 5 * time .Second )
43
- },
44
-
176
+ getCheck (CheckKubeProxyReady ),
45
177
// wait for coredns to report ready
46
- func (cluster ClusterInfo ) conditions.Condition {
47
- return conditions .PollingCondition ("coredns to report ready" , func (ctx context.Context ) error {
48
- present , replicas , err := DeploymentPresent (ctx , cluster , "kube-system" , "k8s-app=kube-dns" )
49
- if err != nil {
50
- return err
51
- }
52
-
53
- if ! present {
54
- return conditions .ErrSkipAssertion
55
- }
56
-
57
- return K8sPodReadyAssertion (ctx , cluster , replicas , "kube-system" , "k8s-app=kube-dns" )
58
- }, 5 * time .Minute , 5 * time .Second )
59
- },
60
-
178
+ getCheck (CheckCoreDNSReady ),
61
179
// wait for all the nodes to be schedulable
62
- func (cluster ClusterInfo ) conditions.Condition {
63
- return conditions .PollingCondition ("all k8s nodes to report schedulable" , func (ctx context.Context ) error {
64
- return K8sAllNodesSchedulableAssertion (ctx , cluster )
65
- }, 5 * time .Minute , 5 * time .Second )
66
- },
180
+ getCheck (CheckK8sNodesSchedulable ),
67
181
},
68
182
)
69
183
}
@@ -74,25 +188,11 @@ func DefaultClusterChecks() []ClusterCheck {
74
188
func K8sComponentsReadinessChecks () []ClusterCheck {
75
189
return []ClusterCheck {
76
190
// wait for all the nodes to report in at k8s level
77
- func (cluster ClusterInfo ) conditions.Condition {
78
- return conditions .PollingCondition ("all k8s nodes to report" , func (ctx context.Context ) error {
79
- return K8sAllNodesReportedAssertion (ctx , cluster )
80
- }, 5 * time .Minute , 30 * time .Second ) // give more time per each attempt, as this check is going to build and cache kubeconfig
81
- },
82
-
191
+ getCheck (CheckK8sAllNodesReported ),
83
192
// wait for k8s control plane static pods
84
- func (cluster ClusterInfo ) conditions.Condition {
85
- return conditions .PollingCondition ("all control plane static pods to be running" , func (ctx context.Context ) error {
86
- return K8sControlPlaneStaticPods (ctx , cluster )
87
- }, 5 * time .Minute , 5 * time .Second )
88
- },
89
-
193
+ getCheck (CheckControlPlaneStaticPodsRunning ),
90
194
// wait for HA k8s control plane
91
- func (cluster ClusterInfo ) conditions.Condition {
92
- return conditions .PollingCondition ("all control plane components to be ready" , func (ctx context.Context ) error {
93
- return K8sFullControlPlaneAssertion (ctx , cluster )
94
- }, 5 * time .Minute , 5 * time .Second )
95
- },
195
+ getCheck (CheckControlPlaneComponentsReady ),
96
196
}
97
197
}
98
198
@@ -103,70 +203,47 @@ func ExtraClusterChecks() []ClusterCheck {
103
203
return []ClusterCheck {}
104
204
}
105
205
206
+ // preBootSequenceCheckNames returns the list of pre-boot check names.
207
+ func preBootSequenceCheckNames () []string {
208
+ return []string {
209
+ CheckEtcdHealthy ,
210
+ CheckEtcdConsistent ,
211
+ CheckEtcdControlPlane ,
212
+ CheckApidReady ,
213
+ CheckAllNodesMemorySizes ,
214
+ CheckAllNodesDiskSizes ,
215
+ CheckNoDiagnostics ,
216
+ CheckKubeletHealthy ,
217
+ CheckAllNodesBootSequenceFinished ,
218
+ }
219
+ }
220
+
106
221
// PreBootSequenceChecks returns a set of Talos cluster readiness checks which are run before boot sequence.
107
222
func PreBootSequenceChecks () []ClusterCheck {
108
- return []ClusterCheck {
109
- // wait for etcd to be healthy on all control plane nodes
110
- func (cluster ClusterInfo ) conditions.Condition {
111
- return conditions .PollingCondition ("etcd to be healthy" , func (ctx context.Context ) error {
112
- return ServiceHealthAssertion (ctx , cluster , "etcd" , WithNodeTypes (machine .TypeInit , machine .TypeControlPlane ))
113
- }, 5 * time .Minute , 5 * time .Second )
114
- },
115
-
116
- // wait for etcd members to be consistent across nodes
117
- func (cluster ClusterInfo ) conditions.Condition {
118
- return conditions .PollingCondition ("etcd members to be consistent across nodes" , func (ctx context.Context ) error {
119
- return EtcdConsistentAssertion (ctx , cluster )
120
- }, 5 * time .Minute , 5 * time .Second )
121
- },
122
-
123
- // wait for etcd members to be the control plane nodes
124
- func (cluster ClusterInfo ) conditions.Condition {
125
- return conditions .PollingCondition ("etcd members to be control plane nodes" , func (ctx context.Context ) error {
126
- return EtcdControlPlaneNodesAssertion (ctx , cluster )
127
- }, 5 * time .Minute , 5 * time .Second )
128
- },
129
-
130
- // wait for apid to be ready on all the nodes
131
- func (cluster ClusterInfo ) conditions.Condition {
132
- return conditions .PollingCondition ("apid to be ready" , func (ctx context.Context ) error {
133
- return ApidReadyAssertion (ctx , cluster )
134
- }, 5 * time .Minute , 5 * time .Second )
135
- },
136
-
137
- // wait for all nodes to report their memory size
138
- func (cluster ClusterInfo ) conditions.Condition {
139
- return conditions .PollingCondition ("all nodes memory sizes" , func (ctx context.Context ) error {
140
- return AllNodesMemorySizes (ctx , cluster )
141
- }, 5 * time .Minute , 5 * time .Second )
142
- },
143
-
144
- // wait for all nodes to report their disk size
145
- func (cluster ClusterInfo ) conditions.Condition {
146
- return conditions .PollingCondition ("all nodes disk sizes" , func (ctx context.Context ) error {
147
- return AllNodesDiskSizes (ctx , cluster )
148
- }, 5 * time .Minute , 5 * time .Second )
149
- },
150
-
151
- // check diagnostics
152
- func (cluster ClusterInfo ) conditions.Condition {
153
- return conditions .PollingCondition ("no diagnostics" , func (ctx context.Context ) error {
154
- return NoDiagnostics (ctx , cluster )
155
- }, time .Minute , 5 * time .Second )
156
- },
223
+ return PreBootSequenceChecksFiltered (nil )
224
+ }
157
225
158
- // wait for kubelet to be healthy on all
159
- func (cluster ClusterInfo ) conditions.Condition {
160
- return conditions .PollingCondition ("kubelet to be healthy" , func (ctx context.Context ) error {
161
- return ServiceHealthAssertion (ctx , cluster , "kubelet" , WithNodeTypes (machine .TypeInit , machine .TypeControlPlane ))
162
- }, 5 * time .Minute , 5 * time .Second )
163
- },
226
+ // PreBootSequenceChecksFiltered returns a filtered version of the PreBootSequenceChecks,
227
+ // removing any checks whose names appear in the provided 'skips' list.
228
+ func PreBootSequenceChecksFiltered (skips []string ) []ClusterCheck {
229
+ checkNames := []string {
230
+ CheckEtcdHealthy ,
231
+ CheckEtcdConsistent ,
232
+ CheckEtcdControlPlane ,
233
+ CheckApidReady ,
234
+ CheckAllNodesMemorySizes ,
235
+ CheckAllNodesDiskSizes ,
236
+ CheckNoDiagnostics ,
237
+ CheckKubeletHealthy ,
238
+ CheckAllNodesBootSequenceFinished ,
239
+ }
164
240
165
- // wait for all nodes to finish booting
166
- func ( cluster ClusterInfo ) conditions. Condition {
167
- return conditions . PollingCondition ( "all nodes to finish boot sequence" , func ( ctx context. Context ) error {
168
- return AllNodesBootedAssertion ( ctx , cluster )
169
- }, 5 * time . Minute , 5 * time . Second )
170
- },
241
+ var filtered [] ClusterCheck
242
+ for _ , name := range checkNames {
243
+ if slices . Contains ( skips , name ) {
244
+ continue
245
+ }
246
+ filtered = append ( filtered , getCheck ( name ))
171
247
}
248
+ return filtered
172
249
}
0 commit comments