Skip to content

Commit 53b08d0

Browse files
Bug fix: failure to clean-up during CS Machine provisioning if VM provisioned to error state. (kubernetes-sigs#58)
* Deleting capi machine (instead of cs machine) upon VM detected in error state. * Added rbac permissions to enable the CloudStackMachine controller to delete/patch CAPI machines. Fixed race condition that occurs when VMs create in the error state by a) detecting this, and b) adding a finalizer when detected. Enabled the use of klog parameters on the command line. * Defensively narrowed the scope of VM looked up after provisioning error. * Fixed broken unit test setup, to accomodate new calls made in the last commit.
1 parent 66cfacf commit 53b08d0

File tree

5 files changed

+31
-8
lines changed

5 files changed

+31
-8
lines changed

config/rbac/role.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,8 +40,10 @@ rules:
4040
- machines
4141
- machines/status
4242
verbs:
43+
- delete
4344
- get
4445
- list
46+
- patch
4547
- watch
4648
- apiGroups:
4749
- cluster.x-k8s.io

controllers/cloudstackmachine_controller.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ const destoryVMRequeueInterval = 10 * time.Second
6161
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackmachines,verbs=get;list;watch;create;update;patch;delete
6262
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackmachines/status,verbs=get;update;patch
6363
// +kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackmachines/finalizers,verbs=update
64-
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch
64+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machines;machines/status,verbs=get;list;watch;patch;delete
6565
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets,verbs=get;list;watch
6666
// +kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=kubeadmcontrolplanes,verbs=get;list;watch
6767

@@ -71,7 +71,7 @@ const destoryVMRequeueInterval = 10 * time.Second
7171
// For more details, check Reconcile and its Result here:
7272
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/reconcile
7373
func (r *CloudStackMachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (retRes ctrl.Result, retErr error) {
74-
log := r.Log.WithValues("machine", req.Name, "namespace", req.Namespace)
74+
log := r.Log.WithValues("cloudstackmachine", req.Name, "namespace", req.Namespace)
7575
log.V(1).Info("Reconcile CloudStackMachine")
7676

7777
// Fetch the CloudStackMachine.
@@ -210,6 +210,10 @@ func (r *CloudStackMachineReconciler) reconcile(
210210
controllerutil.AddFinalizer(csMachine, infrav1.MachineFinalizer)
211211
}
212212
} else if err != nil {
213+
if csMachine.Spec.InstanceID != nil {
214+
// Despite the failed VM Creation call, a VM has been created, and will need to be properly destroyed.
215+
controllerutil.AddFinalizer(csMachine, infrav1.MachineFinalizer)
216+
}
213217
return ctrl.Result{}, err
214218
}
215219
} else {
@@ -221,11 +225,12 @@ func (r *CloudStackMachineReconciler) reconcile(
221225
log.Info("Machine instance is Running...")
222226
csMachine.Status.Ready = true
223227
} else if csMachine.Status.InstanceState == "Error" {
224-
log.Info("CloudStackMachine VM in error state. Deleting associated Machine.", "csMachine", csMachine)
225-
if err := r.Client.Delete(ctx, csMachine); err != nil {
228+
log.Info("CloudStackMachine VM in error state. Deleting associated Machine.", "capiMachine", capiMachine, "csMachine", csMachine)
229+
if err := r.Client.Delete(ctx, capiMachine); err != nil {
226230
return ctrl.Result{}, err
227231
}
228-
return ctrl.Result{RequeueAfter: requeueTimeout}, nil
232+
log.Info("CAPI machine deletion requested.")
233+
return ctrl.Result{}, nil // We're done. CAPI Machine will take it from here.
229234
} else {
230235
log.Info(fmt.Sprintf("Instance not ready, is %s.", csMachine.Status.InstanceState))
231236
return ctrl.Result{RequeueAfter: requeueTimeout}, nil

main.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
flag "github.com/spf13/pflag"
2828
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2929

30+
goflag "flag"
3031
_ "k8s.io/client-go/plugin/pkg/client/auth"
3132
"k8s.io/klog"
3233
"k8s.io/klog/klogr"
@@ -112,8 +113,9 @@ func setFlags() *managerOpts {
112113
}
113114

114115
func main() {
115-
opts := setFlags() // Add our options to flag set.
116-
klog.InitFlags(nil) // Add klog options to flag set.
116+
opts := setFlags() // Add our options to flag set.
117+
klog.InitFlags(nil) // Add klog options to flag set.
118+
flag.CommandLine.AddGoFlagSet(goflag.CommandLine) // Merge klog's gofloag flags into the pflags.
117119
flag.Parse()
118120

119121
ctrl.SetLogger(klogr.New())

pkg/cloud/instance.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,19 @@ func (c *client) GetOrCreateVMInstance(
212212

213213
deployVMResp, err := c.cs.VirtualMachine.DeployVirtualMachine(p)
214214
if err != nil {
215+
// Just because an error was returned doesn't mean a (failed) VM wasn't created and will need to be dealt with.
216+
// Regretfully the deployVMResp may be nil, so we need to get the VM ID with a separate query, so we
217+
// can return it to the caller, so they can clean it up.
218+
listVirtualMachineParams := c.cs.VirtualMachine.NewListVirtualMachinesParams()
219+
listVirtualMachineParams.SetTemplateid(templateID)
220+
listVirtualMachineParams.SetZoneid(csMachine.Status.ZoneID)
221+
listVirtualMachineParams.SetNetworkid(zone.Network.ID)
222+
listVirtualMachineParams.SetName(csMachine.Name)
223+
setIfNotEmpty(csCluster.Status.DomainID, listVirtualMachineParams.SetDomainid)
224+
setIfNotEmpty(csCluster.Spec.Account, listVirtualMachineParams.SetAccount)
225+
if listVirtualMachinesResponse, err2 := c.cs.VirtualMachine.ListVirtualMachines(listVirtualMachineParams); err2 == nil && listVirtualMachinesResponse.Count > 0 {
226+
csMachine.Spec.InstanceID = pointer.StringPtr(listVirtualMachinesResponse.VirtualMachines[0].Id)
227+
}
215228
return err
216229
}
217230
csMachine.Spec.InstanceID = pointer.StringPtr(deployVMResp.Id)

pkg/cloud/instance_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,8 @@ var _ = Describe("Instance", func() {
174174
vms.EXPECT().NewDeployVirtualMachineParams(offeringFakeID, templateFakeID, dummies.Zone1.ID).
175175
Return(&cloudstack.DeployVirtualMachineParams{})
176176
vms.EXPECT().DeployVirtualMachine(gomock.Any()).Return(nil, unknownError)
177-
177+
vms.EXPECT().NewListVirtualMachinesParams().Return(&cloudstack.ListVirtualMachinesParams{})
178+
vms.EXPECT().ListVirtualMachines(gomock.Any()).Return(&cloudstack.ListVirtualMachinesResponse{}, nil)
178179
Ω(client.GetOrCreateVMInstance(dummies.CSMachine1, dummies.CAPIMachine, dummies.CSCluster, "")).
179180
Should(MatchError(unknownErrorMessage))
180181
})

0 commit comments

Comments
 (0)