From 31722afabfe5eaa5a86b14cf78ebd1c90b64ec68 Mon Sep 17 00:00:00 2001 From: melchiormoulin Date: Mon, 21 Apr 2025 09:23:31 +0200 Subject: [PATCH] Enable to reference a DatadogSLO in DatadogMonitor SLO alert type. The goal is to be able to link DatadogSLO & DatadogMonitor for SLO alert type. Why ? Enable the user to create a datadogmonitor SLO alert type without the need of knowing the SLO ID. --- .../v1alpha1/datadogmonitor_types.go | 7 ++ .../v1alpha1/zz_generated.deepcopy.go | 20 +++++ .../v1alpha1/zz_generated.openapi.go | 8 +- .../v1/datadoghq.com_datadogmonitors.yaml | 11 +++ ...atadoghq.com_datadogmonitors_v1alpha1.json | 17 ++++ .../controller/datadogmonitor/controller.go | 35 ++++++++ .../datadogmonitor/controller_test.go | 83 ++++++++++++++++++- 7 files changed, 179 insertions(+), 2 deletions(-) diff --git a/api/datadoghq/v1alpha1/datadogmonitor_types.go b/api/datadoghq/v1alpha1/datadogmonitor_types.go index 337a7590e..ba4c0ceb0 100644 --- a/api/datadoghq/v1alpha1/datadogmonitor_types.go +++ b/api/datadoghq/v1alpha1/datadogmonitor_types.go @@ -45,6 +45,8 @@ type DatadogMonitorSpec struct { // ControllerOptions are the optional parameters in the DatadogMonitor controller ControllerOptions DatadogMonitorControllerOptions `json:"controllerOptions,omitempty"` + //SLORef is SLO reference when specifying slo type alert. + SLORef *SLORef `json:"sloRef,omitempty"` } // DatadogMonitorType defines the type of monitor @@ -370,6 +372,11 @@ type DatadogMonitorList struct { Items []DatadogMonitor `json:"items"` } +type SLORef struct { + Name string `json:"name"` + Namespace string `json:"namespace"` +} + func init() { SchemeBuilder.Register(&DatadogMonitor{}, &DatadogMonitorList{}) } diff --git a/api/datadoghq/v1alpha1/zz_generated.deepcopy.go b/api/datadoghq/v1alpha1/zz_generated.deepcopy.go index e884e2fca..1c0d9c417 100644 --- a/api/datadoghq/v1alpha1/zz_generated.deepcopy.go +++ b/api/datadoghq/v1alpha1/zz_generated.deepcopy.go @@ -1003,6 +1003,11 @@ func (in *DatadogMonitorSpec) DeepCopyInto(out *DatadogMonitorSpec) { } in.Options.DeepCopyInto(&out.Options) in.ControllerOptions.DeepCopyInto(&out.ControllerOptions) + if in.SLORef != nil { + in, out := &in.SLORef, &out.SLORef + *out = new(SLORef) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new DatadogMonitorSpec. @@ -1453,3 +1458,18 @@ func (in *ProfileAffinity) DeepCopy() *ProfileAffinity { in.DeepCopyInto(out) return out } + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *SLORef) DeepCopyInto(out *SLORef) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SLORef. +func (in *SLORef) DeepCopy() *SLORef { + if in == nil { + return nil + } + out := new(SLORef) + in.DeepCopyInto(out) + return out +} diff --git a/api/datadoghq/v1alpha1/zz_generated.openapi.go b/api/datadoghq/v1alpha1/zz_generated.openapi.go index 732e72278..f534577e4 100644 --- a/api/datadoghq/v1alpha1/zz_generated.openapi.go +++ b/api/datadoghq/v1alpha1/zz_generated.openapi.go @@ -1457,11 +1457,17 @@ func schema_datadog_operator_api_datadoghq_v1alpha1_DatadogMonitorSpec(ref commo Ref: ref("github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.DatadogMonitorControllerOptions"), }, }, + "sloRef": { + SchemaProps: spec.SchemaProps{ + Description: "SLORef is SLO reference when specifying slo type alert.", + Ref: ref("github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.SLORef"), + }, + }, }, }, }, Dependencies: []string{ - "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.DatadogMonitorControllerOptions", "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.DatadogMonitorOptions"}, + "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.DatadogMonitorControllerOptions", "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.DatadogMonitorOptions", "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1.SLORef"}, } } diff --git a/config/crd/bases/v1/datadoghq.com_datadogmonitors.yaml b/config/crd/bases/v1/datadoghq.com_datadogmonitors.yaml index 8a250b205..ba8819e0c 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogmonitors.yaml +++ b/config/crd/bases/v1/datadoghq.com_datadogmonitors.yaml @@ -237,6 +237,17 @@ spec: type: string type: array x-kubernetes-list-type: set + sloRef: + description: SLORef is SLO reference when specifying slo type alert. + properties: + name: + type: string + namespace: + type: string + required: + - name + - namespace + type: object tags: description: Tags is the monitor tags associated with your monitor items: diff --git a/config/crd/bases/v1/datadoghq.com_datadogmonitors_v1alpha1.json b/config/crd/bases/v1/datadoghq.com_datadogmonitors_v1alpha1.json index a68de249c..03d69f6fd 100644 --- a/config/crd/bases/v1/datadoghq.com_datadogmonitors_v1alpha1.json +++ b/config/crd/bases/v1/datadoghq.com_datadogmonitors_v1alpha1.json @@ -224,6 +224,23 @@ "type": "array", "x-kubernetes-list-type": "set" }, + "sloRef": { + "additionalProperties": false, + "description": "SLORef is SLO reference when specifying slo type alert.", + "properties": { + "name": { + "type": "string" + }, + "namespace": { + "type": "string" + } + }, + "required": [ + "name", + "namespace" + ], + "type": "object" + }, "tags": { "description": "Tags is the monitor tags associated with your monitor", "items": { diff --git a/internal/controller/datadogmonitor/controller.go b/internal/controller/datadogmonitor/controller.go index 749d3bf62..0e201833f 100644 --- a/internal/controller/datadogmonitor/controller.go +++ b/internal/controller/datadogmonitor/controller.go @@ -9,6 +9,7 @@ import ( "context" "fmt" "os" + "regexp" "sort" "strconv" "strings" @@ -21,6 +22,7 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/tools/record" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -133,6 +135,19 @@ func (r *Reconciler) internalReconcile(ctx context.Context, req reconcile.Reques return r.updateStatusIfNeeded(logger, instance, now, newStatus, err, result) } + if instance.Spec.SLORef != nil { + slo := &datadoghqv1alpha1.DatadogSLO{} + err := r.client.Get(ctx, types.NamespacedName{ + Name: instance.Spec.SLORef.Name, + Namespace: instance.Spec.SLORef.Namespace, + }, slo) + if err != nil { + logger.Error(err, "unable to fetch referenced SLO", "sloRef", instance.Spec.SLORef) + return result, err + } + instance.Spec.Query = replaceSLOPlaceholders(instance.Spec.Query, slo.Status.ID) + } + instanceSpecHash, err := comparison.GenerateMD5ForSpec(&instance.Spec) if err != nil { logger.Error(err, "error generating hash") @@ -219,6 +234,26 @@ func (r *Reconciler) internalReconcile(ctx context.Context, req reconcile.Reques return r.updateStatusIfNeeded(logger, instance, now, newStatus, err, result) } +func replaceSLOPlaceholders(query, sloID string) string { + // Matches error_budget("...").func1().func2() and burn_rate("...").func() + re := regexp.MustCompile(`(error_budget|burn_rate)\(".*?"\)((?:\.\w+\(\))*)?`) + result := re.ReplaceAllStringFunc(query, func(match string) string { + submatches := re.FindStringSubmatch(match) + if len(submatches) >= 3 { + functionName := submatches[1] + suffix := submatches[2] + return fmt.Sprintf(`%s("%s")%s`, functionName, sloID, suffix) + } + return query + }) + return result +} + +func (r *Reconciler) updateDatadogMonitorQueryFromSLORef(logger logr.Logger, ctx context.Context, instance *datadoghqv1alpha1.DatadogMonitor) error { + + return nil +} + func (r *Reconciler) create(logger logr.Logger, datadogMonitor *datadoghqv1alpha1.DatadogMonitor, status *datadoghqv1alpha1.DatadogMonitorStatus, now metav1.Time, instanceSpecHash string) error { // Validate monitor in Datadog if err := validateMonitor(r.datadogAuth, logger, r.datadogClient, datadogMonitor); err != nil { diff --git a/internal/controller/datadogmonitor/controller_test.go b/internal/controller/datadogmonitor/controller_test.go index 6b63b0be7..837bb5870 100644 --- a/internal/controller/datadogmonitor/controller_test.go +++ b/internal/controller/datadogmonitor/controller_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/assert" corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/client-go/kubernetes/scheme" @@ -28,7 +29,9 @@ import ( datadogapi "github.com/DataDog/datadog-api-client-go/v2/api/datadog" datadogV1 "github.com/DataDog/datadog-api-client-go/v2/api/datadogV1" + "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1" datadoghqv1alpha1 "github.com/DataDog/datadog-operator/api/datadoghq/v1alpha1" + "github.com/DataDog/datadog-operator/internal/controller/utils" "github.com/DataDog/datadog-operator/pkg/controller/utils/comparison" ) @@ -44,7 +47,7 @@ func TestReconcileDatadogMonitor_Reconcile(t *testing.T) { logf.SetLogger(zap.New(zap.UseDevMode(true))) s := scheme.Scheme - s.AddKnownTypes(datadoghqv1alpha1.GroupVersion, &datadoghqv1alpha1.DatadogMonitor{}) + s.AddKnownTypes(datadoghqv1alpha1.GroupVersion, &datadoghqv1alpha1.DatadogMonitor{}, &datadoghqv1alpha1.DatadogSLO{}) type args struct { request reconcile.Request @@ -335,6 +338,30 @@ func TestReconcileDatadogMonitor_Reconcile(t *testing.T) { return nil }, }, + { + name: "DatadogMonitor, SLO alert with SLORef", + args: args{ + request: newRequest(resourcesNamespace, resourcesName), + firstAction: func(c client.Client) { + _ = c.Create(context.TODO(), testSLO()) + }, + firstReconcileCount: 10, + + secondAction: func(c client.Client) { + _ = c.Create(context.TODO(), testSLOMonitorWithSLORef()) + }, + secondReconcileCount: 10, + }, + wantErr: false, + wantFunc: func(c client.Client) error { + dm := &datadoghqv1alpha1.DatadogMonitor{} + if err := c.Get(context.TODO(), types.NamespacedName{Name: resourcesName, Namespace: resourcesNamespace}, dm); err != nil { + return err + } + assert.Equal(t, "error_budget(\"123\").over(\"7d\") > 10", dm.Spec.Query) + return nil + }, + }, { name: "DatadogMonitor, log alert", args: args{ @@ -885,6 +912,60 @@ func testSLOMonitor() *datadoghqv1alpha1.DatadogMonitor { }, } } +func testSLO() *v1alpha1.DatadogSLO { + return &v1alpha1.DatadogSLO{ + TypeMeta: metav1.TypeMeta{ + Kind: "DatadogMonitor", + APIVersion: fmt.Sprintf("%s/%s", v1alpha1.GroupVersion.Group, v1alpha1.GroupVersion.Version), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: resourcesNamespace, + Name: resourcesName, + }, + Spec: v1alpha1.DatadogSLOSpec{ + Name: "Test SLO", + Query: &v1alpha1.DatadogSLOQuery{ + Numerator: "sum:my.custom.count.metric{type:good_events}.as_count()", + Denominator: "sum:my.custom.count.metric{*}.as_count()", + }, + Type: v1alpha1.DatadogSLOTypeMetric, + TargetThreshold: resource.MustParse("99.0"), + Timeframe: v1alpha1.DatadogSLOTimeFrame30d, + Tags: utils.GetRequiredTags(), + }, + Status: v1alpha1.DatadogSLOStatus{ + ID: "123", + }, + } +} +func testSLOMonitorWithSLORef() *datadoghqv1alpha1.DatadogMonitor { + threshold := "10" + return &datadoghqv1alpha1.DatadogMonitor{ + TypeMeta: metav1.TypeMeta{ + Kind: "DatadogMonitor", + APIVersion: fmt.Sprintf("%s/%s", datadoghqv1alpha1.GroupVersion.Group, datadoghqv1alpha1.GroupVersion.Version), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: resourcesNamespace, + Name: resourcesName, + }, + Spec: datadoghqv1alpha1.DatadogMonitorSpec{ + SLORef: &datadoghqv1alpha1.SLORef{ + Name: resourcesName, + Namespace: resourcesNamespace, + }, + Query: "error_budget(\"slo-hash-id\").over(\"7d\") > 10", + Options: datadoghqv1alpha1.DatadogMonitorOptions{ + Thresholds: &datadoghqv1alpha1.DatadogMonitorOptionsThresholds{ + Critical: &threshold, + }, + }, + Type: datadoghqv1alpha1.DatadogMonitorTypeSLO, + Name: "test SLO monitor", + Message: "something is wrong", + }, + } +} func testEventV2Monitor() *datadoghqv1alpha1.DatadogMonitor { return &datadoghqv1alpha1.DatadogMonitor{