diff --git a/pkg/defaultmonitortests/types.go b/pkg/defaultmonitortests/types.go index 7c252298709b..2b722747038e 100644 --- a/pkg/defaultmonitortests/types.go +++ b/pkg/defaultmonitortests/types.go @@ -46,6 +46,7 @@ import ( "github.com/openshift/origin/pkg/monitortests/testframework/disruptionexternalservicemonitoring" "github.com/openshift/origin/pkg/monitortests/testframework/disruptionserializer" "github.com/openshift/origin/pkg/monitortests/testframework/e2etestanalyzer" + "github.com/openshift/origin/pkg/monitortests/testframework/highcputestanalyzer" "github.com/openshift/origin/pkg/monitortests/testframework/intervalserializer" "github.com/openshift/origin/pkg/monitortests/testframework/knownimagechecker" @@ -200,6 +201,7 @@ func newUniversalMonitorTests(info monitortestframework.MonitorTestInitializatio monitorTestRegistry.AddMonitorTestOrDie("azure-metrics-collector", "Test Framework", azuremetrics.NewAzureMetricsCollector()) monitorTestRegistry.AddMonitorTestOrDie("watch-namespaces", "Test Framework", watchnamespaces.NewNamespaceWatcher()) + monitorTestRegistry.AddMonitorTestOrDie("high-cpu-test-analyzer", "Test Framework", highcputestanalyzer.NewHighCPUTestAnalyzer()) return monitorTestRegistry } diff --git a/pkg/monitortests/testframework/highcputestanalyzer/monitortest.go b/pkg/monitortests/testframework/highcputestanalyzer/monitortest.go new file mode 100644 index 000000000000..6b7d5182c9f2 --- /dev/null +++ b/pkg/monitortests/testframework/highcputestanalyzer/monitortest.go @@ -0,0 +1,141 @@ +package highcputestanalyzer + +import ( + "context" + "fmt" + "path/filepath" + "time" + + "github.com/openshift/origin/pkg/dataloader" + "github.com/openshift/origin/pkg/monitor/monitorapi" + "github.com/openshift/origin/pkg/monitortestframework" + "github.com/openshift/origin/pkg/test/ginkgo/junitapi" + "github.com/sirupsen/logrus" + "k8s.io/client-go/rest" +) + +// highCPUTestAnalyzer looks for e2e tests that overlap with high CPU alerts and generates a data file with the results. +// The data file uses the autodl framework and thus is ingested automatically into bigquery, where we can then search +// for tests failures that are correlated with high CPU. (either failing because of it, or perhaps causing it) +type highCPUTestAnalyzer struct { + adminRESTConfig *rest.Config +} + +func NewHighCPUTestAnalyzer() monitortestframework.MonitorTest { + return &highCPUTestAnalyzer{} +} + +func (w *highCPUTestAnalyzer) PrepareCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { + return nil +} + +func (w *highCPUTestAnalyzer) StartCollection(ctx context.Context, adminRESTConfig *rest.Config, recorder monitorapi.RecorderWriter) error { + w.adminRESTConfig = adminRESTConfig + return nil +} + +func (w *highCPUTestAnalyzer) CollectData(ctx context.Context, storageDir string, beginning, end time.Time) (monitorapi.Intervals, []*junitapi.JUnitTestCase, error) { + return nil, nil, nil +} + +func (*highCPUTestAnalyzer) ConstructComputedIntervals(ctx context.Context, startingIntervals monitorapi.Intervals, recordedResources monitorapi.ResourcesMap, beginning, end time.Time) (monitorapi.Intervals, error) { + return nil, nil +} + +func (*highCPUTestAnalyzer) EvaluateTestsFromConstructedIntervals(ctx context.Context, finalIntervals monitorapi.Intervals) ([]*junitapi.JUnitTestCase, error) { + return nil, nil +} + +func (*highCPUTestAnalyzer) WriteContentToStorage(ctx context.Context, storageDir, timeSuffix string, finalIntervals monitorapi.Intervals, finalResourceState monitorapi.ResourcesMap) error { + // Find E2E tests that overlap with high CPU alerts + rows := findE2EIntervalsOverlappingHighCPU(finalIntervals) + + // Create data file with the collected rows + dataFile := dataloader.DataFile{ + TableName: "high_cpu_e2e_tests", + Schema: map[string]dataloader.DataType{ + "TestName": dataloader.DataTypeString, + "Success": dataloader.DataTypeInteger, + }, + Rows: rows, + } + + // Create the file name using the specified format + fileName := filepath.Join(storageDir, fmt.Sprintf("high-cpu-e2etests%s-%s", timeSuffix, dataloader.AutoDataLoaderSuffix)) + + // Write the data file + err := dataloader.WriteDataFile(fileName, dataFile) + if err != nil { + logrus.WithError(err).Warnf("unable to write data file: %s", fileName) + } + + return nil +} + +func (*highCPUTestAnalyzer) Cleanup(ctx context.Context) error { + return nil +} + +// findE2EIntervalsOverlappingHighCPU finds E2E test intervals that overlap with high CPU alert intervals +func findE2EIntervalsOverlappingHighCPU(intervals monitorapi.Intervals) []map[string]string { + // Filter for alert intervals of interest + alertIntervals := intervals.Filter(func(interval monitorapi.Interval) bool { + if interval.Source != monitorapi.SourceAlert { + return false + } + + alertName, exists := interval.Locator.Keys["alert"] + return exists && (alertName == "ExtremelyHighIndividualControlPlaneCPU" || alertName == "HighOverallControlPlaneCPU") + }) + + // Filter for E2E test intervals + e2eTestIntervals := intervals.Filter(func(interval monitorapi.Interval) bool { + return interval.Source == monitorapi.SourceE2ETest + }) + + // Find E2E tests that overlap with alert intervals + rows := []map[string]string{} + + for _, alertInterval := range alertIntervals { + for _, testInterval := range e2eTestIntervals { + // Check if test interval overlaps with alert interval + if overlaps(alertInterval, testInterval) { + testName, exists := testInterval.Locator.Keys[monitorapi.LocatorE2ETestKey] + if !exists { + continue + } + + // Determine success value based on status annotation + success := "0" + if status, exists := testInterval.Message.Annotations[monitorapi.AnnotationStatus]; exists && status == "Passed" { + success = "1" + } + + rows = append(rows, map[string]string{ + "TestName": testName, + "Success": success, + }) + } + } + } + + return rows +} + +// overlaps checks if two intervals overlap in time +func overlaps(interval1, interval2 monitorapi.Interval) bool { + // If either interval has a zero end time, treat it as ongoing to the end of time + end1 := interval1.To + if end1.IsZero() { + end1 = time.Date(9999, 12, 31, 23, 59, 59, 999999999, time.UTC) + } + + end2 := interval2.To + if end2.IsZero() { + end2 = time.Date(9999, 12, 31, 23, 59, 59, 999999999, time.UTC) + } + + // Check for overlap + return (interval1.From.Before(end2) || interval1.From.Equal(end2)) && + (interval2.From.Before(end1) || interval2.From.Equal(end1)) +} diff --git a/pkg/monitortests/testframework/highcputestanalyzer/monitortest_test.go b/pkg/monitortests/testframework/highcputestanalyzer/monitortest_test.go new file mode 100644 index 000000000000..baa746e21fb2 --- /dev/null +++ b/pkg/monitortests/testframework/highcputestanalyzer/monitortest_test.go @@ -0,0 +1,539 @@ +package highcputestanalyzer + +import ( + "testing" + "time" + + "github.com/openshift/origin/pkg/monitor/monitorapi" + "github.com/stretchr/testify/assert" +) + +func TestFindE2EIntervalsOverlappingHighCPU(t *testing.T) { + now := time.Now() + + testCases := []struct { + name string + intervals monitorapi.Intervals + expected []map[string]string + }{ + { + name: "no intervals", + intervals: monitorapi.Intervals{}, + expected: []map[string]string{}, + }, + { + name: "no alert intervals", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test1", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + }, + expected: []map[string]string{}, + }, + { + name: "no e2e test intervals", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + }, + expected: []map[string]string{}, + }, + { + name: "e2e test overlaps with alert", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test1", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test1", + "Success": "1", + }, + }, + }, + { + name: "e2e test contained within alert", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "HighOverallControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(20 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test2", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Failed", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test2", + "Success": "0", + }, + }, + }, + { + name: "alert contained within e2e test", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test3", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now, + To: now.Add(20 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test3", + "Success": "1", + }, + }, + }, + { + name: "e2e test touches alert start", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test4", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test4", + "Success": "1", + }, + }, + }, + { + name: "e2e test touches alert end", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "HighOverallControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test5", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{}, + }, + }, + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test5", + "Success": "0", + }, + }, + }, + { + name: "e2e test with zero end time", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(10 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test6", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: time.Time{}, // Zero time + }, + }, + expected: []map[string]string{ + { + "TestName": "test6", + "Success": "1", + }, + }, + }, + { + name: "alert with zero end time", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "HighOverallControlPlaneCPU", + }, + }, + }, + From: now, + To: time.Time{}, // Zero time + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test7", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Failed", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: now.Add(10 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test7", + "Success": "0", + }, + }, + }, + { + name: "multiple overlapping tests", + intervals: monitorapi.Intervals{ + { + Source: monitorapi.SourceAlert, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + "alert": "ExtremelyHighIndividualControlPlaneCPU", + }, + }, + }, + From: now, + To: now.Add(30 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test8", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Passed", + }, + }, + }, + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + { + Source: monitorapi.SourceE2ETest, + Condition: monitorapi.Condition{ + Locator: monitorapi.Locator{ + Keys: map[monitorapi.LocatorKey]string{ + monitorapi.LocatorE2ETestKey: "test9", + }, + }, + Message: monitorapi.Message{ + Annotations: map[monitorapi.AnnotationKey]string{ + monitorapi.AnnotationStatus: "Failed", + }, + }, + }, + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + }, + expected: []map[string]string{ + { + "TestName": "test8", + "Success": "1", + }, + { + "TestName": "test9", + "Success": "0", + }, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := findE2EIntervalsOverlappingHighCPU(tc.intervals) + assert.ElementsMatch(t, tc.expected, result) + }) + } +} + +func TestOverlaps(t *testing.T) { + now := time.Now() + + testCases := []struct { + name string + interval1 monitorapi.Interval + interval2 monitorapi.Interval + expected bool + }{ + { + name: "intervals overlap", + interval1: monitorapi.Interval{ + From: now, + To: now.Add(10 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + expected: true, + }, + { + name: "interval1 contains interval2", + interval1: monitorapi.Interval{ + From: now, + To: now.Add(20 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + expected: true, + }, + { + name: "interval2 contains interval1", + interval1: monitorapi.Interval{ + From: now.Add(5 * time.Minute), + To: now.Add(15 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now, + To: now.Add(20 * time.Minute), + }, + expected: true, + }, + { + name: "intervals touch at start", + interval1: monitorapi.Interval{ + From: now, + To: now.Add(10 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + expected: true, + }, + { + name: "intervals touch at end", + interval1: monitorapi.Interval{ + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now, + To: now.Add(10 * time.Minute), + }, + expected: true, + }, + { + name: "intervals don't overlap", + interval1: monitorapi.Interval{ + From: now, + To: now.Add(10 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now.Add(11 * time.Minute), + To: now.Add(20 * time.Minute), + }, + expected: false, + }, + { + name: "interval1 has zero end time", + interval1: monitorapi.Interval{ + From: now, + To: time.Time{}, + }, + interval2: monitorapi.Interval{ + From: now.Add(10 * time.Minute), + To: now.Add(20 * time.Minute), + }, + expected: true, + }, + { + name: "interval2 has zero end time", + interval1: monitorapi.Interval{ + From: now, + To: now.Add(10 * time.Minute), + }, + interval2: monitorapi.Interval{ + From: now.Add(5 * time.Minute), + To: time.Time{}, + }, + expected: true, + }, + { + name: "both intervals have zero end time", + interval1: monitorapi.Interval{ + From: now, + To: time.Time{}, + }, + interval2: monitorapi.Interval{ + From: now.Add(10 * time.Minute), + To: time.Time{}, + }, + expected: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + result := overlaps(tc.interval1, tc.interval2) + assert.Equal(t, tc.expected, result) + }) + } +}