diff --git a/pkg/controller/collectors.go b/pkg/controller/collectors.go index a9265cec..3a59fe47 100644 --- a/pkg/controller/collectors.go +++ b/pkg/controller/collectors.go @@ -11,6 +11,7 @@ var ( testSuiteLabels = []string{"test_suite_name"} testCaseLabels = []string{"test_case_name", "test_case_classname"} statusesList = [...]string{"created", "waiting_for_resource", "preparing", "pending", "running", "success", "failed", "canceled", "skipped", "manual", "scheduled", "error"} + latencyHistogramBuckets = []float64{.1, .25, .5, 1, 2.5, 5, 10, 15, 20, 30, 40, 50, 60, 90, 150, 210, 270, 330, 390, 450, 500, 600, 1200, 1800, 2700, 3600} ) // NewInternalCollectorCurrentlyQueuedTasksCount returns a new collector for the gcpe_currently_queued_tasks_count metric. @@ -266,6 +267,18 @@ func NewCollectorJobDurationSeconds() prometheus.Collector { ) } +// NewCollectorJobDurationHistogram returns a new collector for the gitlab_ci_pipeline_job_duration_seconds histogram metrics. +func NewCollectorJobDurationHistogram() prometheus.Collector { + return prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "gitlab_ci_pipeline_job_histogram_duration_seconds", + Help: "Histogram of duration (seconds) of finished gitlab jobs", + Buckets: latencyHistogramBuckets, + }, + append(defaultLabels, jobLabels...), + ) +} + // NewCollectorJobID returns a new collector for the gitlab_ci_pipeline_job_id metric. func NewCollectorJobID() prometheus.Collector { return prometheus.NewGaugeVec( diff --git a/pkg/controller/garbage_collector.go b/pkg/controller/garbage_collector.go index c4a6e3ce..ddc64022 100644 --- a/pkg/controller/garbage_collector.go +++ b/pkg/controller/garbage_collector.go @@ -322,6 +322,7 @@ func (c *Controller) GarbageCollectMetrics(ctx context.Context) error { switch m.Kind { case schemas.MetricKindJobArtifactSizeBytes, schemas.MetricKindJobDurationSeconds, + schemas.MetricKindJobDurationHistogram, schemas.MetricKindJobID, schemas.MetricKindJobRunCount, schemas.MetricKindJobStatus, diff --git a/pkg/controller/garbage_collector_test.go b/pkg/controller/garbage_collector_test.go index 7a4be0ae..f972ed6b 100644 --- a/pkg/controller/garbage_collector_test.go +++ b/pkg/controller/garbage_collector_test.go @@ -141,6 +141,7 @@ func TestGarbageCollectMetrics(t *testing.T) { ref1m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}} ref1m2 := schemas.Metric{Kind: schemas.MetricKindStatus, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}} ref1m3 := schemas.Metric{Kind: schemas.MetricKindJobDurationSeconds, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}} + ref1m4 := schemas.Metric{Kind: schemas.MetricKindJobDurationHistogram, Labels: prometheus.Labels{"project": "p1", "ref": "foo", "kind": "branch"}} ref2m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "p2", "ref": "bar", "kind": "branch"}} ref3m1 := schemas.Metric{Kind: schemas.MetricKindCoverage, Labels: prometheus.Labels{"project": "foo", "kind": "branch"}} @@ -150,6 +151,7 @@ func TestGarbageCollectMetrics(t *testing.T) { c.Store.SetMetric(ctx, ref1m1) c.Store.SetMetric(ctx, ref1m2) c.Store.SetMetric(ctx, ref1m3) + c.Store.SetMetric(ctx, ref1m4) c.Store.SetMetric(ctx, ref2m1) c.Store.SetMetric(ctx, ref3m1) c.Store.SetMetric(ctx, ref4m1) @@ -161,6 +163,7 @@ func TestGarbageCollectMetrics(t *testing.T) { expectedMetrics := schemas.Metrics{ ref1m1.Key(): ref1m1, ref1m3.Key(): ref1m3, + ref1m4.Key(): ref1m4, } assert.Equal(t, expectedMetrics, storedMetrics) } diff --git a/pkg/controller/jobs.go b/pkg/controller/jobs.go index 36cf5ca2..fb57dae3 100644 --- a/pkg/controller/jobs.go +++ b/pkg/controller/jobs.go @@ -128,6 +128,12 @@ func (c *Controller) ProcessJobMetrics(ctx context.Context, ref schemas.Ref, job Value: job.DurationSeconds, }) + storeSetMetric(ctx, c.Store, schemas.Metric{ + Kind: schemas.MetricKindJobDurationHistogram, + Labels: labels, + Value: job.DurationSeconds, + }) + storeSetMetric(ctx, c.Store, schemas.Metric{ Kind: schemas.MetricKindJobQueuedDurationSeconds, Labels: labels, diff --git a/pkg/controller/jobs_test.go b/pkg/controller/jobs_test.go index 545a8eca..c14029fa 100644 --- a/pkg/controller/jobs_test.go +++ b/pkg/controller/jobs_test.go @@ -149,6 +149,14 @@ func TestProcessJobMetrics(t *testing.T) { } assert.Equal(t, lastRunJobDuration, metrics[lastRunJobDuration.Key()]) + jobDurationHistogram := schemas.Metric{ + Kind: schemas.MetricKindJobDurationHistogram, + Labels: labels, + Value: newJob.DurationSeconds, + } + + assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()]) + jobRunCount := schemas.Metric{ Kind: schemas.MetricKindJobRunCount, Labels: labels, @@ -171,3 +179,106 @@ func TestProcessJobMetrics(t *testing.T) { } assert.Equal(t, status, metrics[status.Key()]) } + +func TestProcessJobHistogramMetrics(t *testing.T) { + ctx, c, _, srv := newTestController(config.Config{}) + srv.Close() + + oldJob := schemas.Job{ + ID: 1, + Name: "foo", + Timestamp: 1, + } + + job1 := schemas.Job{ + ID: 2, + Name: "foo", + Timestamp: 2, + DurationSeconds: 15, + Status: "failed", + Stage: "🚀", + TagList: "", + ArtifactSize: 150, + Runner: schemas.Runner{ + Description: "foo-123-bar", + }, + } + + job2 := schemas.Job{ + ID: 3, + Name: "foo", + Timestamp: 2, + DurationSeconds: 20, + Status: "failed", + Stage: "🚀", + TagList: "", + ArtifactSize: 150, + Runner: schemas.Runner{ + Description: "foo-123-bar", + }, + } + p := schemas.NewProject("foo") + p.Topics = "first,second" + p.Pull.Pipeline.Jobs.RunnerDescription.AggregationRegexp = `foo-(.*)-bar` + + ref := schemas.NewRef(p, schemas.RefKindBranch, "foo") + ref.LatestPipeline.ID = 1 + ref.LatestPipeline.Variables = "none" + ref.LatestJobs = schemas.Jobs{ + "foo": oldJob, + } + + c.Store.SetRef(ctx, ref) + + // If we run it against the same job, nothing should change in the store + c.ProcessJobMetrics(ctx, ref, oldJob) + refs, _ := c.Store.Refs(ctx) + assert.Equal(t, schemas.Jobs{ + "foo": oldJob, + }, refs[ref.Key()].LatestJobs) + + // Update the ref job 1 + c.ProcessJobMetrics(ctx, ref, job1) + refs, _ = c.Store.Refs(ctx) + assert.Equal(t, schemas.Jobs{ + "foo": job1, + }, refs[ref.Key()].LatestJobs) + + // Check if all the metrics exist + metrics, _ := c.Store.Metrics(ctx) + labels := map[string]string{ + "project": ref.Project.Name, + "topics": ref.Project.Topics, + "ref": ref.Name, + "kind": string(ref.Kind), + "variables": ref.LatestPipeline.Variables, + "source": ref.LatestPipeline.Source, + "stage": job1.Stage, + "tag_list": job1.TagList, + "failure_reason": job1.FailureReason, + "job_name": job1.Name, + "runner_description": ref.Project.Pull.Pipeline.Jobs.RunnerDescription.AggregationRegexp, + } + jobDurationHistogram := schemas.Metric{ + Kind: schemas.MetricKindJobDurationHistogram, + Labels: labels, + Value: job1.DurationSeconds, + } + + assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()]) + + // Update the ref job 2 + c.ProcessJobMetrics(ctx, ref, job2) + refs, _ = c.Store.Refs(ctx) + assert.Equal(t, schemas.Jobs{ + "foo": job2, + }, refs[ref.Key()].LatestJobs) + jobDurationHistogram = schemas.Metric{ + Kind: schemas.MetricKindJobDurationHistogram, + Labels: labels, + Value: job2.DurationSeconds, + } + + assert.Equal(t, jobDurationHistogram, metrics[jobDurationHistogram.Key()]) + +} diff --git a/pkg/controller/metrics.go b/pkg/controller/metrics.go index a28ae040..8869fcc0 100644 --- a/pkg/controller/metrics.go +++ b/pkg/controller/metrics.go @@ -53,6 +53,7 @@ func NewRegistry(ctx context.Context) *Registry { schemas.MetricKindID: NewCollectorID(), schemas.MetricKindJobArtifactSizeBytes: NewCollectorJobArtifactSizeBytes(), schemas.MetricKindJobDurationSeconds: NewCollectorJobDurationSeconds(), + schemas.MetricKindJobDurationHistogram: NewCollectorJobDurationHistogram(), schemas.MetricKindJobID: NewCollectorJobID(), schemas.MetricKindJobQueuedDurationSeconds: NewCollectorJobQueuedDurationSeconds(), schemas.MetricKindJobRunCount: NewCollectorJobRunCount(), @@ -194,6 +195,8 @@ func (r *Registry) ExportMetrics(metrics schemas.Metrics) { c.With(m.Labels).Set(m.Value) case *prometheus.CounterVec: c.With(m.Labels).Add(m.Value) + case *prometheus.HistogramVec: + c.With(m.Labels).Observe(m.Value) default: log.Errorf("unsupported collector type : %v", reflect.TypeOf(c)) } diff --git a/pkg/schemas/metric.go b/pkg/schemas/metric.go index bbf19752..6d0165c0 100644 --- a/pkg/schemas/metric.go +++ b/pkg/schemas/metric.go @@ -48,6 +48,9 @@ const ( // MetricKindJobDurationSeconds .. MetricKindJobDurationSeconds + // MetricKindJobDurationHistogram .. + MetricKindJobDurationHistogram + // MetricKindJobID .. MetricKindJobID @@ -147,7 +150,7 @@ func (m Metric) Key() MetricKey { m.Labels["source"], }) - case MetricKindJobArtifactSizeBytes, MetricKindJobDurationSeconds, MetricKindJobID, MetricKindJobQueuedDurationSeconds, MetricKindJobRunCount, MetricKindJobStatus, MetricKindJobTimestamp: + case MetricKindJobArtifactSizeBytes, MetricKindJobDurationSeconds, MetricKindJobDurationHistogram, MetricKindJobID, MetricKindJobQueuedDurationSeconds, MetricKindJobRunCount, MetricKindJobStatus, MetricKindJobTimestamp: key += fmt.Sprintf("%v", []string{ m.Labels["project"], m.Labels["kind"],