Skip to content

Commit 4ee6553

Browse files
committed
Auto-create tracing spans for log groups
This will make it easy for CI users to trace and analyze their CI jobs without requiring additional work or scripting on their part. They can just look at tracing information on their tracing provider.
1 parent d3d0341 commit 4ee6553

File tree

6 files changed

+58
-4
lines changed

6 files changed

+58
-4
lines changed

agent/agent_configuration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,5 @@ type AgentConfiguration struct {
5757
AcquireJob string
5858
TracingBackend string
5959
TracingServiceName string
60+
TraceLogGroups bool
6061
}

clicommand/agent_start.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ type AgentStartConfig struct {
142142
MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"`
143143
TracingBackend string `cli:"tracing-backend"`
144144
TracingServiceName string `cli:"tracing-service-name"`
145+
TraceLogGroups bool `cli:"trace-log-groups"`
145146

146147
// Global flags
147148
Debug bool `cli:"debug"`
@@ -582,6 +583,11 @@ var AgentStartCommand = cli.Command{
582583
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
583584
Value: "buildkite-agent",
584585
},
586+
cli.BoolFlag{
587+
Name: "trace-log-groups",
588+
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
589+
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
590+
},
585591
cli.StringFlag{
586592
Name: "job-verification-key-path",
587593
Usage: "Path to a file containing a verification key. Passing this flag enables job verification. For hmac-sha256, the raw file content is used as the shared key",
@@ -800,9 +806,13 @@ var AgentStartCommand = cli.Command{
800806
})
801807

802808
// Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing
803-
if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has {
809+
_, tracingEnabled := tracetools.ValidTracingBackends[cfg.TracingBackend]
810+
if !tracingEnabled {
804811
l.Fatal("The given tracing backend %q is not supported. Valid backends are: %q", cfg.TracingBackend, maps.Keys(tracetools.ValidTracingBackends))
805812
}
813+
if !tracingEnabled && cfg.TraceLogGroups {
814+
l.Fatal("Log group tracing cannot be enabled without a tracing backend.")
815+
}
806816

807817
if experiments.IsEnabled(experiments.AgentAPI) {
808818
shutdown := runAgentAPI(ctx, l, cfg.SocketsPath)
@@ -864,6 +874,7 @@ var AgentStartCommand = cli.Command{
864874
AcquireJob: cfg.AcquireJob,
865875
TracingBackend: cfg.TracingBackend,
866876
TracingServiceName: cfg.TracingServiceName,
877+
TraceLogGroups: cfg.TraceLogGroups,
867878
JobVerificationNoSignatureBehavior: cfg.JobVerificationNoSignatureBehavior,
868879
JobVerificationInvalidSignatureBehavior: cfg.JobVerificationInvalidSignatureBehavior,
869880

clicommand/bootstrap.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ type BootstrapConfig struct {
9292
RedactedVars []string `cli:"redacted-vars" normalize:"list"`
9393
TracingBackend string `cli:"tracing-backend"`
9494
TracingServiceName string `cli:"tracing-service-name"`
95+
TraceLogGroups bool `cli:"trace-log-groups"`
9596
}
9697

9798
var BootstrapCommand = cli.Command{
@@ -353,6 +354,11 @@ var BootstrapCommand = cli.Command{
353354
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
354355
Value: "buildkite-agent",
355356
},
357+
cli.BoolFlag{
358+
Name: "trace-log-groups",
359+
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
360+
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
361+
},
356362
DebugFlag,
357363
LogLevelFlag,
358364
ExperimentsFlag,
@@ -437,6 +443,7 @@ var BootstrapCommand = cli.Command{
437443
Tag: cfg.Tag,
438444
TracingBackend: cfg.TracingBackend,
439445
TracingServiceName: cfg.TracingServiceName,
446+
TraceLogGroups: cfg.TraceLogGroups,
440447
})
441448

442449
cctx, cancel := context.WithCancel(ctx)

internal/job/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ type ExecutorConfig struct {
164164

165165
// Service name to use when reporting traces.
166166
TracingServiceName string
167+
168+
// Whether to auto-create tracing spans for log groups.
169+
TraceLogGroups bool
167170
}
168171

169172
// ReadFromEnvironment reads configuration from the Environment, returns a map

internal/job/executor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) {
8282
e.shell.Debug = e.ExecutorConfig.Debug
8383
e.shell.InterruptSignal = e.ExecutorConfig.CancelSignal
8484
e.shell.SignalGracePeriod = e.ExecutorConfig.SignalGracePeriod
85+
e.shell.TraceLogGroups = e.ExecutorConfig.TraceLogGroups
8586
}
8687
if experiments.IsEnabled(experiments.KubernetesExec) {
8788
kubernetesClient := &kubernetes.Client{}

internal/job/shell/shell.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ type Shell struct {
7272

7373
// Amount of time to wait between sending the InterruptSignal and SIGKILL
7474
SignalGracePeriod time.Duration
75+
76+
// Whether to auto create tracing spans for log groups
77+
TraceLogGroups bool
7578
}
7679

7780
// New returns a new Shell
@@ -532,6 +535,32 @@ func round(d time.Duration) time.Duration {
532535
}
533536
}
534537

538+
// spanMakerWriter is an io.Writer shim that captures logs and automatically creates trace spans for the log group.
539+
type spanMakerWriter struct {
540+
w io.Writer
541+
ctx context.Context
542+
span opentracing.Span
543+
}
544+
545+
func (s *spanMakerWriter) Write(p []byte) (n int, err error) {
546+
if bytes.HasPrefix(p, []byte("~~~ ")) || bytes.HasPrefix(p, []byte("--- ")) || bytes.HasPrefix(p, []byte("+++ ")) {
547+
s.FinishIfActive()
548+
operationName, _, _ := strings.Cut(string(p[4:]), "\r\n")
549+
// We don't store the context bc we don't need to create child spans (yet). If we stored it, every log group would
550+
// look like a child of the previous log group, where they're all more like siblings under the same parent span,
551+
// since Buildkite itself has no concept of log group hierarchy.
552+
s.span, _ = opentracing.StartSpanFromContext(s.ctx, operationName)
553+
}
554+
return s.w.Write(p)
555+
}
556+
557+
func (s *spanMakerWriter) FinishIfActive() {
558+
if s.span != nil {
559+
s.span.Finish()
560+
s.span = nil
561+
}
562+
}
563+
535564
func (s *Shell) executeCommand(
536565
ctx context.Context,
537566
cmd *command,
@@ -542,6 +571,8 @@ func (s *Shell) executeCommand(
542571
tracedEnv := env.FromSlice(cmd.Env)
543572
s.injectTraceCtx(ctx, tracedEnv)
544573
cmd.Env = tracedEnv.ToSlice()
574+
logToSpanWriter := &spanMakerWriter{w: w, ctx: ctx, span: nil}
575+
defer func() { logToSpanWriter.FinishIfActive() }()
545576

546577
s.cmdLock.Lock()
547578
s.cmd = cmd
@@ -561,11 +592,11 @@ func (s *Shell) executeCommand(
561592
// Modify process config based on execution flags
562593
if flags.PTY {
563594
cfg.PTY = true
564-
cfg.Stdout = w
595+
cfg.Stdout = logToSpanWriter
565596
} else {
566597
// Show stdout if requested or via debug
567598
if flags.Stdout {
568-
cfg.Stdout = w
599+
cfg.Stdout = logToSpanWriter
569600
} else if s.Debug {
570601
stdOutStreamer := NewLoggerStreamer(s.Logger)
571602
defer stdOutStreamer.Close()
@@ -574,7 +605,7 @@ func (s *Shell) executeCommand(
574605

575606
// Show stderr if requested or via debug
576607
if flags.Stderr {
577-
cfg.Stderr = w
608+
cfg.Stderr = logToSpanWriter
578609
} else if s.Debug {
579610
stdErrStreamer := NewLoggerStreamer(s.Logger)
580611
defer stdErrStreamer.Close()

0 commit comments

Comments
 (0)