Skip to content

Commit 03e6123

Browse files
committed
Auto-create tracing spans for log groups
This will make it easy for CI users to trace and analyze their CI jobs without requiring additional work or scripting on their part. They can just look at tracing information on their tracing provider.
1 parent ace27c4 commit 03e6123

File tree

6 files changed

+58
-4
lines changed

6 files changed

+58
-4
lines changed

agent/agent_configuration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,4 +57,5 @@ type AgentConfiguration struct {
5757
AcquireJob string
5858
TracingBackend string
5959
TracingServiceName string
60+
TraceLogGroups bool
6061
}

clicommand/agent_start.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,7 @@ type AgentStartConfig struct {
142142
MetricsDatadogDistributions bool `cli:"metrics-datadog-distributions"`
143143
TracingBackend string `cli:"tracing-backend"`
144144
TracingServiceName string `cli:"tracing-service-name"`
145+
TraceLogGroups bool `cli:"trace-log-groups"`
145146

146147
// Global flags
147148
Debug bool `cli:"debug"`
@@ -582,6 +583,11 @@ var AgentStartCommand = cli.Command{
582583
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
583584
Value: "buildkite-agent",
584585
},
586+
cli.BoolFlag{
587+
Name: "trace-log-groups",
588+
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
589+
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
590+
},
585591
cli.StringFlag{
586592
Name: "job-verification-key-path",
587593
Usage: "Path to a file containing a verification key. Passing this flag enables job verification. For hmac-sha256, the raw file content is used as the shared key",
@@ -800,9 +806,13 @@ var AgentStartCommand = cli.Command{
800806
})
801807

802808
// Sense check supported tracing backends, we don't want bootstrapped jobs to silently have no tracing
803-
if _, has := tracetools.ValidTracingBackends[cfg.TracingBackend]; !has {
809+
_, tracingEnabled := tracetools.ValidTracingBackends[cfg.TracingBackend]
810+
if !tracingEnabled {
804811
l.Fatal("The given tracing backend %q is not supported. Valid backends are: %q", cfg.TracingBackend, maps.Keys(tracetools.ValidTracingBackends))
805812
}
813+
if !tracingEnabled && cfg.TraceLogGroups {
814+
l.Fatal("Log group tracing cannot be enabled without a tracing backend.")
815+
}
806816

807817
if experiments.IsEnabled(experiments.AgentAPI) {
808818
shutdown := runAgentAPI(ctx, l, cfg.SocketsPath)
@@ -864,6 +874,7 @@ var AgentStartCommand = cli.Command{
864874
AcquireJob: cfg.AcquireJob,
865875
TracingBackend: cfg.TracingBackend,
866876
TracingServiceName: cfg.TracingServiceName,
877+
TraceLogGroups: cfg.TraceLogGroups,
867878
JobVerificationNoSignatureBehavior: cfg.JobVerificationNoSignatureBehavior,
868879
JobVerificationInvalidSignatureBehavior: cfg.JobVerificationInvalidSignatureBehavior,
869880

clicommand/bootstrap.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ type BootstrapConfig struct {
9494
RedactedVars []string `cli:"redacted-vars" normalize:"list"`
9595
TracingBackend string `cli:"tracing-backend"`
9696
TracingServiceName string `cli:"tracing-service-name"`
97+
TraceLogGroups bool `cli:"trace-log-groups"`
9798
}
9899

99100
var BootstrapCommand = cli.Command{
@@ -355,6 +356,11 @@ var BootstrapCommand = cli.Command{
355356
EnvVar: "BUILDKITE_TRACING_SERVICE_NAME",
356357
Value: "buildkite-agent",
357358
},
359+
cli.BoolFlag{
360+
Name: "trace-log-groups",
361+
Usage: "Automatically creates tracing spans for log groups when tracing is enabled.",
362+
EnvVar: "BUILDKITE_TRACE_LOG_GROUPS",
363+
},
358364
DebugFlag,
359365
LogLevelFlag,
360366
ExperimentsFlag,
@@ -439,6 +445,7 @@ var BootstrapCommand = cli.Command{
439445
Tag: cfg.Tag,
440446
TracingBackend: cfg.TracingBackend,
441447
TracingServiceName: cfg.TracingServiceName,
448+
TraceLogGroups: cfg.TraceLogGroups,
442449
})
443450

444451
cctx, cancel := context.WithCancel(ctx)

internal/job/config.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -164,6 +164,9 @@ type ExecutorConfig struct {
164164

165165
// Service name to use when reporting traces.
166166
TracingServiceName string
167+
168+
// Whether to auto-create tracing spans for log groups.
169+
TraceLogGroups bool
167170
}
168171

169172
// ReadFromEnvironment reads configuration from the Environment, returns a map

internal/job/executor.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ func (e *Executor) Run(ctx context.Context) (exitCode int) {
8282
e.shell.Debug = e.ExecutorConfig.Debug
8383
e.shell.InterruptSignal = e.ExecutorConfig.CancelSignal
8484
e.shell.SignalGracePeriod = e.ExecutorConfig.SignalGracePeriod
85+
e.shell.TraceLogGroups = e.ExecutorConfig.TraceLogGroups
8586
}
8687
if experiments.IsEnabled(experiments.KubernetesExec) {
8788
kubernetesClient := &kubernetes.Client{}

internal/job/shell/shell.go

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ type Shell struct {
7272

7373
// Amount of time to wait between sending the InterruptSignal and SIGKILL
7474
SignalGracePeriod time.Duration
75+
76+
// Whether to auto create tracing spans for log groups
77+
TraceLogGroups bool
7578
}
7679

7780
// New returns a new Shell
@@ -532,6 +535,32 @@ func round(d time.Duration) time.Duration {
532535
}
533536
}
534537

538+
// spanMakerWriter is an io.Writer shim that captures logs and automatically creates trace spans for the log group.
539+
type spanMakerWriter struct {
540+
w io.Writer
541+
ctx context.Context
542+
span opentracing.Span
543+
}
544+
545+
func (s *spanMakerWriter) Write(p []byte) (n int, err error) {
546+
if bytes.HasPrefix(p, []byte("~~~ ")) || bytes.HasPrefix(p, []byte("--- ")) || bytes.HasPrefix(p, []byte("+++ ")) {
547+
s.FinishIfActive()
548+
operationName, _, _ := strings.Cut(string(p[4:]), "\r\n")
549+
// We don't store the context bc we don't need to create child spans (yet). If we stored it, every log group would
550+
// look like a child of the previous log group, where they're all more like siblings under the same parent span,
551+
// since Buildkite itself has no concept of log group hierarchy.
552+
s.span, _ = opentracing.StartSpanFromContext(s.ctx, operationName)
553+
}
554+
return s.w.Write(p)
555+
}
556+
557+
func (s *spanMakerWriter) FinishIfActive() {
558+
if s.span != nil {
559+
s.span.Finish()
560+
s.span = nil
561+
}
562+
}
563+
535564
func (s *Shell) executeCommand(
536565
ctx context.Context,
537566
cmd *command,
@@ -542,6 +571,8 @@ func (s *Shell) executeCommand(
542571
tracedEnv := env.FromSlice(cmd.Env)
543572
s.injectTraceCtx(ctx, tracedEnv)
544573
cmd.Env = tracedEnv.ToSlice()
574+
logToSpanWriter := &spanMakerWriter{w: w, ctx: ctx, span: nil}
575+
defer func() { logToSpanWriter.FinishIfActive() }()
545576

546577
s.cmdLock.Lock()
547578
s.cmd = cmd
@@ -561,11 +592,11 @@ func (s *Shell) executeCommand(
561592
// Modify process config based on execution flags
562593
if flags.PTY {
563594
cfg.PTY = true
564-
cfg.Stdout = w
595+
cfg.Stdout = logToSpanWriter
565596
} else {
566597
// Show stdout if requested or via debug
567598
if flags.Stdout {
568-
cfg.Stdout = w
599+
cfg.Stdout = logToSpanWriter
569600
} else if s.Debug {
570601
stdOutStreamer := NewLoggerStreamer(s.Logger)
571602
defer stdOutStreamer.Close()
@@ -574,7 +605,7 @@ func (s *Shell) executeCommand(
574605

575606
// Show stderr if requested or via debug
576607
if flags.Stderr {
577-
cfg.Stderr = w
608+
cfg.Stderr = logToSpanWriter
578609
} else if s.Debug {
579610
stdErrStreamer := NewLoggerStreamer(s.Logger)
580611
defer stdErrStreamer.Close()

0 commit comments

Comments
 (0)