Skip to content

Commit 0fbcb9d

Browse files
authored
fix(go): display clear error message when logger fails (#3577)
1 parent 418b05c commit 0fbcb9d

File tree

3 files changed

+51
-110
lines changed

3 files changed

+51
-110
lines changed

go/plugins/googlecloud/googlecloud.go

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"os"
2626
"os/signal"
2727
"strings"
28+
"sync"
2829
"syscall"
2930
"time"
3031

@@ -45,6 +46,9 @@ import (
4546
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
4647
)
4748

49+
// Global sync.Once for showing logging setup instructions only once across all recovery attempts
50+
var showLoggingInstructionsOnce sync.Once
51+
4852
// EnableGoogleCloudTelemetry enables comprehensive telemetry export to Google Cloud Observability suite.
4953
// This directly initializes telemetry without requiring plugin registration.
5054
//
@@ -222,6 +226,10 @@ func FlushMetrics(ctx context.Context) error {
222226
}
223227

224228
func setLogHandler(projectID string, level slog.Leveler, credentials *google.Credentials) error {
229+
return setupGCPLogger(projectID, level, credentials)
230+
}
231+
232+
func setupGCPLogger(projectID string, level slog.Leveler, credentials *google.Credentials) error {
225233
var clientOpts []option.ClientOption
226234
if credentials != nil {
227235
clientOpts = append(clientOpts, option.WithCredentials(credentials))
@@ -231,6 +239,28 @@ func setLogHandler(projectID string, level slog.Leveler, credentials *google.Cre
231239
if err != nil {
232240
return fmt.Errorf("failed to create logging client: %w", err)
233241
}
242+
// Set up error handling for async logging failures with recursive recovery
243+
c.OnError = func(err error) {
244+
fallbackHandler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
245+
Level: level,
246+
})
247+
slog.SetDefault(slog.New(fallbackHandler))
248+
slog.Warn("Switched to stderr logging due to Google Cloud logging failure", "error", err)
249+
slog.Error("Unable to send logs to Google Cloud", "error", err)
250+
if loggingDenied(err) {
251+
showLoggingInstructionsOnce.Do(func() {
252+
fmt.Fprint(os.Stderr, loggingDeniedHelpText(projectID))
253+
})
254+
}
255+
256+
// Assume the logger is compromised, and we need a new one
257+
// Reinitialize the logger with a new instance with the same config
258+
if setupErr := setupGCPLogger(projectID, level, credentials); setupErr == nil {
259+
slog.Info("Initialized a new GcpLogger")
260+
} else {
261+
slog.Error("Failed to reinitialize GCP logger", "error", setupErr)
262+
}
263+
}
234264
logger := c.Logger("genkit_log")
235265
slog.SetDefault(slog.New(newHandler(level, logger.Log, projectID)))
236266
return nil

go/plugins/googlecloud/slog_handler.go

Lines changed: 18 additions & 109 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,7 @@ import (
2222
"context"
2323
"fmt"
2424
"log/slog"
25-
"os"
2625
"strings"
27-
"time"
2826

2927
"cloud.google.com/go/logging"
3028
"github.com/jba/slog/withsupport"
@@ -37,29 +35,21 @@ const MetadataKey = "metadata"
3735

3836
// Enhanced handler with error handling
3937
type handler struct {
40-
level slog.Leveler
41-
handleEntry func(logging.Entry)
42-
goa *withsupport.GroupOrAttrs
43-
projectID string
44-
fallbackHandler slog.Handler
45-
instructionsLogged bool
38+
level slog.Leveler
39+
handleEntry func(logging.Entry)
40+
goa *withsupport.GroupOrAttrs
41+
projectID string
4642
}
4743

4844
func newHandler(level slog.Leveler, f func(logging.Entry), projectID string) *handler {
4945
if level == nil {
5046
level = slog.LevelInfo
5147
}
5248

53-
// Create fallback handler for when GCP logging fails
54-
fallbackHandler := slog.NewTextHandler(os.Stderr, &slog.HandlerOptions{
55-
Level: level,
56-
})
57-
5849
return &handler{
59-
level: level,
60-
handleEntry: f,
61-
projectID: projectID,
62-
fallbackHandler: fallbackHandler,
50+
level: level,
51+
handleEntry: f,
52+
projectID: projectID,
6353
}
6454
}
6555

@@ -69,21 +59,19 @@ func (h *handler) Enabled(ctx context.Context, level slog.Level) bool {
6959

7060
func (h *handler) WithAttrs(as []slog.Attr) slog.Handler {
7161
return &handler{
72-
level: h.level,
73-
handleEntry: h.handleEntry,
74-
goa: h.goa.WithAttrs(as),
75-
projectID: h.projectID,
76-
fallbackHandler: h.fallbackHandler,
62+
level: h.level,
63+
handleEntry: h.handleEntry,
64+
goa: h.goa.WithAttrs(as),
65+
projectID: h.projectID,
7766
}
7867
}
7968

8069
func (h *handler) WithGroup(name string) slog.Handler {
8170
return &handler{
82-
level: h.level,
83-
handleEntry: h.handleEntry,
84-
goa: h.goa.WithGroup(name),
85-
projectID: h.projectID,
86-
fallbackHandler: h.fallbackHandler,
71+
level: h.level,
72+
handleEntry: h.handleEntry,
73+
goa: h.goa.WithGroup(name),
74+
projectID: h.projectID,
8775
}
8876
}
8977

@@ -96,95 +84,16 @@ func (h *handler) Handle(ctx context.Context, r slog.Record) error {
9684
strings.Contains(message, "google.logging.v2.LoggingServiceV2")
9785

9886
if isInternalGoogleCloudLog {
99-
// Skip internal Google Cloud SDK logs, but send to fallback for debugging if needed
100-
return h.fallbackHandler.Handle(ctx, r)
87+
// Skip these logs - they're noise
88+
return nil
10189
}
10290

10391
entry := h.recordToEntry(ctx, r)
10492

105-
// Try to send to GCP with error handling and recovery
106-
if err := h.handleWithRecovery(entry); err != nil {
107-
// Fall back to local logging if GCP fails
108-
return h.fallbackHandler.Handle(ctx, r)
109-
}
110-
93+
h.handleEntry(entry)
11194
return nil
11295
}
11396

114-
// handleWithRecovery attempts to send the log entry to GCP
115-
func (h *handler) handleWithRecovery(entry logging.Entry) error {
116-
// Attempt to send the log entry
117-
defer func() {
118-
if r := recover(); r != nil {
119-
h.handleError(fmt.Errorf("panic in GCP logging: %v", r))
120-
}
121-
}()
122-
123-
// Create a channel to capture any errors from the async logging operation
124-
errChan := make(chan error, 1)
125-
126-
// Wrap the handleEntry function to capture errors
127-
wrappedHandleEntry := func(entry logging.Entry) {
128-
defer func() {
129-
if r := recover(); r != nil {
130-
errChan <- fmt.Errorf("logging operation panic: %v", r)
131-
} else {
132-
errChan <- nil
133-
}
134-
}()
135-
h.handleEntry(entry)
136-
}
137-
138-
go wrappedHandleEntry(entry)
139-
140-
// Wait for completion with timeout
141-
select {
142-
case err := <-errChan:
143-
if err != nil {
144-
h.handleError(err)
145-
return err
146-
}
147-
return nil
148-
case <-time.After(5 * time.Second):
149-
err := fmt.Errorf("GCP logging timeout")
150-
h.handleError(err)
151-
return err
152-
}
153-
}
154-
155-
// handleError processes logging errors and triggers immediate recovery
156-
func (h *handler) handleError(err error) {
157-
// Check if this is a permission denied error for helpful messaging
158-
if loggingDenied(err) {
159-
h.logPermissionError(err)
160-
} else {
161-
// Log generic error
162-
h.fallbackHandler.Handle(context.Background(), slog.NewRecord(
163-
time.Now(),
164-
slog.LevelError,
165-
fmt.Sprintf("Unable to send logs to Google Cloud: %v", err),
166-
0,
167-
))
168-
}
169-
170-
}
171-
172-
// logPermissionError logs helpful permission error messages (only once)
173-
func (h *handler) logPermissionError(err error) {
174-
if !h.instructionsLogged {
175-
h.instructionsLogged = true
176-
helpText := loggingDeniedHelpText(h.projectID)
177-
errorMsg := fmt.Sprintf("Unable to send logs to Google Cloud: %v\n\n%s\n", err, helpText)
178-
179-
h.fallbackHandler.Handle(context.Background(), slog.NewRecord(
180-
time.Now(),
181-
slog.LevelError,
182-
errorMsg,
183-
0,
184-
))
185-
}
186-
}
187-
18897
func (h *handler) recordToEntry(ctx context.Context, r slog.Record) logging.Entry {
18998
span := trace.SpanFromContext(ctx)
19099

go/plugins/googlecloud/utils.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,9 @@ gcloud projects add-iam-policy-binding %s \
121121
--member=serviceAccount:${SERVICE_ACCOUNT_EMAIL} \
122122
--role=%s
123123
124-
For more information, see: https://cloud.google.com/docs/authentication/getting-started`, role, projectID, role)
124+
For more information, see: https://cloud.google.com/docs/authentication/getting-started
125+
126+
`, role, projectID, role)
125127
}
126128

127129
// loggingDeniedHelpText provides specific help for logging permission errors

0 commit comments

Comments
 (0)