diff --git a/docs/examples/basic-price-oracle/main.go b/docs/examples/basic-price-oracle/main.go index 83609fc7c..4fde54c0c 100644 --- a/docs/examples/basic-price-oracle/main.go +++ b/docs/examples/basic-price-oracle/main.go @@ -29,6 +29,7 @@ import ( "github.com/sirupsen/logrus" "google.golang.org/grpc" "google.golang.org/grpc/credentials" + "google.golang.org/grpc/keepalive" ) const ( @@ -455,11 +456,44 @@ func main() { log.Fatalf("Failed to generate TLS certificate: %v", err) } - // Create the gRPC server with TLS + // Configure server-side keepalive parameters. These settings ensure the + // server actively probes client connection health and allows long-lived + // idle connections. + serverKeepalive := keepalive.ServerParameters{ + // Ping clients after 1 minute of inactivity. + Time: time.Minute, + + // Wait 20 seconds for ping response. + Timeout: 20 * time.Second, + + // Allow connections to stay idle for 24 hours. The active + // pinging mechanism (via Time parameter) handles health + // checking, so we don't need aggressive idle timeouts. + MaxConnectionIdle: time.Hour * 24, + } + + // Configure client keepalive enforcement policy. This tells the server + // how to handle client keepalive pings. + clientKeepalive := keepalive.EnforcementPolicy{ + // Allow client to ping even when there are no active RPCs. + // This is critical for long-lived connections with infrequent + // price queries. + PermitWithoutStream: true, + + // Prevent abusive clients from pinging too frequently (DoS + // protection). + MinTime: 5 * time.Second, + } + + // Create the gRPC server with TLS and keepalive configuration. transportCredentials := credentials.NewTLS(&tls.Config{ Certificates: []tls.Certificate{tlsCert}, }) - backendService := grpc.NewServer(grpc.Creds(transportCredentials)) + backendService := grpc.NewServer( + grpc.Creds(transportCredentials), + grpc.KeepaliveParams(serverKeepalive), + grpc.KeepaliveEnforcementPolicy(clientKeepalive), + ) err = startService(backendService) if err != nil { diff --git a/docs/release-notes/release-notes-0.7.0.md b/docs/release-notes/release-notes-0.7.0.md index 92eba5250..1710804fb 100644 --- a/docs/release-notes/release-notes-0.7.0.md +++ b/docs/release-notes/release-notes-0.7.0.md @@ -63,6 +63,18 @@ dirty. This issue has been resolved, and the behavior is now consistent across all database backend types. +- [Fixed "connection reset by peer" errors in RFQ price oracle + connections](https://github.com/lightninglabs/taproot-assets/pull/1834) by + implementing comprehensive bidirectional gRPC keepalive configuration. The + issue occurred when connections sat idle between RFQ price queries and were + silently closed by the network layer or server timeout, causing the first + payment attempt after an idle period to fail. The fix adds client-side + keepalive pings every 30 seconds and extends the server's idle connection + timeout from 2 minutes to 24 hours, while enabling active health checking on + both sides. This ensures connections remain alive during infrequent RFQ + operations and any network issues are detected promptly rather than + discovered only when the next RPC fails. + # New Features ## Functional Enhancements diff --git a/rfq/oracle.go b/rfq/oracle.go index e7ee28694..96b3c565f 100644 --- a/rfq/oracle.go +++ b/rfq/oracle.go @@ -18,6 +18,7 @@ import ( "google.golang.org/grpc" "google.golang.org/grpc/credentials" "google.golang.org/grpc/credentials/insecure" + "google.golang.org/grpc/keepalive" ) // PriceQueryIntent is an enum that represents the intent of a price rate @@ -186,16 +187,38 @@ type RpcPriceOracle struct { rawConn *grpc.ClientConn } +// clientKeepaliveDialOption configures bidirectional health probing to prevent +// idle RFQ connections from being silently terminated by network intermediaries +// (NATs, load balancers) or aggressive server timeouts. Without active +// keepalive, the first price query after an idle period would fail with +// "connection reset by peer" and require a retry. +var clientKeepaliveDialOption = grpc.WithKeepaliveParams( + keepalive.ClientParameters{ + // Ping server after 30 seconds of inactivity. + Time: 30 * time.Second, + + // Wait 20 seconds for ping response. + Timeout: 20 * time.Second, + + // Permit keepalive pings even when there are no active + // streams. This is critical for long-lived connections with + // infrequent RFQ requests. + PermitWithoutStream: true, + }, +) + // serverDialOpts returns the set of server options needed to connect to the // price oracle RPC server using a TLS connection. func serverDialOpts() ([]grpc.DialOption, error) { var opts []grpc.DialOption - // Skip TLS certificate verification. tlsConfig := tls.Config{InsecureSkipVerify: true} transportCredentials := credentials.NewTLS(&tlsConfig) + opts = append(opts, grpc.WithTransportCredentials(transportCredentials)) + opts = append(opts, clientKeepaliveDialOption) + return opts, nil } @@ -209,6 +232,8 @@ func insecureServerDialOpts() ([]grpc.DialOption, error) { insecure.NewCredentials(), )) + opts = append(opts, clientKeepaliveDialOption) + return opts, nil } diff --git a/server.go b/server.go index 23698a4d7..ecabc76b5 100644 --- a/server.go +++ b/server.go @@ -370,11 +370,34 @@ func (s *Server) RunUntilShutdown(mainErrChan <-chan error) error { serverOpts = append(serverOpts, rpcServerOpts...) serverOpts = append(serverOpts, ServerMaxMsgReceiveSize) - keepAliveParams := keepalive.ServerParameters{ - MaxConnectionIdle: time.Minute * 2, - } - - serverOpts = append(serverOpts, grpc.KeepaliveParams(keepAliveParams)) + // Configure server-side keepalive parameters. These settings allow the + // server to actively probe the connection health and ensure connections + // stay alive during idle periods. + serverKeepalive := keepalive.ServerParameters{ + // Ping client after 1 minute of inactivity. + Time: time.Minute, + // Wait 20 seconds for ping response. + Timeout: 20 * time.Second, + // Allow connections to remain idle for extended periods. This + // is particularly important for RFQ operations where price + // oracle connections may be idle for long periods. + MaxConnectionIdle: time.Hour * 24, + } + + // Configure client enforcement policy. This allows clients to send + // keepalive pings even when there are no active streams, which is + // crucial for long-lived connections with infrequent activity. + clientKeepalive := keepalive.EnforcementPolicy{ + // Minimum time between client pings. + MinTime: 5 * time.Second, + // Allow pings without active RPCs. + PermitWithoutStream: true, + } + + serverOpts = append( + serverOpts, grpc.KeepaliveParams(serverKeepalive), + grpc.KeepaliveEnforcementPolicy(clientKeepalive), + ) grpcServer := grpc.NewServer(serverOpts...) defer grpcServer.Stop()