Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 13 additions & 4 deletions client/v3/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,12 @@ const (
// NoLease is a lease ID for the absence of a lease.
NoLease LeaseID = 0

// retryConnWait is how long to wait before retrying request due to an error
retryConnWait = 500 * time.Millisecond
// retryConnMinBackoff is the starting backoff when retrying a request due to an error
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How were these values chosen?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

They were chosen to be in line with the default exponential backoff parameters of other widely used client side libraries. For example, the aws-sdk-go-v2 library has a default max backoff of 20 seconds: https://github.com/aws/aws-sdk-go-v2/blob/main/aws/retry/standard.go#L31

I kept the initial backoff to 500ms as that is the current backoff time.

retryConnMinBackoff = 500 * time.Millisecond
// retryConnMaxBackoff is the max backoff when retrying a request due to an error
retryConnMaxBackoff = 15 * time.Second
// sendKeepaliveFrequency is how often to send keepalives
sendKeepaliveFrequency = 500 * time.Millisecond
)

// LeaseResponseChSize is the size of buffer to store unsent lease responses.
Expand Down Expand Up @@ -458,16 +462,19 @@ func (l *lessor) recvKeepAliveLoop() (gerr error) {
l.mu.Unlock()
}()

backoffGeneration := 0
for {
stream, err := l.resetRecv()
if err != nil {
backoffGeneration++
l.lg.Warn("error occurred during lease keep alive loop",
zap.Error(err),
)
if canceledByCaller(l.stopCtx, err) {
return err
}
} else {
backoffGeneration = 0
for {
resp, err := stream.Recv()
if err != nil {
Expand All @@ -485,8 +492,10 @@ func (l *lessor) recvKeepAliveLoop() (gerr error) {
}
}

backoff := jitterUp(expBackoff(backoffGeneration, retryConnMinBackoff, retryConnMaxBackoff), 0.5)

select {
case <-time.After(retryConnWait):
case <-time.After(backoff):
case <-l.stopCtx.Done():
return l.stopCtx.Err()
}
Expand Down Expand Up @@ -607,7 +616,7 @@ func (l *lessor) sendKeepAliveLoop(stream pb.Lease_LeaseKeepAliveClient) {
}

select {
case <-time.After(retryConnWait):
case <-time.After(sendKeepaliveFrequency):
case <-stream.Context().Done():
return
case <-l.donec:
Expand Down
9 changes: 9 additions & 0 deletions client/v3/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
package clientv3

import (
"math"
"math/rand"
"time"
)
Expand All @@ -29,3 +30,11 @@ func jitterUp(duration time.Duration, jitter float64) time.Duration {
multiplier := jitter * (rand.Float64()*2 - 1)
return time.Duration(float64(duration) * (1 + multiplier))
}

// expBackoff returns an exponential backoff duration.
//
// This will double the duration each generation and clamp between [minDelay, maxDelay]
func expBackoff(generation int, minDelay, maxDelay time.Duration) time.Duration {
delay := math.Min(math.Pow(2, float64(generation))*float64(minDelay), float64(maxDelay))
return time.Duration(delay)
}