Skip to content

Commit 5b04f71

Browse files
JohnRuskzezha-msft
authored andcommitted
Tidy retry delay jitter calculations
Jitter calc for trying secondary was wrong. For primary it was correct, so just tidied the formula up for consistency.
1 parent ecee659 commit 5b04f71

File tree

2 files changed

+10
-5
lines changed

2 files changed

+10
-5
lines changed

azblob/zc_policy_retry.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,8 @@ func (o RetryOptions) calcDelay(try int32) time.Duration { // try is >=1; never
120120
}
121121

122122
// Introduce some jitter: [0.0, 1.0) / 2 = [0.0, 0.5) + 0.8 = [0.8, 1.3)
123-
delay = time.Duration(delay.Seconds() * (rand.Float64()/2 + 0.8) * float64(time.Second)) // NOTE: We want math/rand; not crypto/rand
123+
// For casts and rounding - be careful, as per https://github.com/golang/go/issues/20757
124+
delay = time.Duration(float32(delay) * (rand.Float32()/2 + 0.8)) // NOTE: We want math/rand; not crypto/rand
124125
if delay > o.MaxRetryDelay {
125126
delay = o.MaxRetryDelay
126127
}
@@ -157,7 +158,8 @@ func NewRetryPolicyFactory(o RetryOptions) pipeline.Factory {
157158
logf("Primary try=%d, Delay=%v\n", primaryTry, delay)
158159
time.Sleep(delay) // The 1st try returns 0 delay
159160
} else {
160-
delay := time.Second * time.Duration(rand.Float32()/2+0.8)
161+
// For casts and rounding - be careful, as per https://github.com/golang/go/issues/20757
162+
delay := time.Duration(float32(time.Second) * (rand.Float32()/2 + 0.8))
161163
logf("Secondary try=%d, Delay=%v\n", try-primaryTry, delay)
162164
time.Sleep(delay) // Delay with some jitter before trying secondary
163165
}

azblob/zt_policy_retry_test.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ func testRetryTestScenario(c *chk.C, scenario retryTestScenario) {
155155
MaxRetryDelay: 4 * time.Second,
156156
RetryReadsFromSecondaryHost: "SecondaryDC",
157157
}
158+
minExpectedTimeToMaxRetries := (retryOptions.MaxRetryDelay * time.Duration(retryOptions.MaxTries-3)) / 2 // a very rough approximation, of a lower bound, given assumption that we hit the cap early in the retry count, and pessimistically assuming that all get halved by random jitter calcs
158159
ctx := context.Background()
159160
ctx, cancel := context.WithTimeout(ctx, 64 /*2^MaxTries(6)*/ *retryOptions.TryTimeout)
160161
retrytestPolicyFactory := newRetryTestPolicyFactory(c, scenario, retryOptions.MaxTries, cancel)
@@ -164,16 +165,18 @@ func testRetryTestScenario(c *chk.C, scenario retryTestScenario) {
164165
}
165166
p := pipeline.NewPipeline(factories[:], pipeline.Options{})
166167
request, err := pipeline.NewRequest(http.MethodGet, *u, strings.NewReader("TestData"))
168+
start := time.Now()
167169
response, err := p.Do(ctx, nil, request)
168170
switch scenario {
169171
case retryTestScenarioRetryUntilSuccess:
170172
if err != nil || response == nil || response.Response() == nil || response.Response().StatusCode != http.StatusOK {
171173
c.Fail() // Operation didn't run to success
172174
}
173175
case retryTestScenarioRetryUntilMaxRetries:
174-
c.Assert(err, chk.NotNil) // Ensure we ended with an error
175-
c.Assert(response, chk.IsNil) // Ensure we ended without a valid response
176-
c.Assert(retrytestPolicyFactory.try, chk.Equals, retryOptions.MaxTries) // Ensure the operation ends with the exact right number of tries
176+
c.Assert(err, chk.NotNil) // Ensure we ended with an error
177+
c.Assert(response, chk.IsNil) // Ensure we ended without a valid response
178+
c.Assert(retrytestPolicyFactory.try, chk.Equals, retryOptions.MaxTries) // Ensure the operation ends with the exact right number of tries
179+
c.Assert(time.Since(start) > minExpectedTimeToMaxRetries, chk.Equals, true) // Ensure it took about as long to get here as we expect (bearing in mind randomness in the jitter), as a basic sanity check of our delay duration calculations
177180
case retryTestScenarioRetryUntilOperationCancel:
178181
c.Assert(err, chk.Equals, context.Canceled) // Ensure we ended due to cancellation
179182
c.Assert(response, chk.IsNil) // Ensure we ended without a valid response

0 commit comments

Comments
 (0)