Adding more logging to the retry.
This commit is contained in:
parent
2374e11b6d
commit
c89ff2b19f
1 changed files with 14 additions and 6 deletions
20
retry.go
20
retry.go
|
@ -111,21 +111,26 @@ func (r *realisClient) ThriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
||||||
|
|
||||||
for i := 0; i < backoff.Steps; i++ {
|
for i := 0; i < backoff.Steps; i++ {
|
||||||
|
|
||||||
// If this isn't our first try, backoff before the next try
|
// If this isn't our first try, backoff before the next try.
|
||||||
if i != 0 {
|
if i != 0 {
|
||||||
adjusted := duration
|
adjusted := duration
|
||||||
if backoff.Jitter > 0.0 {
|
if backoff.Jitter > 0.0 {
|
||||||
adjusted = Jitter(duration, backoff.Jitter)
|
adjusted = Jitter(duration, backoff.Jitter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r.logger.Printf("An error occurred during thrift call, backing off for %v before retrying\n", adjusted)
|
||||||
|
|
||||||
time.Sleep(adjusted)
|
time.Sleep(adjusted)
|
||||||
duration = time.Duration(float64(duration) * backoff.Factor)
|
duration = time.Duration(float64(duration) * backoff.Factor)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Only allow one go-routine make use or modify the thrift client connection
|
// Only allow one go-routine make use or modify the thrift client connection.
|
||||||
r.lock.Lock()
|
r.lock.Lock()
|
||||||
resp, clientErr = thriftCall()
|
resp, clientErr = thriftCall()
|
||||||
r.lock.Unlock()
|
r.lock.Unlock()
|
||||||
|
|
||||||
|
// Check if our thrift call is returning an error. This is a retriable event as we don't know
|
||||||
|
// if it was caused by network issues.
|
||||||
if clientErr != nil {
|
if clientErr != nil {
|
||||||
r.ReestablishConn()
|
r.ReestablishConn()
|
||||||
|
|
||||||
|
@ -134,11 +139,13 @@ func (r *realisClient) ThriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If there was no client error, but the response is nil, something went wrong.
|
||||||
|
// Ideally, we'll never encounter this but we're placing a safeguard here.
|
||||||
if resp == nil {
|
if resp == nil {
|
||||||
return nil, errors.New("Response from aurora is nil")
|
return nil, errors.New("Response from aurora is nil")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check Response Code from thrift and make a decision to continue retrying or not
|
// Check Response Code from thrift and make a decision to continue retrying or not.
|
||||||
switch responseCode := resp.GetResponseCode(); responseCode {
|
switch responseCode := resp.GetResponseCode(); responseCode {
|
||||||
|
|
||||||
// If the thrift call succeeded, stop retrying
|
// If the thrift call succeeded, stop retrying
|
||||||
|
@ -147,17 +154,18 @@ func (r *realisClient) ThriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
||||||
|
|
||||||
// If the response code is transient, continue retrying
|
// If the response code is transient, continue retrying
|
||||||
case aurora.ResponseCode_ERROR_TRANSIENT:
|
case aurora.ResponseCode_ERROR_TRANSIENT:
|
||||||
|
r.logger.Println("Aurora replied with Transient error code, retrying")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
// Failure scenarios, these indicate a bad payload or config. Stop retrying.
|
// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
|
||||||
case aurora.ResponseCode_INVALID_REQUEST:
|
case aurora.ResponseCode_INVALID_REQUEST:
|
||||||
case aurora.ResponseCode_ERROR:
|
case aurora.ResponseCode_ERROR:
|
||||||
case aurora.ResponseCode_AUTH_FAILED:
|
case aurora.ResponseCode_AUTH_FAILED:
|
||||||
case aurora.ResponseCode_JOB_UPDATING_ERROR:
|
case aurora.ResponseCode_JOB_UPDATING_ERROR:
|
||||||
return nil, errors.New(response.CombineMessage(resp))
|
return nil, errors.New(response.CombineMessage(resp))
|
||||||
|
|
||||||
// The only case that should fall down to here is WARNING. It is currently not used
|
// The only case that should fall down to here is a WARNING response code.
|
||||||
// as a response in the scheduler.
|
// It is currently not used as a response in the scheduler so it is unknown how to handle it.
|
||||||
default:
|
default:
|
||||||
return nil, errors.Errorf("unhandled response code from Aurora %v", responseCode.String())
|
return nil, errors.Errorf("unhandled response code from Aurora %v", responseCode.String())
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue