Bugfix: switch statements were missing fallthrough statement thus making them retry non-retriable errors. Response is now propagated back up to caller if it's available in order to provide context if needed.
This commit is contained in:
parent
9ebf118e71
commit
01911e47dd
2 changed files with 10 additions and 8 deletions
|
@ -625,7 +625,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe
|
|||
|
||||
resp, err := r.StartJobUpdate(update, "")
|
||||
if err != nil {
|
||||
return nil, nil, errors.Wrap(err, "unable to create service")
|
||||
return resp, nil, errors.Wrap(err, "unable to create service")
|
||||
}
|
||||
|
||||
if resp != nil && resp.GetResult_() != nil {
|
||||
|
@ -734,7 +734,7 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au
|
|||
})
|
||||
|
||||
if retryErr != nil {
|
||||
return nil, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
|
||||
return resp, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
|
||||
}
|
||||
return resp, nil
|
||||
}
|
||||
|
|
14
retry.go
14
retry.go
|
@ -15,9 +15,8 @@
|
|||
package realis
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"math/rand"
|
||||
"time"
|
||||
|
||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||
"github.com/paypal/gorealis/response"
|
||||
|
@ -178,18 +177,21 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
|||
case aurora.ResponseCode_OK:
|
||||
return resp, nil
|
||||
|
||||
// If the response code is transient, continue retrying
|
||||
// If the response code is transient, continue retrying
|
||||
case aurora.ResponseCode_ERROR_TRANSIENT:
|
||||
r.logger.Println("Aurora replied with Transient error code, retrying")
|
||||
continue
|
||||
|
||||
// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
|
||||
// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
|
||||
case aurora.ResponseCode_INVALID_REQUEST:
|
||||
fallthrough
|
||||
case aurora.ResponseCode_ERROR:
|
||||
fallthrough
|
||||
case aurora.ResponseCode_AUTH_FAILED:
|
||||
fallthrough
|
||||
case aurora.ResponseCode_JOB_UPDATING_ERROR:
|
||||
r.logger.Println("Terminal bad reply from Aurora, won't retry")
|
||||
return nil, errors.New(response.CombineMessage(resp))
|
||||
r.logger.Printf("Terminal Response Code %v from Aurora, won't retry\n", resp.GetResponseCode().String())
|
||||
return resp, errors.New(response.CombineMessage(resp))
|
||||
|
||||
// The only case that should fall down to here is a WARNING response code.
|
||||
// It is currently not used as a response in the scheduler so it is unknown how to handle it.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue