Detecting if the transport error was not temporary in which case we stop retrying. Changed bug where get results was being called before we checked for an error.
This commit is contained in:
parent
6762c1784b
commit
dabd7418a9
2 changed files with 37 additions and 22 deletions
44
realis.go
44
realis.go
|
@ -555,14 +555,14 @@ func (r *realisClient) GetJobs(role string) (*aurora.Response, *aurora.GetJobsRe
|
|||
return r.readonlyClient.GetJobs(role)
|
||||
})
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetJobsResult_
|
||||
}
|
||||
|
||||
if retryErr != nil {
|
||||
return nil, result, errors.Wrap(retryErr, "Error getting Jobs from Aurora Scheduler")
|
||||
}
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetJobsResult_
|
||||
}
|
||||
|
||||
return resp, result, nil
|
||||
}
|
||||
|
||||
|
@ -635,7 +635,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe
|
|||
return resp, nil, errors.Wrap(err, "unable to create service")
|
||||
}
|
||||
|
||||
if resp != nil && resp.GetResult_() != nil {
|
||||
if resp.GetResult_() != nil {
|
||||
return resp, resp.GetResult_().GetStartJobUpdateResult_(), nil
|
||||
}
|
||||
|
||||
|
@ -879,7 +879,7 @@ func (r *realisClient) GetPendingReason(query *aurora.TaskQuery) (pendingReasons
|
|||
|
||||
var result map[*aurora.PendingReason]bool
|
||||
|
||||
if resp != nil && resp.GetResult_() != nil {
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetGetPendingReasonResult_().GetReasons()
|
||||
}
|
||||
for reason := range result {
|
||||
|
@ -999,14 +999,14 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
|
|||
return r.adminClient.DrainHosts(drainList)
|
||||
})
|
||||
|
||||
if resp != nil && resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetDrainHostsResult_()
|
||||
}
|
||||
|
||||
if retryErr != nil {
|
||||
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
|
||||
}
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetDrainHostsResult_()
|
||||
}
|
||||
|
||||
return resp, result, nil
|
||||
}
|
||||
|
||||
|
@ -1030,14 +1030,14 @@ func (r *realisClient) StartMaintenance(hosts ...string) (*aurora.Response, *aur
|
|||
return r.adminClient.StartMaintenance(hostList)
|
||||
})
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetStartMaintenanceResult_()
|
||||
}
|
||||
|
||||
if retryErr != nil {
|
||||
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
|
||||
}
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetStartMaintenanceResult_()
|
||||
}
|
||||
|
||||
return resp, result, nil
|
||||
}
|
||||
|
||||
|
@ -1061,14 +1061,14 @@ func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *auror
|
|||
return r.adminClient.EndMaintenance(hostList)
|
||||
})
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetEndMaintenanceResult_()
|
||||
}
|
||||
|
||||
if retryErr != nil {
|
||||
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
|
||||
}
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetEndMaintenanceResult_()
|
||||
}
|
||||
|
||||
return resp, result, nil
|
||||
}
|
||||
|
||||
|
@ -1094,14 +1094,14 @@ func (r *realisClient) MaintenanceStatus(hosts ...string) (*aurora.Response, *au
|
|||
return r.adminClient.MaintenanceStatus(hostList)
|
||||
})
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetMaintenanceStatusResult_()
|
||||
}
|
||||
|
||||
if retryErr != nil {
|
||||
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
|
||||
}
|
||||
|
||||
if resp.GetResult_() != nil {
|
||||
result = resp.GetResult_().GetMaintenanceStatusResult_()
|
||||
}
|
||||
|
||||
return resp, result, nil
|
||||
}
|
||||
|
||||
|
|
15
retry.go
15
retry.go
|
@ -16,11 +16,13 @@ package realis
|
|||
|
||||
import (
|
||||
"math/rand"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||
"github.com/paypal/gorealis/response"
|
||||
"github.com/pkg/errors"
|
||||
"github.com/rdelval/thrift/lib/go/thrift"
|
||||
)
|
||||
|
||||
type Backoff struct {
|
||||
|
@ -158,6 +160,19 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
|||
// Print out the error to the user
|
||||
r.logger.Printf("Client Error: %v\n", clientErr)
|
||||
|
||||
// Determine if error is a temporary URL error by going up the stack
|
||||
e, ok := clientErr.(thrift.TTransportException)
|
||||
if ok {
|
||||
r.logger.DebugPrint("Encountered a transport exception")
|
||||
|
||||
e, ok := e.Err().(*url.Error)
|
||||
if ok {
|
||||
if !IsTemporary(e) {
|
||||
return nil, errors.Wrap(clientErr, "Non-temporary connection error")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In the future, reestablish connection should be able to check if it is actually possible
|
||||
// to make a thrift call to Aurora. For now, a reconnect should always lead to a retry.
|
||||
r.ReestablishConn()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue