Added end maintenance API which allows DRAINED hosts to be transitioned to ACTIVE. Fixed bug where payload error would never be returned if call failed due to a bad payload.

This commit is contained in:
Renan DelValle 2017-09-27 12:55:50 -07:00
parent f59f0bbdc3
commit 8fe3780949
2 changed files with 91 additions and 6 deletions

View file

@ -59,6 +59,7 @@ type Realis interface {
// Admin functions
DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error)
EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error)
}
type realisClient struct {
@ -1164,7 +1165,7 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
var resp *aurora.Response
var result *aurora.DrainHostsResult_
var clientErr, payloadErr error
var returnErr, clientErr, payloadErr error
if len(hosts) == 0 {
return nil, nil, errors.New("no hosts provided to drain")
@ -1206,11 +1207,81 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
result = resp.GetResult_().GetDrainHostsResult_()
}
// Prioritize returning a bad payload error over a client error as a bad payload error indicates
// a deeper issue
if payloadErr != nil {
returnErr = payloadErr
} else {
returnErr = clientErr
}
// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
// this will can become either a timeout error or a payload error
if retryErr != nil {
return resp, result, errors.Wrap(clientErr, "Unable to recover connection")
return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
}
return resp, result, nil
}
func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error) {
var resp *aurora.Response
var result *aurora.EndMaintenanceResult_
var returnErr, clientErr, payloadErr error
if len(hosts) == 0 {
return nil, nil, errors.New("no hosts provided to drain")
}
hostList := aurora.NewHosts()
hostList.HostNames = make(map[string]bool)
for _, host := range hosts {
hostList.HostNames[host] = true
}
retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) {
// Send thrift call, if we have a thrift send error, attempt to reconnect
// and continue trying to resend command
if resp, clientErr = r.adminClient.EndMaintenance(hostList); clientErr != nil {
// Experienced an connection error
err1 := r.ReestablishConn()
if err1 != nil {
fmt.Println("error in re-establishing connection: ", err1)
}
return false, nil
}
// If error is NOT due to connection
if _, payloadErr = response.ResponseCodeCheck(resp); payloadErr != nil {
// TODO(rdelvalle): an leader election may cause the response to have
// failed when it should have succeeded. Retry everything for now until
// we figure out a more concrete fix.
return false, nil
}
// Successful call
return true, nil
})
if resp != nil && resp.GetResult_() != nil {
result = resp.GetResult_().GetEndMaintenanceResult_()
}
// Prioritize returning a bad payload error over a client error as a bad payload error indicates
// a deeper issue
if payloadErr != nil {
returnErr = payloadErr
} else {
returnErr = clientErr
}
// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
// this will can become either a timeout error or a payload error
if retryErr != nil {
return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
}
return resp, result, nil