Added end maintenance API which allows DRAINED hosts to be transitioned to ACTIVE. Fixed bug where payload error would never be returned if call failed due to a bad payload.
This commit is contained in:
parent
f59f0bbdc3
commit
8fe3780949
2 changed files with 91 additions and 6 deletions
|
@ -28,7 +28,7 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, drainCandidates string
|
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string
|
||||||
|
|
||||||
var CONNECTION_TIMEOUT = 20000
|
var CONNECTION_TIMEOUT = 20000
|
||||||
|
|
||||||
|
@ -42,7 +42,7 @@ func init() {
|
||||||
flag.StringVar(&username, "username", "aurora", "Username to use for authorization")
|
flag.StringVar(&username, "username", "aurora", "Username to use for authorization")
|
||||||
flag.StringVar(&password, "password", "secret", "Password to use for authorization")
|
flag.StringVar(&password, "password", "secret", "Password to use for authorization")
|
||||||
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
|
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
|
||||||
flag.StringVar(&drainCandidates, "drainCandidates", "", "Comma separated list of candidate hosts to drain")
|
flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -501,11 +501,11 @@ func main() {
|
||||||
|
|
||||||
case "drainHosts":
|
case "drainHosts":
|
||||||
fmt.Println("Setting hosts to DRAINING")
|
fmt.Println("Setting hosts to DRAINING")
|
||||||
if drainCandidates == "" {
|
if hostList == "" {
|
||||||
fmt.Println("No hosts specified to drain")
|
fmt.Println("No hosts specified to drain")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
hosts := strings.Split(drainCandidates, ",")
|
hosts := strings.Split(hostList, ",")
|
||||||
_, result, err := r.DrainHosts(hosts...)
|
_, result, err := r.DrainHosts(hosts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Printf("error: %+v\n", err.Error())
|
fmt.Printf("error: %+v\n", err.Error())
|
||||||
|
@ -513,6 +513,20 @@ func main() {
|
||||||
}
|
}
|
||||||
fmt.Print(result.String())
|
fmt.Print(result.String())
|
||||||
|
|
||||||
|
case "endMaintenance":
|
||||||
|
fmt.Println("Setting hosts to ACTIVE")
|
||||||
|
if hostList == "" {
|
||||||
|
fmt.Println("No hosts specified to drain")
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
hosts := strings.Split(hostList, ",")
|
||||||
|
_, result, err := r.EndMaintenance(hosts...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error: %+v\n", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Print(result.String())
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fmt.Println("Command not supported")
|
fmt.Println("Command not supported")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
75
realis.go
75
realis.go
|
@ -59,6 +59,7 @@ type Realis interface {
|
||||||
|
|
||||||
// Admin functions
|
// Admin functions
|
||||||
DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error)
|
DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error)
|
||||||
|
EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
type realisClient struct {
|
type realisClient struct {
|
||||||
|
@ -1164,7 +1165,7 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
|
||||||
|
|
||||||
var resp *aurora.Response
|
var resp *aurora.Response
|
||||||
var result *aurora.DrainHostsResult_
|
var result *aurora.DrainHostsResult_
|
||||||
var clientErr, payloadErr error
|
var returnErr, clientErr, payloadErr error
|
||||||
|
|
||||||
if len(hosts) == 0 {
|
if len(hosts) == 0 {
|
||||||
return nil, nil, errors.New("no hosts provided to drain")
|
return nil, nil, errors.New("no hosts provided to drain")
|
||||||
|
@ -1206,11 +1207,81 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
|
||||||
result = resp.GetResult_().GetDrainHostsResult_()
|
result = resp.GetResult_().GetDrainHostsResult_()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Prioritize returning a bad payload error over a client error as a bad payload error indicates
|
||||||
|
// a deeper issue
|
||||||
|
if payloadErr != nil {
|
||||||
|
returnErr = payloadErr
|
||||||
|
} else {
|
||||||
|
returnErr = clientErr
|
||||||
|
}
|
||||||
|
|
||||||
// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
|
// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
|
||||||
// this will can become either a timeout error or a payload error
|
// this will can become either a timeout error or a payload error
|
||||||
if retryErr != nil {
|
if retryErr != nil {
|
||||||
return resp, result, errors.Wrap(clientErr, "Unable to recover connection")
|
return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error) {
|
||||||
|
|
||||||
|
var resp *aurora.Response
|
||||||
|
var result *aurora.EndMaintenanceResult_
|
||||||
|
var returnErr, clientErr, payloadErr error
|
||||||
|
|
||||||
|
if len(hosts) == 0 {
|
||||||
|
return nil, nil, errors.New("no hosts provided to drain")
|
||||||
|
}
|
||||||
|
|
||||||
|
hostList := aurora.NewHosts()
|
||||||
|
hostList.HostNames = make(map[string]bool)
|
||||||
|
for _, host := range hosts {
|
||||||
|
hostList.HostNames[host] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) {
|
||||||
|
|
||||||
|
// Send thrift call, if we have a thrift send error, attempt to reconnect
|
||||||
|
// and continue trying to resend command
|
||||||
|
if resp, clientErr = r.adminClient.EndMaintenance(hostList); clientErr != nil {
|
||||||
|
// Experienced an connection error
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in re-establishing connection: ", err1)
|
||||||
|
}
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If error is NOT due to connection
|
||||||
|
if _, payloadErr = response.ResponseCodeCheck(resp); payloadErr != nil {
|
||||||
|
// TODO(rdelvalle): an leader election may cause the response to have
|
||||||
|
// failed when it should have succeeded. Retry everything for now until
|
||||||
|
// we figure out a more concrete fix.
|
||||||
|
return false, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Successful call
|
||||||
|
return true, nil
|
||||||
|
|
||||||
|
})
|
||||||
|
|
||||||
|
if resp != nil && resp.GetResult_() != nil {
|
||||||
|
result = resp.GetResult_().GetEndMaintenanceResult_()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Prioritize returning a bad payload error over a client error as a bad payload error indicates
|
||||||
|
// a deeper issue
|
||||||
|
if payloadErr != nil {
|
||||||
|
returnErr = payloadErr
|
||||||
|
} else {
|
||||||
|
returnErr = clientErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
|
||||||
|
// this will can become either a timeout error or a payload error
|
||||||
|
if retryErr != nil {
|
||||||
|
return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
|
||||||
}
|
}
|
||||||
|
|
||||||
return resp, result, nil
|
return resp, result, nil
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue