diff --git a/examples/client.go b/examples/client.go index 5d7a01f..86972b1 100644 --- a/examples/client.go +++ b/examples/client.go @@ -28,7 +28,7 @@ import ( "strings" ) -var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, drainCandidates string +var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string var CONNECTION_TIMEOUT = 20000 @@ -42,7 +42,7 @@ func init() { flag.StringVar(&username, "username", "aurora", "Username to use for authorization") flag.StringVar(&password, "password", "secret", "Password to use for authorization") flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url") - flag.StringVar(&drainCandidates, "drainCandidates", "", "Comma separated list of candidate hosts to drain") + flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on") flag.Parse() } @@ -501,11 +501,11 @@ func main() { case "drainHosts": fmt.Println("Setting hosts to DRAINING") - if drainCandidates == "" { + if hostList == "" { fmt.Println("No hosts specified to drain") os.Exit(1) } - hosts := strings.Split(drainCandidates, ",") + hosts := strings.Split(hostList, ",") _, result, err := r.DrainHosts(hosts...) if err != nil { fmt.Printf("error: %+v\n", err.Error()) @@ -513,6 +513,20 @@ func main() { } fmt.Print(result.String()) + case "endMaintenance": + fmt.Println("Setting hosts to ACTIVE") + if hostList == "" { + fmt.Println("No hosts specified to drain") + os.Exit(1) + } + hosts := strings.Split(hostList, ",") + _, result, err := r.EndMaintenance(hosts...) + if err != nil { + fmt.Printf("error: %+v\n", err.Error()) + os.Exit(1) + } + fmt.Print(result.String()) + default: fmt.Println("Command not supported") os.Exit(1) diff --git a/realis.go b/realis.go index 37fb65a..d9a4c4d 100644 --- a/realis.go +++ b/realis.go @@ -59,6 +59,7 @@ type Realis interface { // Admin functions DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error) + EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error) } type realisClient struct { @@ -1164,7 +1165,7 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr var resp *aurora.Response var result *aurora.DrainHostsResult_ - var clientErr, payloadErr error + var returnErr, clientErr, payloadErr error if len(hosts) == 0 { return nil, nil, errors.New("no hosts provided to drain") @@ -1206,11 +1207,81 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr result = resp.GetResult_().GetDrainHostsResult_() } + // Prioritize returning a bad payload error over a client error as a bad payload error indicates + // a deeper issue + if payloadErr != nil { + returnErr = payloadErr + } else { + returnErr = clientErr + } // Timed out on retries. *Note that when we fix the unexpected errors with a correct payload, // this will can become either a timeout error or a payload error if retryErr != nil { - return resp, result, errors.Wrap(clientErr, "Unable to recover connection") + return resp, result, errors.Wrap(returnErr, "Unable to recover connection") + } + + return resp, result, nil +} + +func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error) { + + var resp *aurora.Response + var result *aurora.EndMaintenanceResult_ + var returnErr, clientErr, payloadErr error + + if len(hosts) == 0 { + return nil, nil, errors.New("no hosts provided to drain") + } + + hostList := aurora.NewHosts() + hostList.HostNames = make(map[string]bool) + for _, host := range hosts { + hostList.HostNames[host] = true + } + + retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) { + + // Send thrift call, if we have a thrift send error, attempt to reconnect + // and continue trying to resend command + if resp, clientErr = r.adminClient.EndMaintenance(hostList); clientErr != nil { + // Experienced an connection error + err1 := r.ReestablishConn() + if err1 != nil { + fmt.Println("error in re-establishing connection: ", err1) + } + return false, nil + } + + // If error is NOT due to connection + if _, payloadErr = response.ResponseCodeCheck(resp); payloadErr != nil { + // TODO(rdelvalle): an leader election may cause the response to have + // failed when it should have succeeded. Retry everything for now until + // we figure out a more concrete fix. + return false, nil + } + + // Successful call + return true, nil + + }) + + if resp != nil && resp.GetResult_() != nil { + result = resp.GetResult_().GetEndMaintenanceResult_() + } + + // Prioritize returning a bad payload error over a client error as a bad payload error indicates + // a deeper issue + if payloadErr != nil { + returnErr = payloadErr + } else { + returnErr = clientErr + } + + // Timed out on retries. *Note that when we fix the unexpected errors with a correct payload, + // this will can become either a timeout error or a payload error + if retryErr != nil { + return resp, result, errors.Wrap(returnErr, "Unable to recover connection") } return resp, result, nil