Added end maintenance API which allows DRAINED hosts to be transitioned to ACTIVE. Fixed bug where payload error would never be returned if call failed due to a bad payload.

2017-09-27 12:55:50 -07:00 · 2017-09-27 12:55:50 -07:00 · 8fe3780949
commit 8fe3780949
parent f59f0bbdc3
2 changed files with 91 additions and 6 deletions
--- a/realis.go
+++ b/realis.go
@ -59,6 +59,7 @@ type Realis interface {

 	// Admin functions
 	DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error)
+	EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error)
 }

 type realisClient struct {
@ -1164,7 +1165,7 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr

 	var resp *aurora.Response
 	var result *aurora.DrainHostsResult_
-	var clientErr, payloadErr error
+	var returnErr, clientErr, payloadErr error

 	if len(hosts) == 0 {
 		return nil, nil, errors.New("no hosts provided to drain")
@ -1206,11 +1207,81 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
 		result = resp.GetResult_().GetDrainHostsResult_()
 	}

+	// Prioritize returning a bad payload error over a client error as a bad payload error indicates
+	// a deeper issue
+	if payloadErr != nil {
+		returnErr = payloadErr
+	} else {
+		returnErr = clientErr
+	}

 	// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
 	// this will can become either a timeout error or a payload error
 	if retryErr != nil {
-		return resp, result, errors.Wrap(clientErr, "Unable to recover connection")
+		return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
+	}
+
+	return resp, result, nil
+}
+
+func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error) {
+
+	var resp *aurora.Response
+	var result *aurora.EndMaintenanceResult_
+	var returnErr, clientErr, payloadErr error
+
+	if len(hosts) == 0 {
+		return nil, nil, errors.New("no hosts provided to drain")
+	}
+
+	hostList := aurora.NewHosts()
+	hostList.HostNames = make(map[string]bool)
+	for _, host := range hosts {
+		hostList.HostNames[host] = true
+	}
+
+	retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) {
+
+		// Send thrift call, if we have a thrift send error, attempt to reconnect
+		// and continue trying to resend command
+		if resp, clientErr = r.adminClient.EndMaintenance(hostList); clientErr != nil {
+			// Experienced an connection error
+			err1 := r.ReestablishConn()
+			if err1 != nil {
+				fmt.Println("error in re-establishing connection: ", err1)
+			}
+			return false, nil
+		}
+
+		// If error is NOT due to connection
+		if _, payloadErr = response.ResponseCodeCheck(resp); payloadErr != nil {
+			// TODO(rdelvalle): an leader election may cause the response to have
+			// failed when it should have succeeded. Retry everything for now until
+			// we figure out a more concrete fix.
+			return false, nil
+		}
+
+		// Successful call
+		return true, nil
+
+	})
+
+	if resp != nil && resp.GetResult_() != nil {
+		result = resp.GetResult_().GetEndMaintenanceResult_()
+	}
+
+	// Prioritize returning a bad payload error over a client error as a bad payload error indicates
+	// a deeper issue
+	if payloadErr != nil {
+		returnErr = payloadErr
+	} else {
+		returnErr = clientErr
+	}
+
+	// Timed out on retries. *Note that when we fix the unexpected errors with a correct payload,
+	// this will can become either a timeout error or a payload error
+	if retryErr != nil {
+		return resp, result, errors.Wrap(returnErr, "Unable to recover connection")
 	}

 	return resp, result, nil