diff --git a/examples/client.go b/examples/client.go index 8e173ca..b6f9df5 100644 --- a/examples/client.go +++ b/examples/client.go @@ -513,12 +513,21 @@ func main() { } // Monitor change to DRAINING and DRAINED mode - _, err = monitor.HostMaintenance( + nontransitioned, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 5, 10) if err != nil { + + // Check whether the call was partially successful + if len(nontransitioned) != 0 { + fmt.Println("Partial success:") + for host, _ := range nontransitioned { + fmt.Printf("Host %s did not transtion into desired mode(s)\n", host) + } + } + fmt.Printf("error: %+v\n", err.Error()) os.Exit(1) } @@ -539,15 +548,24 @@ func main() { } // Monitor change to DRAINING and DRAINED mode - _, err = monitor.HostMaintenance( + nontransitioned, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) if err != nil { + // Check whether the call was partially successful + if len(nontransitioned) != 0 { + fmt.Println("Partial success:") + for host, _ := range nontransitioned { + fmt.Printf("Host %s did not transtion into desired mode(s)\n", host) + } + } + fmt.Printf("error: %+v\n", err.Error()) os.Exit(1) } + fmt.Print(result.String()) default: diff --git a/monitors.go b/monitors.go index 73a4e48..6aab49b 100644 --- a/monitors.go +++ b/monitors.go @@ -153,48 +153,47 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval int, t return false, nil } -// Monitor host status until all hosts match the status provided -func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (bool, error) { +// Monitor host status until all hosts match the status provided. May return an error along with a non nil map which contains +// the hosts that did not transition to the desired modes(s). +func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (map[string]struct{}, error) { - // Transform modes into a look up table + // Transform modes to monitor for into a set for easy lookup desiredMode := make(map[aurora.MaintenanceMode]struct{}) for _,mode := range modes { desiredMode[mode] = struct{}{} } - // Initial map has all hosts we're looking for. - // For each node we find in the correct mode, eliminate it from the map. If we reach 0 elements in the map, - // we found all hosts we we're monitoring. This avoids having to go through and check the list one by one each cycle. - hostMode := make(map[string]struct{}) + // Turn slice into a host set to eliminate duplicates. Delete hosts that have entered the desired mode from + // observed list. We are done when the number of observed hosts reaches zero. + // This avoids having to go through and check the list one by one each cycle. + observedHosts := make(map[string]struct{}) for _,host := range hosts { - hostMode[host] = struct{}{} + observedHosts[host] = struct{}{} } for step := 0; step < steps; step++ { - // Client may have multiple retries handle retries _, result, err := m.Client.MaintenanceStatus(hosts...) if err != nil { // Error is either a payload error or a severe connection error - return false, errors.Wrap(err,"client error") + return observedHosts, errors.Wrap(err,"client error") } - for stat := range result.GetStatuses() { - if _, ok := desiredMode[stat.GetMode()]; ok { - fmt.Printf("host %s entered %s state\n", stat.GetHost(), stat.GetMode()) - delete(hostMode, stat.GetHost()) + for status := range result.GetStatuses() { + if _, ok := desiredMode[status.GetMode()]; ok { + fmt.Printf("host %s entered %s state\n", status.GetHost(), status.GetMode()) + delete(observedHosts, status.GetHost()) } } - if len(hostMode) == 0 { - return true, nil + if len(observedHosts) == 0{ + return observedHosts, nil } else { - fmt.Printf("%d host(s) not in desired state\n", len(hostMode)) + fmt.Printf("%d host(s) not in desired state\n", len(observedHosts)) } time.Sleep(time.Duration(sleepTime) * time.Second) } - return false, errors.New("Timed out") - + return observedHosts, errors.New("Timed out") } diff --git a/realis_e2e_test.go b/realis_e2e_test.go index 89470e9..8c73b67 100644 --- a/realis_e2e_test.go +++ b/realis_e2e_test.go @@ -154,25 +154,36 @@ func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) { } func TestRealisClient_DrainHosts(t *testing.T) { hosts := []string{"192.168.33.7"} - _, _ , err := r.DrainHosts(hosts...) + _, _, err := r.DrainHosts(hosts...) if err != nil { fmt.Printf("error: %+v\n", err.Error()) os.Exit(1) } // Monitor change to DRAINING and DRAINED mode - _, err = monitor.HostMaintenance( + nontransitioned, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 5, 10) - if err != nil { - fmt.Printf("error: %+v\n", err.Error()) - os.Exit(1) - } + assert.Equal(t, nontransitioned, map[string]struct{}{}) + assert.NoError(t, err) + + t.Run("TestRealisClient_MonitorNontransitioned", func(t *testing.T) { + // Monitor change to DRAINING and DRAINED mode + nontransitioned, err := monitor.HostMaintenance( + append(hosts, "IMAGINARY_HOST"), + []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, + 1, + 1) + + // Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts + assert.Error(t, err) + assert.Equal(t, nontransitioned, map[string]struct{}{"IMAGINARY_HOST": {}}) + }) t.Run("TestRealisClient_EndMaintenance", func(t *testing.T) { - _, _ , err := r.EndMaintenance(hosts...) + _, _, err := r.EndMaintenance(hosts...) if err != nil { fmt.Printf("error: %+v\n", err.Error()) os.Exit(1)