Changing HostMaintenance to return a map[string]bool where true indicates success, false indicates failure to transition to the desired state.

This commit is contained in:
Renan DelValle 2017-10-02 17:24:01 -07:00
parent 3111b358fc
commit 922e8d6b5a
2 changed files with 33 additions and 48 deletions

View file

@ -153,22 +153,20 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval int, t
return false, nil
}
// Monitor host status until all hosts match the status provided. May return an error along with a non nil map which contains
// the hosts that did not transition to the desired modes(s).
func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (map[string]struct{}, error) {
// Monitor host status until all hosts match the status provided. Returns a map where the value is true if the host
// is in one of the desired mode(s) or false if it is not.
func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (map[string]bool, error) {
// Transform modes to monitor for into a set for easy lookup
desiredMode := make(map[aurora.MaintenanceMode]struct{})
for _,mode := range modes {
for _, mode := range modes {
desiredMode[mode] = struct{}{}
}
// Turn slice into a host set to eliminate duplicates. Delete hosts that have entered the desired mode from
// observed list. We are done when the number of observed hosts reaches zero.
// This avoids having to go through and check the list one by one each cycle.
observedHosts := make(map[string]struct{})
for _,host := range hosts {
observedHosts[host] = struct{}{}
// Turn slice into a host set to eliminate duplicates.
observedHosts := make(map[string]bool)
for _, host := range hosts {
observedHosts[host] = false
}
for step := 0; step < steps; step++ {
@ -176,20 +174,24 @@ func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode
_, result, err := m.Client.MaintenanceStatus(hosts...)
if err != nil {
// Error is either a payload error or a severe connection error
return observedHosts, errors.Wrap(err,"client error")
return observedHosts, errors.Wrap(err, "client error")
}
for status := range result.GetStatuses() {
if _, ok := desiredMode[status.GetMode()]; ok {
fmt.Printf("host %s entered %s state\n", status.GetHost(), status.GetMode())
delete(observedHosts, status.GetHost())
}
}
if _, ok := desiredMode[status.GetMode()]; ok {
observedHosts[status.GetHost()] = true
if len(observedHosts) == 0{
return observedHosts, nil
} else {
fmt.Printf("%d host(s) not in desired state\n", len(observedHosts))
transitionedHosts := 0
for _, val := range observedHosts {
if val {
transitionedHosts++
}
}
if len(observedHosts) == transitionedHosts {
return observedHosts, nil
}
}
}
time.Sleep(time.Duration(sleepTime) * time.Second)

View file

@ -73,10 +73,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
start := time.Now()
resp, err := r.CreateJob(job)
end := time.Now()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Create call took %d ns\n", (end.UnixNano() - start.UnixNano()))
@ -86,10 +83,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
start := time.Now()
resp, err := r.KillJob(job.JobKey())
end := time.Now()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Kill call took %d ns\n", (end.UnixNano() - start.UnixNano()))
@ -99,10 +93,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
if err != nil {
fmt.Println("Error reading thermos payload file: ", err)
os.Exit(1)
}
assert.NoError(t, err)
job := realis.NewJob().
Environment("prod").
@ -131,10 +122,8 @@ func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
start := time.Now()
resp, err := r.StartCronJob(job.JobKey())
end := time.Now()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Schedule cron call took %d ns\n", (end.UnixNano() - start.UnixNano()))
})
@ -143,11 +132,8 @@ func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
start := time.Now()
resp, err := r.DescheduleCronJob(job.JobKey())
end := time.Now()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Deschedule cron call took %d ns\n", (end.UnixNano() - start.UnixNano()))
})
@ -161,17 +147,17 @@ func TestRealisClient_DrainHosts(t *testing.T) {
}
// Monitor change to DRAINING and DRAINED mode
nontransitioned, err := monitor.HostMaintenance(
hostResults, err := monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
5,
10)
assert.Equal(t, nontransitioned, map[string]struct{}{})
assert.Equal(t, map[string]bool{"192.168.33.7": true}, hostResults)
assert.NoError(t, err)
t.Run("TestRealisClient_MonitorNontransitioned", func(t *testing.T) {
// Monitor change to DRAINING and DRAINED mode
nontransitioned, err := monitor.HostMaintenance(
hostResults, err := monitor.HostMaintenance(
append(hosts, "IMAGINARY_HOST"),
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
1,
@ -179,7 +165,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
// Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts
assert.Error(t, err)
assert.Equal(t, nontransitioned, map[string]struct{}{"IMAGINARY_HOST": {}})
assert.Equal(t, map[string]bool{"192.168.33.7": true, "IMAGINARY_HOST": false}, hostResults)
})
t.Run("TestRealisClient_EndMaintenance", func(t *testing.T) {
@ -195,10 +181,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5,
10)
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
assert.NoError(t, err)
})
}