Avoided going through the entire list of monitored hosts by keeping a set of hosts that had transistioned to a desired mode.
This commit is contained in:
parent
fa7833a749
commit
1fd07b5007
1 changed files with 26 additions and 17 deletions
43
monitors.go
43
monitors.go
|
@ -154,7 +154,7 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval int, t
|
||||||
}
|
}
|
||||||
|
|
||||||
// Monitor host status until all hosts match the status provided. Returns a map where the value is true if the host
|
// Monitor host status until all hosts match the status provided. Returns a map where the value is true if the host
|
||||||
// is in one of the desired mode(s) or false if it is not.
|
// is in one of the desired mode(s) or false if it is not as of the time when the monitor exited.
|
||||||
func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (map[string]bool, error) {
|
func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, sleepTime, steps int) (map[string]bool, error) {
|
||||||
|
|
||||||
// Transform modes to monitor for into a set for easy lookup
|
// Transform modes to monitor for into a set for easy lookup
|
||||||
|
@ -164,38 +164,47 @@ func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode
|
||||||
}
|
}
|
||||||
|
|
||||||
// Turn slice into a host set to eliminate duplicates.
|
// Turn slice into a host set to eliminate duplicates.
|
||||||
observedHosts := make(map[string]bool)
|
// We also can't use a simple count because multiple modes means we can have multiple matches for a single host.
|
||||||
|
// I.e. host A transitions from ACTIVE to DRAINING to DRAINED while monitored
|
||||||
|
remainingHosts := make(map[string]struct{})
|
||||||
for _, host := range hosts {
|
for _, host := range hosts {
|
||||||
observedHosts[host] = false
|
remainingHosts[host] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hostResult := make(map[string]bool)
|
||||||
|
|
||||||
for step := 0; step < steps; step++ {
|
for step := 0; step < steps; step++ {
|
||||||
// Client may have multiple retries handle retries
|
if step != 0 {
|
||||||
|
time.Sleep(time.Duration(sleepTime) * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Client call has multiple retries internally
|
||||||
_, result, err := m.Client.MaintenanceStatus(hosts...)
|
_, result, err := m.Client.MaintenanceStatus(hosts...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
// Error is either a payload error or a severe connection error
|
// Error is either a payload error or a severe connection error
|
||||||
return observedHosts, errors.Wrap(err, "client error")
|
for host := range remainingHosts {
|
||||||
|
hostResult[host] = false
|
||||||
|
}
|
||||||
|
return hostResult, errors.Wrap(err, "client error in monitor")
|
||||||
}
|
}
|
||||||
|
|
||||||
for status := range result.GetStatuses() {
|
for status := range result.GetStatuses() {
|
||||||
|
|
||||||
if _, ok := desiredMode[status.GetMode()]; ok {
|
if _, ok := desiredMode[status.GetMode()]; ok {
|
||||||
observedHosts[status.GetHost()] = true
|
hostResult[status.GetHost()] = true
|
||||||
|
delete(remainingHosts, status.GetHost())
|
||||||
|
|
||||||
transitionedHosts := 0
|
if len(remainingHosts) == 0 {
|
||||||
for _, val := range observedHosts {
|
return hostResult, nil
|
||||||
if val {
|
|
||||||
transitionedHosts++
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if len(observedHosts) == transitionedHosts {
|
|
||||||
return observedHosts, nil
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
time.Sleep(time.Duration(sleepTime) * time.Second)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return observedHosts, errors.New("Timed out")
|
for host := range remainingHosts {
|
||||||
|
hostResult[host] = false
|
||||||
|
}
|
||||||
|
|
||||||
|
return hostResult, errors.New("Timed out")
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue