Retry temporary errors by default (#107)

* Adding Aurora URL validator in order to handle scenarios where incomplete information is passed to the client. The client will do its best to guess the missing information such as protocol and port.

* Upgraded to testify 1.3.0.

* Added configuration to fail on a non-temporary error. This is reverting to the original behavior of the retry mechanism. However, this allows the user to opt to fail in a non-temporary error.
This commit is contained in:
Renan DelValle 2019-06-11 11:47:14 -07:00 committed by GitHub
parent 4ffb509939
commit 6dc4bf93b9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 2795 additions and 1009 deletions

View file

@ -27,7 +27,10 @@ type Monitor struct {
}
// Polls the scheduler every certain amount of time to see if the update has succeeded
func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout int) (bool, error) {
func (m *Monitor) JobUpdate(
updateKey aurora.JobUpdateKey,
interval int,
timeout int) (bool, error) {
updateQ := aurora.JobUpdateQuery{
Key: &updateKey,
@ -40,7 +43,10 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
aurora.JobUpdateStatus_FAILED,
},
}
updateSummaries, err := m.JobUpdateQuery(updateQ, time.Duration(interval)*time.Second, time.Duration(timeout)*time.Second)
updateSummaries, err := m.JobUpdateQuery(
updateQ,
time.Duration(interval)*time.Second,
time.Duration(timeout)*time.Second)
status := updateSummaries[0].State.Status
@ -119,7 +125,10 @@ func (m *Monitor) JobUpdateQuery(
}
// Monitor a Job until all instances enter one of the LIVE_STATES
func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval, timeout int) (bool, error) {
func (m *Monitor) Instances(
key *aurora.JobKey,
instances int32,
interval, timeout int) (bool, error) {
return m.ScheduleStatus(key, instances, LiveStates, interval, timeout)
}
@ -164,9 +173,13 @@ func (m *Monitor) ScheduleStatus(
}
}
// Monitor host status until all hosts match the status provided. Returns a map where the value is true if the host
// Monitor host status until all hosts match the status provided.
// Returns a map where the value is true if the host
// is in one of the desired mode(s) or false if it is not as of the time when the monitor exited.
func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode, interval, timeout int) (map[string]bool, error) {
func (m *Monitor) HostMaintenance(
hosts []string,
modes []aurora.MaintenanceMode,
interval, timeout int) (map[string]bool, error) {
// Transform modes to monitor for into a set for easy lookup
desiredMode := make(map[aurora.MaintenanceMode]struct{})
@ -175,7 +188,8 @@ func (m *Monitor) HostMaintenance(hosts []string, modes []aurora.MaintenanceMode
}
// Turn slice into a host set to eliminate duplicates.
// We also can't use a simple count because multiple modes means we can have multiple matches for a single host.
// We also can't use a simple count because multiple modes means
// we can have multiple matches for a single host.
// I.e. host A transitions from ACTIVE to DRAINING to DRAINED while monitored
remainingHosts := make(map[string]struct{})
for _, host := range hosts {