Adding JobUpdateStatus monitor as well as renaming all monitor functions to be Monitor + <subject>

This commit is contained in:
Renan DelValle 2019-09-10 17:42:51 -07:00 committed by Renan DelValle
parent 04471c6918
commit 6f20f5b62f
4 changed files with 71 additions and 29 deletions

View file

@ -166,7 +166,7 @@ func main() {
log.Fatalln(err)
}
if ok, mErr := r.InstancesMonitor(job.JobKey(), job.GetInstanceCount(), 5*time.Second, 50*time.Second); !ok || mErr != nil {
if ok, mErr := r.MonitorInstances(job.JobKey(), job.GetInstanceCount(), 5*time.Second, 50*time.Second); !ok || mErr != nil {
err := r.KillJob(job.JobKey())
if err != nil {
log.Fatalln(err)
@ -185,7 +185,7 @@ func main() {
}
fmt.Println(result.String())
if ok, mErr := r.JobUpdateMonitor(*result.GetKey(), 5*time.Second, 180*time.Second); !ok || mErr != nil {
if ok, mErr := r.MonitorJobUpdate(*result.GetKey(), 5*time.Second, 180*time.Second); !ok || mErr != nil {
err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out")
err = r.KillJob(job.JobKey())
if err != nil {
@ -203,7 +203,7 @@ func main() {
log.Fatal(err)
}
if ok, err := r.InstancesMonitor(job.JobKey(), job.GetInstanceCount(), 10*time.Second, 300*time.Second); !ok || err != nil {
if ok, err := r.MonitorInstances(job.JobKey(), job.GetInstanceCount(), 10*time.Second, 300*time.Second); !ok || err != nil {
err := r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
@ -219,7 +219,7 @@ func main() {
log.Fatal(err)
}
if ok, err := r.InstancesMonitor(job.JobKey(), job.GetInstanceCount(), 10*time.Second, 300*time.Second); !ok || err != nil {
if ok, err := r.MonitorInstances(job.JobKey(), job.GetInstanceCount(), 10*time.Second, 300*time.Second); !ok || err != nil {
err := r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
@ -258,7 +258,7 @@ func main() {
log.Fatal(err)
}
if ok, err := r.InstancesMonitor(job.JobKey(), 0, 5*time.Second, 50*time.Second); !ok || err != nil {
if ok, err := r.MonitorInstances(job.JobKey(), 0, 5*time.Second, 50*time.Second); !ok || err != nil {
log.Fatal("Unable to kill all instances of job")
}
@ -312,7 +312,7 @@ func main() {
log.Fatal(err)
}
if ok, err := r.InstancesMonitor(job.JobKey(), int32(currInstances+numOfInstances), 5*time.Second, 50*time.Second); !ok || err != nil {
if ok, err := r.MonitorInstances(job.JobKey(), int32(currInstances+numOfInstances), 5*time.Second, 50*time.Second); !ok || err != nil {
fmt.Println("Flexing up failed")
}
@ -333,7 +333,7 @@ func main() {
log.Fatal(err)
}
if ok, err := r.InstancesMonitor(job.JobKey(), int32(currInstances-numOfInstances), 5*time.Second, 100*time.Second); !ok || err != nil {
if ok, err := r.MonitorInstances(job.JobKey(), int32(currInstances-numOfInstances), 5*time.Second, 100*time.Second); !ok || err != nil {
fmt.Println("flexDown failed")
}
@ -360,7 +360,7 @@ func main() {
}
jobUpdateKey := result.GetKey()
_, err = r.JobUpdateMonitor(*jobUpdateKey, 5*time.Second, 6*time.Minute)
_, err = r.MonitorJobUpdate(*jobUpdateKey, 5*time.Second, 6*time.Minute)
if err != nil {
log.Fatal(err)
}
@ -518,7 +518,7 @@ func main() {
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := r.HostMaintenanceMonitor(
hostResult, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
5*time.Second,
@ -547,7 +547,7 @@ func main() {
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := r.HostMaintenanceMonitor(
hostResult, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
5*time.Second,
@ -573,7 +573,7 @@ func main() {
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := r.HostMaintenanceMonitor(
hostResult, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5*time.Second,

View file

@ -208,7 +208,7 @@ func main() {
fmt.Println("Error creating Aurora job: ", jobCreationErr)
os.Exit(1)
} else {
if ok, monitorErr := r.InstancesMonitor(auroraJob.JobKey(), auroraJob.GetInstanceCount(), 5, 50); !ok || monitorErr != nil {
if ok, monitorErr := r.MonitorInstances(auroraJob.JobKey(), auroraJob.GetInstanceCount(), 5, 50); !ok || monitorErr != nil {
if jobErr := r.KillJob(auroraJob.JobKey()); jobErr !=
nil {
fmt.Println(jobErr)

View file

@ -29,7 +29,7 @@ const (
)
// Polls the scheduler every certain amount of time to see if the update has succeeded
func (c *Client) JobUpdateMonitor(updateKey aurora.JobUpdateKey, interval, timeout time.Duration) (bool, error) {
func (c *Client) MonitorJobUpdate(updateKey aurora.JobUpdateKey, interval, timeout time.Duration) (bool, error) {
if interval < 1*time.Second {
interval = interval * time.Second
}
@ -87,15 +87,55 @@ func (c *Client) JobUpdateMonitor(updateKey aurora.JobUpdateKey, interval, timeo
}
}
func (c *Client) MonitorJobUpdateStatus(updateKey aurora.JobUpdateKey,
desiredStatuses map[aurora.JobUpdateStatus]bool,
interval, timeout time.Duration) (aurora.JobUpdateStatus, error) {
updateQ := aurora.JobUpdateQuery{
Key: &updateKey,
Limit: 1,
}
ticker := time.NewTicker(interval)
defer ticker.Stop()
timer := time.NewTimer(timeout)
defer timer.Stop()
for {
select {
case <-ticker.C:
updateDetails, cliErr := c.JobUpdateDetails(updateQ)
if cliErr != nil {
return aurora.JobUpdateStatus(-1), cliErr
}
if len(updateDetails) == 0 {
c.RealisConfig().logger.Println("No update found")
return aurora.JobUpdateStatus(-1), errors.New("No update found for " + updateKey.String())
}
status := updateDetails[0].Update.Summary.State.Status
if _, ok := desiredStatuses[status]; ok {
return status, nil
}
case <-timer.C:
return aurora.JobUpdateStatus(-1), newTimedoutError(errors.New("job update monitor timed out"))
}
}
}
// Monitor a AuroraJob until all instances enter one of the LiveStates
func (c *Client) InstancesMonitor(key aurora.JobKey, instances int32, interval, timeout time.Duration) (bool, error) {
return c.ScheduleStatusMonitor(key, instances, aurora.LIVE_STATES, interval, timeout)
func (c *Client) MonitorInstances(key aurora.JobKey, instances int32, interval, timeout time.Duration) (bool, error) {
return c.MonitorScheduleStatus(key, instances, aurora.LIVE_STATES, interval, timeout)
}
// Monitor a AuroraJob until all instances enter a desired status.
// Defaults sets of desired statuses provided by the thrift API include:
// ActiveStates, SlaveAssignedStates, LiveStates, and TerminalStates
func (c *Client) ScheduleStatusMonitor(key aurora.JobKey, instanceCount int32, desiredStatuses []aurora.ScheduleStatus, interval, timeout time.Duration) (bool, error) {
func (c *Client) MonitorScheduleStatus(key aurora.JobKey,
instanceCount int32,
desiredStatuses []aurora.ScheduleStatus,
interval, timeout time.Duration) (bool, error) {
if interval < 1*time.Second {
interval = interval * time.Second
}
@ -131,7 +171,9 @@ func (c *Client) ScheduleStatusMonitor(key aurora.JobKey, instanceCount int32, d
// Monitor host status until all hosts match the status provided. Returns a map where the value is true if the host
// is in one of the desired mode(s) or false if it is not as of the time when the monitor exited.
func (c *Client) HostMaintenanceMonitor(hosts []string, modes []aurora.MaintenanceMode, interval, timeout time.Duration) (map[string]bool, error) {
func (c *Client) MonitorHostMaintenance(hosts []string,
modes []aurora.MaintenanceMode,
interval, timeout time.Duration) (map[string]bool, error) {
if interval < 1*time.Second {
interval = interval * time.Second
}

View file

@ -238,7 +238,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
assert.NoError(t, err)
// Test Instances Monitor
success, err := r.InstancesMonitor(job.JobKey(), job.GetInstanceCount(), 1*time.Second, 50*time.Second)
success, err := r.MonitorInstances(job.JobKey(), job.GetInstanceCount(), 1*time.Second, 50*time.Second)
assert.True(t, success)
assert.NoError(t, err)
@ -265,7 +265,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
err := r.KillJob(job.JobKey())
assert.NoError(t, err)
success, err := r.InstancesMonitor(job.JobKey(), 0, 1*time.Second, 60*time.Second)
success, err := r.MonitorInstances(job.JobKey(), 0, 1*time.Second, 60*time.Second)
assert.True(t, success)
assert.NoError(t, err)
})
@ -429,7 +429,7 @@ func TestRealisClient_CreateService(t *testing.T) {
var ok bool
var mErr error
if ok, mErr = r.JobUpdateMonitor(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
if ok, mErr = r.MonitorJobUpdate(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
// Update may already be in a terminal state so don't check for error
err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
@ -507,7 +507,7 @@ func TestRealisClient_StartMaintenance(t *testing.T) {
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
hostResults, err := r.HostMaintenanceMonitor(
hostResults, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
1*time.Second,
@ -519,7 +519,7 @@ func TestRealisClient_StartMaintenance(t *testing.T) {
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
_, err = r.HostMaintenanceMonitor(
_, err = r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5*time.Second,
@ -533,7 +533,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
hostResults, err := r.HostMaintenanceMonitor(
hostResults, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
1*time.Second,
@ -543,7 +543,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
t.Run("TestRealisClient_MonitorNontransitioned", func(t *testing.T) {
// Monitor change to DRAINING and DRAINED mode
hostResults, err := r.HostMaintenanceMonitor(
hostResults, err := r.MonitorHostMaintenance(
append(hosts, "IMAGINARY_HOST"),
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
1*time.Second,
@ -559,7 +559,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
_, err = r.HostMaintenanceMonitor(
_, err = r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5*time.Second,
@ -580,7 +580,7 @@ func TestRealisClient_SLADrainHosts(t *testing.T) {
}
// Monitor change to DRAINING and DRAINED mode
hostResults, err := r.HostMaintenanceMonitor(
hostResults, err := r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
1*time.Second,
@ -592,7 +592,7 @@ func TestRealisClient_SLADrainHosts(t *testing.T) {
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
_, err = r.HostMaintenanceMonitor(
_, err = r.MonitorHostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5*time.Second,
@ -627,7 +627,7 @@ func TestRealisClient_SessionThreadSafety(t *testing.T) {
defer wg.Done()
// Test Schedule status monitor for terminal state and timing out after 30 seconds
success, err := r.ScheduleStatusMonitor(job.JobKey(), job.GetInstanceCount(), aurora.LIVE_STATES, 1, 30)
success, err := r.MonitorScheduleStatus(job.JobKey(), job.GetInstanceCount(), aurora.LIVE_STATES, 1, 30)
assert.False(t, success)
assert.Error(t, err)
@ -710,7 +710,7 @@ func TestRealisClient_PartitionPolicy(t *testing.T) {
var ok bool
var mErr error
if ok, mErr = r.JobUpdateMonitor(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
if ok, mErr = r.MonitorJobUpdate(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
// Update may already be in a terminal state so don't check for error
err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
err = r.KillJob(job.JobKey())