Making abort job synchronous (#95)

* Making abort job synchronous to avoid scenarios where kill is received before job update lock is released.
* Adding missing cases for terminal update statues to JobUpdate monitor.
* Monitors now return errors which provide context through behavior.
* Adding notes to the doc explaining what happens when AbortJob times out.
This commit is contained in:
Renan DelValle 2019-01-15 14:55:59 -08:00 committed by GitHub
parent 10c620de7b
commit 2b7eb3a852
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 66 additions and 32 deletions

View file

@ -775,6 +775,8 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au
}
// Abort Job Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
// This API is meant to be synchronous. It will attempt to wait until the update transitions to the aborted state.
// However, if the job update does not transition to the ABORT state an error will be returned.
func (r *realisClient) AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error) {
r.logger.DebugPrintf("AbortJobUpdate Thrift Payload: %+v %v\n", updateKey, message)
@ -786,7 +788,12 @@ func (r *realisClient) AbortJobUpdate(updateKey aurora.JobUpdateKey, message str
if retryErr != nil {
return nil, errors.Wrap(retryErr, "Error sending AbortJobUpdate command to Aurora Scheduler")
}
return resp, nil
// Make this call synchronous by blocking until it job has successfully transitioned to aborted
m := Monitor{Client: r}
_, err := m.JobUpdateStatus(updateKey, map[aurora.JobUpdateStatus]bool{aurora.JobUpdateStatus_ABORTED: true}, time.Second*5, time.Minute)
return resp, err
}
//Pause Job Update. UpdateID is returned from StartJobUpdate or the Aurora web UI.