V1 CreateService and StartJobUpdate Timeout signal and cleanup (#105)
* Bumped up version to 1.21.1 * Moving admin functions to a new file. They are still part of the same pointer receiver type. * Removing dead code and fixing some comments to add space between backslash and comment. * Adding set up and tear down to run tests script. It sets up a pod, runs all tests, and then tears down the pod. * Added `--rm` to run tests Mac script. * Removing cookie jar from transport layer as it's not needed. * Changing all error messages to start with a lower case letter. Changing some messages around to be more descriptive. * Adding an argument to allow the retry mechanism to stop if a timeout has been encountered. This is useful for mutating API calls. Only StartUpdate and CreateService have enabled by default stop at timeout. * Added 2 tests for when a call goes through despite the client timing out. One is with a good payload, one is with a bad payload. * Updating changelog with information about the error type returned. * Adding test for duplicate metadata. * Refactored JobUpdateStatus monitor to use a new monitor called JobUpdateQuery. Update monitor will now still continue if it does not find an update to monitor. Furthermore, it has been optimized to reduce returning payloads from the scheduler as much as possible. This is through using the GetJobUpdateSummaries API instead of JobUpdateDetails and by including a the statuses we're searching for as part of the query. * Added documentation as to how to handle a timeout on an API request. * Optimized GetInstancesIds to create a copy of the JobKey being passed down in order to avoid unexpected behavior. Instead of setting every variable name separately, now a JobKey array is being created.
This commit is contained in:
parent
e16e390afe
commit
1a15c4a5aa
12 changed files with 661 additions and 475 deletions
80
monitors.go
80
monitors.go
|
@ -19,7 +19,6 @@ import (
|
|||
"time"
|
||||
|
||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||
"github.com/paypal/gorealis/response"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
|
@ -30,16 +29,20 @@ type Monitor struct {
|
|||
// Polls the scheduler every certain amount of time to see if the update has succeeded
|
||||
func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout int) (bool, error) {
|
||||
|
||||
status, err := m.JobUpdateStatus(updateKey,
|
||||
map[aurora.JobUpdateStatus]bool{
|
||||
aurora.JobUpdateStatus_ROLLED_FORWARD: true,
|
||||
aurora.JobUpdateStatus_ROLLED_BACK: true,
|
||||
aurora.JobUpdateStatus_ABORTED: true,
|
||||
aurora.JobUpdateStatus_ERROR: true,
|
||||
aurora.JobUpdateStatus_FAILED: true,
|
||||
updateQ := aurora.JobUpdateQuery{
|
||||
Key: &updateKey,
|
||||
Limit: 1,
|
||||
UpdateStatuses: []aurora.JobUpdateStatus{
|
||||
aurora.JobUpdateStatus_ROLLED_FORWARD,
|
||||
aurora.JobUpdateStatus_ROLLED_BACK,
|
||||
aurora.JobUpdateStatus_ABORTED,
|
||||
aurora.JobUpdateStatus_ERROR,
|
||||
aurora.JobUpdateStatus_FAILED,
|
||||
},
|
||||
time.Duration(interval)*time.Second,
|
||||
time.Duration(timeout)*time.Second)
|
||||
}
|
||||
updateSummaries, err := m.JobUpdateQuery(updateQ, time.Duration(interval)*time.Second, time.Duration(timeout)*time.Second)
|
||||
|
||||
status := updateSummaries[0].State.Status
|
||||
|
||||
if err != nil {
|
||||
return false, err
|
||||
|
@ -52,22 +55,43 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
|
|||
switch status {
|
||||
case aurora.JobUpdateStatus_ROLLED_FORWARD:
|
||||
return true, nil
|
||||
case aurora.JobUpdateStatus_ROLLED_BACK, aurora.JobUpdateStatus_ABORTED, aurora.JobUpdateStatus_ERROR, aurora.JobUpdateStatus_FAILED:
|
||||
case aurora.JobUpdateStatus_ROLLED_BACK,
|
||||
aurora.JobUpdateStatus_ABORTED,
|
||||
aurora.JobUpdateStatus_ERROR,
|
||||
aurora.JobUpdateStatus_FAILED:
|
||||
return false, errors.Errorf("bad terminal state for update: %v", status)
|
||||
default:
|
||||
return false, errors.Errorf("unexpected update state: %v", status)
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Monitor) JobUpdateStatus(updateKey aurora.JobUpdateKey,
|
||||
func (m *Monitor) JobUpdateStatus(
|
||||
updateKey aurora.JobUpdateKey,
|
||||
desiredStatuses map[aurora.JobUpdateStatus]bool,
|
||||
interval time.Duration,
|
||||
timeout time.Duration) (aurora.JobUpdateStatus, error) {
|
||||
|
||||
updateQ := aurora.JobUpdateQuery{
|
||||
Key: &updateKey,
|
||||
Limit: 1,
|
||||
desiredStatusesSlice := make([]aurora.JobUpdateStatus, 0)
|
||||
|
||||
for k := range desiredStatuses {
|
||||
desiredStatusesSlice = append(desiredStatusesSlice, k)
|
||||
}
|
||||
|
||||
updateQ := aurora.JobUpdateQuery{
|
||||
Key: &updateKey,
|
||||
Limit: 1,
|
||||
UpdateStatuses: desiredStatusesSlice,
|
||||
}
|
||||
summary, err := m.JobUpdateQuery(updateQ, interval, timeout)
|
||||
|
||||
return summary[0].State.Status, err
|
||||
}
|
||||
|
||||
func (m *Monitor) JobUpdateQuery(
|
||||
updateQuery aurora.JobUpdateQuery,
|
||||
interval time.Duration,
|
||||
timeout time.Duration) ([]*aurora.JobUpdateSummary, error) {
|
||||
|
||||
ticker := time.NewTicker(interval)
|
||||
defer ticker.Stop()
|
||||
timer := time.NewTimer(timeout)
|
||||
|
@ -78,25 +102,18 @@ func (m *Monitor) JobUpdateStatus(updateKey aurora.JobUpdateKey,
|
|||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
respDetail, cliErr = m.Client.JobUpdateDetails(updateQ)
|
||||
respDetail, cliErr = m.Client.GetJobUpdateSummaries(&updateQuery)
|
||||
if cliErr != nil {
|
||||
return aurora.JobUpdateStatus(-1), cliErr
|
||||
return nil, cliErr
|
||||
}
|
||||
|
||||
updateDetail := response.JobUpdateDetails(respDetail)
|
||||
|
||||
if len(updateDetail) == 0 {
|
||||
m.Client.RealisConfig().logger.Println("No update found")
|
||||
return aurora.JobUpdateStatus(-1), errors.New("No update found for " + updateKey.String())
|
||||
}
|
||||
status := updateDetail[0].Update.Summary.State.Status
|
||||
|
||||
if _, ok := desiredStatuses[status]; ok {
|
||||
return status, nil
|
||||
updateSummaries := respDetail.Result_.GetJobUpdateSummariesResult_.UpdateSummaries
|
||||
if len(updateSummaries) >= 1 {
|
||||
return updateSummaries, nil
|
||||
}
|
||||
|
||||
case <-timer.C:
|
||||
return aurora.JobUpdateStatus(-1), newTimedoutError(errors.New("job update monitor timed out"))
|
||||
return nil, newTimedoutError(errors.New("job update monitor timed out"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -109,7 +126,12 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval, timeo
|
|||
// Monitor a Job until all instances enter a desired status.
|
||||
// Defaults sets of desired statuses provided by the thrift API include:
|
||||
// ACTIVE_STATES, SLAVE_ASSIGNED_STATES, LIVE_STATES, and TERMINAL_STATES
|
||||
func (m *Monitor) ScheduleStatus(key *aurora.JobKey, instanceCount int32, desiredStatuses map[aurora.ScheduleStatus]bool, interval, timeout int) (bool, error) {
|
||||
func (m *Monitor) ScheduleStatus(
|
||||
key *aurora.JobKey,
|
||||
instanceCount int32,
|
||||
desiredStatuses map[aurora.ScheduleStatus]bool,
|
||||
interval int,
|
||||
timeout int) (bool, error) {
|
||||
|
||||
ticker := time.NewTicker(time.Second * time.Duration(interval))
|
||||
defer ticker.Stop()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue