V1 CreateService and StartJobUpdate Timeout signal and cleanup (#105)

* Bumped up version to 1.21.1

* Moving admin functions to a new file. They are still part of the same pointer receiver type.

* Removing dead code and fixing some comments to add space between backslash and comment.

* Adding set up and tear down to run tests script. It sets up a pod, runs all tests, and then tears down the pod.

* Added `--rm` to run tests Mac script.

* Removing cookie jar from transport layer as it's not needed.

* Changing all error messages to start with a lower case letter. Changing some messages around to be more descriptive.

* Adding an argument to allow the retry mechanism to stop if a timeout has been encountered. This is useful for mutating API calls. Only StartUpdate and CreateService have enabled by default stop at timeout.

* Added 2 tests for when a call goes through despite the client timing out. One is with a good payload, one is with a bad payload.

* Updating changelog with information about the error type returned.

* Adding test for duplicate metadata.

* Refactored JobUpdateStatus monitor to use a new monitor called JobUpdateQuery. Update monitor will now still continue if it does not find an update to monitor. Furthermore, it has been optimized to reduce returning payloads from the scheduler as much as possible. This is through using the GetJobUpdateSummaries API instead of JobUpdateDetails and by including a the statuses we're searching for as part of the query.


* Added documentation as to how to handle a timeout on an API request.

* Optimized GetInstancesIds to create a copy of the JobKey being passed down in order to avoid unexpected behavior. Instead of setting every variable name separately, now a JobKey array is being created.
This commit is contained in:
Renan DelValle 2019-05-05 11:46:22 -07:00 committed by GitHub
parent e16e390afe
commit 1a15c4a5aa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
12 changed files with 661 additions and 475 deletions

View file

@ -19,7 +19,6 @@ import (
"time"
"github.com/paypal/gorealis/gen-go/apache/aurora"
"github.com/paypal/gorealis/response"
"github.com/pkg/errors"
)
@ -30,16 +29,20 @@ type Monitor struct {
// Polls the scheduler every certain amount of time to see if the update has succeeded
func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout int) (bool, error) {
status, err := m.JobUpdateStatus(updateKey,
map[aurora.JobUpdateStatus]bool{
aurora.JobUpdateStatus_ROLLED_FORWARD: true,
aurora.JobUpdateStatus_ROLLED_BACK: true,
aurora.JobUpdateStatus_ABORTED: true,
aurora.JobUpdateStatus_ERROR: true,
aurora.JobUpdateStatus_FAILED: true,
updateQ := aurora.JobUpdateQuery{
Key: &updateKey,
Limit: 1,
UpdateStatuses: []aurora.JobUpdateStatus{
aurora.JobUpdateStatus_ROLLED_FORWARD,
aurora.JobUpdateStatus_ROLLED_BACK,
aurora.JobUpdateStatus_ABORTED,
aurora.JobUpdateStatus_ERROR,
aurora.JobUpdateStatus_FAILED,
},
time.Duration(interval)*time.Second,
time.Duration(timeout)*time.Second)
}
updateSummaries, err := m.JobUpdateQuery(updateQ, time.Duration(interval)*time.Second, time.Duration(timeout)*time.Second)
status := updateSummaries[0].State.Status
if err != nil {
return false, err
@ -52,22 +55,43 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
switch status {
case aurora.JobUpdateStatus_ROLLED_FORWARD:
return true, nil
case aurora.JobUpdateStatus_ROLLED_BACK, aurora.JobUpdateStatus_ABORTED, aurora.JobUpdateStatus_ERROR, aurora.JobUpdateStatus_FAILED:
case aurora.JobUpdateStatus_ROLLED_BACK,
aurora.JobUpdateStatus_ABORTED,
aurora.JobUpdateStatus_ERROR,
aurora.JobUpdateStatus_FAILED:
return false, errors.Errorf("bad terminal state for update: %v", status)
default:
return false, errors.Errorf("unexpected update state: %v", status)
}
}
func (m *Monitor) JobUpdateStatus(updateKey aurora.JobUpdateKey,
func (m *Monitor) JobUpdateStatus(
updateKey aurora.JobUpdateKey,
desiredStatuses map[aurora.JobUpdateStatus]bool,
interval time.Duration,
timeout time.Duration) (aurora.JobUpdateStatus, error) {
updateQ := aurora.JobUpdateQuery{
Key: &updateKey,
Limit: 1,
desiredStatusesSlice := make([]aurora.JobUpdateStatus, 0)
for k := range desiredStatuses {
desiredStatusesSlice = append(desiredStatusesSlice, k)
}
updateQ := aurora.JobUpdateQuery{
Key: &updateKey,
Limit: 1,
UpdateStatuses: desiredStatusesSlice,
}
summary, err := m.JobUpdateQuery(updateQ, interval, timeout)
return summary[0].State.Status, err
}
func (m *Monitor) JobUpdateQuery(
updateQuery aurora.JobUpdateQuery,
interval time.Duration,
timeout time.Duration) ([]*aurora.JobUpdateSummary, error) {
ticker := time.NewTicker(interval)
defer ticker.Stop()
timer := time.NewTimer(timeout)
@ -78,25 +102,18 @@ func (m *Monitor) JobUpdateStatus(updateKey aurora.JobUpdateKey,
for {
select {
case <-ticker.C:
respDetail, cliErr = m.Client.JobUpdateDetails(updateQ)
respDetail, cliErr = m.Client.GetJobUpdateSummaries(&updateQuery)
if cliErr != nil {
return aurora.JobUpdateStatus(-1), cliErr
return nil, cliErr
}
updateDetail := response.JobUpdateDetails(respDetail)
if len(updateDetail) == 0 {
m.Client.RealisConfig().logger.Println("No update found")
return aurora.JobUpdateStatus(-1), errors.New("No update found for " + updateKey.String())
}
status := updateDetail[0].Update.Summary.State.Status
if _, ok := desiredStatuses[status]; ok {
return status, nil
updateSummaries := respDetail.Result_.GetJobUpdateSummariesResult_.UpdateSummaries
if len(updateSummaries) >= 1 {
return updateSummaries, nil
}
case <-timer.C:
return aurora.JobUpdateStatus(-1), newTimedoutError(errors.New("job update monitor timed out"))
return nil, newTimedoutError(errors.New("job update monitor timed out"))
}
}
}
@ -109,7 +126,12 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval, timeo
// Monitor a Job until all instances enter a desired status.
// Defaults sets of desired statuses provided by the thrift API include:
// ACTIVE_STATES, SLAVE_ASSIGNED_STATES, LIVE_STATES, and TERMINAL_STATES
func (m *Monitor) ScheduleStatus(key *aurora.JobKey, instanceCount int32, desiredStatuses map[aurora.ScheduleStatus]bool, interval, timeout int) (bool, error) {
func (m *Monitor) ScheduleStatus(
key *aurora.JobKey,
instanceCount int32,
desiredStatuses map[aurora.ScheduleStatus]bool,
interval int,
timeout int) (bool, error) {
ticker := time.NewTicker(time.Second * time.Duration(interval))
defer ticker.Stop()