API update to support staggered updates prototype.

This commit is contained in:
Renan DelValle 2018-05-09 15:33:18 -07:00
parent 40153d5cb1
commit 77bb78927e
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
4 changed files with 191 additions and 73 deletions

View file

@ -680,9 +680,27 @@ struct JobUpdateKey {
2: string id 2: string id
} }
/** Job update thresholds and limits. */ /** Declaration of update strategy types available. **/
enum JobUpdateStrategyType {
/** An update strategy that will maintain a limited amount of updates running. */
QUEUE = 0,
/** An update strategy that will only add more work when the current active group is empty. */
BATCH = 1,
/**
* An update strategy that will only add more work when the current active group is empty.
* Unlike BatchUpdate, once an active group is empty, the size of the next active group
* is allowed to change using this strategy.
*/
VARIABLE_BATCH = 2
}
/** Job update thresholds and limits. **/
struct JobUpdateSettings { struct JobUpdateSettings {
/** Max number of instances being updated at any given moment. */ /**
* TODO(rdelvalle): determine if it's better to use updateGroupSizes for everything and capping
* updateGroupSizes at length=1 for BATCH and QUEUE.
* Max number of instances being updated at any given moment.
*/
1: i32 updateGroupSize 1: i32 updateGroupSize
/** Max number of instance failures to tolerate before marking instance as FAILED. */ /** Max number of instance failures to tolerate before marking instance as FAILED. */
@ -700,13 +718,13 @@ struct JobUpdateSettings {
/** Instance IDs to act on. All instances will be affected if this is not set. */ /** Instance IDs to act on. All instances will be affected if this is not set. */
7: set<Range> updateOnlyTheseInstances 7: set<Range> updateOnlyTheseInstances
/** /** TODO(rdelvalle): Deprecated, please set updateStrategyType to BATCH instead
* If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another * If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
* batch until the preceding batch finishes updating. * batch until the preceding batch finishes updating.
*/ */
8: bool waitForBatchCompletion 8: bool waitForBatchCompletion
/** /**
* If set, requires external calls to pulseJobUpdate RPC within the specified rate for the * If set, requires external calls to pulseJobUpdate RPC within the specified rate for the
* update to make progress. If no pulses received within specified interval the update will * update to make progress. If no pulses received within specified interval the update will
* block. A blocked update is unable to continue but retains its current status. It may only get * block. A blocked update is unable to continue but retains its current status. It may only get
@ -715,16 +733,16 @@ struct JobUpdateSettings {
9: optional i32 blockIfNoPulsesAfterMs 9: optional i32 blockIfNoPulsesAfterMs
/** /**
* This list contains the number of instances that each batch will complete before moving on to * Explicitly state which Update strategy type to use.
* the next. This field can only be used with waitForBatchCompletion set as true. */
**/ 10: optional JobUpdateStrategyType updateStrategyType
10: optional list<i32> variableUpdateGroupSize
/** /**
* Pauses the deployment of further tasks after each batch completes * Limit for each update group during an update.
* until the user sends an resume call. * This field should always be length of 1 for QUEUE and BATCH.
**/ */
11: bool autoPause} 11: optional list<i32> groupsSize
}
/** Event marking a state transition in job update lifecycle. */ /** Event marking a state transition in job update lifecycle. */
struct JobUpdateEvent { struct JobUpdateEvent {

View file

@ -18,12 +18,9 @@ import (
"flag" "flag"
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"log"
"os" "os"
"time"
"strings" "strings"
"time"
"github.com/paypal/gorealis" "github.com/paypal/gorealis"
"github.com/paypal/gorealis/gen-go/apache/aurora" "github.com/paypal/gorealis/gen-go/apache/aurora"
@ -90,7 +87,6 @@ func main() {
Factor: 2.0, Factor: 2.0,
Jitter: 0.1, Jitter: 0.1,
}), }),
realis.SetLogger(log.New(os.Stdout, "realis-debug: ", log.Ldate)),
realis.Debug(), realis.Debug(),
} }
@ -191,7 +187,6 @@ func main() {
// Create a service with three instances using the update API instead of the createJob API // Create a service with three instances using the update API instead of the createJob API
fmt.Println("Creating service") fmt.Println("Creating service")
settings := realis.NewUpdateSettings() settings := realis.NewUpdateSettings()
settings.VariableUpdateGroupSize = []int32{1, 2, 3}
job.InstanceCount(6).RAM(16).CPU(.1) job.InstanceCount(6).RAM(16).CPU(.1)
resp, result, err := r.CreateService(job, settings) resp, result, err := r.CreateService(job, settings)
if err != nil { if err != nil {
@ -430,6 +425,40 @@ func main() {
monitor.JobUpdate(*jobUpdateKey, 5, 500) monitor.JobUpdate(*jobUpdateKey, 5, 500)
break break
case "staggeredUpdate":
fmt.Println("Updating a job with with less RAM and to 5 instances staggered")
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
var instId int32
for k := range live {
instId = k
break
}
taskConfig, err := r.FetchTaskConfig(aurora.InstanceKey{
JobKey: job.JobKey(),
InstanceId: instId,
})
if err != nil {
fmt.Println(err)
os.Exit(1)
}
updateJob := realis.NewDefaultUpdateJob(taskConfig).
UpdateStrategy(aurora.JobUpdateStrategyType_VARIABLE_BATCH).
GroupsSize([]int32{1, 2})
updateJob.InstanceCount(3).RAM(8).CPU(.1)
resp, err := r.StartJobUpdate(updateJob, "")
if err != nil {
fmt.Println(err)
os.Exit(1)
}
jobUpdateKey := response.JobUpdateKey(resp)
monitor.JobUpdate(*jobUpdateKey, 5, 500)
break
case "pauseJobUpdate": case "pauseJobUpdate":
resp, err := r.PauseJobUpdate(&aurora.JobUpdateKey{ resp, err := r.PauseJobUpdate(&aurora.JobUpdateKey{
Job: job.JobKey(), Job: job.JobKey(),
@ -492,7 +521,7 @@ func main() {
fmt.Println(resp.String()) fmt.Println(resp.String())
break break
case "rollbackUpdate": case "rollbackUpdate":
fmt.Println("Abort update") fmt.Println("Rollback update")
resp, err := r.RollbackJobUpdate(aurora.JobUpdateKey{ resp, err := r.RollbackJobUpdate(aurora.JobUpdateKey{
Job: job.JobKey(), Job: job.JobKey(),
ID: updateId, ID: updateId,

View file

@ -523,6 +523,54 @@ func (p *JobUpdatePulseStatus) UnmarshalText(text []byte) error {
return nil return nil
} }
//Declaration of update strategy types available. *
type JobUpdateStrategyType int64
const (
JobUpdateStrategyType_QUEUE JobUpdateStrategyType = 0
JobUpdateStrategyType_BATCH JobUpdateStrategyType = 1
JobUpdateStrategyType_VARIABLE_BATCH JobUpdateStrategyType = 2
)
func (p JobUpdateStrategyType) String() string {
switch p {
case JobUpdateStrategyType_QUEUE:
return "QUEUE"
case JobUpdateStrategyType_BATCH:
return "BATCH"
case JobUpdateStrategyType_VARIABLE_BATCH:
return "VARIABLE_BATCH"
}
return "<UNSET>"
}
func JobUpdateStrategyTypeFromString(s string) (JobUpdateStrategyType, error) {
switch s {
case "QUEUE":
return JobUpdateStrategyType_QUEUE, nil
case "BATCH":
return JobUpdateStrategyType_BATCH, nil
case "VARIABLE_BATCH":
return JobUpdateStrategyType_VARIABLE_BATCH, nil
}
return JobUpdateStrategyType(0), fmt.Errorf("not a valid JobUpdateStrategyType string")
}
func JobUpdateStrategyTypePtr(v JobUpdateStrategyType) *JobUpdateStrategyType { return &v }
func (p JobUpdateStrategyType) MarshalText() ([]byte, error) {
return []byte(p.String()), nil
}
func (p *JobUpdateStrategyType) UnmarshalText(text []byte) error {
q, err := JobUpdateStrategyTypeFromString(string(text))
if err != nil {
return err
}
*p = q
return nil
}
// Attributes: // Attributes:
// - User // - User
type Identity struct { type Identity struct {
@ -9183,39 +9231,39 @@ func (p *JobUpdateKey) String() string {
return fmt.Sprintf("JobUpdateKey(%+v)", *p) return fmt.Sprintf("JobUpdateKey(%+v)", *p)
} }
// Job update thresholds and limits. // Job update thresholds and limits. *
// //
// Attributes: // Attributes:
// - UpdateGroupSize: Max number of instances being updated at any given moment. // - UpdateGroupSize: TODO(rdelvalle): determine if it's better to use updateGroupSizes for everything and capping
// updateGroupSizes at length=1 for BATCH and QUEUE.
// Max number of instances being updated at any given moment.
// - MaxPerInstanceFailures: Max number of instance failures to tolerate before marking instance as FAILED. // - MaxPerInstanceFailures: Max number of instance failures to tolerate before marking instance as FAILED.
// - MaxFailedInstances: Max number of FAILED instances to tolerate before terminating the update. // - MaxFailedInstances: Max number of FAILED instances to tolerate before terminating the update.
// - MinWaitInInstanceRunningMs: Min time to watch a RUNNING instance. // - MinWaitInInstanceRunningMs: Min time to watch a RUNNING instance.
// - RollbackOnFailure: If true, enables failed update rollback. // - RollbackOnFailure: If true, enables failed update rollback.
// - UpdateOnlyTheseInstances: Instance IDs to act on. All instances will be affected if this is not set. // - UpdateOnlyTheseInstances: Instance IDs to act on. All instances will be affected if this is not set.
// - WaitForBatchCompletion: If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another // - WaitForBatchCompletion: TODO(rdelvalle): Deprecated, please set updateStrategyType to BATCH instead
// If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
// batch until the preceding batch finishes updating. // batch until the preceding batch finishes updating.
// - BlockIfNoPulsesAfterMs: If set, requires external calls to pulseJobUpdate RPC within the specified rate for the // - BlockIfNoPulsesAfterMs: If set, requires external calls to pulseJobUpdate RPC within the specified rate for the
// update to make progress. If no pulses received within specified interval the update will // update to make progress. If no pulses received within specified interval the update will
// block. A blocked update is unable to continue but retains its current status. It may only get // block. A blocked update is unable to continue but retains its current status. It may only get
// unblocked by a fresh pulseJobUpdate call. // unblocked by a fresh pulseJobUpdate call.
// - VariableUpdateGroupSize: * This list contains the number of instances that each batch will complete before moving on to // - UpdateStrategyType: Explicitly state which Update strategy type to use.
// * the next. This field can only be used with waitForBatchCompletion set as true. // - GroupsSize: Limit for each update group during an update.
// * // This field should always be length of 1 for QUEUE and BATCH.
// - AutoPause: Pauses the deployment of further tasks after each batch completes
// until the user sends an resume call.
//
type JobUpdateSettings struct { type JobUpdateSettings struct {
UpdateGroupSize int32 `thrift:"updateGroupSize,1" json:"updateGroupSize"` UpdateGroupSize int32 `thrift:"updateGroupSize,1" json:"updateGroupSize"`
MaxPerInstanceFailures int32 `thrift:"maxPerInstanceFailures,2" json:"maxPerInstanceFailures"` MaxPerInstanceFailures int32 `thrift:"maxPerInstanceFailures,2" json:"maxPerInstanceFailures"`
MaxFailedInstances int32 `thrift:"maxFailedInstances,3" json:"maxFailedInstances"` MaxFailedInstances int32 `thrift:"maxFailedInstances,3" json:"maxFailedInstances"`
// unused field # 4 // unused field # 4
MinWaitInInstanceRunningMs int32 `thrift:"minWaitInInstanceRunningMs,5" json:"minWaitInInstanceRunningMs"` MinWaitInInstanceRunningMs int32 `thrift:"minWaitInInstanceRunningMs,5" json:"minWaitInInstanceRunningMs"`
RollbackOnFailure bool `thrift:"rollbackOnFailure,6" json:"rollbackOnFailure"` RollbackOnFailure bool `thrift:"rollbackOnFailure,6" json:"rollbackOnFailure"`
UpdateOnlyTheseInstances map[*Range]bool `thrift:"updateOnlyTheseInstances,7" json:"updateOnlyTheseInstances"` UpdateOnlyTheseInstances map[*Range]bool `thrift:"updateOnlyTheseInstances,7" json:"updateOnlyTheseInstances"`
WaitForBatchCompletion bool `thrift:"waitForBatchCompletion,8" json:"waitForBatchCompletion"` WaitForBatchCompletion bool `thrift:"waitForBatchCompletion,8" json:"waitForBatchCompletion"`
BlockIfNoPulsesAfterMs *int32 `thrift:"blockIfNoPulsesAfterMs,9" json:"blockIfNoPulsesAfterMs,omitempty"` BlockIfNoPulsesAfterMs *int32 `thrift:"blockIfNoPulsesAfterMs,9" json:"blockIfNoPulsesAfterMs,omitempty"`
VariableUpdateGroupSize []int32 `thrift:"variableUpdateGroupSize,10" json:"variableUpdateGroupSize,omitempty"` UpdateStrategyType *JobUpdateStrategyType `thrift:"updateStrategyType,10" json:"updateStrategyType,omitempty"`
AutoPause bool `thrift:"autoPause,11" json:"autoPause"` GroupsSize []int32 `thrift:"groupsSize,11" json:"groupsSize,omitempty"`
} }
func NewJobUpdateSettings() *JobUpdateSettings { func NewJobUpdateSettings() *JobUpdateSettings {
@ -9259,21 +9307,30 @@ func (p *JobUpdateSettings) GetBlockIfNoPulsesAfterMs() int32 {
return *p.BlockIfNoPulsesAfterMs return *p.BlockIfNoPulsesAfterMs
} }
var JobUpdateSettings_VariableUpdateGroupSize_DEFAULT []int32 var JobUpdateSettings_UpdateStrategyType_DEFAULT JobUpdateStrategyType
func (p *JobUpdateSettings) GetVariableUpdateGroupSize() []int32 { func (p *JobUpdateSettings) GetUpdateStrategyType() JobUpdateStrategyType {
return p.VariableUpdateGroupSize if !p.IsSetUpdateStrategyType() {
return JobUpdateSettings_UpdateStrategyType_DEFAULT
}
return *p.UpdateStrategyType
} }
func (p *JobUpdateSettings) GetAutoPause() bool { var JobUpdateSettings_GroupsSize_DEFAULT []int32
return p.AutoPause
func (p *JobUpdateSettings) GetGroupsSize() []int32 {
return p.GroupsSize
} }
func (p *JobUpdateSettings) IsSetBlockIfNoPulsesAfterMs() bool { func (p *JobUpdateSettings) IsSetBlockIfNoPulsesAfterMs() bool {
return p.BlockIfNoPulsesAfterMs != nil return p.BlockIfNoPulsesAfterMs != nil
} }
func (p *JobUpdateSettings) IsSetVariableUpdateGroupSize() bool { func (p *JobUpdateSettings) IsSetUpdateStrategyType() bool {
return p.VariableUpdateGroupSize != nil return p.UpdateStrategyType != nil
}
func (p *JobUpdateSettings) IsSetGroupsSize() bool {
return p.GroupsSize != nil
} }
func (p *JobUpdateSettings) Read(iprot thrift.TProtocol) error { func (p *JobUpdateSettings) Read(iprot thrift.TProtocol) error {
@ -9429,12 +9486,22 @@ func (p *JobUpdateSettings) readField9(iprot thrift.TProtocol) error {
} }
func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error { func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
if v, err := iprot.ReadI32(); err != nil {
return thrift.PrependError("error reading field 10: ", err)
} else {
temp := JobUpdateStrategyType(v)
p.UpdateStrategyType = &temp
}
return nil
}
func (p *JobUpdateSettings) readField11(iprot thrift.TProtocol) error {
_, size, err := iprot.ReadListBegin() _, size, err := iprot.ReadListBegin()
if err != nil { if err != nil {
return thrift.PrependError("error reading list begin: ", err) return thrift.PrependError("error reading list begin: ", err)
} }
tSlice := make([]int32, 0, size) tSlice := make([]int32, 0, size)
p.VariableUpdateGroupSize = tSlice p.GroupsSize = tSlice
for i := 0; i < size; i++ { for i := 0; i < size; i++ {
var _elem27 int32 var _elem27 int32
if v, err := iprot.ReadI32(); err != nil { if v, err := iprot.ReadI32(); err != nil {
@ -9442,7 +9509,7 @@ func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
} else { } else {
_elem27 = v _elem27 = v
} }
p.VariableUpdateGroupSize = append(p.VariableUpdateGroupSize, _elem27) p.GroupsSize = append(p.GroupsSize, _elem27)
} }
if err := iprot.ReadListEnd(); err != nil { if err := iprot.ReadListEnd(); err != nil {
return thrift.PrependError("error reading list end: ", err) return thrift.PrependError("error reading list end: ", err)
@ -9450,15 +9517,6 @@ func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
return nil return nil
} }
func (p *JobUpdateSettings) readField11(iprot thrift.TProtocol) error {
if v, err := iprot.ReadBool(); err != nil {
return thrift.PrependError("error reading field 11: ", err)
} else {
p.AutoPause = v
}
return nil
}
func (p *JobUpdateSettings) Write(oprot thrift.TProtocol) error { func (p *JobUpdateSettings) Write(oprot thrift.TProtocol) error {
if err := oprot.WriteStructBegin("JobUpdateSettings"); err != nil { if err := oprot.WriteStructBegin("JobUpdateSettings"); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err) return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err)
@ -9617,14 +9675,29 @@ func (p *JobUpdateSettings) writeField9(oprot thrift.TProtocol) (err error) {
} }
func (p *JobUpdateSettings) writeField10(oprot thrift.TProtocol) (err error) { func (p *JobUpdateSettings) writeField10(oprot thrift.TProtocol) (err error) {
if p.IsSetVariableUpdateGroupSize() { if p.IsSetUpdateStrategyType() {
if err := oprot.WriteFieldBegin("variableUpdateGroupSize", thrift.LIST, 10); err != nil { if err := oprot.WriteFieldBegin("updateStrategyType", thrift.I32, 10); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:variableUpdateGroupSize: ", p), err) return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:updateStrategyType: ", p), err)
} }
if err := oprot.WriteListBegin(thrift.I32, len(p.VariableUpdateGroupSize)); err != nil { if err := oprot.WriteI32(int32(*p.UpdateStrategyType)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T.updateStrategyType (10) field write error: ", p), err)
}
if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 10:updateStrategyType: ", p), err)
}
}
return err
}
func (p *JobUpdateSettings) writeField11(oprot thrift.TProtocol) (err error) {
if p.IsSetGroupsSize() {
if err := oprot.WriteFieldBegin("groupsSize", thrift.LIST, 11); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:groupsSize: ", p), err)
}
if err := oprot.WriteListBegin(thrift.I32, len(p.GroupsSize)); err != nil {
return thrift.PrependError("error writing list begin: ", err) return thrift.PrependError("error writing list begin: ", err)
} }
for _, v := range p.VariableUpdateGroupSize { for _, v := range p.GroupsSize {
if err := oprot.WriteI32(int32(v)); err != nil { if err := oprot.WriteI32(int32(v)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err) return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err)
} }
@ -9633,25 +9706,12 @@ func (p *JobUpdateSettings) writeField10(oprot thrift.TProtocol) (err error) {
return thrift.PrependError("error writing list end: ", err) return thrift.PrependError("error writing list end: ", err)
} }
if err := oprot.WriteFieldEnd(); err != nil { if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 10:variableUpdateGroupSize: ", p), err) return thrift.PrependError(fmt.Sprintf("%T write field end error 11:groupsSize: ", p), err)
} }
} }
return err return err
} }
func (p *JobUpdateSettings) writeField11(oprot thrift.TProtocol) (err error) {
if err := oprot.WriteFieldBegin("autoPause", thrift.BOOL, 11); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:autoPause: ", p), err)
}
if err := oprot.WriteBool(bool(p.AutoPause)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T.autoPause (11) field write error: ", p), err)
}
if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 11:autoPause: ", p), err)
}
return err
}
func (p *JobUpdateSettings) String() string { func (p *JobUpdateSettings) String() string {
if p == nil { if p == nil {
return "<nil>" return "<nil>"

View file

@ -60,7 +60,6 @@ func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
req.Settings.MaxPerInstanceFailures = 0 req.Settings.MaxPerInstanceFailures = 0
req.Settings.MaxFailedInstances = 0 req.Settings.MaxFailedInstances = 0
req.Settings.RollbackOnFailure = true req.Settings.RollbackOnFailure = true
req.Settings.VariableUpdateGroupSize = []int32{1, 2, 3}
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior //TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
return &UpdateJob{Job: job, req: req} return &UpdateJob{Job: job, req: req}
@ -139,6 +138,18 @@ func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
return u return u
} }
func (u *UpdateJob) UpdateStrategy(strategy aurora.JobUpdateStrategyType) *UpdateJob {
u.req.Settings.UpdateStrategyType = &strategy
return u
}
func (u *UpdateJob) GroupsSize(groupSizes []int32) *UpdateJob {
u.req.Settings.GroupsSize = make([]int32, len(groupSizes))
copy(u.req.Settings.GroupsSize, groupSizes)
return u
}
func NewUpdateSettings() *aurora.JobUpdateSettings { func NewUpdateSettings() *aurora.JobUpdateSettings {
us := new(aurora.JobUpdateSettings) us := new(aurora.JobUpdateSettings)