API update to support staggered updates prototype.

This commit is contained in:
Renan DelValle 2018-05-09 15:33:18 -07:00
parent 40153d5cb1
commit 77bb78927e
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
4 changed files with 191 additions and 73 deletions

View file

@ -680,9 +680,27 @@ struct JobUpdateKey {
2: string id
}
/** Job update thresholds and limits. */
/** Declaration of update strategy types available. **/
enum JobUpdateStrategyType {
/** An update strategy that will maintain a limited amount of updates running. */
QUEUE = 0,
/** An update strategy that will only add more work when the current active group is empty. */
BATCH = 1,
/**
* An update strategy that will only add more work when the current active group is empty.
* Unlike BatchUpdate, once an active group is empty, the size of the next active group
* is allowed to change using this strategy.
*/
VARIABLE_BATCH = 2
}
/** Job update thresholds and limits. **/
struct JobUpdateSettings {
/** Max number of instances being updated at any given moment. */
/**
* TODO(rdelvalle): determine if it's better to use updateGroupSizes for everything and capping
* updateGroupSizes at length=1 for BATCH and QUEUE.
* Max number of instances being updated at any given moment.
*/
1: i32 updateGroupSize
/** Max number of instance failures to tolerate before marking instance as FAILED. */
@ -700,13 +718,13 @@ struct JobUpdateSettings {
/** Instance IDs to act on. All instances will be affected if this is not set. */
7: set<Range> updateOnlyTheseInstances
/**
/** TODO(rdelvalle): Deprecated, please set updateStrategyType to BATCH instead
* If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
* batch until the preceding batch finishes updating.
*/
8: bool waitForBatchCompletion
/**
/**
* If set, requires external calls to pulseJobUpdate RPC within the specified rate for the
* update to make progress. If no pulses received within specified interval the update will
* block. A blocked update is unable to continue but retains its current status. It may only get
@ -715,16 +733,16 @@ struct JobUpdateSettings {
9: optional i32 blockIfNoPulsesAfterMs
/**
* This list contains the number of instances that each batch will complete before moving on to
* the next. This field can only be used with waitForBatchCompletion set as true.
**/
10: optional list<i32> variableUpdateGroupSize
* Explicitly state which Update strategy type to use.
*/
10: optional JobUpdateStrategyType updateStrategyType
/**
* Pauses the deployment of further tasks after each batch completes
* until the user sends an resume call.
**/
11: bool autoPause}
* Limit for each update group during an update.
* This field should always be length of 1 for QUEUE and BATCH.
*/
11: optional list<i32> groupsSize
}
/** Event marking a state transition in job update lifecycle. */
struct JobUpdateEvent {

View file

@ -18,12 +18,9 @@ import (
"flag"
"fmt"
"io/ioutil"
"log"
"os"
"time"
"strings"
"time"
"github.com/paypal/gorealis"
"github.com/paypal/gorealis/gen-go/apache/aurora"
@ -90,7 +87,6 @@ func main() {
Factor: 2.0,
Jitter: 0.1,
}),
realis.SetLogger(log.New(os.Stdout, "realis-debug: ", log.Ldate)),
realis.Debug(),
}
@ -191,7 +187,6 @@ func main() {
// Create a service with three instances using the update API instead of the createJob API
fmt.Println("Creating service")
settings := realis.NewUpdateSettings()
settings.VariableUpdateGroupSize = []int32{1, 2, 3}
job.InstanceCount(6).RAM(16).CPU(.1)
resp, result, err := r.CreateService(job, settings)
if err != nil {
@ -430,6 +425,40 @@ func main() {
monitor.JobUpdate(*jobUpdateKey, 5, 500)
break
case "staggeredUpdate":
fmt.Println("Updating a job with with less RAM and to 5 instances staggered")
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
var instId int32
for k := range live {
instId = k
break
}
taskConfig, err := r.FetchTaskConfig(aurora.InstanceKey{
JobKey: job.JobKey(),
InstanceId: instId,
})
if err != nil {
fmt.Println(err)
os.Exit(1)
}
updateJob := realis.NewDefaultUpdateJob(taskConfig).
UpdateStrategy(aurora.JobUpdateStrategyType_VARIABLE_BATCH).
GroupsSize([]int32{1, 2})
updateJob.InstanceCount(3).RAM(8).CPU(.1)
resp, err := r.StartJobUpdate(updateJob, "")
if err != nil {
fmt.Println(err)
os.Exit(1)
}
jobUpdateKey := response.JobUpdateKey(resp)
monitor.JobUpdate(*jobUpdateKey, 5, 500)
break
case "pauseJobUpdate":
resp, err := r.PauseJobUpdate(&aurora.JobUpdateKey{
Job: job.JobKey(),
@ -492,7 +521,7 @@ func main() {
fmt.Println(resp.String())
break
case "rollbackUpdate":
fmt.Println("Abort update")
fmt.Println("Rollback update")
resp, err := r.RollbackJobUpdate(aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,

View file

@ -523,6 +523,54 @@ func (p *JobUpdatePulseStatus) UnmarshalText(text []byte) error {
return nil
}
//Declaration of update strategy types available. *
type JobUpdateStrategyType int64
const (
JobUpdateStrategyType_QUEUE JobUpdateStrategyType = 0
JobUpdateStrategyType_BATCH JobUpdateStrategyType = 1
JobUpdateStrategyType_VARIABLE_BATCH JobUpdateStrategyType = 2
)
func (p JobUpdateStrategyType) String() string {
switch p {
case JobUpdateStrategyType_QUEUE:
return "QUEUE"
case JobUpdateStrategyType_BATCH:
return "BATCH"
case JobUpdateStrategyType_VARIABLE_BATCH:
return "VARIABLE_BATCH"
}
return "<UNSET>"
}
func JobUpdateStrategyTypeFromString(s string) (JobUpdateStrategyType, error) {
switch s {
case "QUEUE":
return JobUpdateStrategyType_QUEUE, nil
case "BATCH":
return JobUpdateStrategyType_BATCH, nil
case "VARIABLE_BATCH":
return JobUpdateStrategyType_VARIABLE_BATCH, nil
}
return JobUpdateStrategyType(0), fmt.Errorf("not a valid JobUpdateStrategyType string")
}
func JobUpdateStrategyTypePtr(v JobUpdateStrategyType) *JobUpdateStrategyType { return &v }
func (p JobUpdateStrategyType) MarshalText() ([]byte, error) {
return []byte(p.String()), nil
}
func (p *JobUpdateStrategyType) UnmarshalText(text []byte) error {
q, err := JobUpdateStrategyTypeFromString(string(text))
if err != nil {
return err
}
*p = q
return nil
}
// Attributes:
// - User
type Identity struct {
@ -9183,39 +9231,39 @@ func (p *JobUpdateKey) String() string {
return fmt.Sprintf("JobUpdateKey(%+v)", *p)
}
// Job update thresholds and limits.
// Job update thresholds and limits. *
//
// Attributes:
// - UpdateGroupSize: Max number of instances being updated at any given moment.
// - UpdateGroupSize: TODO(rdelvalle): determine if it's better to use updateGroupSizes for everything and capping
// updateGroupSizes at length=1 for BATCH and QUEUE.
// Max number of instances being updated at any given moment.
// - MaxPerInstanceFailures: Max number of instance failures to tolerate before marking instance as FAILED.
// - MaxFailedInstances: Max number of FAILED instances to tolerate before terminating the update.
// - MinWaitInInstanceRunningMs: Min time to watch a RUNNING instance.
// - RollbackOnFailure: If true, enables failed update rollback.
// - UpdateOnlyTheseInstances: Instance IDs to act on. All instances will be affected if this is not set.
// - WaitForBatchCompletion: If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
// - WaitForBatchCompletion: TODO(rdelvalle): Deprecated, please set updateStrategyType to BATCH instead
// If true, use updateGroupSize as strict batching boundaries, and avoid proceeding to another
// batch until the preceding batch finishes updating.
// - BlockIfNoPulsesAfterMs: If set, requires external calls to pulseJobUpdate RPC within the specified rate for the
// update to make progress. If no pulses received within specified interval the update will
// block. A blocked update is unable to continue but retains its current status. It may only get
// unblocked by a fresh pulseJobUpdate call.
// - VariableUpdateGroupSize: * This list contains the number of instances that each batch will complete before moving on to
// * the next. This field can only be used with waitForBatchCompletion set as true.
// *
// - AutoPause: Pauses the deployment of further tasks after each batch completes
// until the user sends an resume call.
//
// - UpdateStrategyType: Explicitly state which Update strategy type to use.
// - GroupsSize: Limit for each update group during an update.
// This field should always be length of 1 for QUEUE and BATCH.
type JobUpdateSettings struct {
UpdateGroupSize int32 `thrift:"updateGroupSize,1" json:"updateGroupSize"`
MaxPerInstanceFailures int32 `thrift:"maxPerInstanceFailures,2" json:"maxPerInstanceFailures"`
MaxFailedInstances int32 `thrift:"maxFailedInstances,3" json:"maxFailedInstances"`
// unused field # 4
MinWaitInInstanceRunningMs int32 `thrift:"minWaitInInstanceRunningMs,5" json:"minWaitInInstanceRunningMs"`
RollbackOnFailure bool `thrift:"rollbackOnFailure,6" json:"rollbackOnFailure"`
UpdateOnlyTheseInstances map[*Range]bool `thrift:"updateOnlyTheseInstances,7" json:"updateOnlyTheseInstances"`
WaitForBatchCompletion bool `thrift:"waitForBatchCompletion,8" json:"waitForBatchCompletion"`
BlockIfNoPulsesAfterMs *int32 `thrift:"blockIfNoPulsesAfterMs,9" json:"blockIfNoPulsesAfterMs,omitempty"`
VariableUpdateGroupSize []int32 `thrift:"variableUpdateGroupSize,10" json:"variableUpdateGroupSize,omitempty"`
AutoPause bool `thrift:"autoPause,11" json:"autoPause"`
MinWaitInInstanceRunningMs int32 `thrift:"minWaitInInstanceRunningMs,5" json:"minWaitInInstanceRunningMs"`
RollbackOnFailure bool `thrift:"rollbackOnFailure,6" json:"rollbackOnFailure"`
UpdateOnlyTheseInstances map[*Range]bool `thrift:"updateOnlyTheseInstances,7" json:"updateOnlyTheseInstances"`
WaitForBatchCompletion bool `thrift:"waitForBatchCompletion,8" json:"waitForBatchCompletion"`
BlockIfNoPulsesAfterMs *int32 `thrift:"blockIfNoPulsesAfterMs,9" json:"blockIfNoPulsesAfterMs,omitempty"`
UpdateStrategyType *JobUpdateStrategyType `thrift:"updateStrategyType,10" json:"updateStrategyType,omitempty"`
GroupsSize []int32 `thrift:"groupsSize,11" json:"groupsSize,omitempty"`
}
func NewJobUpdateSettings() *JobUpdateSettings {
@ -9259,21 +9307,30 @@ func (p *JobUpdateSettings) GetBlockIfNoPulsesAfterMs() int32 {
return *p.BlockIfNoPulsesAfterMs
}
var JobUpdateSettings_VariableUpdateGroupSize_DEFAULT []int32
var JobUpdateSettings_UpdateStrategyType_DEFAULT JobUpdateStrategyType
func (p *JobUpdateSettings) GetVariableUpdateGroupSize() []int32 {
return p.VariableUpdateGroupSize
func (p *JobUpdateSettings) GetUpdateStrategyType() JobUpdateStrategyType {
if !p.IsSetUpdateStrategyType() {
return JobUpdateSettings_UpdateStrategyType_DEFAULT
}
return *p.UpdateStrategyType
}
func (p *JobUpdateSettings) GetAutoPause() bool {
return p.AutoPause
var JobUpdateSettings_GroupsSize_DEFAULT []int32
func (p *JobUpdateSettings) GetGroupsSize() []int32 {
return p.GroupsSize
}
func (p *JobUpdateSettings) IsSetBlockIfNoPulsesAfterMs() bool {
return p.BlockIfNoPulsesAfterMs != nil
}
func (p *JobUpdateSettings) IsSetVariableUpdateGroupSize() bool {
return p.VariableUpdateGroupSize != nil
func (p *JobUpdateSettings) IsSetUpdateStrategyType() bool {
return p.UpdateStrategyType != nil
}
func (p *JobUpdateSettings) IsSetGroupsSize() bool {
return p.GroupsSize != nil
}
func (p *JobUpdateSettings) Read(iprot thrift.TProtocol) error {
@ -9429,12 +9486,22 @@ func (p *JobUpdateSettings) readField9(iprot thrift.TProtocol) error {
}
func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
if v, err := iprot.ReadI32(); err != nil {
return thrift.PrependError("error reading field 10: ", err)
} else {
temp := JobUpdateStrategyType(v)
p.UpdateStrategyType = &temp
}
return nil
}
func (p *JobUpdateSettings) readField11(iprot thrift.TProtocol) error {
_, size, err := iprot.ReadListBegin()
if err != nil {
return thrift.PrependError("error reading list begin: ", err)
}
tSlice := make([]int32, 0, size)
p.VariableUpdateGroupSize = tSlice
p.GroupsSize = tSlice
for i := 0; i < size; i++ {
var _elem27 int32
if v, err := iprot.ReadI32(); err != nil {
@ -9442,7 +9509,7 @@ func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
} else {
_elem27 = v
}
p.VariableUpdateGroupSize = append(p.VariableUpdateGroupSize, _elem27)
p.GroupsSize = append(p.GroupsSize, _elem27)
}
if err := iprot.ReadListEnd(); err != nil {
return thrift.PrependError("error reading list end: ", err)
@ -9450,15 +9517,6 @@ func (p *JobUpdateSettings) readField10(iprot thrift.TProtocol) error {
return nil
}
func (p *JobUpdateSettings) readField11(iprot thrift.TProtocol) error {
if v, err := iprot.ReadBool(); err != nil {
return thrift.PrependError("error reading field 11: ", err)
} else {
p.AutoPause = v
}
return nil
}
func (p *JobUpdateSettings) Write(oprot thrift.TProtocol) error {
if err := oprot.WriteStructBegin("JobUpdateSettings"); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write struct begin error: ", p), err)
@ -9617,14 +9675,29 @@ func (p *JobUpdateSettings) writeField9(oprot thrift.TProtocol) (err error) {
}
func (p *JobUpdateSettings) writeField10(oprot thrift.TProtocol) (err error) {
if p.IsSetVariableUpdateGroupSize() {
if err := oprot.WriteFieldBegin("variableUpdateGroupSize", thrift.LIST, 10); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:variableUpdateGroupSize: ", p), err)
if p.IsSetUpdateStrategyType() {
if err := oprot.WriteFieldBegin("updateStrategyType", thrift.I32, 10); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 10:updateStrategyType: ", p), err)
}
if err := oprot.WriteListBegin(thrift.I32, len(p.VariableUpdateGroupSize)); err != nil {
if err := oprot.WriteI32(int32(*p.UpdateStrategyType)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T.updateStrategyType (10) field write error: ", p), err)
}
if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 10:updateStrategyType: ", p), err)
}
}
return err
}
func (p *JobUpdateSettings) writeField11(oprot thrift.TProtocol) (err error) {
if p.IsSetGroupsSize() {
if err := oprot.WriteFieldBegin("groupsSize", thrift.LIST, 11); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:groupsSize: ", p), err)
}
if err := oprot.WriteListBegin(thrift.I32, len(p.GroupsSize)); err != nil {
return thrift.PrependError("error writing list begin: ", err)
}
for _, v := range p.VariableUpdateGroupSize {
for _, v := range p.GroupsSize {
if err := oprot.WriteI32(int32(v)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T. (0) field write error: ", p), err)
}
@ -9633,25 +9706,12 @@ func (p *JobUpdateSettings) writeField10(oprot thrift.TProtocol) (err error) {
return thrift.PrependError("error writing list end: ", err)
}
if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 10:variableUpdateGroupSize: ", p), err)
return thrift.PrependError(fmt.Sprintf("%T write field end error 11:groupsSize: ", p), err)
}
}
return err
}
func (p *JobUpdateSettings) writeField11(oprot thrift.TProtocol) (err error) {
if err := oprot.WriteFieldBegin("autoPause", thrift.BOOL, 11); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field begin error 11:autoPause: ", p), err)
}
if err := oprot.WriteBool(bool(p.AutoPause)); err != nil {
return thrift.PrependError(fmt.Sprintf("%T.autoPause (11) field write error: ", p), err)
}
if err := oprot.WriteFieldEnd(); err != nil {
return thrift.PrependError(fmt.Sprintf("%T write field end error 11:autoPause: ", p), err)
}
return err
}
func (p *JobUpdateSettings) String() string {
if p == nil {
return "<nil>"

View file

@ -60,7 +60,6 @@ func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
req.Settings.MaxPerInstanceFailures = 0
req.Settings.MaxFailedInstances = 0
req.Settings.RollbackOnFailure = true
req.Settings.VariableUpdateGroupSize = []int32{1, 2, 3}
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
return &UpdateJob{Job: job, req: req}
@ -139,6 +138,18 @@ func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
return u
}
func (u *UpdateJob) UpdateStrategy(strategy aurora.JobUpdateStrategyType) *UpdateJob {
u.req.Settings.UpdateStrategyType = &strategy
return u
}
func (u *UpdateJob) GroupsSize(groupSizes []int32) *UpdateJob {
u.req.Settings.GroupsSize = make([]int32, len(groupSizes))
copy(u.req.Settings.GroupsSize, groupSizes)
return u
}
func NewUpdateSettings() *aurora.JobUpdateSettings {
us := new(aurora.JobUpdateSettings)