* Changing generateBinding.sh check to check for thrift 0.12.0 and adding support for Variable Batch updates. * Adding update strategies change to changelog, changed docker-compose to point to aurora 0.22.0 snapshot. Added test coverage for update strategies.
188 lines
5.6 KiB
Go
188 lines
5.6 KiB
Go
/**
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package realis
|
|
|
|
import (
|
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
|
)
|
|
|
|
// UpdateJob is a structure to collect all information required to create job update.
|
|
type UpdateJob struct {
|
|
Job // SetInstanceCount for job is hidden, access via full qualifier
|
|
req *aurora.JobUpdateRequest
|
|
}
|
|
|
|
// NewDefaultUpdateJob creates an UpdateJob object with opinionated default settings.
|
|
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
|
req.TaskConfig = config
|
|
req.Settings = NewUpdateSettings()
|
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
if !ok {
|
|
// This should never happen but it is here as a safeguard
|
|
return nil
|
|
}
|
|
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
// Rebuild resource map from TaskConfig
|
|
for _, ptr := range config.Resources {
|
|
if ptr.NumCpus != nil {
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
|
|
if ptr.RamMb != nil {
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
|
continue
|
|
}
|
|
|
|
if ptr.DiskMb != nil {
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
|
continue
|
|
}
|
|
|
|
if ptr.NumGpus != nil {
|
|
job.resources[GPU] = &aurora.Resource{NumGpus: ptr.NumGpus}
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Mirrors defaults set by Pystachio
|
|
req.Settings.UpdateGroupSize = 1
|
|
req.Settings.WaitForBatchCompletion = false
|
|
req.Settings.MinWaitInInstanceRunningMs = 45000
|
|
req.Settings.MaxPerInstanceFailures = 0
|
|
req.Settings.MaxFailedInstances = 0
|
|
req.Settings.RollbackOnFailure = true
|
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
|
return &UpdateJob{Job: job, req: req}
|
|
}
|
|
|
|
// NewUpdateJob creates an UpdateJob object wihtout default settings.
|
|
func NewUpdateJob(config *aurora.TaskConfig, settings *aurora.JobUpdateSettings) *UpdateJob {
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
|
req.TaskConfig = config
|
|
req.Settings = settings
|
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
if !ok {
|
|
// This should never happen but it is here as a safeguard
|
|
return nil
|
|
}
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
// Rebuild resource map from TaskConfig
|
|
for _, ptr := range config.Resources {
|
|
if ptr.NumCpus != nil {
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
|
|
if ptr.RamMb != nil {
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
|
continue
|
|
}
|
|
|
|
if ptr.DiskMb != nil {
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
|
continue
|
|
}
|
|
|
|
if ptr.NumGpus != nil {
|
|
job.resources[GPU] = &aurora.Resource{}
|
|
job.resources[GPU].NumGpus = ptr.NumGpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
}
|
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
|
return &UpdateJob{Job: job, req: req}
|
|
}
|
|
|
|
// InstanceCount sets instance count the job will have after the update.
|
|
func (u *UpdateJob) InstanceCount(inst int32) *UpdateJob {
|
|
u.req.InstanceCount = inst
|
|
return u
|
|
}
|
|
|
|
// BatchSize sets the max number of instances being updated at any given moment.
|
|
func (u *UpdateJob) BatchSize(size int32) *UpdateJob {
|
|
u.req.Settings.UpdateGroupSize = size
|
|
return u
|
|
}
|
|
|
|
// WatchTime sets the minimum number of seconds a shard must remain in RUNNING state before considered a success.
|
|
func (u *UpdateJob) WatchTime(ms int32) *UpdateJob {
|
|
u.req.Settings.MinWaitInInstanceRunningMs = ms
|
|
return u
|
|
}
|
|
|
|
// WaitForBatchCompletion configures the job update to wait for all instances in a group to be done before moving on.
|
|
func (u *UpdateJob) WaitForBatchCompletion(batchWait bool) *UpdateJob {
|
|
u.req.Settings.WaitForBatchCompletion = batchWait
|
|
return u
|
|
}
|
|
|
|
// MaxPerInstanceFailures sets the max number of instance failures to tolerate before marking instance as FAILED.
|
|
func (u *UpdateJob) MaxPerInstanceFailures(inst int32) *UpdateJob {
|
|
u.req.Settings.MaxPerInstanceFailures = inst
|
|
return u
|
|
}
|
|
|
|
// MaxFailedInstances sets the max number of FAILED instances to tolerate before terminating the update.
|
|
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
|
|
u.req.Settings.MaxFailedInstances = inst
|
|
return u
|
|
}
|
|
|
|
// RollbackOnFail configure the job to rollback automatically after a job update fails.
|
|
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
|
u.req.Settings.RollbackOnFailure = rollback
|
|
return u
|
|
}
|
|
|
|
// NewUpdateSettings return an opinionated set of job update settings.
|
|
func (u *UpdateJob) BatchUpdateStrategy(strategy aurora.BatchJobUpdateStrategy) *UpdateJob {
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{BatchStrategy: &strategy}
|
|
return u
|
|
}
|
|
|
|
func (u *UpdateJob) QueueUpdateStrategy(strategy aurora.QueueJobUpdateStrategy) *UpdateJob {
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{QueueStrategy: &strategy}
|
|
return u
|
|
}
|
|
|
|
func (u *UpdateJob) VariableBatchStrategy(strategy aurora.VariableBatchJobUpdateStrategy) *UpdateJob {
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{VarBatchStrategy: &strategy}
|
|
return u
|
|
}
|
|
|
|
func NewUpdateSettings() *aurora.JobUpdateSettings {
|
|
us := new(aurora.JobUpdateSettings)
|
|
// Mirrors defaults set by Pystachio
|
|
us.UpdateGroupSize = 1
|
|
us.WaitForBatchCompletion = false
|
|
us.MinWaitInInstanceRunningMs = 45000
|
|
us.MaxPerInstanceFailures = 0
|
|
us.MaxFailedInstances = 0
|
|
us.RollbackOnFailure = true
|
|
|
|
return us
|
|
}
|