gorealis/updatejob.go
Renan DelValle fe692040aa Variable Batch Update Support (#100)
* Changing generateBinding.sh check to check for thrift 0.12.0 and adding support for Variable Batch updates.

* Adding update strategies change to changelog, changed docker-compose to point to aurora 0.22.0 snapshot. Added test coverage for update strategies.
2020-01-14 15:50:10 -08:00

188 lines
5.6 KiB
Go

/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package realis
import (
"github.com/paypal/gorealis/gen-go/apache/aurora"
)
// UpdateJob is a structure to collect all information required to create job update.
type UpdateJob struct {
Job // SetInstanceCount for job is hidden, access via full qualifier
req *aurora.JobUpdateRequest
}
// NewDefaultUpdateJob creates an UpdateJob object with opinionated default settings.
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
req := aurora.NewJobUpdateRequest()
req.TaskConfig = config
req.Settings = NewUpdateSettings()
job, ok := NewJob().(*AuroraJob)
if !ok {
// This should never happen but it is here as a safeguard
return nil
}
job.jobConfig.TaskConfig = config
// Rebuild resource map from TaskConfig
for _, ptr := range config.Resources {
if ptr.NumCpus != nil {
job.resources[CPU].NumCpus = ptr.NumCpus
continue // Guard against Union violations that Go won't enforce
}
if ptr.RamMb != nil {
job.resources[RAM].RamMb = ptr.RamMb
continue
}
if ptr.DiskMb != nil {
job.resources[DISK].DiskMb = ptr.DiskMb
continue
}
if ptr.NumGpus != nil {
job.resources[GPU] = &aurora.Resource{NumGpus: ptr.NumGpus}
continue
}
}
// Mirrors defaults set by Pystachio
req.Settings.UpdateGroupSize = 1
req.Settings.WaitForBatchCompletion = false
req.Settings.MinWaitInInstanceRunningMs = 45000
req.Settings.MaxPerInstanceFailures = 0
req.Settings.MaxFailedInstances = 0
req.Settings.RollbackOnFailure = true
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
return &UpdateJob{Job: job, req: req}
}
// NewUpdateJob creates an UpdateJob object wihtout default settings.
func NewUpdateJob(config *aurora.TaskConfig, settings *aurora.JobUpdateSettings) *UpdateJob {
req := aurora.NewJobUpdateRequest()
req.TaskConfig = config
req.Settings = settings
job, ok := NewJob().(*AuroraJob)
if !ok {
// This should never happen but it is here as a safeguard
return nil
}
job.jobConfig.TaskConfig = config
// Rebuild resource map from TaskConfig
for _, ptr := range config.Resources {
if ptr.NumCpus != nil {
job.resources[CPU].NumCpus = ptr.NumCpus
continue // Guard against Union violations that Go won't enforce
}
if ptr.RamMb != nil {
job.resources[RAM].RamMb = ptr.RamMb
continue
}
if ptr.DiskMb != nil {
job.resources[DISK].DiskMb = ptr.DiskMb
continue
}
if ptr.NumGpus != nil {
job.resources[GPU] = &aurora.Resource{}
job.resources[GPU].NumGpus = ptr.NumGpus
continue // Guard against Union violations that Go won't enforce
}
}
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
return &UpdateJob{Job: job, req: req}
}
// InstanceCount sets instance count the job will have after the update.
func (u *UpdateJob) InstanceCount(inst int32) *UpdateJob {
u.req.InstanceCount = inst
return u
}
// BatchSize sets the max number of instances being updated at any given moment.
func (u *UpdateJob) BatchSize(size int32) *UpdateJob {
u.req.Settings.UpdateGroupSize = size
return u
}
// WatchTime sets the minimum number of seconds a shard must remain in RUNNING state before considered a success.
func (u *UpdateJob) WatchTime(ms int32) *UpdateJob {
u.req.Settings.MinWaitInInstanceRunningMs = ms
return u
}
// WaitForBatchCompletion configures the job update to wait for all instances in a group to be done before moving on.
func (u *UpdateJob) WaitForBatchCompletion(batchWait bool) *UpdateJob {
u.req.Settings.WaitForBatchCompletion = batchWait
return u
}
// MaxPerInstanceFailures sets the max number of instance failures to tolerate before marking instance as FAILED.
func (u *UpdateJob) MaxPerInstanceFailures(inst int32) *UpdateJob {
u.req.Settings.MaxPerInstanceFailures = inst
return u
}
// MaxFailedInstances sets the max number of FAILED instances to tolerate before terminating the update.
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
u.req.Settings.MaxFailedInstances = inst
return u
}
// RollbackOnFail configure the job to rollback automatically after a job update fails.
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
u.req.Settings.RollbackOnFailure = rollback
return u
}
// NewUpdateSettings return an opinionated set of job update settings.
func (u *UpdateJob) BatchUpdateStrategy(strategy aurora.BatchJobUpdateStrategy) *UpdateJob {
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{BatchStrategy: &strategy}
return u
}
func (u *UpdateJob) QueueUpdateStrategy(strategy aurora.QueueJobUpdateStrategy) *UpdateJob {
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{QueueStrategy: &strategy}
return u
}
func (u *UpdateJob) VariableBatchStrategy(strategy aurora.VariableBatchJobUpdateStrategy) *UpdateJob {
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{VarBatchStrategy: &strategy}
return u
}
func NewUpdateSettings() *aurora.JobUpdateSettings {
us := new(aurora.JobUpdateSettings)
// Mirrors defaults set by Pystachio
us.UpdateGroupSize = 1
us.WaitForBatchCompletion = false
us.MinWaitInInstanceRunningMs = 45000
us.MaxPerInstanceFailures = 0
us.MaxFailedInstances = 0
us.RollbackOnFailure = true
return us
}