2016-08-02 11:42:00 -07:00
|
|
|
/**
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
2016-08-09 16:18:30 -07:00
|
|
|
|
2016-08-02 11:42:00 -07:00
|
|
|
package realis
|
|
|
|
|
2016-08-24 15:38:44 -07:00
|
|
|
import (
|
2017-10-12 17:07:43 -07:00
|
|
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
2016-08-24 15:38:44 -07:00
|
|
|
)
|
2016-08-02 11:42:00 -07:00
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// UpdateJob is a structure to collect all information required to create job update.
|
2016-08-02 11:42:00 -07:00
|
|
|
type UpdateJob struct {
|
2016-08-24 11:59:01 -07:00
|
|
|
Job // SetInstanceCount for job is hidden, access via full qualifier
|
2016-08-24 17:21:59 -07:00
|
|
|
req *aurora.JobUpdateRequest
|
2016-08-02 11:42:00 -07:00
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// NewDefaultUpdateJob creates an UpdateJob object with opinionated default settings.
|
2016-11-14 23:16:36 -08:00
|
|
|
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
2016-08-24 11:59:01 -07:00
|
|
|
req.TaskConfig = config
|
2018-02-06 12:39:02 -08:00
|
|
|
req.Settings = NewUpdateSettings()
|
2016-08-02 11:42:00 -07:00
|
|
|
|
2019-06-11 11:47:14 -07:00
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
|
|
if !ok {
|
|
|
|
// This should never happen but it is here as a safeguard
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2016-08-24 11:59:01 -07:00
|
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
|
|
|
|
// Rebuild resource map from TaskConfig
|
2019-02-20 11:11:46 -08:00
|
|
|
for _, ptr := range config.Resources {
|
2016-08-24 17:21:59 -07:00
|
|
|
if ptr.NumCpus != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
2016-08-24 11:59:01 -07:00
|
|
|
continue // Guard against Union violations that Go won't enforce
|
|
|
|
}
|
|
|
|
|
2016-08-24 17:21:59 -07:00
|
|
|
if ptr.RamMb != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
2016-08-24 11:59:01 -07:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2016-08-24 17:21:59 -07:00
|
|
|
if ptr.DiskMb != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
2016-08-24 11:59:01 -07:00
|
|
|
continue
|
|
|
|
}
|
2019-02-20 11:11:46 -08:00
|
|
|
|
|
|
|
if ptr.NumGpus != nil {
|
|
|
|
job.resources[GPU] = &aurora.Resource{NumGpus: ptr.NumGpus}
|
|
|
|
continue
|
|
|
|
}
|
2016-08-24 11:59:01 -07:00
|
|
|
}
|
|
|
|
|
2016-08-02 11:42:00 -07:00
|
|
|
// Mirrors defaults set by Pystachio
|
|
|
|
req.Settings.UpdateGroupSize = 1
|
|
|
|
req.Settings.WaitForBatchCompletion = false
|
2017-07-11 12:37:34 -07:00
|
|
|
req.Settings.MinWaitInInstanceRunningMs = 45000
|
2016-08-02 11:42:00 -07:00
|
|
|
req.Settings.MaxPerInstanceFailures = 0
|
|
|
|
req.Settings.MaxFailedInstances = 0
|
|
|
|
req.Settings.RollbackOnFailure = true
|
|
|
|
|
2016-08-09 13:31:15 -07:00
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
2017-12-23 10:33:42 -08:00
|
|
|
return &UpdateJob{Job: job, req: req}
|
2016-08-02 11:42:00 -07:00
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// NewUpdateJob creates an UpdateJob object wihtout default settings.
|
2016-11-14 23:16:36 -08:00
|
|
|
func NewUpdateJob(config *aurora.TaskConfig, settings *aurora.JobUpdateSettings) *UpdateJob {
|
|
|
|
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
|
|
|
req.TaskConfig = config
|
|
|
|
req.Settings = settings
|
|
|
|
|
2019-06-11 11:47:14 -07:00
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
|
|
if !ok {
|
|
|
|
// This should never happen but it is here as a safeguard
|
|
|
|
return nil
|
|
|
|
}
|
2016-11-14 23:16:36 -08:00
|
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
|
|
|
|
// Rebuild resource map from TaskConfig
|
2019-02-20 11:11:46 -08:00
|
|
|
for _, ptr := range config.Resources {
|
2016-11-14 23:16:36 -08:00
|
|
|
if ptr.NumCpus != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
2016-11-14 23:16:36 -08:00
|
|
|
continue // Guard against Union violations that Go won't enforce
|
|
|
|
}
|
|
|
|
|
|
|
|
if ptr.RamMb != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
2016-11-14 23:16:36 -08:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if ptr.DiskMb != nil {
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
2016-11-14 23:16:36 -08:00
|
|
|
continue
|
|
|
|
}
|
2019-01-08 15:11:52 -08:00
|
|
|
|
|
|
|
if ptr.NumGpus != nil {
|
2019-03-15 15:10:31 -07:00
|
|
|
job.resources[GPU] = &aurora.Resource{}
|
2019-01-08 15:11:52 -08:00
|
|
|
job.resources[GPU].NumGpus = ptr.NumGpus
|
|
|
|
continue // Guard against Union violations that Go won't enforce
|
|
|
|
}
|
2016-11-14 23:16:36 -08:00
|
|
|
}
|
|
|
|
|
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
2017-12-23 10:33:42 -08:00
|
|
|
return &UpdateJob{Job: job, req: req}
|
2016-11-14 23:16:36 -08:00
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// InstanceCount sets instance count the job will have after the update.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) InstanceCount(inst int32) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.InstanceCount = inst
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// BatchSize sets the max number of instances being updated at any given moment.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) BatchSize(size int32) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.Settings.UpdateGroupSize = size
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// WatchTime sets the minimum number of seconds a shard must remain in RUNNING state before considered a success.
|
2017-07-11 12:37:34 -07:00
|
|
|
func (u *UpdateJob) WatchTime(ms int32) *UpdateJob {
|
|
|
|
u.req.Settings.MinWaitInInstanceRunningMs = ms
|
2016-08-02 11:42:00 -07:00
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// WaitForBatchCompletion configures the job update to wait for all instances in a group to be done before moving on.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) WaitForBatchCompletion(batchWait bool) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.Settings.WaitForBatchCompletion = batchWait
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// MaxPerInstanceFailures sets the max number of instance failures to tolerate before marking instance as FAILED.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) MaxPerInstanceFailures(inst int32) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.Settings.MaxPerInstanceFailures = inst
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// MaxFailedInstances sets the max number of FAILED instances to tolerate before terminating the update.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.Settings.MaxFailedInstances = inst
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// RollbackOnFail configure the job to rollback automatically after a job update fails.
|
2016-08-02 11:53:44 -07:00
|
|
|
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
2016-08-02 11:42:00 -07:00
|
|
|
u.req.Settings.RollbackOnFailure = rollback
|
|
|
|
return u
|
|
|
|
}
|
2017-11-30 12:02:50 -08:00
|
|
|
|
2019-06-12 11:22:59 -07:00
|
|
|
// NewUpdateSettings return an opinionated set of job update settings.
|
2019-03-14 13:42:47 -07:00
|
|
|
func (u *UpdateJob) BatchUpdateStrategy(strategy aurora.BatchJobUpdateStrategy) *UpdateJob {
|
|
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{BatchStrategy: &strategy}
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
|
|
|
func (u *UpdateJob) QueueUpdateStrategy(strategy aurora.QueueJobUpdateStrategy) *UpdateJob {
|
|
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{QueueStrategy: &strategy}
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
|
|
|
func (u *UpdateJob) VariableBatchStrategy(strategy aurora.VariableBatchJobUpdateStrategy) *UpdateJob {
|
|
|
|
u.req.Settings.UpdateStrategy = &aurora.JobUpdateStrategy{VarBatchStrategy: &strategy}
|
|
|
|
return u
|
|
|
|
}
|
|
|
|
|
2018-02-06 12:39:02 -08:00
|
|
|
func NewUpdateSettings() *aurora.JobUpdateSettings {
|
|
|
|
us := new(aurora.JobUpdateSettings)
|
2017-11-30 12:02:50 -08:00
|
|
|
// Mirrors defaults set by Pystachio
|
2018-02-06 12:39:02 -08:00
|
|
|
us.UpdateGroupSize = 1
|
|
|
|
us.WaitForBatchCompletion = false
|
|
|
|
us.MinWaitInInstanceRunningMs = 45000
|
|
|
|
us.MaxPerInstanceFailures = 0
|
|
|
|
us.MaxFailedInstances = 0
|
|
|
|
us.RollbackOnFailure = true
|
2017-11-30 12:02:50 -08:00
|
|
|
|
|
|
|
return us
|
|
|
|
}
|