* Adding Aurora URL validator in order to handle scenarios where incomplete information is passed to the client. The client will do its best to guess the missing information such as protocol and port. * Upgraded to testify 1.3.0. * Added configuration to fail on a non-temporary error. This is reverting to the original behavior of the retry mechanism. However, this allows the user to opt to fail in a non-temporary error.
172 lines
4.7 KiB
Go
172 lines
4.7 KiB
Go
/**
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package realis
|
|
|
|
import (
|
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
|
)
|
|
|
|
// Structure to collect all information required to create job update
|
|
type UpdateJob struct {
|
|
Job // SetInstanceCount for job is hidden, access via full qualifier
|
|
req *aurora.JobUpdateRequest
|
|
}
|
|
|
|
// Create a default UpdateJob object.
|
|
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
|
req.TaskConfig = config
|
|
req.Settings = NewUpdateSettings()
|
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
if !ok {
|
|
// This should never happen but it is here as a safeguard
|
|
return nil
|
|
}
|
|
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
// Rebuild resource map from TaskConfig
|
|
for _, ptr := range config.Resources {
|
|
if ptr.NumCpus != nil {
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
|
|
if ptr.RamMb != nil {
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
|
continue
|
|
}
|
|
|
|
if ptr.DiskMb != nil {
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
|
continue
|
|
}
|
|
|
|
if ptr.NumGpus != nil {
|
|
job.resources[GPU] = &aurora.Resource{NumGpus: ptr.NumGpus}
|
|
continue
|
|
}
|
|
}
|
|
|
|
// Mirrors defaults set by Pystachio
|
|
req.Settings.UpdateGroupSize = 1
|
|
req.Settings.WaitForBatchCompletion = false
|
|
req.Settings.MinWaitInInstanceRunningMs = 45000
|
|
req.Settings.MaxPerInstanceFailures = 0
|
|
req.Settings.MaxFailedInstances = 0
|
|
req.Settings.RollbackOnFailure = true
|
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
|
return &UpdateJob{Job: job, req: req}
|
|
}
|
|
|
|
func NewUpdateJob(config *aurora.TaskConfig, settings *aurora.JobUpdateSettings) *UpdateJob {
|
|
|
|
req := aurora.NewJobUpdateRequest()
|
|
req.TaskConfig = config
|
|
req.Settings = settings
|
|
|
|
job, ok := NewJob().(*AuroraJob)
|
|
if !ok {
|
|
// This should never happen but it is here as a safeguard
|
|
return nil
|
|
}
|
|
job.jobConfig.TaskConfig = config
|
|
|
|
// Rebuild resource map from TaskConfig
|
|
for _, ptr := range config.Resources {
|
|
if ptr.NumCpus != nil {
|
|
job.resources[CPU].NumCpus = ptr.NumCpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
|
|
if ptr.RamMb != nil {
|
|
job.resources[RAM].RamMb = ptr.RamMb
|
|
continue
|
|
}
|
|
|
|
if ptr.DiskMb != nil {
|
|
job.resources[DISK].DiskMb = ptr.DiskMb
|
|
continue
|
|
}
|
|
|
|
if ptr.NumGpus != nil {
|
|
job.resources[GPU] = &aurora.Resource{}
|
|
job.resources[GPU].NumGpus = ptr.NumGpus
|
|
continue // Guard against Union violations that Go won't enforce
|
|
}
|
|
}
|
|
|
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
|
return &UpdateJob{Job: job, req: req}
|
|
}
|
|
|
|
// Set instance count the job will have after the update.
|
|
func (u *UpdateJob) InstanceCount(inst int32) *UpdateJob {
|
|
u.req.InstanceCount = inst
|
|
return u
|
|
}
|
|
|
|
// Max number of instances being updated at any given moment.
|
|
func (u *UpdateJob) BatchSize(size int32) *UpdateJob {
|
|
u.req.Settings.UpdateGroupSize = size
|
|
return u
|
|
}
|
|
|
|
// Minimum number of seconds a shard must remain in RUNNING state before considered a success.
|
|
func (u *UpdateJob) WatchTime(ms int32) *UpdateJob {
|
|
u.req.Settings.MinWaitInInstanceRunningMs = ms
|
|
return u
|
|
}
|
|
|
|
// Wait for all instances in a group to be done before moving on.
|
|
func (u *UpdateJob) WaitForBatchCompletion(batchWait bool) *UpdateJob {
|
|
u.req.Settings.WaitForBatchCompletion = batchWait
|
|
return u
|
|
}
|
|
|
|
// Max number of instance failures to tolerate before marking instance as FAILED.
|
|
func (u *UpdateJob) MaxPerInstanceFailures(inst int32) *UpdateJob {
|
|
u.req.Settings.MaxPerInstanceFailures = inst
|
|
return u
|
|
}
|
|
|
|
// Max number of FAILED instances to tolerate before terminating the update.
|
|
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
|
|
u.req.Settings.MaxFailedInstances = inst
|
|
return u
|
|
}
|
|
|
|
// When False, prevents auto rollback of a failed update.
|
|
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
|
u.req.Settings.RollbackOnFailure = rollback
|
|
return u
|
|
}
|
|
|
|
func NewUpdateSettings() *aurora.JobUpdateSettings {
|
|
|
|
us := new(aurora.JobUpdateSettings)
|
|
// Mirrors defaults set by Pystachio
|
|
us.UpdateGroupSize = 1
|
|
us.WaitForBatchCompletion = false
|
|
us.MinWaitInInstanceRunningMs = 45000
|
|
us.MaxPerInstanceFailures = 0
|
|
us.MaxFailedInstances = 0
|
|
us.RollbackOnFailure = true
|
|
|
|
return us
|
|
}
|