gorealis/job.go

392 lines
12 KiB
Go
Raw Normal View History

2016-08-02 11:42:00 -07:00
/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
2016-08-02 11:42:00 -07:00
package realis
import (
"strconv"
"github.com/paypal/gorealis/gen-go/apache/aurora"
2016-08-02 11:42:00 -07:00
)
// Job inteface is used to define a set of functions an Aurora Job object
// must implemement.
// TODO(rdelvalle): Consider getting rid of the Job interface
2016-08-24 11:59:01 -07:00
type Job interface {
// Set Job Key environment.
Environment(env string) Job
Role(role string) Job
Name(name string) Job
CronSchedule(cron string) Job
CronCollisionPolicy(policy aurora.CronCollisionPolicy) Job
2019-03-05 11:38:42 -08:00
CPU(cpus float64) Job
2016-08-24 11:59:01 -07:00
Disk(disk int64) Job
RAM(ram int64) Job
2019-03-05 11:38:42 -08:00
GPU(gpu int64) Job
2016-08-24 11:59:01 -07:00
ExecutorName(name string) Job
ExecutorData(data string) Job
AddPorts(num int) Job
AddLabel(key string, value string) Job
AddNamedPorts(names ...string) Job
AddLimitConstraint(name string, limit int32) Job
AddValueConstraint(name string, negated bool, values ...string) Job
// From Aurora Docs:
// dedicated attribute. Aurora treats this specially, and only allows matching jobs
// to run on these machines, and will only schedule matching jobs on these machines.
// When a job is created, the scheduler requires that the $role component matches
// the role field in the job configuration, and will reject the job creation otherwise.
// A wildcard (*) may be used for the role portion of the dedicated attribute, which
// will allow any owner to elect for a job to run on the host(s)
AddDedicatedConstraint(role, name string) Job
2016-08-24 11:59:01 -07:00
AddURIs(extract bool, cache bool, values ...string) Job
JobKey() *aurora.JobKey
JobConfig() *aurora.JobConfiguration
TaskConfig() *aurora.TaskConfig
IsService(isService bool) Job
InstanceCount(instCount int32) Job
GetInstanceCount() int32
2016-08-24 11:59:01 -07:00
MaxFailure(maxFail int32) Job
Container(container Container) Job
PartitionPolicy(policy *aurora.PartitionPolicy) Job
Tier(tier string) Job
SlaPolicy(policy *aurora.SlaPolicy) Job
2022-10-13 10:16:07 +05:30
Priority(priority int32) Job
2016-08-24 11:59:01 -07:00
}
type resourceType int
const (
CPU resourceType = iota
RAM
DISK
GPU
)
const portNamePrefix = "org.apache.aurora.port."
// AuroraJob is a structure to collect all information pertaining to an Aurora job.
2016-08-24 11:59:01 -07:00
type AuroraJob struct {
jobConfig *aurora.JobConfiguration
resources map[resourceType]*aurora.Resource
metadata map[string]*aurora.Metadata
constraints map[string]*aurora.Constraint
portCount int
2016-08-02 11:42:00 -07:00
}
// NewJob is used to create a Job object with everything initialized.
2016-08-24 11:59:01 -07:00
func NewJob() Job {
2016-08-02 11:42:00 -07:00
jobConfig := aurora.NewJobConfiguration()
taskConfig := aurora.NewTaskConfig()
jobKey := aurora.NewJobKey()
// Job Config
2016-08-02 11:42:00 -07:00
jobConfig.Key = jobKey
jobConfig.TaskConfig = taskConfig
// Task Config
2016-08-02 11:42:00 -07:00
taskConfig.Job = jobKey
taskConfig.Container = aurora.NewContainer()
taskConfig.Container.Mesos = aurora.NewMesosContainer()
// Resources
2016-08-02 11:42:00 -07:00
numCpus := aurora.NewResource()
ramMb := aurora.NewResource()
diskMb := aurora.NewResource()
resources := map[resourceType]*aurora.Resource{CPU: numCpus, RAM: ramMb, DISK: diskMb}
taskConfig.Resources = []*aurora.Resource{numCpus, ramMb, diskMb}
2016-08-02 11:42:00 -07:00
numCpus.NumCpus = new(float64)
ramMb.RamMb = new(int64)
diskMb.DiskMb = new(int64)
return &AuroraJob{
jobConfig: jobConfig,
resources: resources,
metadata: make(map[string]*aurora.Metadata),
constraints: make(map[string]*aurora.Constraint),
portCount: 0,
}
2016-08-02 11:42:00 -07:00
}
// Environment sets the Job Key environment.
func (j *AuroraJob) Environment(env string) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.Key.Environment = env
return j
2016-08-02 11:42:00 -07:00
}
// Role sets the Job Key role.
func (j *AuroraJob) Role(role string) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.Key.Role = role
2016-08-02 11:42:00 -07:00
V1 CreateService and StartJobUpdate Timeout signal and cleanup (#105) * Bumped up version to 1.21.1 * Moving admin functions to a new file. They are still part of the same pointer receiver type. * Removing dead code and fixing some comments to add space between backslash and comment. * Adding set up and tear down to run tests script. It sets up a pod, runs all tests, and then tears down the pod. * Added `--rm` to run tests Mac script. * Removing cookie jar from transport layer as it's not needed. * Changing all error messages to start with a lower case letter. Changing some messages around to be more descriptive. * Adding an argument to allow the retry mechanism to stop if a timeout has been encountered. This is useful for mutating API calls. Only StartUpdate and CreateService have enabled by default stop at timeout. * Added 2 tests for when a call goes through despite the client timing out. One is with a good payload, one is with a bad payload. * Updating changelog with information about the error type returned. * Adding test for duplicate metadata. * Refactored JobUpdateStatus monitor to use a new monitor called JobUpdateQuery. Update monitor will now still continue if it does not find an update to monitor. Furthermore, it has been optimized to reduce returning payloads from the scheduler as much as possible. This is through using the GetJobUpdateSummaries API instead of JobUpdateDetails and by including a the statuses we're searching for as part of the query. * Added documentation as to how to handle a timeout on an API request. * Optimized GetInstancesIds to create a copy of the JobKey being passed down in order to avoid unexpected behavior. Instead of setting every variable name separately, now a JobKey array is being created.
2019-05-05 11:46:22 -07:00
// Will be deprecated
identity := &aurora.Identity{User: role}
2016-08-24 11:59:01 -07:00
j.jobConfig.Owner = identity
j.jobConfig.TaskConfig.Owner = identity
return j
2016-08-02 11:42:00 -07:00
}
// Name sets the Job Key Name.
func (j *AuroraJob) Name(name string) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.Key.Name = name
return j
2016-08-02 11:42:00 -07:00
}
// ExecutorName sets the name of the executor that will the task will be configured to.
func (j *AuroraJob) ExecutorName(name string) Job {
if j.jobConfig.TaskConfig.ExecutorConfig == nil {
j.jobConfig.TaskConfig.ExecutorConfig = aurora.NewExecutorConfig()
}
2016-08-24 11:59:01 -07:00
j.jobConfig.TaskConfig.ExecutorConfig.Name = name
return j
2016-08-02 11:42:00 -07:00
}
// ExecutorData sets the data blob that will be passed to the Mesos executor.
func (j *AuroraJob) ExecutorData(data string) Job {
if j.jobConfig.TaskConfig.ExecutorConfig == nil {
j.jobConfig.TaskConfig.ExecutorConfig = aurora.NewExecutorConfig()
}
2016-08-24 11:59:01 -07:00
j.jobConfig.TaskConfig.ExecutorConfig.Data = data
return j
2016-08-02 11:42:00 -07:00
}
// CPU sets the amount of CPU each task will use in an Aurora Job.
func (j *AuroraJob) CPU(cpus float64) Job {
*j.resources[CPU].NumCpus = cpus
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// RAM sets the amount of RAM each task will use in an Aurora Job.
func (j *AuroraJob) RAM(ram int64) Job {
*j.resources[RAM].RamMb = ram
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// Disk sets the amount of Disk each task will use in an Aurora Job.
func (j *AuroraJob) Disk(disk int64) Job {
*j.resources[DISK].DiskMb = disk
return j
}
// GPU sets the amount of GPU each task will use in an Aurora Job.
func (j *AuroraJob) GPU(gpu int64) Job {
// GPU resource must be set explicitly since the scheduler by default
// rejects jobs with GPU resources attached to it.
if _, ok := j.resources[GPU]; !ok {
j.resources[GPU] = &aurora.Resource{}
j.JobConfig().GetTaskConfig().Resources = append(
j.JobConfig().GetTaskConfig().Resources,
j.resources[GPU])
}
2016-08-02 11:42:00 -07:00
j.resources[GPU].NumGpus = &gpu
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// MaxFailure sets how many failures to tolerate before giving up per Job.
func (j *AuroraJob) MaxFailure(maxFail int32) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.TaskConfig.MaxTaskFailures = maxFail
return j
2016-08-02 11:42:00 -07:00
}
// InstanceCount sets how many instances of the task to run for this Job.
func (j *AuroraJob) InstanceCount(instCount int32) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.InstanceCount = instCount
return j
2016-08-02 11:42:00 -07:00
}
// CronSchedule allows the user to configure a cron schedule for this job to run in.
func (j *AuroraJob) CronSchedule(cron string) Job {
j.jobConfig.CronSchedule = &cron
return j
}
// CronCollisionPolicy allows the user to decide what happens if two or more instances
// of the same Cron job need to run.
func (j *AuroraJob) CronCollisionPolicy(policy aurora.CronCollisionPolicy) Job {
j.jobConfig.CronCollisionPolicy = policy
return j
}
// GetInstanceCount returns how many tasks this Job contains.
func (j *AuroraJob) GetInstanceCount() int32 {
return j.jobConfig.InstanceCount
}
// IsService returns true if the job is a long term running job or false if it is an ad-hoc job.
func (j *AuroraJob) IsService(isService bool) Job {
2016-08-24 11:59:01 -07:00
j.jobConfig.TaskConfig.IsService = isService
return j
}
// JobKey returns the job's configuration key.
func (j *AuroraJob) JobKey() *aurora.JobKey {
2016-08-24 11:59:01 -07:00
return j.jobConfig.Key
2016-08-02 11:42:00 -07:00
}
// JobConfig returns the job's configuration.
func (j *AuroraJob) JobConfig() *aurora.JobConfiguration {
2016-08-24 11:59:01 -07:00
return j.jobConfig
}
// TaskConfig returns the job's task(shard) configuration.
func (j *AuroraJob) TaskConfig() *aurora.TaskConfig {
2016-08-24 11:59:01 -07:00
return j.jobConfig.TaskConfig
2016-08-02 11:42:00 -07:00
}
// AddURIs adds a list of URIs with the same extract and cache configuration. Scheduler must have
// --enable_mesos_fetcher flag enabled. Currently there is no duplicate detection.
func (j *AuroraJob) AddURIs(extract bool, cache bool, values ...string) Job {
2016-08-02 11:42:00 -07:00
for _, value := range values {
j.jobConfig.TaskConfig.MesosFetcherUris = append(j.jobConfig.TaskConfig.MesosFetcherUris,
&aurora.MesosFetcherURI{Value: value, Extract: &extract, Cache: &cache})
2016-08-02 11:42:00 -07:00
}
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// AddLabel adds a Mesos label to the job. Note that Aurora will add the
// prefix "org.apache.aurora.metadata." to the beginning of each key.
func (j *AuroraJob) AddLabel(key string, value string) Job {
if _, ok := j.metadata[key]; !ok {
j.metadata[key] = &aurora.Metadata{Key: key}
j.jobConfig.TaskConfig.Metadata = append(j.jobConfig.TaskConfig.Metadata, j.metadata[key])
}
j.metadata[key].Value = value
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// AddNamedPorts adds a named port to the job configuration These are random ports as it's
// not currently possible to request specific ports using Aurora.
func (j *AuroraJob) AddNamedPorts(names ...string) Job {
2016-08-24 11:59:01 -07:00
j.portCount += len(names)
for _, name := range names {
j.jobConfig.TaskConfig.Resources = append(
j.jobConfig.TaskConfig.Resources,
&aurora.Resource{NamedPort: &name})
}
2016-08-24 11:59:01 -07:00
return j
}
// AddPorts adds a request for a number of ports to the job configuration. The names chosen for these ports
2016-08-15 12:33:17 -07:00
// will be org.apache.aurora.port.X, where X is the current port count for the job configuration
// starting at 0. These are random ports as it's not currently possible to request
// specific ports using Aurora.
func (j *AuroraJob) AddPorts(num int) Job {
2016-08-24 11:59:01 -07:00
start := j.portCount
j.portCount += num
for i := start; i < j.portCount; i++ {
portName := portNamePrefix + strconv.Itoa(i)
j.jobConfig.TaskConfig.Resources = append(
j.jobConfig.TaskConfig.Resources,
&aurora.Resource{NamedPort: &portName})
2016-08-02 11:42:00 -07:00
}
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// AddValueConstraint allows the user to add a value constrain to the job to limit which agents the job's
// tasks can be run on. If the name matches a constraint that was previously set, the previous value will be
// overwritten. In case the previous constraint attached to the name was of type limit, the constraint will be clobbered
// by this new Value constraint.
// From Aurora Docs:
// Add a Value constraint
2016-08-02 11:42:00 -07:00
// name - Mesos slave attribute that the constraint is matched against.
// If negated = true , treat this as a 'not' - to avoid specific values.
// Values - list of values we look for in attribute name
func (j *AuroraJob) AddValueConstraint(name string, negated bool, values ...string) Job {
if _, ok := j.constraints[name]; !ok {
j.constraints[name] = &aurora.Constraint{Name: name}
j.jobConfig.TaskConfig.Constraints = append(j.jobConfig.TaskConfig.Constraints, j.constraints[name])
}
j.constraints[name].Constraint = &aurora.TaskConstraint{
Value: &aurora.ValueConstraint{
Negated: negated,
Values: values,
},
Limit: nil,
}
2016-08-02 11:42:00 -07:00
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// AddLimitConstraint allows the user to limit how many tasks form the same Job are run on a single host.
// If the name matches a constraint that was previously set, the previous value will be
// overwritten. In case the previous constraint attached to the name was of type Value, the constraint will be clobbered
// by this new Limit constraint.
// From Aurora Docs:
// A constraint that specifies the maximum number of active tasks on a host with
2016-08-02 11:42:00 -07:00
// a matching attribute that may be scheduled simultaneously.
func (j *AuroraJob) AddLimitConstraint(name string, limit int32) Job {
if _, ok := j.constraints[name]; !ok {
j.constraints[name] = &aurora.Constraint{Name: name}
j.jobConfig.TaskConfig.Constraints = append(j.jobConfig.TaskConfig.Constraints, j.constraints[name])
}
j.constraints[name].Constraint = &aurora.TaskConstraint{
Value: nil,
Limit: &aurora.LimitConstraint{Limit: limit},
}
2016-08-02 11:42:00 -07:00
2016-08-24 11:59:01 -07:00
return j
2016-08-02 11:42:00 -07:00
}
// AddDedicatedConstraint is a convenience function that allows the user to
// add a dedicated constraint to a Job configuration.
// In case a previous dedicated constraint was set, it will be clobbered by this new value.
func (j *AuroraJob) AddDedicatedConstraint(role, name string) Job {
j.AddValueConstraint("dedicated", false, role+"/"+name)
return j
}
// Container sets a container to run for the job configuration to run.
func (j *AuroraJob) Container(container Container) Job {
j.jobConfig.TaskConfig.Container = container.Build()
return j
}
// PartitionPolicy sets a partition policy for the job configuration to implement.
func (j *AuroraJob) PartitionPolicy(policy *aurora.PartitionPolicy) Job {
j.jobConfig.TaskConfig.PartitionPolicy = policy
return j
}
// Tier sets the Tier for the Job.
func (j *AuroraJob) Tier(tier string) Job {
j.jobConfig.TaskConfig.Tier = &tier
return j
}
// SlaPolicy sets an SlaPolicy for the Job.
func (j *AuroraJob) SlaPolicy(policy *aurora.SlaPolicy) Job {
j.jobConfig.TaskConfig.SlaPolicy = policy
return j
}
2022-10-13 10:16:07 +05:30
func (j *AuroraJob) Priority(priority int32) Job {
j.jobConfig.TaskConfig.Priority = priority
return j
}