Adds support for Tier and SlaPolicy to the Job interface (#99)

* Adding parameter for Aurora so that we're able to run SLA aware updates with less than 20 instances. Lowered time it takes to run test by reducing watch time per instance as well.

* Reducing the number of instances and time for SLA aware instances in docker-compose set up.

* Adding another Mesos agent to the docker-compose setup.

* Huge thanks to @zircote for this contribution.
This commit is contained in:
Renan DelValle 2019-02-20 16:36:50 -08:00 committed by GitHub
parent 79fa7ba16d
commit 1f459dd56a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 114 additions and 1 deletions

View file

@ -55,6 +55,30 @@ services:
depends_on:
- zk
agent-two:
image: rdelvalle/mesos-agent:1.5.1
pid: host
restart: on-failure
ports:
- "5061:5061"
environment:
MESOS_MASTER: zk://192.168.33.2:2181/mesos
MESOS_CONTAINERIZERS: docker,mesos
MESOS_HOSTNAME: localhost
MESOS_PORT: 5061
MESOS_RESOURCES: ports(*):[11000-11999]
MESOS_SYSTEMD_ENABLE_SUPPORT: 'false'
MESOS_WORK_DIR: /tmp/mesos
networks:
aurora_cluster:
ipv4_address: 192.168.33.5
volumes:
- /sys/fs/cgroup:/sys/fs/cgroup
- /var/run/docker.sock:/var/run/docker.sock
depends_on:
- zk
aurora-one:
image: rdelvalle/aurora:0.21.0
pid: host
@ -65,6 +89,7 @@ services:
CLUSTER_NAME: test-cluster
ZK_ENDPOINTS: "192.168.33.2:2181"
MESOS_MASTER: "zk://192.168.33.2:2181/mesos"
EXTRA_SCHEDULER_ARGS: "-min_required_instances_for_sla_check=1"
networks:
aurora_cluster:
ipv4_address: 192.168.33.7

15
job.go
View file

@ -56,6 +56,8 @@ type Job interface {
MaxFailure(maxFail int32) Job
Container(container Container) Job
PartitionPolicy(policy *aurora.PartitionPolicy) Job
Tier(tier string) Job
SlaPolicy(policy *aurora.SlaPolicy) Job
}
type ResourceType int
@ -323,6 +325,19 @@ func (j *AuroraJob) Container(container Container) Job {
// Set a partition policy for the job configuration to implement.
func (j *AuroraJob) PartitionPolicy(policy *aurora.PartitionPolicy) Job {
j.jobConfig.TaskConfig.PartitionPolicy = policy
return j
}
// Set the Tier for the Job.
func (j *AuroraJob) Tier(tier string) Job {
j.jobConfig.TaskConfig.Tier = &tier
return j
}
// Set an SlaPolicy for the Job.
func (j *AuroraJob) SlaPolicy(policy *aurora.SlaPolicy) Job {
j.jobConfig.TaskConfig.SlaPolicy = policy
return j
}

View file

@ -750,3 +750,76 @@ func TestRealisClient_PartitionPolicy(t *testing.T) {
// Clean up after finishing test
_, err = r.KillJob(job.JobKey())
}
func TestAuroraJob_UpdateSlaPolicy(t *testing.T) {
tests := []struct {
name string
args aurora.SlaPolicy
}{
{
"create_service_with_sla_count_policy_test",
aurora.SlaPolicy{CountSlaPolicy: &aurora.CountSlaPolicy{Count: 1, DurationSecs: 15}},
},
{
"create_service_with_sla_percentage_policy_test",
aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 0.25, DurationSecs: 15}},
},
{
"create_service_with_sla_coordinator_policy_test",
aurora.SlaPolicy{CoordinatorSlaPolicy: &aurora.CoordinatorSlaPolicy{
CoordinatorUrl: "http://localhost/endpoint", StatusKey: "aurora_test"}},
},
}
role := "vagrant"
_, err := r.SetQuota(role, thrift.Float64Ptr(6.0), thrift.Int64Ptr(1024), thrift.Int64Ptr(1024))
assert.NoError(t, err)
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Create a single job
job := realis.NewJob().
Environment("prod").
Role(role).
Name(tt.name).
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
ExecutorData(string(thermosPayload)).
CPU(.01).
RAM(2).
Disk(5).
InstanceCount(4).
IsService(true).
SlaPolicy(&tt.args).
Tier("preferred")
settings := realis.NewUpdateSettings()
settings.UpdateGroupSize = 2
settings.MinWaitInInstanceRunningMs = 5 * 1000
_, result, err := r.CreateService(job, settings)
assert.NoError(t, err)
assert.NotNil(t, result)
var ok bool
var mErr error
if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 240); !ok || mErr != nil {
// Update may already be in a terminal state so don't check for error
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
_, err = r.KillJob(job.JobKey())
assert.NoError(t, err)
}
assert.True(t, ok)
assert.NoError(t, mErr)
// Kill task test task after confirming it came up fine
_, err = r.KillJob(job.JobKey())
assert.NoError(t, err)
})
}
}

View file

@ -1,4 +1,4 @@
#!/bin/bash
# Since we run our docker compose setup in bridge mode to be able to run on MacOS, we have to launch a Docker container within the bridge network in order to avoid any routing issues.
docker run -t -v $(pwd):/go/src/github.com/paypal/gorealis --network gorealis_aurora_cluster golang:1.10.3-stretch go test -v github.com/paypal/gorealis $@
docker run -t -v $(pwd):/go/src/github.com/paypal/gorealis --network gorealis_aurora_cluster golang:1.10-stretch go test -v github.com/paypal/gorealis $@