1117 lines
29 KiB
Go
1117 lines
29 KiB
Go
/**
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package realis_test
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/apache/thrift/lib/go/thrift"
|
|
realis "github.com/paypal/gorealis"
|
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
|
"github.com/paypal/gorealis/response"
|
|
"github.com/pkg/errors"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
)
|
|
|
|
var r realis.Realis
|
|
var monitor *realis.Monitor
|
|
var thermosPayload []byte
|
|
|
|
const auroraURL = "http://192.168.33.7:8081"
|
|
|
|
func TestMain(m *testing.M) {
|
|
var err error
|
|
|
|
// New configuration to connect to docker container
|
|
r, err = realis.NewRealisClient(realis.SchedulerUrl(auroraURL),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.TimeoutMS(20000))
|
|
|
|
if err != nil {
|
|
fmt.Println("Please run docker-compose up -d before running test suite")
|
|
os.Exit(1)
|
|
}
|
|
|
|
defer r.Close()
|
|
|
|
// Create monitor
|
|
monitor = &realis.Monitor{Client: r}
|
|
|
|
thermosPayload, err = ioutil.ReadFile("examples/thermos_payload.json")
|
|
if err != nil {
|
|
fmt.Println("Error reading thermos payload file: ", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
os.Exit(m.Run())
|
|
}
|
|
|
|
func TestNonExistentEndpoint(t *testing.T) {
|
|
// Reduce penalties for this test to make it quick
|
|
backoff := realis.Backoff{
|
|
Steps: 3,
|
|
Duration: 200 * time.Millisecond,
|
|
Factor: 1.0,
|
|
Jitter: 0.1}
|
|
|
|
taskQ := &aurora.TaskQuery{}
|
|
badEndpoint := "http://idontexist.local:8081/api"
|
|
|
|
t.Run("WithRetries", func(t *testing.T) {
|
|
// Attempt to connect to a bad endpoint
|
|
badClient, err := realis.NewRealisClient(
|
|
realis.SchedulerUrl(badEndpoint),
|
|
realis.TimeoutMS(200000),
|
|
realis.BackOff(backoff),
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
require.NotNil(t, badClient)
|
|
defer badClient.Close()
|
|
|
|
_, err = badClient.GetTasksWithoutConfigs(taskQ)
|
|
|
|
// Check that we do error out of retrying
|
|
require.Error(t, err)
|
|
// Check that the error before this one was a a retry error
|
|
// TODO: Consider bubbling up timeout behaving error all the way up to the user.
|
|
retryErr := realis.ToRetryCount(errors.Cause(err))
|
|
require.NotNil(t, retryErr, "error passed in is not a retry error")
|
|
|
|
assert.Equal(t, backoff.Steps, retryErr.RetryCount(), "retry count is incorrect")
|
|
})
|
|
|
|
t.Run("FailOnLookup", func(t *testing.T) {
|
|
// Attempt to connect to a bad endpoint
|
|
badClient, err := realis.NewRealisClient(
|
|
realis.SchedulerUrl(badEndpoint),
|
|
realis.TimeoutMS(200000),
|
|
realis.BackOff(backoff),
|
|
realis.FailOnPermanentErrors(),
|
|
)
|
|
|
|
require.NoError(t, err)
|
|
require.NotNil(t, badClient)
|
|
defer badClient.Close()
|
|
|
|
_, err = badClient.GetTasksWithoutConfigs(taskQ)
|
|
|
|
// Check that we do error out of retrying
|
|
require.Error(t, err)
|
|
})
|
|
|
|
}
|
|
|
|
func TestThriftBinary(t *testing.T) {
|
|
r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.TimeoutMS(20000),
|
|
realis.ThriftBinary())
|
|
|
|
require.NoError(t, err)
|
|
defer r.Close()
|
|
|
|
role := "all"
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &role,
|
|
}
|
|
|
|
// Perform a simple API call to test Thrift Binary
|
|
_, err = r.GetTasksWithoutConfigs(taskQ)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestThriftJSON(t *testing.T) {
|
|
r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.TimeoutMS(20000),
|
|
realis.ThriftJSON())
|
|
|
|
require.NoError(t, err)
|
|
defer r.Close()
|
|
|
|
role := "all"
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &role,
|
|
}
|
|
|
|
// Perform a simple API call to test Thrift Binary
|
|
_, err = r.GetTasksWithoutConfigs(taskQ)
|
|
|
|
assert.NoError(t, err)
|
|
|
|
}
|
|
|
|
func TestNoopLogger(t *testing.T) {
|
|
r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.SetLogger(realis.NoopLogger{}))
|
|
|
|
require.NoError(t, err)
|
|
defer r.Close()
|
|
|
|
role := "all"
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &role,
|
|
}
|
|
|
|
// Perform a simple API call to test Thrift Binary
|
|
_, err = r.GetTasksWithoutConfigs(taskQ)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestLeaderFromZK(t *testing.T) {
|
|
cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.2:2181")
|
|
url, err := realis.LeaderFromZK(*cluster)
|
|
|
|
require.NoError(t, err)
|
|
|
|
// Address stored inside of ZK might be different than the one we connect to in our tests.
|
|
assert.Equal(t, "http://192.168.33.7:8081", url)
|
|
}
|
|
|
|
func TestRealisClient_ReestablishConn(t *testing.T) {
|
|
// Test that we're able to tear down the old connection and create a new one.
|
|
err := r.ReestablishConn()
|
|
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
|
|
|
role := "vagrant"
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role(role).
|
|
Name("create_thermos_job_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.1).
|
|
RAM(16).
|
|
Disk(50).
|
|
IsService(true).
|
|
InstanceCount(2).
|
|
AddPorts(1)
|
|
|
|
_, err := r.CreateJob(job)
|
|
require.NoError(t, err)
|
|
|
|
// Test Instances Monitor
|
|
success, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
|
|
// Fetch all Jobs
|
|
_, result, err := r.GetJobs(role)
|
|
assert.Len(t, result.Configs, 1)
|
|
assert.NoError(t, err)
|
|
|
|
// Test asking the scheduler to perform a Snpshot
|
|
t.Run("Snapshot", func(t *testing.T) {
|
|
err := r.Snapshot()
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
// Test asking the scheduler to backup a Snapshot
|
|
t.Run("PerformBackup", func(t *testing.T) {
|
|
err := r.PerformBackup()
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("GetTaskStatus", func(t *testing.T) {
|
|
status, err := r.GetTaskStatus(&aurora.TaskQuery{
|
|
JobKeys: []*aurora.JobKey{job.JobKey()},
|
|
Statuses: []aurora.ScheduleStatus{aurora.ScheduleStatus_RUNNING}})
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, status)
|
|
assert.Len(t, status, 2)
|
|
|
|
// TODO: Assert that assigned task matches the configuration of the task scheduled
|
|
})
|
|
|
|
t.Run("AddInstances", func(t *testing.T) {
|
|
_, err := r.AddInstances(aurora.InstanceKey{JobKey: job.JobKey(), InstanceId: 0}, 2)
|
|
require.NoError(t, err)
|
|
success, err := monitor.Instances(job.JobKey(), 4, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("KillInstances", func(t *testing.T) {
|
|
_, err := r.KillInstances(job.JobKey(), 2, 3)
|
|
require.NoError(t, err)
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("RestartInstances", func(t *testing.T) {
|
|
_, err := r.RestartInstances(job.JobKey(), 0)
|
|
require.NoError(t, err)
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("RestartJobs", func(t *testing.T) {
|
|
_, err := r.RestartJob(job.JobKey())
|
|
require.NoError(t, err)
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
// Tasks must exist for it to, be killed
|
|
t.Run("KillJob", func(t *testing.T) {
|
|
_, err := r.KillJob(job.JobKey())
|
|
require.NoError(t, err)
|
|
success, err := monitor.Instances(job.JobKey(), 0, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("Duplicate_Metadata", func(t *testing.T) {
|
|
job.Name("thermos_duplicate_metadata").
|
|
AddLabel("hostname", "cookie").
|
|
AddLabel("hostname", "candy").
|
|
AddLabel("hostname", "popcorn").
|
|
AddLabel("hostname", "chips").
|
|
AddLabel("chips", "chips")
|
|
|
|
_, err := r.CreateJob(job)
|
|
require.NoError(t, err)
|
|
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("Duplicate_constraints", func(t *testing.T) {
|
|
job.Name("thermos_duplicate_constraints").
|
|
AddValueConstraint("zone", false, "east", "west").
|
|
AddValueConstraint("zone", false, "east").
|
|
AddValueConstraint("zone", true, "west")
|
|
|
|
_, err := r.CreateJob(job)
|
|
require.NoError(t, err)
|
|
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("Overwrite_constraints", func(t *testing.T) {
|
|
job.Name("thermos_overwrite_constraints").
|
|
AddLimitConstraint("zone", 1).
|
|
AddValueConstraint("zone", true, "west", "east").
|
|
AddLimitConstraint("zone", 2)
|
|
|
|
_, err := r.CreateJob(job)
|
|
require.NoError(t, err)
|
|
|
|
success, err := monitor.Instances(job.JobKey(), 2, 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
})
|
|
}
|
|
|
|
// Test configuring an executor that doesn't exist for CreateJob API
|
|
func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) {
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("executordoesntexist").
|
|
ExecutorName("idontexist").
|
|
ExecutorData("").
|
|
CPU(.25).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(1)
|
|
|
|
resp, err := r.CreateJob(job)
|
|
assert.Error(t, err)
|
|
assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
|
|
}
|
|
|
|
// Test configuring an executor that doesn't exist for CreateJob API
|
|
func TestRealisClient_GetPendingReason(t *testing.T) {
|
|
|
|
env := "prod"
|
|
role := "vagrant"
|
|
name := "pending_reason_test"
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment(env).
|
|
Role(role).
|
|
Name(name).
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(1000).
|
|
RAM(64).
|
|
Disk(100).
|
|
InstanceCount(1)
|
|
|
|
resp, err := r.CreateJob(job)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &role,
|
|
Environment: &env,
|
|
JobName: &name,
|
|
}
|
|
|
|
reasons, err := r.GetPendingReason(taskQ)
|
|
assert.NoError(t, err)
|
|
assert.Len(t, reasons, 1)
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) {
|
|
|
|
fmt.Println("Creating service")
|
|
role := "vagrant"
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role(role).
|
|
Name("create_thermos_job_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.5).
|
|
RAM(64).
|
|
Disk(100).
|
|
IsService(true).
|
|
InstanceCount(1).
|
|
AddPorts(1).
|
|
AddLabel("currentTime", time.Now().String())
|
|
|
|
settings := realis.NewUpdateSettings()
|
|
settings.BlockIfNoPulsesAfterMs = thrift.Int32Ptr(500)
|
|
settings.UpdateGroupSize = 1
|
|
settings.MinWaitInInstanceRunningMs = 5000
|
|
settings.WaitForBatchCompletion = true
|
|
job.InstanceCount(2)
|
|
|
|
_, result, err := r.CreateService(job, settings)
|
|
require.NoError(t, err)
|
|
|
|
updateQ := aurora.JobUpdateQuery{
|
|
Key: result.GetKey(),
|
|
Limit: 1,
|
|
}
|
|
|
|
var updateDetails []*aurora.JobUpdateDetails
|
|
|
|
ticker := time.NewTicker(time.Second * 1)
|
|
timer := time.NewTimer(time.Minute * 6)
|
|
defer ticker.Stop()
|
|
defer timer.Stop()
|
|
|
|
pulseLoop:
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
|
|
_, err = r.PulseJobUpdate(result.GetKey())
|
|
assert.NoError(t, err, "unable to pulse job update")
|
|
|
|
respDetail, err := r.JobUpdateDetails(updateQ)
|
|
assert.NoError(t, err)
|
|
|
|
updateDetails = response.JobUpdateDetails(respDetail)
|
|
require.Len(t, updateDetails, 1, "No update found")
|
|
|
|
status := updateDetails[0].Update.Summary.State.Status
|
|
if _, ok := realis.ActiveJobUpdateStates[status]; !ok {
|
|
|
|
// Rolled forward is the only state in which an update has been successfully updated
|
|
// if we encounter an inactive state and it is not at rolled forward, update failed
|
|
if status == aurora.JobUpdateStatus_ROLLED_FORWARD {
|
|
fmt.Println("update succeed")
|
|
break pulseLoop
|
|
} else {
|
|
fmt.Println("update failed")
|
|
break pulseLoop
|
|
}
|
|
}
|
|
case <-timer.C:
|
|
_, err := r.AbortJobUpdate(*updateDetails[0].GetUpdate().GetSummary().GetKey(), "")
|
|
assert.NoError(t, err)
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err, "timed out during pulse update test")
|
|
t.FailNow()
|
|
}
|
|
}
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
// Test configuring an executor that doesn't exist for CreateJob API
|
|
func TestRealisClient_CreateService(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("create_service_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.25).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(3).
|
|
IsService(true)
|
|
|
|
settings := realis.NewUpdateSettings()
|
|
settings.UpdateGroupSize = 2
|
|
settings.MinWaitInInstanceRunningMs = 5000
|
|
job.InstanceCount(3)
|
|
|
|
_, result, err := r.CreateService(job, settings)
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
// Test asking the scheduler to backup a Snapshot
|
|
t.Run("PauseJobUpdate", func(t *testing.T) {
|
|
_, err = r.PauseJobUpdate(result.GetKey(), "")
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("ResumeJobUpdate", func(t *testing.T) {
|
|
_, err = r.ResumeJobUpdate(result.GetKey(), "")
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
var ok bool
|
|
var mErr error
|
|
|
|
if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 240); !ok || mErr != nil {
|
|
// Update may already be in a terminal state so don't check for error
|
|
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
require.NoError(t, mErr)
|
|
assert.True(t, ok)
|
|
|
|
// Kill task test task after confirming it came up fine
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
|
|
success, err := monitor.Instances(job.JobKey(), 0, 1, 50)
|
|
require.NoError(t, mErr)
|
|
assert.True(t, success)
|
|
|
|
// Create a client which will timeout and close the connection before receiving an answer
|
|
timeoutClient, err := realis.NewRealisClient(
|
|
realis.SchedulerUrl(auroraURL),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.TimeoutMS(5),
|
|
)
|
|
require.NoError(t, err)
|
|
defer timeoutClient.Close()
|
|
|
|
// Test case where http connection timeouts out.
|
|
t.Run("TimeoutError", func(t *testing.T) {
|
|
job.Name("createService_timeout")
|
|
|
|
// Make sure a timedout error was returned
|
|
_, _, err = timeoutClient.CreateService(job, settings)
|
|
require.Error(t, err)
|
|
assert.True(t, realis.IsTimeout(err))
|
|
|
|
updateReceivedQuery := aurora.JobUpdateQuery{
|
|
Role: &job.JobKey().Role,
|
|
JobKey: job.JobKey(),
|
|
UpdateStatuses: aurora.ACTIVE_JOB_UPDATE_STATES,
|
|
Limit: 1}
|
|
|
|
updateSummaries, err := monitor.JobUpdateQuery(updateReceivedQuery, time.Second*1, time.Second*50)
|
|
require.NoError(t, err)
|
|
|
|
require.Len(t, updateSummaries, 1)
|
|
|
|
r.AbortJobUpdate(*updateSummaries[0].Key, "Cleaning up")
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
|
|
})
|
|
|
|
// Test case where http connection timeouts out.
|
|
t.Run("TimeoutError_BadPayload", func(t *testing.T) {
|
|
// Illegal payload
|
|
job.InstanceCount(-1)
|
|
job.Name("createService_timeout_bad_payload")
|
|
|
|
// Make sure a timedout error was returned
|
|
_, _, err = timeoutClient.CreateService(job, settings)
|
|
require.Error(t, err)
|
|
assert.True(t, realis.IsTimeout(err))
|
|
|
|
summary, err := r.GetJobUpdateSummaries(
|
|
&aurora.JobUpdateQuery{
|
|
Role: &job.JobKey().Role,
|
|
JobKey: job.JobKey(),
|
|
UpdateStatuses: aurora.ACTIVE_JOB_UPDATE_STATES})
|
|
assert.NoError(t, err)
|
|
|
|
// Payload should have been rejected, no update should exist
|
|
require.Len(t, summary.GetResult_().GetGetJobUpdateSummariesResult_().GetUpdateSummaries(), 0)
|
|
})
|
|
}
|
|
|
|
// Test configuring an executor that doesn't exist for CreateJob API
|
|
func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("executordoesntexist").
|
|
ExecutorName("idontexist").
|
|
ExecutorData("").
|
|
CPU(.25).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(1)
|
|
|
|
settings := realis.NewUpdateSettings()
|
|
job.InstanceCount(3)
|
|
|
|
resp, result, err := r.CreateService(job, settings)
|
|
require.Error(t, err)
|
|
assert.Nil(t, result)
|
|
require.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
|
|
}
|
|
|
|
func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
|
|
|
|
thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
|
|
assert.NoError(t, err)
|
|
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("cronsched_job_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosCronPayload)).
|
|
CPU(1).
|
|
RAM(64).
|
|
Disk(100).
|
|
IsService(true).
|
|
InstanceCount(1).
|
|
AddPorts(1).
|
|
CronSchedule("* * * * *").
|
|
IsService(false)
|
|
|
|
_, err = r.ScheduleCronJob(job)
|
|
require.NoError(t, err)
|
|
|
|
t.Run("Start", func(t *testing.T) {
|
|
_, err := r.StartCronJob(job.JobKey())
|
|
require.NoError(t, err)
|
|
})
|
|
|
|
t.Run("Deschedule", func(t *testing.T) {
|
|
_, err := r.DescheduleCronJob(job.JobKey())
|
|
require.NoError(t, err)
|
|
})
|
|
}
|
|
func TestRealisClient_StartMaintenance(t *testing.T) {
|
|
hosts := []string{"localhost"}
|
|
|
|
_, _, err := r.StartMaintenance(hosts...)
|
|
require.NoError(t, err, "unable to start maintenance")
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
|
|
1,
|
|
50)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
|
|
|
|
_, _, err = r.EndMaintenance(hosts...)
|
|
require.NoError(t, err)
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRealisClient_DrainHosts(t *testing.T) {
|
|
hosts := []string{"localhost"}
|
|
|
|
t.Run("DrainHosts", func(t *testing.T) {
|
|
_, _, err := r.DrainHosts(hosts...)
|
|
assert.NoError(t, err, "unable to drain host")
|
|
})
|
|
|
|
t.Run("MonitorTransitionToDrained", func(t *testing.T) {
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
50)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
|
|
})
|
|
|
|
t.Run("MonitorNonExistentHost", func(t *testing.T) {
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
append(hosts, "IMAGINARY_HOST"),
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
1)
|
|
|
|
// Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts
|
|
require.Error(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true, "IMAGINARY_HOST": false}, hostResults)
|
|
})
|
|
|
|
t.Run("EndMaintenance", func(t *testing.T) {
|
|
_, _, err := r.EndMaintenance(hosts...)
|
|
require.NoError(t, err)
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
}
|
|
|
|
func TestRealisClient_SLADrainHosts(t *testing.T) {
|
|
hosts := []string{"localhost"}
|
|
policy := aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 50.0}}
|
|
|
|
_, err := r.SLADrainHosts(&policy, 30, hosts...)
|
|
require.NoError(t, err, "unable to drain host with SLA policy")
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
50)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
|
|
|
|
_, _, err = r.EndMaintenance(hosts...)
|
|
require.NoError(t, err)
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
|
|
// slaDrainHosts goes with default policy if no policy is specified
|
|
_, err = r.SLADrainHosts(nil, 30, hosts...)
|
|
require.NoError(t, err, "unable to drain host with SLA policy")
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
50)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
|
|
|
|
_, _, err = r.EndMaintenance(hosts...)
|
|
require.NoError(t, err)
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
|
|
_, err = r.SLADrainHosts(&aurora.SlaPolicy{}, 30, hosts...)
|
|
require.NoError(t, err, "unable to drain host with SLA policy")
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
50)
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
|
|
|
|
_, _, err = r.EndMaintenance(hosts...)
|
|
require.NoError(t, err)
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
// Test multiple go routines using a single connection
|
|
func TestRealisClient_SessionThreadSafety(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("create_thermos_job_test_multi").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.25).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(1000) // Impossible amount to go live in any sane machine
|
|
|
|
_, err := r.CreateJob(job)
|
|
assert.NoError(t, err)
|
|
|
|
wg := sync.WaitGroup{}
|
|
threadCount := 20
|
|
wg.Add(threadCount)
|
|
for i := 0; i < threadCount; i++ {
|
|
|
|
// Launch multiple monitors that will poll every second
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
// Test Schedule status monitor for terminal state and timing out after 30 seconds
|
|
success, err := monitor.ScheduleStatus(job.JobKey(), job.GetInstanceCount(), realis.LiveStates, 1, 30)
|
|
assert.False(t, success)
|
|
assert.Error(t, err)
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
}()
|
|
}
|
|
|
|
wg.Wait()
|
|
}
|
|
|
|
// Test setting and getting the quota
|
|
func TestRealisClient_Quota(t *testing.T) {
|
|
var resp *aurora.Response
|
|
var err error
|
|
|
|
cpu := 3.5
|
|
ram := int64(20480)
|
|
disk := int64(10240)
|
|
|
|
t.Run("Set", func(t *testing.T) {
|
|
resp, err = r.SetQuota("vagrant", &cpu, &ram, &disk)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
t.Run("Get", func(t *testing.T) {
|
|
// Test GetQuota based on previously set values
|
|
var result *aurora.GetQuotaResult_
|
|
resp, err = r.GetQuota("vagrant")
|
|
if resp.GetResult_() != nil {
|
|
result = resp.GetResult_().GetQuotaResult_
|
|
}
|
|
assert.NoError(t, err)
|
|
|
|
for _, res := range result.Quota.GetResources() {
|
|
switch true {
|
|
case res.DiskMb != nil:
|
|
assert.Equal(t, disk, *res.DiskMb)
|
|
case res.NumCpus != nil:
|
|
assert.Equal(t, cpu, *res.NumCpus)
|
|
case res.RamMb != nil:
|
|
assert.Equal(t, ram, *res.RamMb)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestRealisClient_ForceImplicitTaskReconciliation(t *testing.T) {
|
|
err := r.ForceImplicitTaskReconciliation()
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRealisClient_ForceExplicitTaskReconciliation(t *testing.T) {
|
|
// Default value
|
|
err := r.ForceExplicitTaskReconciliation(nil)
|
|
assert.NoError(t, err)
|
|
|
|
// Custom batch value
|
|
err = r.ForceExplicitTaskReconciliation(thrift.Int32Ptr(32))
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestRealisClient_PartitionPolicy(t *testing.T) {
|
|
|
|
role := "vagrant"
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role(role).
|
|
Name("create_thermos_job_partition_policy_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.5).
|
|
RAM(64).
|
|
Disk(100).
|
|
IsService(true).
|
|
InstanceCount(2).
|
|
PartitionPolicy(&aurora.PartitionPolicy{Reschedule: true, DelaySecs: thrift.Int64Ptr(30)})
|
|
|
|
settings := realis.NewUpdateSettings()
|
|
settings.UpdateGroupSize = 2
|
|
|
|
_, result, err := r.CreateService(job, settings)
|
|
assert.NoError(t, err)
|
|
|
|
var ok bool
|
|
var mErr error
|
|
|
|
if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil {
|
|
// Update may already be in a terminal state so don't check for error
|
|
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
// Clean up after finishing test
|
|
r.KillJob(job.JobKey())
|
|
}
|
|
|
|
func TestAuroraJob_UpdateSlaPolicy(t *testing.T) {
|
|
|
|
tests := []struct {
|
|
name string
|
|
args aurora.SlaPolicy
|
|
}{
|
|
{
|
|
"create_service_with_sla_count_policy_test",
|
|
aurora.SlaPolicy{CountSlaPolicy: &aurora.CountSlaPolicy{Count: 1, DurationSecs: 15}},
|
|
},
|
|
{
|
|
"create_service_with_sla_percentage_policy_test",
|
|
aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 0.25, DurationSecs: 15}},
|
|
},
|
|
{
|
|
"create_service_with_sla_coordinator_policy_test",
|
|
aurora.SlaPolicy{CoordinatorSlaPolicy: &aurora.CoordinatorSlaPolicy{
|
|
CoordinatorUrl: "http://localhost/endpoint", StatusKey: "aurora_test"}},
|
|
},
|
|
}
|
|
role := "vagrant"
|
|
|
|
_, err := r.SetQuota(role, thrift.Float64Ptr(6.0), thrift.Int64Ptr(1024), thrift.Int64Ptr(1024))
|
|
assert.NoError(t, err)
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role(role).
|
|
Name(tt.name).
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.01).
|
|
RAM(2).
|
|
Disk(5).
|
|
InstanceCount(4).
|
|
IsService(true).
|
|
SlaPolicy(&tt.args).
|
|
Tier("preferred")
|
|
|
|
settings := realis.NewUpdateSettings()
|
|
settings.UpdateGroupSize = 2
|
|
settings.MinWaitInInstanceRunningMs = 5 * 1000
|
|
|
|
_, result, err := r.CreateService(job, settings)
|
|
require.NoError(t, err)
|
|
assert.NotNil(t, result)
|
|
|
|
var ok bool
|
|
var mErr error
|
|
|
|
if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 240); !ok || mErr != nil {
|
|
// Update may already be in a terminal state so don't check for error
|
|
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
|
|
assert.NoError(t, err)
|
|
}
|
|
assert.NoError(t, mErr)
|
|
assert.True(t, ok)
|
|
|
|
// Kill task test task after confirming it came up fine
|
|
_, err = r.KillJob(job.JobKey())
|
|
|
|
assert.NoError(t, err)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestRealisClient_UpdateStrategies(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.01).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(6).
|
|
IsService(true)
|
|
|
|
strategies := []struct {
|
|
UpdateJob *realis.UpdateJob
|
|
Name string
|
|
}{
|
|
{
|
|
UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()).
|
|
QueueUpdateStrategy(aurora.QueueJobUpdateStrategy{GroupSize: 2}).
|
|
InstanceCount(6).
|
|
WatchTime(1000),
|
|
Name: "Queue",
|
|
},
|
|
{
|
|
UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()).
|
|
BatchUpdateStrategy(aurora.BatchJobUpdateStrategy{GroupSize: 2}).
|
|
InstanceCount(6).
|
|
WatchTime(1000),
|
|
Name: "Batch",
|
|
},
|
|
{
|
|
UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()).
|
|
VariableBatchStrategy(aurora.VariableBatchJobUpdateStrategy{GroupSizes: []int32{1, 2, 3}}).
|
|
InstanceCount(6).
|
|
WatchTime(1000),
|
|
Name: "VarBatch",
|
|
},
|
|
}
|
|
|
|
for _, strategy := range strategies {
|
|
t.Run("TestRealisClient_UpdateStrategies_"+strategy.Name, func(t *testing.T) {
|
|
job.Name("update_strategies_" + strategy.Name)
|
|
resp, err := r.StartJobUpdate(strategy.UpdateJob, "")
|
|
|
|
assert.NoError(t, err)
|
|
assert.NotNil(t, resp)
|
|
assert.NotNil(t, resp.GetResult_())
|
|
assert.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_())
|
|
assert.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_().GetKey())
|
|
|
|
var ok bool
|
|
var mErr error
|
|
key := *resp.GetResult_().GetStartJobUpdateResult_().GetKey()
|
|
|
|
if ok, mErr = monitor.JobUpdate(key, 5, 240); !ok || mErr != nil {
|
|
// Update may already be in a terminal state so don't check for error
|
|
_, err := r.AbortJobUpdate(key, "Monitor timed out.")
|
|
assert.NoError(t, err)
|
|
}
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
})
|
|
}
|
|
}
|
|
|
|
func TestRealisClient_BatchAwareAutoPause(t *testing.T) {
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("BatchAwareAutoPauseTest").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.01).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(6).
|
|
IsService(true)
|
|
updateGroups := []int32{1, 2, 3}
|
|
strategy := realis.NewDefaultUpdateJob(job.TaskConfig()).
|
|
VariableBatchStrategy(aurora.VariableBatchJobUpdateStrategy{
|
|
GroupSizes: updateGroups,
|
|
AutopauseAfterBatch: true,
|
|
}).
|
|
InstanceCount(6).
|
|
WatchTime(1000)
|
|
|
|
resp, err := r.StartJobUpdate(strategy, "")
|
|
require.NoError(t, err)
|
|
require.NotNil(t, resp)
|
|
require.NotNil(t, resp.GetResult_())
|
|
require.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_())
|
|
require.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_().GetKey())
|
|
|
|
key := *resp.GetResult_().GetStartJobUpdateResult_().GetKey()
|
|
|
|
for i := range updateGroups {
|
|
curStep, mErr := monitor.AutoPausedUpdateMonitor(key, time.Second*5, time.Second*240)
|
|
if mErr != nil {
|
|
// Update may already be in a terminal state so don't check for error
|
|
_, err := r.AbortJobUpdate(key, "Monitor timed out.")
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
assert.Equal(t, i, curStep)
|
|
|
|
if i != len(updateGroups)-1 {
|
|
_, err = r.ResumeJobUpdate(&key, "auto resuming test")
|
|
require.NoError(t, err)
|
|
}
|
|
}
|
|
|
|
_, err = r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
}
|