* Changing incorrect license in some source files. * Changing CreateService to mimic CreateJob by setting the batch size to the instance count. * Changing Getcerts to GetCerts to match the style of the rest of the codebase. * Overhauled error handling. Backoff now recognizes temporary errors and continues to retry if it finds one. * Changed thrift function call wrapper to be more explicitly named and to perform more safety checks. * Moved Jitter function from realis to retry. * API code is now more uniform and follows a certain template. * Lock added whenever a thrift call is made or when a modification is done to the connection. Note that calling ReestablishConn externally may result in some race conditions. We will move to make this function private in the near future. * Added test for Realis session thread safety. Tested ScheduleStatus monitor. Tested monitor timing out. * Returning nil whenever there is an error return so that there are no ambiguities. * Using defer with unlock so that the lock is still released if a panic is invoked.
264 lines
6.8 KiB
Go
264 lines
6.8 KiB
Go
/**
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package realis_test
|
|
|
|
import (
|
|
"fmt"
|
|
"io/ioutil"
|
|
"os"
|
|
"testing"
|
|
"time"
|
|
|
|
"sync"
|
|
|
|
"github.com/paypal/gorealis"
|
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
|
"github.com/stretchr/testify/assert"
|
|
)
|
|
|
|
var r realis.Realis
|
|
var monitor *realis.Monitor
|
|
var thermosPayload []byte
|
|
|
|
func TestMain(m *testing.M) {
|
|
var err error
|
|
|
|
// New configuration to connect to Vagrant image
|
|
r, err = realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
|
|
realis.BasicAuth("aurora", "secret"),
|
|
realis.TimeoutMS(20000))
|
|
if err != nil {
|
|
fmt.Println("Please run vagrant box before running test suite")
|
|
os.Exit(1)
|
|
}
|
|
|
|
defer r.Close()
|
|
|
|
// Create monitor
|
|
monitor = &realis.Monitor{Client: r}
|
|
|
|
thermosPayload, err = ioutil.ReadFile("examples/thermos_payload.json")
|
|
if err != nil {
|
|
fmt.Println("Error reading thermos payload file: ", err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
os.Exit(m.Run())
|
|
}
|
|
|
|
func TestLeaderFromZK(t *testing.T) {
|
|
cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.7:2181")
|
|
url, err := realis.LeaderFromZK(*cluster)
|
|
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, "http://aurora.local:8081", url)
|
|
}
|
|
|
|
func TestRealisClient_ReestablishConn(t *testing.T) {
|
|
|
|
// Test that we're able to tear down the old connection and create a new one.
|
|
err := r.ReestablishConn()
|
|
|
|
assert.NoError(t, err)
|
|
}
|
|
|
|
func TestGetCACerts(t *testing.T) {
|
|
certs, err := realis.GetCerts("./examples/certs")
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, len(certs.Subjects()), 2)
|
|
|
|
}
|
|
|
|
func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
|
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("create_thermos_job_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(1).
|
|
RAM(64).
|
|
Disk(100).
|
|
IsService(true).
|
|
InstanceCount(1).
|
|
AddPorts(1)
|
|
|
|
start := time.Now()
|
|
resp, err := r.CreateJob(job)
|
|
end := time.Now()
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
fmt.Printf("Create call took %d ns\n", (end.UnixNano() - start.UnixNano()))
|
|
|
|
// Test Instances Monitor
|
|
success, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 1, 50)
|
|
assert.True(t, success)
|
|
assert.NoError(t, err)
|
|
|
|
// Tasks must exist for it to be killed
|
|
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
|
|
start := time.Now()
|
|
resp, err := r.KillJob(job.JobKey())
|
|
end := time.Now()
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
fmt.Printf("Kill call took %d ns\n", (end.UnixNano() - start.UnixNano()))
|
|
})
|
|
}
|
|
|
|
func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
|
|
|
|
thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
|
|
assert.NoError(t, err)
|
|
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("cronsched_job_test").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosCronPayload)).
|
|
CPU(1).
|
|
RAM(64).
|
|
Disk(100).
|
|
IsService(true).
|
|
InstanceCount(1).
|
|
AddPorts(1).
|
|
CronSchedule("* * * * *").
|
|
IsService(false)
|
|
|
|
resp, err := r.ScheduleCronJob(job)
|
|
if err != nil {
|
|
fmt.Println(err)
|
|
os.Exit(1)
|
|
}
|
|
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
|
|
t.Run("TestRealisClient_StartCronJob_Thermos", func(t *testing.T) {
|
|
start := time.Now()
|
|
resp, err := r.StartCronJob(job.JobKey())
|
|
end := time.Now()
|
|
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
fmt.Printf("Schedule cron call took %d ns\n", (end.UnixNano() - start.UnixNano()))
|
|
})
|
|
|
|
t.Run("TestRealisClient_DeschedulerCronJob_Thermos", func(t *testing.T) {
|
|
start := time.Now()
|
|
resp, err := r.DescheduleCronJob(job.JobKey())
|
|
end := time.Now()
|
|
|
|
assert.NoError(t, err)
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
fmt.Printf("Deschedule cron call took %d ns\n", (end.UnixNano() - start.UnixNano()))
|
|
})
|
|
}
|
|
|
|
func TestRealisClient_DrainHosts(t *testing.T) {
|
|
hosts := []string{"192.168.33.7"}
|
|
_, _, err := r.DrainHosts(hosts...)
|
|
if err != nil {
|
|
fmt.Printf("error: %+v\n", err.Error())
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
50)
|
|
assert.Equal(t, map[string]bool{"192.168.33.7": true}, hostResults)
|
|
assert.NoError(t, err)
|
|
|
|
t.Run("TestRealisClient_MonitorNontransitioned", func(t *testing.T) {
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
hostResults, err := monitor.HostMaintenance(
|
|
append(hosts, "IMAGINARY_HOST"),
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
|
|
1,
|
|
1)
|
|
|
|
// Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts
|
|
assert.Error(t, err)
|
|
assert.Equal(t, map[string]bool{"192.168.33.7": true, "IMAGINARY_HOST": false}, hostResults)
|
|
})
|
|
|
|
t.Run("TestRealisClient_EndMaintenance", func(t *testing.T) {
|
|
_, _, err := r.EndMaintenance(hosts...)
|
|
if err != nil {
|
|
fmt.Printf("error: %+v\n", err.Error())
|
|
os.Exit(1)
|
|
}
|
|
|
|
// Monitor change to DRAINING and DRAINED mode
|
|
_, err = monitor.HostMaintenance(
|
|
hosts,
|
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
|
5,
|
|
10)
|
|
assert.NoError(t, err)
|
|
})
|
|
|
|
}
|
|
|
|
// Test multiple go routines using a single connection
|
|
func TestRealisClient_SessionThreadSafety(t *testing.T) {
|
|
|
|
// Create a single job
|
|
job := realis.NewJob().
|
|
Environment("prod").
|
|
Role("vagrant").
|
|
Name("create_thermos_job_test_multi").
|
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
|
ExecutorData(string(thermosPayload)).
|
|
CPU(.25).
|
|
RAM(4).
|
|
Disk(10).
|
|
InstanceCount(100) // Impossible amount to go live in the current vagrant default settings
|
|
|
|
resp, err := r.CreateJob(job)
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
|
|
wg := sync.WaitGroup{}
|
|
for i := 0; i < 20; i++ {
|
|
|
|
wg.Add(1)
|
|
|
|
// Launch multiple monitors that will poll every second
|
|
go func() {
|
|
defer wg.Done()
|
|
|
|
// Test Schedule status monitor for terminal state and timing out after 30 seconds
|
|
success, err := monitor.ScheduleStatus(job.JobKey(), job.GetInstanceCount(), aurora.LIVE_STATES, 1, 30)
|
|
assert.False(t, success)
|
|
assert.Error(t, err)
|
|
|
|
resp, err := r.KillJob(job.JobKey())
|
|
assert.NoError(t, err)
|
|
|
|
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
|
|
|
|
}()
|
|
}
|
|
|
|
wg.Wait()
|
|
}
|