gorealis-v2/realis_e2e_test.go

/**
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package realis_test

import (
	"fmt"
	"io/ioutil"
	"os"
	"sync"
	"testing"
	"time"

	realis "github.com/aurora-scheduler/gorealis/v2"
	"github.com/aurora-scheduler/gorealis/v2/gen-go/apache/aurora"
	"github.com/pkg/errors"
	"github.com/stretchr/testify/assert"
	"github.com/stretchr/testify/require"
)

var r *realis.Client
var thermosExec realis.ThermosExecutor

func TestMain(m *testing.M) {
	var err error

	// New configuration to connect to docker container
	r, err = realis.NewClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
		realis.BasicAuth("aurora", "secret"),
		realis.Timeout(20*time.Second))

	if err != nil {
		fmt.Println("Please run docker-compose up -d before running test suite")
		os.Exit(1)
	}

	defer r.Close()

	thermosExec.AddProcess(realis.NewThermosProcess("boostrap", "echo bootsrapping")).
		AddProcess(realis.NewThermosProcess("hello_gorealis", "while true; do echo hello world from gorealis; sleep 10; done"))

	os.Exit(m.Run())
}

func TestNonExistentEndpoint(t *testing.T) {
	backoff := realis.Backoff{ // Reduce penalties for this test to make it quick
		Steps:    5,
		Duration: 1 * time.Second,
		Factor:   1.0,
		Jitter:   0.1}

	// Attempt to connect to a bad endpoint
	r, err := realis.NewClient(realis.SchedulerUrl("http://doesntexist.com:8081/api"),
		realis.Timeout(200*time.Millisecond),
		realis.BackOff(backoff),
	)
	defer r.Close()

	taskQ := &aurora.TaskQuery{}

	_, err = r.GetTasksWithoutConfigs(taskQ)

	// Check that we do error out of retrying
	assert.Error(t, err)

	// Check that the error before this one was a a retry error
	// TODO: Consider bubbling up timeout behaving error all the way up to the user.
	retryErr := realis.ToRetryCount(errors.Cause(err))
	assert.NotNil(t, retryErr, "error passed in is not a retry error")

	assert.Equal(t, backoff.Steps, retryErr.RetryCount(), "retry count is incorrect")

}

func TestBadCredentials(t *testing.T) {
	r, err := realis.NewClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
		realis.BasicAuth("incorrect", "password"),
		realis.Debug())
	defer r.Close()

	assert.NoError(t, err)

	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		Name("create_thermos_job_bad_creds_test").
		ThermosExecutor(thermosExec).
		CPU(.5).
		RAM(64).
		Disk(100).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		InstanceCount(2).
		AddPorts(1)

	assert.Error(t, r.CreateJob(job))
}

func TestThriftBinary(t *testing.T) {
	r, err := realis.NewClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
		realis.BasicAuth("aurora", "secret"),
		realis.Timeout(20*time.Second),
		realis.ThriftBinary())

	assert.NoError(t, err)
	defer r.Close()

	role := "all"
	taskQ := &aurora.TaskQuery{
		Role: &role,
	}

	// Perform a simple API call to test Thrift Binary
	_, err = r.GetTasksWithoutConfigs(taskQ)

	assert.NoError(t, err)

}

func TestThriftJSON(t *testing.T) {
	r, err := realis.NewClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
		realis.BasicAuth("aurora", "secret"),
		realis.Timeout(20*time.Second),
		realis.ThriftJSON())

	assert.NoError(t, err)

	role := "all"
	taskQ := &aurora.TaskQuery{
		Role: &role,
	}

	// Perform a simple API call to test Thrift Binary
	_, err = r.GetTasksWithoutConfigs(taskQ)

	assert.NoError(t, err)

	r.Close()

}

func TestNoopLogger(t *testing.T) {
	r, err := realis.NewClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
		realis.BasicAuth("aurora", "secret"),
		realis.SetLogger(realis.NoopLogger{}))

	assert.NoError(t, err)

	role := "all"
	taskQ := &aurora.TaskQuery{
		Role: &role,
	}

	// Perform a simple API call to test Thrift Binary
	_, err = r.GetTasksWithoutConfigs(taskQ)

	assert.NoError(t, err)

	r.Close()
}

func TestLeaderFromZK(t *testing.T) {
	cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.2:2181")
	url, err := realis.LeaderFromZK(*cluster)

	assert.NoError(t, err)
	assert.Equal(t, "http://192.168.33.7:8081", url)

}
func TestInvalidAuroraURL(t *testing.T) {
	for _, url := range []string{
		"http://doesntexist.com:8081/apitest",
		"test://doesntexist.com:8081",
		"https://doesntexist.com:8081/testing/api",
	} {
		r, err := realis.NewClient(realis.SchedulerUrl(url))
		assert.Error(t, err)
		assert.Nil(t, r)
	}
}

func TestValidAuroraURL(t *testing.T) {
	for _, url := range []string{
		"http://domain.com:8081/api",
		"https://domain.com:8081/api",
		"domain.com:8081",
		"domain.com",
		"192.168.33.7",
		"192.168.33.7:8081",
		"192.168.33.7:8081/api",
	} {
		r, err := realis.NewClient(realis.SchedulerUrl(url))
		assert.NoError(t, err)
		assert.NotNil(t, r)
	}
}

func TestRealisClient_ReestablishConn(t *testing.T) {
	// Test that we're able to tear down the old connection and create a new one.
	err := r.ReestablishConn()

	assert.NoError(t, err)
}

func TestGetCACerts(t *testing.T) {
	certs, err := realis.GetCerts("./examples/certs")
	assert.NoError(t, err)
	assert.Equal(t, len(certs.Subjects()), 2)
}

func TestRealisClient_CreateJob_Thermos(t *testing.T) {
	role := "vagrant"
	job := realis.NewJob().
		Environment("prod").
		Role(role).
		Name("create_thermos_job_test").
		ThermosExecutor(thermosExec).
		CPU(.5).
		RAM(64).
		Disk(100).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		InstanceCount(2).
		AddPorts(1)

	err := r.CreateJob(job)
	assert.NoError(t, err)

	// Test Instances Monitor
	success, err := r.MonitorInstances(job.JobKey(), job.GetInstanceCount(), 1*time.Second, 50*time.Second)
	assert.True(t, success)
	assert.NoError(t, err)

	// Fetch all Jobs
	result, err := r.GetJobs(role)
	fmt.Println("GetJobs length: ", len(result.Configs))
	assert.Len(t, result.Configs, 1)
	assert.NoError(t, err)

	// Test asking the scheduler to perform a Snapshot
	t.Run("TestRealisClient_Snapshot", func(t *testing.T) {
		err := r.Snapshot()
		assert.NoError(t, err)
	})

	// Test asking the scheduler to backup a Snapshot
	t.Run("TestRealisClient_PerformBackup", func(t *testing.T) {
		err := r.PerformBackup()
		assert.NoError(t, err)
	})

	// Tasks must exist for it to, be killed
	t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
		err := r.KillJob(job.JobKey())
		assert.NoError(t, err)

		success, err := r.MonitorInstances(job.JobKey(), 0, 1*time.Second, 90*time.Second)
		assert.True(t, success)
		assert.NoError(t, err)
	})
}

// Test configuring an executor that doesn't exist for CreateJob API
func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) {
	// Create a single job
	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		Name("executordoesntexist").
		ExecutorName("idontexist").
		ExecutorData("").
		CPU(.25).
		RAM(4).
		Disk(10).
		InstanceCount(1)

	err := r.CreateJob(job)
	assert.Error(t, err)
}

// Test configuring an executor that doesn't exist for CreateJob API
func TestRealisClient_GetPendingReason(t *testing.T) {
	env := "prod"
	role := "vagrant"
	name := "pending_reason_test"

	// Create a single job
	job := realis.NewJob().
		Environment(env).
		Role(role).
		Name(name).
		ThermosExecutor(thermosExec).
		CPU(1000).
		RAM(64).
		Disk(100).
		InstanceCount(1)

	err := r.CreateJob(job)
	assert.NoError(t, err)

	taskQ := &aurora.TaskQuery{
		Role:        &role,
		Environment: &env,
		JobName:     &name,
	}

	reasons, err := r.GetPendingReason(taskQ)
	assert.NoError(t, err)
	assert.Len(t, reasons, 1)

	err = r.KillJob(job.JobKey())
	assert.NoError(t, err)
}

func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) {
	fmt.Println("Creating service")
	role := "vagrant"
	job := realis.NewJobUpdate().
		Environment("prod").
		Role(role).
		Name("create_thermos_job_pulse_test").
		CPU(.5).
		RAM(64).
		Disk(100).
		ThermosExecutor(thermosExec).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		InstanceCount(2).
		AddPorts(1).
		AddLabel("currentTime", time.Now().String()).
		PulseIntervalTimeout(30 * time.Millisecond).
		BatchSize(1).WaitForBatchCompletion(true)

	result, err := r.CreateService(job)
	fmt.Println(result.String())

	assert.NoError(t, err)

	updateQ := aurora.JobUpdateQuery{
		Key:   result.GetKey(),
		Limit: 1,
	}

	var updateDetails []*aurora.JobUpdateDetails

	ticker := time.NewTicker(time.Second * 3)
	timer := time.NewTimer(time.Minute * 6)
	defer ticker.Stop()
	defer timer.Stop()

pulseLoop:
	for {
		select {
		case <-ticker.C:
			if result.GetKey() == nil {
				continue
			}

			pulseStatus, err := r.PulseJobUpdate(*result.GetKey())

			assert.Nil(t, err)
			if pulseStatus != aurora.JobUpdatePulseStatus_OK && pulseStatus != aurora.JobUpdatePulseStatus_FINISHED {
				assert.FailNow(t, "pulse update status received doesn't exist")
			}

			updateDetails, err = r.JobUpdateDetails(updateQ)
			assert.Nil(t, err)

			assert.Equal(t, len(updateDetails), 1, "no update matching query found")
			status := updateDetails[0].Update.Summary.State.Status

			if _, ok := realis.ActiveJobUpdateStates[status]; !ok {

				// Rolled forward is the only state in which an update has been successfully updated
				// if we encounter an inactive state and it is not at rolled forward, update failed
				if status == aurora.JobUpdateStatus_ROLLED_FORWARD {
					fmt.Println("Update succeeded")
					break pulseLoop
				} else {
					fmt.Println("Update failed")
					break pulseLoop
				}
			}

		case <-timer.C:
			err := r.AbortJobUpdate(*updateDetails[0].GetUpdate().GetSummary().GetKey(), "")
			assert.NoError(t, err)
			err = r.KillJob(job.JobKey())
			assert.NoError(t, err)
			assert.FailNow(t, "timed out during pulse update test")
		}
	}

	err = r.KillJob(job.JobKey())
	assert.NoError(t, err)
}

// Test configuring an executor that doesn't exist for CreateJob API
func TestRealisClient_CreateService(t *testing.T) {

	// Create a single job
	job := realis.NewJobUpdate().
		Environment("prod").
		Role("vagrant").
		Name("create_service_test").
		ThermosExecutor(thermosExec).
		CPU(.25).
		RAM(4).
		Disk(10).
		InstanceCount(3).
		WatchTime(20 * time.Second).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		BatchSize(2)

	result, err := r.CreateService(job)

	assert.NoError(t, err)
	assert.NotNil(t, result)

	var ok bool
	var mErr error

	if ok, mErr = r.MonitorJobUpdate(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
		// Update may already be in a terminal state so don't check for error
		err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")

		err = r.KillJob(job.JobKey())

		assert.NoError(t, err)
	}

	assert.True(t, ok)
	assert.NoError(t, mErr)

	// Kill task test task after confirming it came up fine
	err = r.KillJob(job.JobKey())

	assert.NoError(t, err)
}

// Test configuring an executor that doesn't exist for CreateJob API
func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) {

	// Create a single job
	jobUpdate := realis.NewJobUpdate().
		Environment("prod").
		Role("vagrant").
		Name("executordoesntexist").
		ExecutorName("idontexist").
		ExecutorData("").
		CPU(.25).
		RAM(4).
		Disk(10).
		InstanceCount(3)

	result, err := r.CreateService(jobUpdate)

	assert.Error(t, err)
	assert.Nil(t, result)
}

func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {

	thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
	assert.NoError(t, err)

	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		Name("cronsched_job_test").
		ExecutorName(aurora.AURORA_EXECUTOR_NAME).
		ExecutorData(string(thermosCronPayload)).
		CPU(1).
		RAM(64).
		Disk(100).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		InstanceCount(1).
		AddPorts(1).
		CronSchedule("* * * * *").
		IsService(false)

	err = r.ScheduleCronJob(job)
	assert.NoError(t, err)

	t.Run("TestRealisClient_StartCronJob_Thermos", func(t *testing.T) {
		err := r.StartCronJob(job.JobKey())
		assert.NoError(t, err)
	})

	t.Run("TestRealisClient_DeschedulerCronJob_Thermos", func(t *testing.T) {
		err := r.DescheduleCronJob(job.JobKey())
		assert.NoError(t, err)
	})
}
func TestRealisClient_StartMaintenance(t *testing.T) {
	hosts := []string{"localhost"}
	_, err := r.StartMaintenance(hosts...)
	assert.NoError(t, err)

	// Monitor change to DRAINING and DRAINED mode
	hostResults, err := r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
		1*time.Second,
		50*time.Second)
	assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
	assert.NoError(t, err)

	_, err = r.EndMaintenance(hosts...)
	assert.NoError(t, err)

	// Monitor change to DRAINING and DRAINED mode
	_, err = r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
		5*time.Second,
		10*time.Second)
	assert.NoError(t, err)
}

func TestRealisClient_DrainHosts(t *testing.T) {
	hosts := []string{"localhost"}
	_, err := r.DrainHosts(hosts...)
	assert.NoError(t, err)

	// Monitor change to DRAINING and DRAINED mode
	hostResults, err := r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
		1*time.Second,
		50*time.Second)
	assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
	assert.NoError(t, err)

	t.Run("TestRealisClient_MonitorNontransitioned", func(t *testing.T) {
		// Monitor change to DRAINING and DRAINED mode
		hostResults, err := r.MonitorHostMaintenance(
			append(hosts, "IMAGINARY_HOST"),
			[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
			1*time.Second,
			1*time.Second)

		// Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts
		assert.Error(t, err)
		assert.Equal(t, map[string]bool{"localhost": true, "IMAGINARY_HOST": false}, hostResults)
	})

	t.Run("TestRealisClient_EndMaintenance", func(t *testing.T) {
		_, err := r.EndMaintenance(hosts...)
		assert.NoError(t, err)

		// Monitor change to DRAINING and DRAINED mode
		_, err = r.MonitorHostMaintenance(
			hosts,
			[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
			5*time.Second,
			10*time.Second)
		assert.NoError(t, err)
	})

}

func TestRealisClient_SLADrainHosts(t *testing.T) {
	hosts := []string{"localhost"}
	policy := aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 50.0}}

	_, err := r.SLADrainHosts(&policy, 30, hosts...)
	if err != nil {
		fmt.Printf("error: %+v\n", err.Error())
		os.Exit(1)
	}

	// Monitor change to DRAINING and DRAINED mode
	hostResults, err := r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
		1*time.Second,
		50*time.Second)
	assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
	assert.NoError(t, err)

	_, err = r.EndMaintenance(hosts...)
	assert.NoError(t, err)

	// Monitor change to DRAINING and DRAINED mode
	_, err = r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
		5*time.Second,
		10*time.Second)
	assert.NoError(t, err)

	// slaDrainHosts goes with default policy if no policy is specified
	_, err = r.SLADrainHosts(nil, 30, hosts...)
	if err != nil {
		fmt.Printf("error: %+v\n", err.Error())
		os.Exit(1)
	}
	hostResults, err = r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
		1*time.Second,
		50*time.Second)
	assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
	assert.NoError(t, err)

	_, err = r.EndMaintenance(hosts...)
	assert.NoError(t, err)

	_, err = r.SLADrainHosts(&aurora.SlaPolicy{}, 30, hosts...)
	if err != nil {
		fmt.Printf("error: %+v\n", err.Error())
		os.Exit(1)
	}
	hostResults, err = r.MonitorHostMaintenance(
		hosts,
		[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
		1*time.Second,
		50*time.Second)
	assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
	assert.NoError(t, err)

	_, err = r.EndMaintenance(hosts...)
	assert.NoError(t, err)
}

// Test multiple go routines using a single connection
func TestRealisClient_SessionThreadSafety(t *testing.T) {

	// Create a single job
	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		Name("create_thermos_job_test_multi").
		ThermosExecutor(thermosExec).
		CPU(.25).
		RAM(4).
		Disk(10).
		InstanceCount(1000) // Impossible amount to go live in any sane machine

	err := r.CreateJob(job)
	assert.NoError(t, err)

	wg := sync.WaitGroup{}
	for i := 0; i < 20; i++ {

		wg.Add(1)

		// Launch multiple monitors that will poll every second
		go func() {
			defer wg.Done()

			// Test Schedule status monitor for terminal state and timing out after 30 seconds
			success, err := r.MonitorScheduleStatus(job.JobKey(), job.GetInstanceCount(), aurora.LIVE_STATES, 1, 30)
			assert.False(t, success)
			assert.Error(t, err)

			err = r.KillJob(job.JobKey())
			assert.NoError(t, err)

		}()
	}

	wg.Wait()
}

// Test setting and getting the quota
func TestRealisClient_SetQuota(t *testing.T) {
	var cpu = 3.5
	var ram int64 = 20480
	var disk int64 = 10240
	err := r.SetQuota("vagrant", &cpu, &ram, &disk)
	assert.NoError(t, err)
	t.Run("TestRealisClient_GetQuota", func(t *testing.T) {
		// Test GetQuota based on previously set values
		var result *aurora.GetQuotaResult_
		quotaResult, err := r.GetQuota("vagrant")

		assert.NoError(t, err)
		for _, res := range quotaResult.GetQuota().GetResources() {
			switch true {
			case res.DiskMb != nil:
				assert.Equal(t, disk, *res.DiskMb)
				break
			case res.NumCpus != nil:
				assert.Equal(t, cpu, *res.NumCpus)
				break
			case res.RamMb != nil:
				assert.Equal(t, ram, *res.RamMb)
				break
			}
		}
		fmt.Print("GetQuota Result", result.String())
	})
}

func TestRealisClient_ForceImplicitTaskReconciliation(t *testing.T) {
	err := r.ForceImplicitTaskReconciliation()
	assert.NoError(t, err)
}

func TestRealisClient_ForceExplicitTaskReconciliation(t *testing.T) {
	// Default value
	err := r.ForceExplicitTaskReconciliation(nil)
	assert.NoError(t, err)

	// Custom batch value
	var batchSize int32 = 32
	err = r.ForceExplicitTaskReconciliation(&batchSize)
	assert.NoError(t, err)
}

func TestRealisClient_PartitionPolicy(t *testing.T) {

	role := "vagrant"
	var partitionDelay int64 = 30
	job := realis.NewJobUpdate().
		Environment("prod").
		Role(role).
		Name("create_thermos_job_partition_policy_test").
		ThermosExecutor(thermosExec).
		CPU(.5).
		RAM(64).
		Disk(100).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0).
		InstanceCount(2).
		BatchSize(2).
		PartitionPolicy(true, partitionDelay)

	result, err := r.CreateService(job)
	assert.NoError(t, err)

	var ok bool
	var mErr error

	if ok, mErr = r.MonitorJobUpdate(*result.GetKey(), 5*time.Second, 4*time.Minute); !ok || mErr != nil {
		// Update may already be in a terminal state so don't check for error
		err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
		err = r.KillJob(job.JobKey())

		assert.NoError(t, err)
	}

}

func TestRealisClient_UpdateStrategies(t *testing.T) {
	// Create a single job
	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		ThermosExecutor(thermosExec).
		CPU(.01).
		RAM(4).
		Disk(10).
		InstanceCount(6).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0)

	// Needed to populate the task config correctly
	assert.NoError(t, job.BuildThermosPayload())

	strategies := []struct {
		jobUpdate *realis.JobUpdate
		Name      string
	}{
		{
			jobUpdate: realis.JobUpdateFromAuroraTask(job.AuroraTask()).
				QueueUpdateStrategy(2).
				InstanceCount(6).
				WatchTime(1000),
			Name: "Queue",
		},
		{
			jobUpdate: realis.JobUpdateFromAuroraTask(job.AuroraTask()).
				BatchUpdateStrategy(false, 2).
				InstanceCount(6).
				WatchTime(1000),
			Name: "Batch",
		},
		{
			jobUpdate: realis.JobUpdateFromAuroraTask(job.AuroraTask()).
				VariableBatchStrategy(false, 1, 2, 3).
				InstanceCount(6).
				WatchTime(1000),
			Name: "VarBatch",
		},
	}

	for _, strategy := range strategies {
		t.Run("TestRealisClient_UpdateStrategies_"+strategy.Name, func(t *testing.T) {
			strategy.jobUpdate.Name("update_strategies_" + strategy.Name)
			result, err := r.StartJobUpdate(strategy.jobUpdate, "")

			require.NoError(t, err)
			assert.NotNil(t, result)

			var ok bool
			var mErr error
			key := *result.GetKey()

			if ok, mErr = r.MonitorJobUpdate(key, 5, 240); !ok || mErr != nil {
				// Update may already be in a terminal state so don't check for error
				assert.NoError(t, r.AbortJobUpdate(key, "Monitor timed out."))
			}
			assert.NoError(t, r.KillJob(strategy.jobUpdate.JobKey()))
		})
	}
}

func TestRealisClient_BatchAwareAutoPause(t *testing.T) {
	// Create a single job
	job := realis.NewJob().
		Environment("prod").
		Role("vagrant").
		Name("batch_aware_auto_pause_test").
		ThermosExecutor(thermosExec).
		CPU(.01).
		RAM(4).
		Disk(10).
		InstanceCount(6).
		IsService(true)

	updateGroups := []int32{1, 3}
	strategy := realis.JobUpdateFromAuroraTask(job.AuroraTask()).
		VariableBatchStrategy(true, updateGroups...).
		InstanceCount(6).
		WatchTime(1000)

	result, err := r.StartJobUpdate(strategy, "")
	require.NoError(t, err)
	require.NotNil(t, result)

	key := *result.GetKey()

	for i := range updateGroups {
		curStep, mErr := r.MonitorAutoPausedUpdate(key, time.Second*5, time.Minute*5)
		if mErr != nil {
			fmt.Println(mErr)
			// Update may already be in a terminal state so don't check for error
			_ = r.AbortJobUpdate(key, "Monitor timed out.")
		}

		assert.Equal(t, i, curStep)

		if i != len(updateGroups)-1 {
			require.NoError(t, err)
			require.NoError(t, r.ResumeJobUpdate(key, "auto resuming test"))
		}
	}
	assert.NoError(t, r.AbortJobUpdate(key, ""))
	assert.NoError(t, r.KillJob(strategy.JobKey()))
}

func TestRealisClient_GetJobSummary(t *testing.T) {
	role := "vagrant"
	env := "prod"
	name := "test_get_job_summary"
	// Create a single job
	job := realis.NewJob().
		Environment(env).
		Role(role).
		Name(name).
		ThermosExecutor(thermosExec).
		CPU(.25).
		RAM(4).
		Disk(10).
		InstanceCount(3).
		IsService(true).
		Production(false).
		Tier("preemptible").
		Priority(0)

	err := r.CreateJob(job)
	assert.NoError(t, err)

	success, err := r.MonitorScheduleStatus(job.JobKey(),
		job.GetInstanceCount(),
		aurora.ACTIVE_STATES,
		1*time.Second,
		150*time.Second)
	assert.True(t, success)
	assert.NoError(t, err)

	// get job summaries of the role
	summary, err := r.GetJobSummary(role)
	assert.NoError(t, err)
	assert.NotNil(t, summary)
	jobCount := 0
	for _, s := range summary.Summaries {
		jobKey := s.Job.TaskConfig.Job
		if jobKey.Environment == env && jobKey.Name == name {
			jobCount++
		}
	}
	assert.Equal(t, 1, jobCount)

	err = r.KillJob(job.JobKey())
	assert.NoError(t, err)
}