/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package realis_test import ( "fmt" "io/ioutil" "os" "sync" "testing" "time" "github.com/apache/thrift/lib/go/thrift" realis "github.com/paypal/gorealis" "github.com/paypal/gorealis/gen-go/apache/aurora" "github.com/paypal/gorealis/response" "github.com/pkg/errors" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" ) var r realis.Realis var monitor *realis.Monitor var thermosPayload []byte const auroraURL = "http://192.168.33.7:8081" func TestMain(m *testing.M) { var err error // New configuration to connect to docker container r, err = realis.NewRealisClient(realis.SchedulerUrl(auroraURL), realis.BasicAuth("aurora", "secret"), realis.TimeoutMS(20000)) if err != nil { fmt.Println("Please run docker-compose up -d before running test suite") os.Exit(1) } defer r.Close() // Create monitor monitor = &realis.Monitor{Client: r} thermosPayload, err = ioutil.ReadFile("examples/thermos_payload.json") if err != nil { fmt.Println("Error reading thermos payload file: ", err) os.Exit(1) } os.Exit(m.Run()) } func TestNonExistentEndpoint(t *testing.T) { // Reduce penalties for this test to make it quick backoff := realis.Backoff{ Steps: 3, Duration: 200 * time.Millisecond, Factor: 1.0, Jitter: 0.1} taskQ := &aurora.TaskQuery{} badEndpoint := "http://idontexist.local:8081/api" t.Run("WithRetries", func(t *testing.T) { // Attempt to connect to a bad endpoint badClient, err := realis.NewRealisClient( realis.SchedulerUrl(badEndpoint), realis.TimeoutMS(200000), realis.BackOff(backoff), ) require.NoError(t, err) require.NotNil(t, badClient) defer badClient.Close() _, err = badClient.GetTasksWithoutConfigs(taskQ) // Check that we do error out of retrying require.Error(t, err) // Check that the error before this one was a a retry error // TODO: Consider bubbling up timeout behaving error all the way up to the user. retryErr := realis.ToRetryCount(errors.Cause(err)) require.NotNil(t, retryErr, "error passed in is not a retry error") assert.Equal(t, backoff.Steps, retryErr.RetryCount(), "retry count is incorrect") }) t.Run("FailOnLookup", func(t *testing.T) { // Attempt to connect to a bad endpoint badClient, err := realis.NewRealisClient( realis.SchedulerUrl(badEndpoint), realis.TimeoutMS(200000), realis.BackOff(backoff), realis.FailOnPermanentErrors(), ) require.NoError(t, err) require.NotNil(t, badClient) defer badClient.Close() _, err = badClient.GetTasksWithoutConfigs(taskQ) // Check that we do error out of retrying require.Error(t, err) }) } func TestThriftBinary(t *testing.T) { r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL), realis.BasicAuth("aurora", "secret"), realis.TimeoutMS(20000), realis.ThriftBinary()) require.NoError(t, err) defer r.Close() role := "all" taskQ := &aurora.TaskQuery{ Role: &role, } // Perform a simple API call to test Thrift Binary _, err = r.GetTasksWithoutConfigs(taskQ) assert.NoError(t, err) } func TestThriftJSON(t *testing.T) { r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL), realis.BasicAuth("aurora", "secret"), realis.TimeoutMS(20000), realis.ThriftJSON()) require.NoError(t, err) defer r.Close() role := "all" taskQ := &aurora.TaskQuery{ Role: &role, } // Perform a simple API call to test Thrift Binary _, err = r.GetTasksWithoutConfigs(taskQ) assert.NoError(t, err) } func TestNoopLogger(t *testing.T) { r, err := realis.NewRealisClient(realis.SchedulerUrl(auroraURL), realis.BasicAuth("aurora", "secret"), realis.SetLogger(realis.NoopLogger{})) require.NoError(t, err) defer r.Close() role := "all" taskQ := &aurora.TaskQuery{ Role: &role, } // Perform a simple API call to test Thrift Binary _, err = r.GetTasksWithoutConfigs(taskQ) assert.NoError(t, err) } func TestLeaderFromZK(t *testing.T) { cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.2:2181") url, err := realis.LeaderFromZK(*cluster) require.NoError(t, err) // Address stored inside of ZK might be different than the one we connect to in our tests. assert.Equal(t, "http://192.168.33.7:8081", url) } func TestRealisClient_ReestablishConn(t *testing.T) { // Test that we're able to tear down the old connection and create a new one. err := r.ReestablishConn() assert.NoError(t, err) } func TestRealisClient_CreateJob_Thermos(t *testing.T) { role := "vagrant" job := realis.NewJob(). Environment("prod"). Role(role). Name("create_thermos_job_test"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.1). RAM(16). Disk(50). IsService(true). InstanceCount(2). AddPorts(1) _, err := r.CreateJob(job) require.NoError(t, err) // Test Instances Monitor success, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 1, 50) assert.True(t, success) assert.NoError(t, err) // Fetch all Jobs _, result, err := r.GetJobs(role) assert.Len(t, result.Configs, 1) assert.NoError(t, err) // Test asking the scheduler to perform a Snpshot t.Run("Snapshot", func(t *testing.T) { err := r.Snapshot() assert.NoError(t, err) }) // Test asking the scheduler to backup a Snapshot t.Run("PerformBackup", func(t *testing.T) { err := r.PerformBackup() assert.NoError(t, err) }) t.Run("GetTaskStatus", func(t *testing.T) { status, err := r.GetTaskStatus(&aurora.TaskQuery{ JobKeys: []*aurora.JobKey{job.JobKey()}, Statuses: []aurora.ScheduleStatus{aurora.ScheduleStatus_RUNNING}}) require.NoError(t, err) assert.NotNil(t, status) assert.Len(t, status, 2) // TODO: Assert that assigned task matches the configuration of the task scheduled }) t.Run("AddInstances", func(t *testing.T) { _, err := r.AddInstances(aurora.InstanceKey{JobKey: job.JobKey(), InstanceId: 0}, 2) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 4, 1, 50) assert.True(t, success) assert.NoError(t, err) }) t.Run("KillInstances", func(t *testing.T) { _, err := r.KillInstances(job.JobKey(), 2, 3) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) }) t.Run("RestartInstances", func(t *testing.T) { _, err := r.RestartInstances(job.JobKey(), 0) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) }) t.Run("RestartJobs", func(t *testing.T) { _, err := r.RestartJob(job.JobKey()) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) }) // Tasks must exist for it to, be killed t.Run("KillJob", func(t *testing.T) { _, err := r.KillJob(job.JobKey()) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 0, 1, 50) assert.True(t, success) assert.NoError(t, err) }) t.Run("Duplicate_Metadata", func(t *testing.T) { job.Name("thermos_duplicate_metadata"). AddLabel("hostname", "cookie"). AddLabel("hostname", "candy"). AddLabel("hostname", "popcorn"). AddLabel("hostname", "chips"). AddLabel("chips", "chips") _, err := r.CreateJob(job) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) t.Run("Duplicate_constraints", func(t *testing.T) { job.Name("thermos_duplicate_constraints"). AddValueConstraint("zone", false, "east", "west"). AddValueConstraint("zone", false, "east"). AddValueConstraint("zone", true, "west") _, err := r.CreateJob(job) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) t.Run("Overwrite_constraints", func(t *testing.T) { job.Name("thermos_overwrite_constraints"). AddLimitConstraint("zone", 1). AddValueConstraint("zone", true, "west", "east"). AddLimitConstraint("zone", 2) _, err := r.CreateJob(job) require.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 2, 1, 50) assert.True(t, success) assert.NoError(t, err) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) } // Test configuring an executor that doesn't exist for CreateJob API func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("executordoesntexist"). ExecutorName("idontexist"). ExecutorData(""). CPU(.25). RAM(4). Disk(10). InstanceCount(1) resp, err := r.CreateJob(job) assert.Error(t, err) assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode()) } // Test configuring an executor that doesn't exist for CreateJob API func TestRealisClient_GetPendingReason(t *testing.T) { env := "prod" role := "vagrant" name := "pending_reason_test" // Create a single job job := realis.NewJob(). Environment(env). Role(role). Name(name). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(1000). RAM(64). Disk(100). InstanceCount(1) resp, err := r.CreateJob(job) require.NoError(t, err) assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode) taskQ := &aurora.TaskQuery{ Role: &role, Environment: &env, JobName: &name, } reasons, err := r.GetPendingReason(taskQ) assert.NoError(t, err) assert.Len(t, reasons, 1) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) } func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) { fmt.Println("Creating service") role := "vagrant" job := realis.NewJob(). Environment("prod"). Role(role). Name("create_thermos_job_test"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.5). RAM(64). Disk(100). IsService(true). InstanceCount(1). AddPorts(1). AddLabel("currentTime", time.Now().String()) settings := realis.NewUpdateSettings() settings.BlockIfNoPulsesAfterMs = thrift.Int32Ptr(500) settings.UpdateGroupSize = 1 settings.MinWaitInInstanceRunningMs = 5000 settings.WaitForBatchCompletion = true job.InstanceCount(2) _, result, err := r.CreateService(job, settings) require.NoError(t, err) updateQ := aurora.JobUpdateQuery{ Key: result.GetKey(), Limit: 1, } var updateDetails []*aurora.JobUpdateDetails ticker := time.NewTicker(time.Second * 1) timer := time.NewTimer(time.Minute * 6) defer ticker.Stop() defer timer.Stop() pulseLoop: for { select { case <-ticker.C: _, err = r.PulseJobUpdate(result.GetKey()) assert.NoError(t, err, "unable to pulse job update") respDetail, err := r.JobUpdateDetails(updateQ) assert.NoError(t, err) updateDetails = response.JobUpdateDetails(respDetail) require.Len(t, updateDetails, 1, "No update found") status := updateDetails[0].Update.Summary.State.Status if _, ok := realis.ActiveJobUpdateStates[status]; !ok { // Rolled forward is the only state in which an update has been successfully updated // if we encounter an inactive state and it is not at rolled forward, update failed if status == aurora.JobUpdateStatus_ROLLED_FORWARD { fmt.Println("update succeed") break pulseLoop } else { fmt.Println("update failed") break pulseLoop } } case <-timer.C: _, err := r.AbortJobUpdate(*updateDetails[0].GetUpdate().GetSummary().GetKey(), "") assert.NoError(t, err) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err, "timed out during pulse update test") t.FailNow() } } _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) } // Test configuring an executor that doesn't exist for CreateJob API func TestRealisClient_CreateService(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("create_service_test"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.25). RAM(4). Disk(10). InstanceCount(3). IsService(true) settings := realis.NewUpdateSettings() settings.UpdateGroupSize = 2 settings.MinWaitInInstanceRunningMs = 5000 job.InstanceCount(3) _, result, err := r.CreateService(job, settings) require.NoError(t, err) assert.NotNil(t, result) // Test asking the scheduler to backup a Snapshot t.Run("PauseJobUpdate", func(t *testing.T) { _, err = r.PauseJobUpdate(result.GetKey(), "") assert.NoError(t, err) }) t.Run("ResumeJobUpdate", func(t *testing.T) { _, err = r.ResumeJobUpdate(result.GetKey(), "") assert.NoError(t, err) }) var ok bool var mErr error if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 240); !ok || mErr != nil { // Update may already be in a terminal state so don't check for error _, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.") _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) } require.NoError(t, mErr) assert.True(t, ok) // Kill task test task after confirming it came up fine _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) success, err := monitor.Instances(job.JobKey(), 0, 1, 50) require.NoError(t, mErr) assert.True(t, success) // Create a client which will timeout and close the connection before receiving an answer timeoutClient, err := realis.NewRealisClient( realis.SchedulerUrl(auroraURL), realis.BasicAuth("aurora", "secret"), realis.TimeoutMS(5), ) require.NoError(t, err) defer timeoutClient.Close() // Test case where http connection timeouts out. t.Run("TimeoutError", func(t *testing.T) { job.Name("createService_timeout") // Make sure a timedout error was returned _, _, err = timeoutClient.CreateService(job, settings) require.Error(t, err) assert.True(t, realis.IsTimeout(err)) updateReceivedQuery := aurora.JobUpdateQuery{ Role: &job.JobKey().Role, JobKey: job.JobKey(), UpdateStatuses: aurora.ACTIVE_JOB_UPDATE_STATES, Limit: 1} updateSummaries, err := monitor.JobUpdateQuery(updateReceivedQuery, time.Second*1, time.Second*50) require.NoError(t, err) require.Len(t, updateSummaries, 1) r.AbortJobUpdate(*updateSummaries[0].Key, "Cleaning up") _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) // Test case where http connection timeouts out. t.Run("TimeoutError_BadPayload", func(t *testing.T) { // Illegal payload job.InstanceCount(-1) job.Name("createService_timeout_bad_payload") // Make sure a timedout error was returned _, _, err = timeoutClient.CreateService(job, settings) require.Error(t, err) assert.True(t, realis.IsTimeout(err)) summary, err := r.GetJobUpdateSummaries( &aurora.JobUpdateQuery{ Role: &job.JobKey().Role, JobKey: job.JobKey(), UpdateStatuses: aurora.ACTIVE_JOB_UPDATE_STATES}) assert.NoError(t, err) // Payload should have been rejected, no update should exist require.Len(t, summary.GetResult_().GetGetJobUpdateSummariesResult_().GetUpdateSummaries(), 0) }) } // Test configuring an executor that doesn't exist for CreateJob API func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("executordoesntexist"). ExecutorName("idontexist"). ExecutorData(""). CPU(.25). RAM(4). Disk(10). InstanceCount(1) settings := realis.NewUpdateSettings() job.InstanceCount(3) resp, result, err := r.CreateService(job, settings) require.Error(t, err) assert.Nil(t, result) require.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode()) } func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) { thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json") assert.NoError(t, err) job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("cronsched_job_test"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosCronPayload)). CPU(1). RAM(64). Disk(100). IsService(true). InstanceCount(1). AddPorts(1). CronSchedule("* * * * *"). IsService(false) _, err = r.ScheduleCronJob(job) require.NoError(t, err) t.Run("Start", func(t *testing.T) { _, err := r.StartCronJob(job.JobKey()) require.NoError(t, err) }) t.Run("Deschedule", func(t *testing.T) { _, err := r.DescheduleCronJob(job.JobKey()) require.NoError(t, err) }) } func TestRealisClient_StartMaintenance(t *testing.T) { hosts := []string{"localhost"} _, _, err := r.StartMaintenance(hosts...) require.NoError(t, err, "unable to start maintenance") // Monitor change to DRAINING and DRAINED mode hostResults, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED}, 1, 50) assert.NoError(t, err) assert.Equal(t, map[string]bool{"localhost": true}, hostResults) _, _, err = r.EndMaintenance(hosts...) require.NoError(t, err) // Monitor change to DRAINING and DRAINED mode _, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) assert.NoError(t, err) } func TestRealisClient_DrainHosts(t *testing.T) { hosts := []string{"localhost"} t.Run("DrainHosts", func(t *testing.T) { _, _, err := r.DrainHosts(hosts...) assert.NoError(t, err, "unable to drain host") }) t.Run("MonitorTransitionToDrained", func(t *testing.T) { // Monitor change to DRAINING and DRAINED mode hostResults, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 1, 50) require.NoError(t, err) assert.Equal(t, map[string]bool{"localhost": true}, hostResults) }) t.Run("MonitorNonExistentHost", func(t *testing.T) { // Monitor change to DRAINING and DRAINED mode hostResults, err := monitor.HostMaintenance( append(hosts, "IMAGINARY_HOST"), []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 1, 1) // Assert monitor returned an error that was not nil, and also a list of the non-transitioned hosts require.Error(t, err) assert.Equal(t, map[string]bool{"localhost": true, "IMAGINARY_HOST": false}, hostResults) }) t.Run("EndMaintenance", func(t *testing.T) { _, _, err := r.EndMaintenance(hosts...) require.NoError(t, err) // Monitor change to DRAINING and DRAINED mode _, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) assert.NoError(t, err) }) } func TestRealisClient_SLADrainHosts(t *testing.T) { hosts := []string{"localhost"} policy := aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 50.0}} _, err := r.SLADrainHosts(&policy, 30, hosts...) require.NoError(t, err, "unable to drain host with SLA policy") // Monitor change to DRAINING and DRAINED mode hostResults, err := monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 1, 50) assert.NoError(t, err) assert.Equal(t, map[string]bool{"localhost": true}, hostResults) _, _, err = r.EndMaintenance(hosts...) require.NoError(t, err) // Monitor change to DRAINING and DRAINED mode _, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) assert.NoError(t, err) // slaDrainHosts goes with default policy if no policy is specified _, err = r.SLADrainHosts(nil, 30, hosts...) require.NoError(t, err, "unable to drain host with SLA policy") // Monitor change to DRAINING and DRAINED mode hostResults, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 1, 50) assert.NoError(t, err) assert.Equal(t, map[string]bool{"localhost": true}, hostResults) _, _, err = r.EndMaintenance(hosts...) require.NoError(t, err) // Monitor change to DRAINING and DRAINED mode _, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) assert.NoError(t, err) _, err = r.SLADrainHosts(&aurora.SlaPolicy{}, 30, hosts...) require.NoError(t, err, "unable to drain host with SLA policy") // Monitor change to DRAINING and DRAINED mode hostResults, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING}, 1, 50) assert.NoError(t, err) assert.Equal(t, map[string]bool{"localhost": true}, hostResults) _, _, err = r.EndMaintenance(hosts...) require.NoError(t, err) // Monitor change to DRAINING and DRAINED mode _, err = monitor.HostMaintenance( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, 5, 10) assert.NoError(t, err) } // Test multiple go routines using a single connection func TestRealisClient_SessionThreadSafety(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("create_thermos_job_test_multi"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.25). RAM(4). Disk(10). InstanceCount(1000) // Impossible amount to go live in any sane machine _, err := r.CreateJob(job) assert.NoError(t, err) wg := sync.WaitGroup{} threadCount := 20 wg.Add(threadCount) for i := 0; i < threadCount; i++ { // Launch multiple monitors that will poll every second go func() { defer wg.Done() // Test Schedule status monitor for terminal state and timing out after 30 seconds success, err := monitor.ScheduleStatus(job.JobKey(), job.GetInstanceCount(), realis.LiveStates, 1, 30) assert.False(t, success) assert.Error(t, err) _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }() } wg.Wait() } // Test setting and getting the quota func TestRealisClient_Quota(t *testing.T) { var resp *aurora.Response var err error cpu := 3.5 ram := int64(20480) disk := int64(10240) t.Run("Set", func(t *testing.T) { resp, err = r.SetQuota("vagrant", &cpu, &ram, &disk) assert.NoError(t, err) }) t.Run("Get", func(t *testing.T) { // Test GetQuota based on previously set values var result *aurora.GetQuotaResult_ resp, err = r.GetQuota("vagrant") if resp.GetResult_() != nil { result = resp.GetResult_().GetQuotaResult_ } assert.NoError(t, err) for _, res := range result.Quota.GetResources() { switch true { case res.DiskMb != nil: assert.Equal(t, disk, *res.DiskMb) case res.NumCpus != nil: assert.Equal(t, cpu, *res.NumCpus) case res.RamMb != nil: assert.Equal(t, ram, *res.RamMb) } } }) } func TestRealisClient_ForceImplicitTaskReconciliation(t *testing.T) { err := r.ForceImplicitTaskReconciliation() assert.NoError(t, err) } func TestRealisClient_ForceExplicitTaskReconciliation(t *testing.T) { // Default value err := r.ForceExplicitTaskReconciliation(nil) assert.NoError(t, err) // Custom batch value err = r.ForceExplicitTaskReconciliation(thrift.Int32Ptr(32)) assert.NoError(t, err) } func TestRealisClient_PartitionPolicy(t *testing.T) { role := "vagrant" job := realis.NewJob(). Environment("prod"). Role(role). Name("create_thermos_job_partition_policy_test"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.5). RAM(64). Disk(100). IsService(true). InstanceCount(2). PartitionPolicy(&aurora.PartitionPolicy{Reschedule: true, DelaySecs: thrift.Int64Ptr(30)}) settings := realis.NewUpdateSettings() settings.UpdateGroupSize = 2 _, result, err := r.CreateService(job, settings) assert.NoError(t, err) var ok bool var mErr error if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil { // Update may already be in a terminal state so don't check for error _, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.") assert.NoError(t, err) } // Clean up after finishing test r.KillJob(job.JobKey()) } func TestAuroraJob_UpdateSlaPolicy(t *testing.T) { tests := []struct { name string args aurora.SlaPolicy }{ { "create_service_with_sla_count_policy_test", aurora.SlaPolicy{CountSlaPolicy: &aurora.CountSlaPolicy{Count: 1, DurationSecs: 15}}, }, { "create_service_with_sla_percentage_policy_test", aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 0.25, DurationSecs: 15}}, }, { "create_service_with_sla_coordinator_policy_test", aurora.SlaPolicy{CoordinatorSlaPolicy: &aurora.CoordinatorSlaPolicy{ CoordinatorUrl: "http://localhost/endpoint", StatusKey: "aurora_test"}}, }, } role := "vagrant" _, err := r.SetQuota(role, thrift.Float64Ptr(6.0), thrift.Int64Ptr(1024), thrift.Int64Ptr(1024)) assert.NoError(t, err) for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role(role). Name(tt.name). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.01). RAM(2). Disk(5). InstanceCount(4). IsService(true). SlaPolicy(&tt.args). Tier("preferred") settings := realis.NewUpdateSettings() settings.UpdateGroupSize = 2 settings.MinWaitInInstanceRunningMs = 5 * 1000 _, result, err := r.CreateService(job, settings) require.NoError(t, err) assert.NotNil(t, result) var ok bool var mErr error if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 240); !ok || mErr != nil { // Update may already be in a terminal state so don't check for error _, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.") _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) } assert.NoError(t, mErr) assert.True(t, ok) // Kill task test task after confirming it came up fine _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) } } func TestRealisClient_UpdateStrategies(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.01). RAM(4). Disk(10). InstanceCount(6). IsService(true) strategies := []struct { UpdateJob *realis.UpdateJob Name string }{ { UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()). QueueUpdateStrategy(aurora.QueueJobUpdateStrategy{GroupSize: 2}). InstanceCount(6). WatchTime(1000), Name: "Queue", }, { UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()). BatchUpdateStrategy(aurora.BatchJobUpdateStrategy{GroupSize: 2}). InstanceCount(6). WatchTime(1000), Name: "Batch", }, { UpdateJob: realis.NewDefaultUpdateJob(job.TaskConfig()). VariableBatchStrategy(aurora.VariableBatchJobUpdateStrategy{GroupSizes: []int32{1, 2, 3}}). InstanceCount(6). WatchTime(1000), Name: "VarBatch", }, } for _, strategy := range strategies { t.Run("TestRealisClient_UpdateStrategies_"+strategy.Name, func(t *testing.T) { job.Name("update_strategies_" + strategy.Name) resp, err := r.StartJobUpdate(strategy.UpdateJob, "") assert.NoError(t, err) assert.NotNil(t, resp) assert.NotNil(t, resp.GetResult_()) assert.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_()) assert.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_().GetKey()) var ok bool var mErr error key := *resp.GetResult_().GetStartJobUpdateResult_().GetKey() if ok, mErr = monitor.JobUpdate(key, 5, 240); !ok || mErr != nil { // Update may already be in a terminal state so don't check for error _, err := r.AbortJobUpdate(key, "Monitor timed out.") assert.NoError(t, err) } _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }) } } func TestRealisClient_BatchAwareAutoPause(t *testing.T) { // Create a single job job := realis.NewJob(). Environment("prod"). Role("vagrant"). Name("BatchAwareAutoPauseTest"). ExecutorName(aurora.AURORA_EXECUTOR_NAME). ExecutorData(string(thermosPayload)). CPU(.01). RAM(4). Disk(10). InstanceCount(6). IsService(true) updateGroups := []int32{1, 2, 3} strategy := realis.NewDefaultUpdateJob(job.TaskConfig()). VariableBatchStrategy(aurora.VariableBatchJobUpdateStrategy{ GroupSizes: updateGroups, AutopauseAfterBatch: true, }). InstanceCount(6). WatchTime(1000) resp, err := r.StartJobUpdate(strategy, "") require.NoError(t, err) require.NotNil(t, resp) require.NotNil(t, resp.GetResult_()) require.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_()) require.NotNil(t, resp.GetResult_().GetStartJobUpdateResult_().GetKey()) key := *resp.GetResult_().GetStartJobUpdateResult_().GetKey() for i := range updateGroups { curStep, mErr := monitor.AutoPausedUpdateMonitor(key, time.Second*5, time.Second*240) if mErr != nil { // Update may already be in a terminal state so don't check for error _, err := r.AbortJobUpdate(key, "Monitor timed out.") assert.NoError(t, err) } assert.Equal(t, i, curStep) if i != len(updateGroups)-1 { _, err = r.ResumeJobUpdate(&key, "auto resuming test") require.NoError(t, err) } } _, err = r.KillJob(job.JobKey()) assert.NoError(t, err) }