From 037c636d6d21d317d5c36ea7cd300a238d2a2714 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 4 Oct 2018 10:47:08 -0700 Subject: [PATCH] Retry switch fallthrough fix and create multiple tests (#77) * Bugfix: switch statements were missing fallthrough statement thus making them retry non-retriable errors. Using a list to catch cases now. * Adding tests for CreateService, createService when the executor doesn't exist, and createJob when the executor doesn't exist. Renamed Pulse test to reflect that it's using CreateService instead of CreateJob. * Repsonse propagate back up to caller for context for CreateJob, CreateService, and StartJobUpdate. * Deleting PR template as Travis CI takes care of running tests and formatting tests now. --- .github/PULL_REQUEST_TEMPLATE.md | 16 ------ realis.go | 6 +- realis_e2e_test.go | 97 ++++++++++++++++++++++++++++++-- retry.go | 19 +++---- 4 files changed, 105 insertions(+), 33 deletions(-) delete mode 100644 .github/PULL_REQUEST_TEMPLATE.md diff --git a/.github/PULL_REQUEST_TEMPLATE.md b/.github/PULL_REQUEST_TEMPLATE.md deleted file mode 100644 index 797ed27..0000000 --- a/.github/PULL_REQUEST_TEMPLATE.md +++ /dev/null @@ -1,16 +0,0 @@ - - -* Have you run goformat on the project before submitting? - -* Have you run go test on the project before submitting? Do all tests pass? - -* Does the Pull Request require a test to be added to the end to end tests? If so, has it been added? \ No newline at end of file diff --git a/realis.go b/realis.go index f7fd081..80b9139 100644 --- a/realis.go +++ b/realis.go @@ -612,7 +612,7 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) { }) if retryErr != nil { - return nil, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler") + return resp, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler") } return resp, nil } @@ -625,7 +625,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe resp, err := r.StartJobUpdate(update, "") if err != nil { - return nil, nil, errors.Wrap(err, "unable to create service") + return resp, nil, errors.Wrap(err, "unable to create service") } if resp != nil && resp.GetResult_() != nil { @@ -734,7 +734,7 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au }) if retryErr != nil { - return nil, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler") + return resp, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler") } return resp, nil } diff --git a/realis_e2e_test.go b/realis_e2e_test.go index 633528d..382fa53 100644 --- a/realis_e2e_test.go +++ b/realis_e2e_test.go @@ -37,13 +37,13 @@ var thermosPayload []byte func TestMain(m *testing.M) { var err error - // New configuration to connect to Vagrant image + // New configuration to connect to docker container r, err = realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"), realis.BasicAuth("aurora", "secret"), realis.TimeoutMS(20000)) if err != nil { - fmt.Println("Please run vagrant box before running test suite") + fmt.Println("Please run docker-compose up -d before running test suite") os.Exit(1) } @@ -173,7 +173,27 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) { }) } -func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) { +// Test configuring an executor that doesn't exist for CreateJob API +func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) { + + // Create a single job + job := realis.NewJob(). + Environment("prod"). + Role("vagrant"). + Name("executordoesntexist"). + ExecutorName("idontexist"). + ExecutorData(""). + CPU(.25). + RAM(4). + Disk(10). + InstanceCount(1) + + resp, err := r.CreateJob(job) + assert.Error(t, err) + assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode()) +} + +func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) { fmt.Println("Creating service") role := "vagrant" @@ -257,6 +277,75 @@ func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) { } +// Test configuring an executor that doesn't exist for CreateJob API +func TestRealisClient_CreateService(t *testing.T) { + + // Create a single job + job := realis.NewJob(). + Environment("prod"). + Role("vagrant"). + Name("create_service_test"). + ExecutorName(aurora.AURORA_EXECUTOR_NAME). + ExecutorData(string(thermosPayload)). + CPU(.25). + RAM(4). + Disk(10). + InstanceCount(3). + IsService(true) + + settings := realis.NewUpdateSettings() + job.InstanceCount(3) + resp, result, err := r.CreateService(job, settings) + + assert.NoError(t, err) + assert.NotNil(t, result) + assert.Equal(t, aurora.ResponseCode_OK, resp.GetResponseCode()) + + var ok bool + var mErr error + + if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil { + // Update may already be in a terminal state so don't check for error + _, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.") + + _, err = r.KillJob(job.JobKey()) + + assert.NoError(t, err) + } + + assert.True(t, ok) + assert.NoError(t, mErr) + + // Kill task test task after confirming it came up fine + _, err = r.KillJob(job.JobKey()) + + assert.NoError(t, err) +} + +// Test configuring an executor that doesn't exist for CreateJob API +func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) { + + // Create a single job + job := realis.NewJob(). + Environment("prod"). + Role("vagrant"). + Name("executordoesntexist"). + ExecutorName("idontexist"). + ExecutorData(""). + CPU(.25). + RAM(4). + Disk(10). + InstanceCount(1) + + settings := realis.NewUpdateSettings() + job.InstanceCount(3) + resp, result, err := r.CreateService(job, settings) + + assert.Error(t, err) + assert.Nil(t, result) + assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode()) +} + func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) { thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json") @@ -367,7 +456,7 @@ func TestRealisClient_SessionThreadSafety(t *testing.T) { CPU(.25). RAM(4). Disk(10). - InstanceCount(100) // Impossible amount to go live in the current vagrant default settings + InstanceCount(1000) // Impossible amount to go live in any sane machine resp, err := r.CreateJob(job) assert.NoError(t, err) diff --git a/retry.go b/retry.go index 39ad106..90cf55e 100644 --- a/retry.go +++ b/retry.go @@ -15,9 +15,8 @@ package realis import ( - "time" - "math/rand" + "time" "github.com/paypal/gorealis/gen-go/apache/aurora" "github.com/paypal/gorealis/response" @@ -178,18 +177,18 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro case aurora.ResponseCode_OK: return resp, nil - // If the response code is transient, continue retrying + // If the response code is transient, continue retrying case aurora.ResponseCode_ERROR_TRANSIENT: r.logger.Println("Aurora replied with Transient error code, retrying") continue - // Failure scenarios, these indicate a bad payload or a bad config. Stop retrying. - case aurora.ResponseCode_INVALID_REQUEST: - case aurora.ResponseCode_ERROR: - case aurora.ResponseCode_AUTH_FAILED: - case aurora.ResponseCode_JOB_UPDATING_ERROR: - r.logger.Println("Terminal bad reply from Aurora, won't retry") - return nil, errors.New(response.CombineMessage(resp)) + // Failure scenarios, these indicate a bad payload or a bad config. Stop retrying. + case aurora.ResponseCode_INVALID_REQUEST, + aurora.ResponseCode_ERROR, + aurora.ResponseCode_AUTH_FAILED, + aurora.ResponseCode_JOB_UPDATING_ERROR: + r.logger.Printf("Terminal Response Code %v from Aurora, won't retry\n", resp.GetResponseCode().String()) + return resp, errors.New(response.CombineMessage(resp)) // The only case that should fall down to here is a WARNING response code. // It is currently not used as a response in the scheduler so it is unknown how to handle it.