Retry switch fallthrough fix and create multiple tests (#77)

* Bugfix: switch statements were missing fallthrough statement thus making them retry non-retriable errors. Using a list to catch cases now. * Adding tests for CreateService, createService when the executor doesn't exist, and createJob when the executor doesn't exist. Renamed Pulse test to reflect that it's using CreateService instead of CreateJob. * Repsonse propagate back up to caller for context for CreateJob, CreateService, and StartJobUpdate. * Deleting PR template as Travis CI takes care of running tests and formatting tests now.
2018-10-04 10:47:08 -07:00 · 2018-10-04 10:47:08 -07:00 · 037c636d6d
commit 037c636d6d
parent 9ebf118e71
4 changed files with 105 additions and 33 deletions
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,16 +0,0 @@
-<!--
-----------------------------------------
-## Please read instructions below ##
-Before submitting, please make sure you run a vagrant box running Aurora with the latest version shown in .auroraversion and run go test from the project root.
-
-To run an Aurora Vagrant image, follow the instructions here:
-http://aurora.apache.org/documentation/latest/getting-started/vagrant/
-
-## Answer the following questions: ##
-->
-
-* Have you run goformat on the project before submitting?
-
-* Have you run go test on the project before submitting? Do all tests pass?
-
-* Does the Pull Request require a test to be added to the end to end tests? If so, has it been added?
--- a/realis.go
+++ b/realis.go
@ -612,7 +612,7 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
 	})

 	if retryErr != nil {
-		return nil, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler")
+		return resp, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler")
 	}
 	return resp, nil
 }
@ -625,7 +625,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe

 	resp, err := r.StartJobUpdate(update, "")
 	if err != nil {
-		return nil, nil, errors.Wrap(err, "unable to create service")
+		return resp, nil, errors.Wrap(err, "unable to create service")
 	}

 	if resp != nil && resp.GetResult_() != nil {
@ -734,7 +734,7 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au
 	})

 	if retryErr != nil {
-		return nil, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
+		return resp, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
 	}
 	return resp, nil
 }
--- a/realis_e2e_test.go
+++ b/realis_e2e_test.go
@ -37,13 +37,13 @@ var thermosPayload []byte
 func TestMain(m *testing.M) {
 	var err error

-	// New configuration to connect to Vagrant image
+	// New configuration to connect to docker container
 	r, err = realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
 		realis.BasicAuth("aurora", "secret"),
 		realis.TimeoutMS(20000))

 	if err != nil {
-		fmt.Println("Please run vagrant box before running test suite")
+		fmt.Println("Please run docker-compose up -d before running test suite")
 		os.Exit(1)
 	}

@ -173,7 +173,27 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
 	})
 }

-func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) {
+// Test configuring an executor that doesn't exist for CreateJob API
+func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) {
+
+	// Create a single job
+	job := realis.NewJob().
+		Environment("prod").
+		Role("vagrant").
+		Name("executordoesntexist").
+		ExecutorName("idontexist").
+		ExecutorData("").
+		CPU(.25).
+		RAM(4).
+		Disk(10).
+		InstanceCount(1)
+
+	resp, err := r.CreateJob(job)
+	assert.Error(t, err)
+	assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
+}
+
+func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) {

 	fmt.Println("Creating service")
 	role := "vagrant"
@ -257,6 +277,75 @@ func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) {

 }

+// Test configuring an executor that doesn't exist for CreateJob API
+func TestRealisClient_CreateService(t *testing.T) {
+
+	// Create a single job
+	job := realis.NewJob().
+		Environment("prod").
+		Role("vagrant").
+		Name("create_service_test").
+		ExecutorName(aurora.AURORA_EXECUTOR_NAME).
+		ExecutorData(string(thermosPayload)).
+		CPU(.25).
+		RAM(4).
+		Disk(10).
+		InstanceCount(3).
+		IsService(true)
+
+	settings := realis.NewUpdateSettings()
+	job.InstanceCount(3)
+	resp, result, err := r.CreateService(job, settings)
+
+	assert.NoError(t, err)
+	assert.NotNil(t, result)
+	assert.Equal(t, aurora.ResponseCode_OK, resp.GetResponseCode())
+
+	var ok bool
+	var mErr error
+
+	if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil {
+		// Update may already be in a terminal state so don't check for error
+		_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
+
+		_, err = r.KillJob(job.JobKey())
+
+		assert.NoError(t, err)
+	}
+
+	assert.True(t, ok)
+	assert.NoError(t, mErr)
+
+	// Kill task test task after confirming it came up fine
+	_, err = r.KillJob(job.JobKey())
+
+	assert.NoError(t, err)
+}
+
+// Test configuring an executor that doesn't exist for CreateJob API
+func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) {
+
+	// Create a single job
+	job := realis.NewJob().
+		Environment("prod").
+		Role("vagrant").
+		Name("executordoesntexist").
+		ExecutorName("idontexist").
+		ExecutorData("").
+		CPU(.25).
+		RAM(4).
+		Disk(10).
+		InstanceCount(1)
+
+	settings := realis.NewUpdateSettings()
+	job.InstanceCount(3)
+	resp, result, err := r.CreateService(job, settings)
+
+	assert.Error(t, err)
+	assert.Nil(t, result)
+	assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
+}
+
 func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {

 	thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
@ -367,7 +456,7 @@ func TestRealisClient_SessionThreadSafety(t *testing.T) {
 		CPU(.25).
 		RAM(4).
 		Disk(10).
-		InstanceCount(100) // Impossible amount to go live in the current vagrant default settings
+		InstanceCount(1000) // Impossible amount to go live in any sane machine

 	resp, err := r.CreateJob(job)
 	assert.NoError(t, err)
--- a/retry.go
+++ b/retry.go
@ -15,9 +15,8 @@
 package realis

 import (
-	"time"
-
 	"math/rand"
+	"time"

 	"github.com/paypal/gorealis/gen-go/apache/aurora"
 	"github.com/paypal/gorealis/response"
@ -178,18 +177,18 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro
 			case aurora.ResponseCode_OK:
 				return resp, nil

-				// If the response code is transient, continue retrying
+			// If the response code is transient, continue retrying
 			case aurora.ResponseCode_ERROR_TRANSIENT:
 				r.logger.Println("Aurora replied with Transient error code, retrying")
 				continue

-				// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
-			case aurora.ResponseCode_INVALID_REQUEST:
-			case aurora.ResponseCode_ERROR:
-			case aurora.ResponseCode_AUTH_FAILED:
-			case aurora.ResponseCode_JOB_UPDATING_ERROR:
-				r.logger.Println("Terminal bad reply from Aurora, won't retry")
-				return nil, errors.New(response.CombineMessage(resp))
+			// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
+			case aurora.ResponseCode_INVALID_REQUEST,
+				aurora.ResponseCode_ERROR,
+				aurora.ResponseCode_AUTH_FAILED,
+				aurora.ResponseCode_JOB_UPDATING_ERROR:
+				r.logger.Printf("Terminal Response Code %v from Aurora, won't retry\n", resp.GetResponseCode().String())
+				return resp, errors.New(response.CombineMessage(resp))

 				// The only case that should fall down to here is a WARNING response code.
 				// It is currently not used as a response in the scheduler so it is unknown how to handle it.