Retry switch fallthrough fix and create multiple tests (#77)
* Bugfix: switch statements were missing fallthrough statement thus making them retry non-retriable errors. Using a list to catch cases now. * Adding tests for CreateService, createService when the executor doesn't exist, and createJob when the executor doesn't exist. Renamed Pulse test to reflect that it's using CreateService instead of CreateJob. * Repsonse propagate back up to caller for context for CreateJob, CreateService, and StartJobUpdate. * Deleting PR template as Travis CI takes care of running tests and formatting tests now.
This commit is contained in:
parent
9ebf118e71
commit
037c636d6d
4 changed files with 105 additions and 33 deletions
16
.github/PULL_REQUEST_TEMPLATE.md
vendored
16
.github/PULL_REQUEST_TEMPLATE.md
vendored
|
@ -1,16 +0,0 @@
|
||||||
<!--
|
|
||||||
-----------------------------------------
|
|
||||||
## Please read instructions below ##
|
|
||||||
Before submitting, please make sure you run a vagrant box running Aurora with the latest version shown in .auroraversion and run go test from the project root.
|
|
||||||
|
|
||||||
To run an Aurora Vagrant image, follow the instructions here:
|
|
||||||
http://aurora.apache.org/documentation/latest/getting-started/vagrant/
|
|
||||||
|
|
||||||
## Answer the following questions: ##
|
|
||||||
-->
|
|
||||||
|
|
||||||
* Have you run goformat on the project before submitting?
|
|
||||||
|
|
||||||
* Have you run go test on the project before submitting? Do all tests pass?
|
|
||||||
|
|
||||||
* Does the Pull Request require a test to be added to the end to end tests? If so, has it been added?
|
|
|
@ -612,7 +612,7 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
})
|
})
|
||||||
|
|
||||||
if retryErr != nil {
|
if retryErr != nil {
|
||||||
return nil, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler")
|
return resp, errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
@ -625,7 +625,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe
|
||||||
|
|
||||||
resp, err := r.StartJobUpdate(update, "")
|
resp, err := r.StartJobUpdate(update, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, nil, errors.Wrap(err, "unable to create service")
|
return resp, nil, errors.Wrap(err, "unable to create service")
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp != nil && resp.GetResult_() != nil {
|
if resp != nil && resp.GetResult_() != nil {
|
||||||
|
@ -734,7 +734,7 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au
|
||||||
})
|
})
|
||||||
|
|
||||||
if retryErr != nil {
|
if retryErr != nil {
|
||||||
return nil, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
|
return resp, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,13 +37,13 @@ var thermosPayload []byte
|
||||||
func TestMain(m *testing.M) {
|
func TestMain(m *testing.M) {
|
||||||
var err error
|
var err error
|
||||||
|
|
||||||
// New configuration to connect to Vagrant image
|
// New configuration to connect to docker container
|
||||||
r, err = realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
|
r, err = realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
|
||||||
realis.BasicAuth("aurora", "secret"),
|
realis.BasicAuth("aurora", "secret"),
|
||||||
realis.TimeoutMS(20000))
|
realis.TimeoutMS(20000))
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("Please run vagrant box before running test suite")
|
fmt.Println("Please run docker-compose up -d before running test suite")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -173,7 +173,27 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) {
|
// Test configuring an executor that doesn't exist for CreateJob API
|
||||||
|
func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) {
|
||||||
|
|
||||||
|
// Create a single job
|
||||||
|
job := realis.NewJob().
|
||||||
|
Environment("prod").
|
||||||
|
Role("vagrant").
|
||||||
|
Name("executordoesntexist").
|
||||||
|
ExecutorName("idontexist").
|
||||||
|
ExecutorData("").
|
||||||
|
CPU(.25).
|
||||||
|
RAM(4).
|
||||||
|
Disk(10).
|
||||||
|
InstanceCount(1)
|
||||||
|
|
||||||
|
resp, err := r.CreateJob(job)
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) {
|
||||||
|
|
||||||
fmt.Println("Creating service")
|
fmt.Println("Creating service")
|
||||||
role := "vagrant"
|
role := "vagrant"
|
||||||
|
@ -257,6 +277,75 @@ func TestRealisClient_CreateJobWithPulse_Thermos(t *testing.T) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Test configuring an executor that doesn't exist for CreateJob API
|
||||||
|
func TestRealisClient_CreateService(t *testing.T) {
|
||||||
|
|
||||||
|
// Create a single job
|
||||||
|
job := realis.NewJob().
|
||||||
|
Environment("prod").
|
||||||
|
Role("vagrant").
|
||||||
|
Name("create_service_test").
|
||||||
|
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
|
||||||
|
ExecutorData(string(thermosPayload)).
|
||||||
|
CPU(.25).
|
||||||
|
RAM(4).
|
||||||
|
Disk(10).
|
||||||
|
InstanceCount(3).
|
||||||
|
IsService(true)
|
||||||
|
|
||||||
|
settings := realis.NewUpdateSettings()
|
||||||
|
job.InstanceCount(3)
|
||||||
|
resp, result, err := r.CreateService(job, settings)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.NotNil(t, result)
|
||||||
|
assert.Equal(t, aurora.ResponseCode_OK, resp.GetResponseCode())
|
||||||
|
|
||||||
|
var ok bool
|
||||||
|
var mErr error
|
||||||
|
|
||||||
|
if ok, mErr = monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil {
|
||||||
|
// Update may already be in a terminal state so don't check for error
|
||||||
|
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out.")
|
||||||
|
|
||||||
|
_, err = r.KillJob(job.JobKey())
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
assert.True(t, ok)
|
||||||
|
assert.NoError(t, mErr)
|
||||||
|
|
||||||
|
// Kill task test task after confirming it came up fine
|
||||||
|
_, err = r.KillJob(job.JobKey())
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test configuring an executor that doesn't exist for CreateJob API
|
||||||
|
func TestRealisClient_CreateService_ExecutorDoesNotExist(t *testing.T) {
|
||||||
|
|
||||||
|
// Create a single job
|
||||||
|
job := realis.NewJob().
|
||||||
|
Environment("prod").
|
||||||
|
Role("vagrant").
|
||||||
|
Name("executordoesntexist").
|
||||||
|
ExecutorName("idontexist").
|
||||||
|
ExecutorData("").
|
||||||
|
CPU(.25).
|
||||||
|
RAM(4).
|
||||||
|
Disk(10).
|
||||||
|
InstanceCount(1)
|
||||||
|
|
||||||
|
settings := realis.NewUpdateSettings()
|
||||||
|
job.InstanceCount(3)
|
||||||
|
resp, result, err := r.CreateService(job, settings)
|
||||||
|
|
||||||
|
assert.Error(t, err)
|
||||||
|
assert.Nil(t, result)
|
||||||
|
assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
|
||||||
|
}
|
||||||
|
|
||||||
func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
|
func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
|
||||||
|
|
||||||
thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
|
thermosCronPayload, err := ioutil.ReadFile("examples/thermos_cron_payload.json")
|
||||||
|
@ -367,7 +456,7 @@ func TestRealisClient_SessionThreadSafety(t *testing.T) {
|
||||||
CPU(.25).
|
CPU(.25).
|
||||||
RAM(4).
|
RAM(4).
|
||||||
Disk(10).
|
Disk(10).
|
||||||
InstanceCount(100) // Impossible amount to go live in the current vagrant default settings
|
InstanceCount(1000) // Impossible amount to go live in any sane machine
|
||||||
|
|
||||||
resp, err := r.CreateJob(job)
|
resp, err := r.CreateJob(job)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
19
retry.go
19
retry.go
|
@ -15,9 +15,8 @@
|
||||||
package realis
|
package realis
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"time"
|
|
||||||
|
|
||||||
"math/rand"
|
"math/rand"
|
||||||
|
"time"
|
||||||
|
|
||||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||||
"github.com/paypal/gorealis/response"
|
"github.com/paypal/gorealis/response"
|
||||||
|
@ -178,18 +177,18 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro
|
||||||
case aurora.ResponseCode_OK:
|
case aurora.ResponseCode_OK:
|
||||||
return resp, nil
|
return resp, nil
|
||||||
|
|
||||||
// If the response code is transient, continue retrying
|
// If the response code is transient, continue retrying
|
||||||
case aurora.ResponseCode_ERROR_TRANSIENT:
|
case aurora.ResponseCode_ERROR_TRANSIENT:
|
||||||
r.logger.Println("Aurora replied with Transient error code, retrying")
|
r.logger.Println("Aurora replied with Transient error code, retrying")
|
||||||
continue
|
continue
|
||||||
|
|
||||||
// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
|
// Failure scenarios, these indicate a bad payload or a bad config. Stop retrying.
|
||||||
case aurora.ResponseCode_INVALID_REQUEST:
|
case aurora.ResponseCode_INVALID_REQUEST,
|
||||||
case aurora.ResponseCode_ERROR:
|
aurora.ResponseCode_ERROR,
|
||||||
case aurora.ResponseCode_AUTH_FAILED:
|
aurora.ResponseCode_AUTH_FAILED,
|
||||||
case aurora.ResponseCode_JOB_UPDATING_ERROR:
|
aurora.ResponseCode_JOB_UPDATING_ERROR:
|
||||||
r.logger.Println("Terminal bad reply from Aurora, won't retry")
|
r.logger.Printf("Terminal Response Code %v from Aurora, won't retry\n", resp.GetResponseCode().String())
|
||||||
return nil, errors.New(response.CombineMessage(resp))
|
return resp, errors.New(response.CombineMessage(resp))
|
||||||
|
|
||||||
// The only case that should fall down to here is a WARNING response code.
|
// The only case that should fall down to here is a WARNING response code.
|
||||||
// It is currently not used as a response in the scheduler so it is unknown how to handle it.
|
// It is currently not used as a response in the scheduler so it is unknown how to handle it.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue