Upgrading dependencies to include logrus.

This commit is contained in:
Renan DelValle 2018-11-09 15:58:49 -08:00
parent bc28198c2d
commit c03901c0f1
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
379 changed files with 90030 additions and 47 deletions

View file

@ -582,6 +582,21 @@ func main() {
fmt.Print(result.String())
case "getPendingReasons":
fmt.Println("Getting pending reasons")
taskQ := &aurora.TaskQuery{
Role: &job.JobKey().Role,
Environment: &job.JobKey().Environment,
JobName: &job.JobKey().Name,
}
reasons, err := r.GetPendingReason(taskQ)
if err != nil {
log.Fatalf("error: %+v\n ", err)
}
fmt.Printf("length: %d\n ", len(reasons))
fmt.Printf("tasks: %+v\n", reasons)
case "getJobs":
fmt.Println("GetJobs...role: ", role)
_, result, err := r.GetJobs(role)

View file

@ -51,6 +51,7 @@ type Realis interface {
GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
GetJobs(role string) (*aurora.Response, *aurora.GetJobsResult_, error)
GetPendingReason(query *aurora.TaskQuery) (pendingReasons []*aurora.PendingReason, e error)
JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error)
KillJob(key *aurora.JobKey) (*aurora.Response, error)
KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
@ -72,6 +73,7 @@ type Realis interface {
// Admin functions
DrainHosts(hosts ...string) (*aurora.Response, *aurora.DrainHostsResult_, error)
StartMaintenance(hosts ...string) (*aurora.Response, *aurora.StartMaintenanceResult_, error)
EndMaintenance(hosts ...string) (*aurora.Response, *aurora.EndMaintenanceResult_, error)
MaintenanceStatus(hosts ...string) (*aurora.Response, *aurora.MaintenanceStatusResult_, error)
SetQuota(role string, cpu *float64, ram *int64, disk *int64) (*aurora.Response, error)
@ -249,6 +251,10 @@ func newTBinTransport(url string, timeout int, config *RealisConfig) (thrift.TTr
return trans, err
}
// This client implementation of the realis interface uses a retry mechanism for all Thrift Calls.
// It will retry all calls which result in a temporary failure as well as calls that fail due to an EOF
// being returned by the http client. Most permanent failures are now being caught by the thriftCallWithRetries
// function and not being retried but there may be corner cases not yet handled.
func NewRealisClient(options ...ClientOption) (Realis, error) {
config := &RealisConfig{}
@ -441,7 +447,7 @@ func newTJSONConfig(url string, timeoutms int, config *RealisConfig) (*RealisCon
httpTrans := (trans).(*thrift.THttpClient)
httpTrans.SetHeader("Content-Type", "application/x-thrift")
httpTrans.SetHeader("User-Agent", "GoRealis v"+VERSION)
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
return &RealisConfig{transport: trans, protoFactory: thrift.NewTJSONProtocolFactory()}, nil
}
@ -458,7 +464,7 @@ func newTBinaryConfig(url string, timeoutms int, config *RealisConfig) (*RealisC
httpTrans.SetHeader("Accept", "application/vnd.apache.thrift.binary")
httpTrans.SetHeader("Content-Type", "application/vnd.apache.thrift.binary")
httpTrans.SetHeader("User-Agent", "GoRealis v"+VERSION)
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
return &RealisConfig{transport: trans, protoFactory: thrift.NewTBinaryProtocolFactoryDefault()}, nil
@ -474,6 +480,9 @@ func (r *realisClient) ReestablishConn() error {
r.logger.Println("Re-establishing Connection to Aurora")
r.Close()
r.lock.Lock()
defer r.lock.Unlock()
// Recreate connection from scratch using original options
newRealis, err := NewRealisClient(r.config.options...)
if err != nil {
@ -496,6 +505,10 @@ func (r *realisClient) ReestablishConn() error {
// Releases resources associated with the realis client.
func (r *realisClient) Close() {
r.lock.Lock()
defer r.lock.Unlock()
r.client.Transport.Close()
r.readonlyClient.Transport.Close()
r.adminClient.Transport.Close()
@ -553,14 +566,14 @@ func (r *realisClient) GetJobs(role string) (*aurora.Response, *aurora.GetJobsRe
return r.readonlyClient.GetJobs(role)
})
if resp.GetResult_() != nil {
result = resp.GetResult_().GetJobsResult_
}
if retryErr != nil {
return nil, result, errors.Wrap(retryErr, "Error getting Jobs from Aurora Scheduler")
}
if resp.GetResult_() != nil {
result = resp.GetResult_().GetJobsResult_
}
return resp, result, nil
}
@ -633,7 +646,7 @@ func (r *realisClient) CreateService(auroraJob Job, settings *aurora.JobUpdateSe
return resp, nil, errors.Wrap(err, "unable to create service")
}
if resp != nil && resp.GetResult_() != nil {
if resp.GetResult_() != nil {
return resp, resp.GetResult_().GetStartJobUpdateResult_(), nil
}
@ -862,6 +875,30 @@ func (r *realisClient) GetTaskStatus(query *aurora.TaskQuery) (tasks []*aurora.S
return response.ScheduleStatusResult(resp).GetTasks(), nil
}
// Get pending reason
func (r *realisClient) GetPendingReason(query *aurora.TaskQuery) (pendingReasons []*aurora.PendingReason, e error) {
r.logger.DebugPrintf("GetPendingReason Thrift Payload: %+v\n", query)
resp, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.client.GetPendingReason(query)
})
if retryErr != nil {
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for pending Reasons")
}
var result map[*aurora.PendingReason]bool
if resp.GetResult_() != nil {
result = resp.GetResult_().GetGetPendingReasonResult_().GetReasons()
}
for reason := range result {
pendingReasons = append(pendingReasons, reason)
}
return pendingReasons, nil
}
// Get information about task including without a task configuration object
func (r *realisClient) GetTasksWithoutConfigs(query *aurora.TaskQuery) (tasks []*aurora.ScheduledTask, e error) {
@ -973,14 +1010,45 @@ func (r *realisClient) DrainHosts(hosts ...string) (*aurora.Response, *aurora.Dr
return r.adminClient.DrainHosts(drainList)
})
if resp != nil && resp.GetResult_() != nil {
if retryErr != nil {
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
}
if resp.GetResult_() != nil {
result = resp.GetResult_().GetDrainHostsResult_()
}
return resp, result, nil
}
func (r *realisClient) StartMaintenance(hosts ...string) (*aurora.Response, *aurora.StartMaintenanceResult_, error) {
var result *aurora.StartMaintenanceResult_
if len(hosts) == 0 {
return nil, nil, errors.New("no hosts provided to start maintenance on")
}
hostList := aurora.NewHosts()
hostList.HostNames = make(map[string]bool)
for _, host := range hosts {
hostList.HostNames[host] = true
}
r.logger.DebugPrintf("StartMaintenance Thrift Payload: %v\n", hostList)
resp, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.adminClient.StartMaintenance(hostList)
})
if retryErr != nil {
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
}
if resp.GetResult_() != nil {
result = resp.GetResult_().GetStartMaintenanceResult_()
}
return resp, result, nil
}
@ -1004,14 +1072,14 @@ func (r *realisClient) EndMaintenance(hosts ...string) (*aurora.Response, *auror
return r.adminClient.EndMaintenance(hostList)
})
if resp.GetResult_() != nil {
result = resp.GetResult_().GetEndMaintenanceResult_()
}
if retryErr != nil {
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
}
if resp.GetResult_() != nil {
result = resp.GetResult_().GetEndMaintenanceResult_()
}
return resp, result, nil
}
@ -1037,14 +1105,14 @@ func (r *realisClient) MaintenanceStatus(hosts ...string) (*aurora.Response, *au
return r.adminClient.MaintenanceStatus(hostList)
})
if resp.GetResult_() != nil {
result = resp.GetResult_().GetMaintenanceStatusResult_()
}
if retryErr != nil {
return resp, result, errors.Wrap(retryErr, "Unable to recover connection")
}
if resp.GetResult_() != nil {
result = resp.GetResult_().GetMaintenanceStatusResult_()
}
return resp, result, nil
}
@ -1063,13 +1131,12 @@ func (r *realisClient) SetQuota(role string, cpu *float64, ramMb *int64, diskMb
quota.Resources[c] = true
quota.Resources[d] = true
resp, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
resp, retryErr := r.adminClient.SetQuota(role, quota)
if retryErr != nil {
return nil, errors.Wrap(retryErr, "Unable to set role quota")
}
return resp, nil
return r.adminClient.SetQuota(role, quota)
})
if retryErr != nil {
return resp, errors.Wrap(retryErr, "Unable to set role quota")
}
return resp, retryErr
}
@ -1078,14 +1145,12 @@ func (r *realisClient) SetQuota(role string, cpu *float64, ramMb *int64, diskMb
func (r *realisClient) GetQuota(role string) (*aurora.Response, error) {
resp, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
resp, retryErr := r.adminClient.GetQuota(role)
if retryErr != nil {
return nil, errors.Wrap(retryErr, "Unable to get role quota")
}
return resp, nil
return r.adminClient.GetQuota(role)
})
if retryErr != nil {
return resp, errors.Wrap(retryErr, "Unable to get role quota")
}
return resp, retryErr
}

View file

@ -90,6 +90,70 @@ func TestNonExistentEndpoint(t *testing.T) {
}
func TestThriftBinary(t *testing.T) {
r, err := realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
realis.BasicAuth("aurora", "secret"),
realis.TimeoutMS(20000),
realis.ThriftBinary())
assert.NoError(t, err)
role := "all"
taskQ := &aurora.TaskQuery{
Role: &role,
}
// Perform a simple API call to test Thrift Binary
_, err = r.GetTasksWithoutConfigs(taskQ)
assert.NoError(t, err)
r.Close()
}
func TestThriftJSON(t *testing.T) {
r, err := realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
realis.BasicAuth("aurora", "secret"),
realis.TimeoutMS(20000),
realis.ThriftJSON())
assert.NoError(t, err)
role := "all"
taskQ := &aurora.TaskQuery{
Role: &role,
}
// Perform a simple API call to test Thrift Binary
_, err = r.GetTasksWithoutConfigs(taskQ)
assert.NoError(t, err)
r.Close()
}
func TestNoopLogger(t *testing.T) {
r, err := realis.NewRealisClient(realis.SchedulerUrl("http://192.168.33.7:8081"),
realis.BasicAuth("aurora", "secret"),
realis.SetLogger(realis.NoopLogger{}))
assert.NoError(t, err)
role := "all"
taskQ := &aurora.TaskQuery{
Role: &role,
}
// Perform a simple API call to test Thrift Binary
_, err = r.GetTasksWithoutConfigs(taskQ)
assert.NoError(t, err)
r.Close()
}
func TestLeaderFromZK(t *testing.T) {
cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.2:2181")
url, err := realis.LeaderFromZK(*cluster)
@ -122,20 +186,17 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
Name("create_thermos_job_test").
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
ExecutorData(string(thermosPayload)).
CPU(1).
CPU(.5).
RAM(64).
Disk(100).
IsService(true).
InstanceCount(1).
InstanceCount(2).
AddPorts(1)
start := time.Now()
resp, err := r.CreateJob(job)
end := time.Now()
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Create call took %d ns\n", (end.UnixNano() - start.UnixNano()))
// Test Instances Monitor
success, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 1, 50)
@ -145,7 +206,7 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
//Fetch all Jobs
_, result, err := r.GetJobs(role)
fmt.Printf("GetJobs length: %+v \n", len(result.Configs))
assert.Equal(t, len(result.Configs), 1)
assert.Len(t, result.Configs, 1)
assert.NoError(t, err)
// Test asking the scheduler to perform a Snpshot
@ -162,13 +223,14 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
// Tasks must exist for it to, be killed
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
start := time.Now()
resp, err := r.KillJob(job.JobKey())
end := time.Now()
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
fmt.Printf("Kill call took %d ns\n", (end.UnixNano() - start.UnixNano()))
success, err := monitor.Instances(job.JobKey(), 0, 1, 50)
assert.True(t, success)
assert.NoError(t, err)
})
}
@ -192,6 +254,43 @@ func TestRealisClient_CreateJob_ExecutorDoesNotExist(t *testing.T) {
assert.Equal(t, aurora.ResponseCode_INVALID_REQUEST, resp.GetResponseCode())
}
// Test configuring an executor that doesn't exist for CreateJob API
func TestRealisClient_GetPendingReason(t *testing.T) {
env := "prod"
role := "vagrant"
name := "pending_reason_test"
// Create a single job
job := realis.NewJob().
Environment(env).
Role(role).
Name(name).
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
ExecutorData(string(thermosPayload)).
CPU(1000).
RAM(64).
Disk(100).
InstanceCount(1)
resp, err := r.CreateJob(job)
assert.NoError(t, err)
assert.Equal(t, aurora.ResponseCode_OK, resp.ResponseCode)
taskQ := &aurora.TaskQuery{
Role: &role,
Environment: &env,
JobName: &name,
}
reasons, err := r.GetPendingReason(taskQ)
assert.NoError(t, err)
assert.Len(t, reasons, 1)
resp, err = r.KillJob(job.JobKey())
assert.NoError(t, err)
}
func TestRealisClient_CreateService_WithPulse_Thermos(t *testing.T) {
fmt.Println("Creating service")
@ -394,6 +493,34 @@ func TestRealisClient_ScheduleCronJob_Thermos(t *testing.T) {
fmt.Printf("Deschedule cron call took %d ns\n", (end.UnixNano() - start.UnixNano()))
})
}
func TestRealisClient_StartMaintenance(t *testing.T) {
hosts := []string{"localhost"}
_, _, err := r.StartMaintenance(hosts...)
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
// Monitor change to DRAINING and DRAINED mode
hostResults, err := monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
1,
50)
assert.Equal(t, map[string]bool{"localhost": true}, hostResults)
assert.NoError(t, err)
_, _, err = r.EndMaintenance(hosts...)
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
_, err = monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5,
10)
assert.NoError(t, err)
}
func TestRealisClient_DrainHosts(t *testing.T) {
hosts := []string{"localhost"}
@ -427,10 +554,7 @@ func TestRealisClient_DrainHosts(t *testing.T) {
t.Run("TestRealisClient_EndMaintenance", func(t *testing.T) {
_, _, err := r.EndMaintenance(hosts...)
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
assert.NoError(t, err)
// Monitor change to DRAINING and DRAINED mode
_, err = monitor.HostMaintenance(

View file

@ -15,9 +15,12 @@
package realis
import (
"io"
"math/rand"
"net/url"
"time"
"git.apache.org/thrift.git/lib/go/thrift"
"github.com/paypal/gorealis/gen-go/apache/aurora"
"github.com/paypal/gorealis/response"
"github.com/pkg/errors"
@ -88,7 +91,6 @@ func ExponentialBackoff(backoff Backoff, logger Logger, condition ConditionFunc)
}
if err != nil {
// If the error is temporary, continue retrying.
if !IsTemporary(err) {
return err
@ -96,9 +98,7 @@ func ExponentialBackoff(backoff Backoff, logger Logger, condition ConditionFunc)
// Print out the temporary error we experienced.
logger.Println(err)
}
}
}
if curStep > 1 {
@ -158,6 +158,22 @@ func (r *realisClient) thriftCallWithRetries(thriftCall auroraThriftCall) (*auro
// Print out the error to the user
r.logger.Printf("Client Error: %v\n", clientErr)
// Determine if error is a temporary URL error by going up the stack
e, ok := clientErr.(thrift.TTransportException)
if ok {
r.logger.DebugPrint("Encountered a transport exception")
e, ok := e.Err().(*url.Error)
if ok {
// EOF error occurs when the server closes the read buffer of the client. This is common
// when the server is overloaded and should be retried. All other errors that are permanent
// will not be retried.
if e.Err != io.EOF && !e.Temporary() {
return nil, errors.Wrap(clientErr, "Permanent connection error")
}
}
}
// In the future, reestablish connection should be able to check if it is actually possible
// to make a thrift call to Aurora. For now, a reconnect should always lead to a retry.
r.ReestablishConn()