Code cleanup, added ability to attach logger, added CreateService api

* Code cleanup: Deleted multiple functions which have become stale. Removed cluster example as we replaced the need to create the Cluster object.

* Cleaned up ZK connection code by using the backoff function. Added a test to the end to end to test that we're getting the host correctly from ZK. Changed clusters test to be an outside package.

* Added LeaderFromZKURL test to end to end tests.

* Added logger to realisConfig so that users can attach their own Loggers to the client. Logger is an interface that shadows most popular logging libraries. Only Print, Println, and Printf are needed to be a realis.Logger type. Example in the client uses the std library log.

* Moved most fmt.Print* calls to be redirected to user provided logger. Logger by default is a no-op logger.

* Adding CreateService to realis interface. Uses the StartJobUpdate API to create services instead of the createJobs API.

* Bumping up version number inside client in anticipation of new release.
This commit is contained in:
Renan DelValle 2017-11-30 12:02:50 -08:00 committed by GitHub
parent 72b746e431
commit e614e04f27
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 292 additions and 298 deletions

View file

@ -12,17 +12,18 @@
* limitations under the License. * limitations under the License.
*/ */
package realis package realis_test
import ( import (
"fmt" "fmt"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"testing" "testing"
"github.com/paypal/gorealis"
) )
func TestLoadClusters(t *testing.T) { func TestLoadClusters(t *testing.T) {
clusters, err := LoadClusters("examples/clusters.json") clusters, err := realis.LoadClusters("examples/clusters.json")
if err != nil { if err != nil {
fmt.Print(err) fmt.Print(err)
} }

View file

@ -27,6 +27,7 @@ import (
"github.com/paypal/gorealis" "github.com/paypal/gorealis"
"github.com/paypal/gorealis/gen-go/apache/aurora" "github.com/paypal/gorealis/gen-go/apache/aurora"
"github.com/paypal/gorealis/response" "github.com/paypal/gorealis/response"
"log"
) )
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string
@ -45,11 +46,10 @@ func init() {
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url") flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on") flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on")
flag.Parse() flag.Parse()
}
func main() { // Attempt to load leader from zookeeper using a
// cluster.json file used for the default aurora client if provided.
// Attempt to load leader from zookeeper // This will override the provided url in the arguments
if clustersConfig != "" { if clustersConfig != "" {
clusters, err := realis.LoadClusters(clustersConfig) clusters, err := realis.LoadClusters(clustersConfig)
if err != nil { if err != nil {
@ -59,7 +59,7 @@ func main() {
cluster, ok := clusters[clusterName] cluster, ok := clusters[clusterName]
if !ok { if !ok {
fmt.Printf("Cluster %s chosen doesn't exist\n", clusterName) fmt.Printf("Cluster %s doesn't exist in the file provided\n", clusterName)
os.Exit(1) os.Exit(1)
} }
@ -69,56 +69,42 @@ func main() {
os.Exit(1) os.Exit(1)
} }
} }
}
func main() {
var job realis.Job var job realis.Job
var err error var err error
var monitor *realis.Monitor var monitor *realis.Monitor
var r realis.Realis var r realis.Realis
var defaultBackoff = &realis.Backoff{ clientOptions := []realis.ClientOption{
Steps: 2, realis.BasicAuth(username, password),
Duration: 10 * time.Second, realis.ThriftJSON(),
Factor: 2.0, realis.TimeoutMS(CONNECTION_TIMEOUT),
Jitter: 0.1, realis.BackOff(&realis.Backoff{
Steps: 2,
Duration: 10 * time.Second,
Factor: 2.0,
Jitter: 0.1,
}),
realis.SetLogger(log.New(os.Stdout, "realis-debug: ", log.Ldate)),
} }
//check if zkUrl is available. //check if zkUrl is available.
if zkUrl != "" { if zkUrl != "" {
fmt.Println("zkUrl: ", zkUrl) fmt.Println("zkUrl: ", zkUrl)
cluster := &realis.Cluster{Name: "example", clientOptions = append(clientOptions, realis.ZKUrl(zkUrl))
AuthMechanism: "UNAUTHENTICATED",
ZK: zkUrl,
SchedZKPath: "/aurora/scheduler",
AgentRunDir: "latest",
AgentRoot: "/var/lib/mesos",
}
fmt.Printf("cluster: %+v \n", cluster)
r, err = realis.NewRealisClient(realis.ZKUrl(zkUrl),
realis.BasicAuth(username, password),
realis.ThriftJSON(),
realis.TimeoutMS(CONNECTION_TIMEOUT),
realis.BackOff(defaultBackoff))
if err != nil {
fmt.Println(err)
os.Exit(1)
}
monitor = &realis.Monitor{r}
} else { } else {
r, err = realis.NewRealisClient(realis.SchedulerUrl(url), clientOptions = append(clientOptions, realis.SchedulerUrl(url))
realis.BasicAuth(username, password),
realis.ThriftJSON(),
realis.TimeoutMS(CONNECTION_TIMEOUT),
realis.BackOff(defaultBackoff))
if err != nil {
fmt.Println(err)
os.Exit(1)
}
monitor = &realis.Monitor{r}
} }
r, err = realis.NewRealisClient(clientOptions...)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
monitor = &realis.Monitor{r}
defer r.Close() defer r.Close()
switch executor { switch executor {
@ -197,6 +183,29 @@ func main() {
} }
} }
break
case "createService":
// Create a service with three instances using the update API instead of the createJob API
fmt.Println("Creating service")
settings := realis.NewUpdateSettings()
job.InstanceCount(3)
_, resp, err := r.CreateService(job, *settings)
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Println(resp.String())
if ok, err := monitor.JobUpdate(*resp.GetKey(), 5, 50); !ok || err != nil {
_, err := r.KillJob(job.JobKey())
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fmt.Println("ok: ", ok)
fmt.Println("err: ", err)
}
break break
case "createDocker": case "createDocker":
fmt.Println("Creating a docker based job") fmt.Println("Creating a docker based job")
@ -352,7 +361,6 @@ func main() {
} }
fmt.Println(resp.String()) fmt.Println(resp.String())
break break
case "flexDown": case "flexDown":
fmt.Println("Flexing down job") fmt.Println("Flexing down job")
@ -407,7 +415,6 @@ func main() {
jobUpdateKey := response.JobUpdateKey(resp) jobUpdateKey := response.JobUpdateKey(resp)
monitor.JobUpdate(*jobUpdateKey, 5, 500) monitor.JobUpdate(*jobUpdateKey, 5, 500)
break break
case "updateDetails": case "updateDetails":
resp, err := r.JobUpdateDetails(aurora.JobUpdateQuery{ resp, err := r.JobUpdateDetails(aurora.JobUpdateQuery{
@ -457,9 +464,7 @@ func main() {
} }
print(config.String()) print(config.String())
break break
case "updatesummary": case "updatesummary":
fmt.Println("Getting job update summary") fmt.Println("Getting job update summary")
jobquery := &aurora.JobUpdateQuery{ jobquery := &aurora.JobUpdateQuery{
Role: &job.JobKey().Role, Role: &job.JobKey().Role,
@ -471,7 +476,6 @@ func main() {
os.Exit(1) os.Exit(1)
} }
fmt.Println(updatesummary) fmt.Println(updatesummary)
case "taskStatus": case "taskStatus":
fmt.Println("Getting task status") fmt.Println("Getting task status")
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role, taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
@ -485,7 +489,6 @@ func main() {
} }
fmt.Printf("length: %d\n ", len(tasks)) fmt.Printf("length: %d\n ", len(tasks))
fmt.Printf("tasks: %+v\n", tasks) fmt.Printf("tasks: %+v\n", tasks)
case "tasksWithoutConfig": case "tasksWithoutConfig":
fmt.Println("Getting task status") fmt.Println("Getting task status")
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role, taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
@ -499,7 +502,6 @@ func main() {
} }
fmt.Printf("length: %d\n ", len(tasks)) fmt.Printf("length: %d\n ", len(tasks))
fmt.Printf("tasks: %+v\n", tasks) fmt.Printf("tasks: %+v\n", tasks)
case "drainHosts": case "drainHosts":
fmt.Println("Setting hosts to DRAINING") fmt.Println("Setting hosts to DRAINING")
if hostList == "" { if hostList == "" {
@ -531,7 +533,6 @@ func main() {
} }
fmt.Print(result.String()) fmt.Print(result.String())
case "endMaintenance": case "endMaintenance":
fmt.Println("Setting hosts to ACTIVE") fmt.Println("Setting hosts to ACTIVE")
if hostList == "" { if hostList == "" {
@ -563,7 +564,6 @@ func main() {
} }
fmt.Print(result.String()) fmt.Print(result.String())
default: default:
fmt.Println("Command not supported") fmt.Println("Command not supported")
os.Exit(1) os.Exit(1)

2
job.go
View file

@ -151,8 +151,6 @@ func (j *AuroraJob) RAM(ram int64) Job {
*j.resources["ram"].RamMb = ram *j.resources["ram"].RamMb = ram
j.jobConfig.TaskConfig.RamMb = ram //Will be deprecated soon j.jobConfig.TaskConfig.RamMb = ram //Will be deprecated soon
return j return j
} }

View file

@ -16,7 +16,6 @@
package realis package realis
import ( import (
"fmt"
"time" "time"
"github.com/paypal/gorealis/gen-go/apache/aurora" "github.com/paypal/gorealis/gen-go/apache/aurora"
@ -59,7 +58,7 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
updateDetail := response.JobUpdateDetails(respDetail) updateDetail := response.JobUpdateDetails(respDetail)
if len(updateDetail) == 0 { if len(updateDetail) == 0 {
fmt.Println("No update found") m.Client.RealisConfig().logger.Println("No update found")
return false, errors.New("No update found for " + updateKey.String()) return false, errors.New("No update found for " + updateKey.String())
} }
status := updateDetail[0].Update.Summary.State.Status status := updateDetail[0].Update.Summary.State.Status
@ -70,13 +69,13 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
// if we encounter an inactive state and it is not at rolled forward, update failed // if we encounter an inactive state and it is not at rolled forward, update failed
switch status { switch status {
case aurora.JobUpdateStatus_ROLLED_FORWARD: case aurora.JobUpdateStatus_ROLLED_FORWARD:
fmt.Println("Update succeded") m.Client.RealisConfig().logger.Println("Update succeded")
return true, nil return true, nil
case aurora.JobUpdateStatus_FAILED: case aurora.JobUpdateStatus_FAILED:
fmt.Println("Update failed") m.Client.RealisConfig().logger.Println("Update failed")
return false, errors.New(UpdateFailed) return false, errors.New(UpdateFailed)
case aurora.JobUpdateStatus_ROLLED_BACK: case aurora.JobUpdateStatus_ROLLED_BACK:
fmt.Println("rolled back") m.Client.RealisConfig().logger.Println("rolled back")
return false, errors.New(RolledBack) return false, errors.New(RolledBack)
default: default:
return false, nil return false, nil

268
realis.go
View file

@ -31,28 +31,29 @@ import (
"github.com/pkg/errors" "github.com/pkg/errors"
) )
const VERSION = "1.0.4" const VERSION = "1.1.0"
type Realis interface { type Realis interface {
AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error) AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error)
AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error) AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error)
RemoveInstances(key *aurora.JobKey, count int32) (*aurora.Response, error)
CreateJob(auroraJob Job) (*aurora.Response, error) CreateJob(auroraJob Job) (*aurora.Response, error)
CreateService(auroraJob Job, settings UpdateSettings) (*aurora.Response, *aurora.StartJobUpdateResult_, error)
DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error) DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error)
GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error)
GetInstanceIds(key *aurora.JobKey, states map[aurora.ScheduleStatus]bool) (map[int32]bool, error) GetInstanceIds(key *aurora.JobKey, states map[aurora.ScheduleStatus]bool) (map[int32]bool, error)
GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error)
GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error) JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error)
KillJob(key *aurora.JobKey) (*aurora.Response, error) KillJob(key *aurora.JobKey) (*aurora.Response, error)
KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
RemoveInstances(key *aurora.JobKey, count int32) (*aurora.Response, error)
RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
RestartJob(key *aurora.JobKey) (*aurora.Response, error) RestartJob(key *aurora.JobKey) (*aurora.Response, error)
RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error) RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error)
ScheduleCronJob(auroraJob Job) (*aurora.Response, error) ScheduleCronJob(auroraJob Job) (*aurora.Response, error)
StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error) StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error)
StartCronJob(key *aurora.JobKey) (*aurora.Response, error) StartCronJob(key *aurora.JobKey) (*aurora.Response, error)
GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error)
ReestablishConn() error ReestablishConn() error
RealisConfig() *RealisConfig RealisConfig() *RealisConfig
Close() Close()
@ -68,69 +69,101 @@ type realisClient struct {
client *aurora.AuroraSchedulerManagerClient client *aurora.AuroraSchedulerManagerClient
readonlyClient *aurora.ReadOnlySchedulerClient readonlyClient *aurora.ReadOnlySchedulerClient
adminClient *aurora.AuroraAdminClient adminClient *aurora.AuroraAdminClient
logger Logger
} }
type option func(*RealisConfig) type RealisConfig struct {
username, password string
url string
timeoutms int
binTransport, jsonTransport bool
cluster *Cluster
backoff *Backoff
transport thrift.TTransport
protoFactory thrift.TProtocolFactory
logger Logger
}
type Backoff struct {
Duration time.Duration // the base duration
Factor float64 // Duration is multipled by factor each iteration
Jitter float64 // The amount of jitter applied each iteration
Steps int // Exit with error after this many steps
}
var defaultBackoff = Backoff{
Steps: 3,
Duration: 10 * time.Second,
Factor: 5.0,
Jitter: 0.1,
}
type ClientOption func(*RealisConfig)
//Config sets for options in RealisConfig. //Config sets for options in RealisConfig.
func BasicAuth(username, password string) option { func BasicAuth(username, password string) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.username = username config.username = username
config.password = password config.password = password
} }
} }
func SchedulerUrl(url string) option { func SchedulerUrl(url string) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.url = url config.url = url
} }
} }
func TimeoutMS(timeout int) option { func TimeoutMS(timeout int) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.timeoutms = timeout config.timeoutms = timeout
} }
} }
func ZKCluster(cluster *Cluster) option { func ZKCluster(cluster *Cluster) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.cluster = cluster config.cluster = cluster
} }
} }
func ZKUrl(url string) option { func ZKUrl(url string) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.cluster = GetDefaultClusterFromZKUrl(url) config.cluster = GetDefaultClusterFromZKUrl(url)
} }
} }
func Retries(backoff *Backoff) option { func Retries(backoff *Backoff) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.backoff = backoff config.backoff = backoff
} }
} }
func ThriftJSON() option { func ThriftJSON() ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.jsonTransport = true config.jsonTransport = true
} }
} }
func ThriftBinary() option { func ThriftBinary() ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.binTransport = true config.binTransport = true
} }
} }
func BackOff(b *Backoff) option { func BackOff(b *Backoff) ClientOption {
return func(config *RealisConfig) { return func(config *RealisConfig) {
config.backoff = b config.backoff = b
} }
} }
func newTJSONTransport(url string, timeout int) (thrift.TTransport, error) { // Using the word set to avoid name collision with Interface
func SetLogger(l Logger) ClientOption {
return func(config *RealisConfig) {
config.logger = l
}
}
func newTJSONTransport(url string, timeout int) (thrift.TTransport, error) {
trans, err := defaultTTransport(url, timeout) trans, err := defaultTTransport(url, timeout)
if err != nil { if err != nil {
return nil, errors.Wrap(err, "Error creating realis") return nil, errors.Wrap(err, "Error creating realis")
@ -155,35 +188,41 @@ func newTBinTransport(url string, timeout int) (thrift.TTransport, error) {
return trans, err return trans, err
} }
func NewRealisClient(options ...option) (Realis, error) { func NewRealisClient(options ...ClientOption) (Realis, error) {
config := &RealisConfig{} config := &RealisConfig{}
fmt.Println(" options length: ", len(options))
// Default configs
config.timeoutms = 10000
config.backoff = &defaultBackoff
config.logger = NoopLogger{}
// Override default configs where necessary
for _, opt := range options { for _, opt := range options {
opt(config) opt(config)
} }
//Default timeout
if config.timeoutms == 0 { config.logger.Println("Number of options applied to config: ", len(options))
config.timeoutms = 10000
}
//Set default Transport to JSON if needed. //Set default Transport to JSON if needed.
if !config.jsonTransport && !config.binTransport { if !config.jsonTransport && !config.binTransport {
config.jsonTransport = true config.jsonTransport = true
} }
var url string var url string
var err error var err error
//Cluster or URL?
// Determine how to get information to connect to the scheduler.
// Prioritize getting leader from ZK over using a direct URL.
if config.cluster != nil { if config.cluster != nil {
url, err = LeaderFromZK(*config.cluster) url, err = LeaderFromZK(*config.cluster)
// If ZK is configured, throw an error if the leader is unable to be determined // If ZK is configured, throw an error if the leader is unable to be determined
if err != nil { if err != nil {
return nil, errors.Wrap(err, "LeaderFromZK error") return nil, errors.Wrap(err, "LeaderFromZK error")
} }
config.logger.Println("Scheduler URL from ZK: ", url)
fmt.Println("schedURLFromZK: ", url)
} else if config.url != "" { } else if config.url != "" {
fmt.Println("Scheduler URL: ", config.url)
url = config.url url = config.url
config.logger.Println("Scheduler URL: ", url)
} else { } else {
return nil, errors.New("Incomplete Options -- url or cluster required") return nil, errors.New("Incomplete Options -- url or cluster required")
} }
@ -193,9 +232,9 @@ func NewRealisClient(options ...option) (Realis, error) {
if err != nil { if err != nil {
return nil, errors.Wrap(err, "Error creating realis") return nil, errors.Wrap(err, "Error creating realis")
} }
config.transport = trans config.transport = trans
config.protoFactory = thrift.NewTJSONProtocolFactory() config.protoFactory = thrift.NewTJSONProtocolFactory()
} else if config.binTransport { } else if config.binTransport {
trans, err := newTBinTransport(url, config.timeoutms) trans, err := newTBinTransport(url, config.timeoutms)
if err != nil { if err != nil {
@ -205,55 +244,22 @@ func NewRealisClient(options ...option) (Realis, error) {
config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault() config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
} }
config.logger.Printf("gorealis config url: %+v\n", config.url)
//Basic Authentication. //Basic Authentication.
if config.username != "" && config.password != "" { if config.username != "" && config.password != "" {
AddBasicAuth(config, config.username, config.password) AddBasicAuth(config, config.username, config.password)
} }
//Set defaultBackoff if required.
if config.backoff == nil {
config.backoff = &defaultBackoff
} else {
defaultBackoff = *config.backoff
fmt.Printf(" updating default backoff : %+v\n", *config.backoff)
}
fmt.Printf("gorealis config url: %+v\n", config.url)
return &realisClient{ return &realisClient{
config: config, config: config,
client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory), client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory),
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory), readonlyClient: aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory),
adminClient: aurora.NewAuroraAdminClientFactory(config.transport, config.protoFactory)}, nil adminClient: aurora.NewAuroraAdminClientFactory(config.transport, config.protoFactory),
logger: config.logger}, nil
} }
// Wrapper object to provide future flexibility
type RealisConfig struct {
username, password string
url string
timeoutms int
binTransport, jsonTransport bool
cluster *Cluster
backoff *Backoff
transport thrift.TTransport
protoFactory thrift.TProtocolFactory
}
type Backoff struct {
Duration time.Duration // the base duration
Factor float64 // Duration is multipled by factor each iteration
Jitter float64 // The amount of jitter applied each iteration
Steps int // Exit with error after this many steps
}
var defaultBackoff = Backoff{
Steps: 3,
Duration: 10 * time.Second,
Factor: 5.0,
Jitter: 0.1,
}
// Jitter returns a time.Duration between duration and duration + maxFactor * // Jitter returns a time.Duration between duration and duration + maxFactor *
// duration. // duration.
// //
@ -267,33 +273,6 @@ func Jitter(duration time.Duration, maxFactor float64) time.Duration {
return wait return wait
} }
// Create a new Client with Cluster information and other details.
func NewDefaultClientUsingCluster(cluster *Cluster, user, passwd string) (Realis, error) {
url, err := LeaderFromZK(*cluster)
if err != nil {
fmt.Println(err)
return nil, err
}
fmt.Printf(" url: %s\n", url)
//Create new configuration with default transport layer
config, err := newDefaultConfig(url, 10000)
if err != nil {
fmt.Println(err)
return nil, err
}
config.username = user
config.password = passwd
config.cluster = cluster
config.url = ""
// Configured for vagrant
AddBasicAuth(config, user, passwd)
r := newClient(config)
return r, nil
}
func GetDefaultClusterFromZKUrl(zkurl string) *Cluster { func GetDefaultClusterFromZKUrl(zkurl string) *Cluster {
return &Cluster{Name: "defaultCluster", return &Cluster{Name: "defaultCluster",
AuthMechanism: "UNAUTHENTICATED", AuthMechanism: "UNAUTHENTICATED",
@ -304,65 +283,6 @@ func GetDefaultClusterFromZKUrl(zkurl string) *Cluster {
} }
} }
//This api would create default cluster object..
func NewDefaultClientUsingZKUrl(zkUrl, user, passwd string) (Realis, error) {
fmt.Printf(" zkUrl: %s\n", zkUrl)
cluster := GetDefaultClusterFromZKUrl(zkUrl)
url, err := LeaderFromZK(*cluster)
if err != nil {
fmt.Println(err)
return nil, err
}
fmt.Printf(" url: %s\n", url)
//Create new configuration with default transport layer
config, err := newDefaultConfig(url, 10000)
if err != nil {
fmt.Println(err)
return nil, err
}
config.username = user
config.password = passwd
config.cluster = cluster
config.url = ""
// Configured for vagrant
AddBasicAuth(config, user, passwd)
r := newClient(config)
return r, nil
}
func NewDefaultClientUsingUrl(url, user, passwd string) (Realis, error) {
fmt.Printf(" url: %s\n", url)
//Create new configuration with default transport layer
config, err := newDefaultConfig(url, 10000)
if err != nil {
fmt.Println(err)
return nil, err
}
config.username = user
config.password = passwd
config.url = url
config.cluster = nil
// Configured for vagrant
AddBasicAuth(config, user, passwd)
config.backoff = &Backoff{Steps: 2, Duration: 10 * time.Second, Factor: 2.0, Jitter: 0.1}
r := newClient(config)
return r, nil
}
// Create a new Client with a default transport layer
func newClient(realisconfig *RealisConfig) Realis {
return &realisClient{
config: realisconfig,
client: aurora.NewAuroraSchedulerManagerClientFactory(realisconfig.transport, realisconfig.protoFactory),
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(realisconfig.transport, realisconfig.protoFactory),
adminClient: aurora.NewAuroraAdminClientFactory(realisconfig.transport, realisconfig.protoFactory)}
}
// Creates a default Thrift Transport object for communications in gorealis using an HTTP Post Client // Creates a default Thrift Transport object for communications in gorealis using an HTTP Post Client
func defaultTTransport(urlstr string, timeoutms int) (thrift.TTransport, error) { func defaultTTransport(urlstr string, timeoutms int) (thrift.TTransport, error) {
jar, err := cookiejar.New(nil) jar, err := cookiejar.New(nil)
@ -439,7 +359,7 @@ func basicAuth(username, password string) string {
func (r *realisClient) ReestablishConn() error { func (r *realisClient) ReestablishConn() error {
//close existing connection.. //close existing connection..
fmt.Println("ReestablishConn begin ....") r.logger.Println("ReestablishConn begin ....")
r.Close() r.Close()
//First check cluster object for re-establish; if not available then try with scheduler url. //First check cluster object for re-establish; if not available then try with scheduler url.
//var config *RealisConfig //var config *RealisConfig
@ -452,7 +372,7 @@ func (r *realisClient) ReestablishConn() error {
if err != nil { if err != nil {
fmt.Errorf("LeaderFromZK error: %+v\n ", err) fmt.Errorf("LeaderFromZK error: %+v\n ", err)
} }
fmt.Println("ReestablishConn url: ", url) r.logger.Println("ReestablishConn url: ", url)
if r.config.jsonTransport { if r.config.jsonTransport {
trans, err := newTJSONTransport(url, r.config.timeoutms) trans, err := newTJSONTransport(url, r.config.timeoutms)
if err != nil { if err != nil {
@ -469,7 +389,7 @@ func (r *realisClient) ReestablishConn() error {
r.config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault() r.config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
} }
if err != nil { if err != nil {
fmt.Println("error creating config: ", err) r.logger.Println("error creating config: ", err)
} }
// Configured for basic-auth // Configured for basic-auth
AddBasicAuth(r.config, r.config.username, r.config.password) AddBasicAuth(r.config, r.config.username, r.config.password)
@ -478,7 +398,7 @@ func (r *realisClient) ReestablishConn() error {
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory) r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
} else if r.config.url != "" && r.config.username != "" && r.config.password != "" { } else if r.config.url != "" && r.config.username != "" && r.config.password != "" {
//Re-establish using scheduler url. //Re-establish using scheduler url.
fmt.Println("ReestablishConn url: ", r.config.url) r.logger.Println("ReestablishConn url: ", r.config.url)
if r.config.jsonTransport { if r.config.jsonTransport {
trans, err := newTJSONTransport(url, r.config.timeoutms) trans, err := newTJSONTransport(url, r.config.timeoutms)
if err != nil { if err != nil {
@ -499,14 +419,14 @@ func (r *realisClient) ReestablishConn() error {
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(r.config.transport, r.config.protoFactory) r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(r.config.transport, r.config.protoFactory)
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory) r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
} else { } else {
fmt.Println(" Missing Data for ReestablishConn ") r.logger.Println(" Missing Data for ReestablishConn ")
fmt.Println(" r.config.cluster: ", r.config.cluster) r.logger.Println(" r.config.cluster: ", r.config.cluster)
fmt.Println(" r.config.username: ", r.config.username) r.logger.Println(" r.config.username: ", r.config.username)
fmt.Println(" r.config.passwd: ", r.config.password) r.logger.Println(" r.config.passwd: ", r.config.password)
fmt.Println(" r.config.url: ", r.config.url) r.logger.Println(" r.config.url: ", r.config.url)
return errors.New(" Missing Data for ReestablishConn ") return errors.New(" Missing Data for ReestablishConn ")
} }
fmt.Printf(" config url before return: %+v\n", r.config.url) r.logger.Printf(" config url before return: %+v\n", r.config.url)
return nil return nil
} }
@ -645,6 +565,9 @@ func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
} }
// Sends a create job message to the scheduler with a specific job configuration. // Sends a create job message to the scheduler with a specific job configuration.
// Although this API is able to create service jobs, it is better to use CreateService instead
// as that API uses the update thrift call which has a few extra features available.
// Use this API to create ad-hoc jobs.
func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) { func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
var resp *aurora.Response var resp *aurora.Response
var clientErr error var clientErr error
@ -669,6 +592,24 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
} }
// This API uses an update thrift call to create the services giving a few more robust features.
func (r *realisClient) CreateService(auroraJob Job, settings UpdateSettings) (*aurora.Response, *aurora.StartJobUpdateResult_, error) {
// Create a new job update object and ship it to the StartJobUpdate api
update := NewUpdateJob(auroraJob.TaskConfig(), &settings.settings)
update.InstanceCount(auroraJob.GetInstanceCount())
resp, err := r.StartJobUpdate(update, "")
if err != nil {
return resp, nil, errors.Wrap(err, "unable to create service")
}
if resp != nil && resp.GetResult_() != nil {
return resp, resp.GetResult_().GetStartJobUpdateResult_(), nil
}
return resp, nil, errors.New("results object is nil")
}
func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) { func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) {
var resp *aurora.Response var resp *aurora.Response
var clientErr error var clientErr error
@ -962,6 +903,7 @@ func (r *realisClient) GetTasksWithoutConfigs(query *aurora.TaskQuery) (tasks []
} }
// Get the task configuration from the aurora scheduler for a job
func (r *realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) { func (r *realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) {
ids := make(map[int32]bool) ids := make(map[int32]bool)
@ -1166,7 +1108,7 @@ func (r *realisClient) MaintenanceStatus(hosts ...string) (*aurora.Response, *au
// Experienced an connection error // Experienced an connection error
err1 := r.ReestablishConn() err1 := r.ReestablishConn()
if err1 != nil { if err1 != nil {
fmt.Println("error in re-establishing connection: ", err1) r.logger.Println("error in re-establishing connection: ", err1)
} }
return false, nil return false, nil
} }

View file

@ -55,6 +55,14 @@ func TestMain(m *testing.M) {
os.Exit(m.Run()) os.Exit(m.Run())
} }
func TestLeaderFromZK(t *testing.T) {
cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.7:2181")
url, err := realis.LeaderFromZK(*cluster)
assert.NoError(t, err)
assert.Equal(t, url, "http://aurora.local:8081")
}
func TestRealisClient_CreateJob_Thermos(t *testing.T) { func TestRealisClient_CreateJob_Thermos(t *testing.T) {
job := realis.NewJob(). job := realis.NewJob().

View file

@ -24,12 +24,15 @@ type UpdateJob struct {
req *aurora.JobUpdateRequest req *aurora.JobUpdateRequest
} }
// Create a default UpdateJob object. // Create a default UpdateJob object.
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob { func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
req := aurora.NewJobUpdateRequest() req := aurora.NewJobUpdateRequest()
req.TaskConfig = config req.TaskConfig = config
req.Settings = aurora.NewJobUpdateSettings() s := NewUpdateSettings().Settings()
req.Settings = &s
job := NewJob().(*AuroraJob) job := NewJob().(*AuroraJob)
job.jobConfig.TaskConfig = config job.jobConfig.TaskConfig = config
@ -60,7 +63,6 @@ func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
req.Settings.MaxPerInstanceFailures = 0 req.Settings.MaxPerInstanceFailures = 0
req.Settings.MaxFailedInstances = 0 req.Settings.MaxFailedInstances = 0
req.Settings.RollbackOnFailure = true req.Settings.RollbackOnFailure = true
req.Settings.WaitForBatchCompletion = false
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior //TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
return &UpdateJob{job, req} return &UpdateJob{job, req}
@ -138,3 +140,66 @@ func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
u.req.Settings.RollbackOnFailure = rollback u.req.Settings.RollbackOnFailure = rollback
return u return u
} }
// TODO(rdelvalle): Integrate this struct with the JobUpdate struct so that we don't repeat code
type UpdateSettings struct {
settings aurora.JobUpdateSettings
}
func NewUpdateSettings() *UpdateSettings {
us := new(UpdateSettings)
// Mirrors defaults set by Pystachio
us.settings.UpdateOnlyTheseInstances = make(map[*aurora.Range]bool)
us.settings.UpdateGroupSize = 1
us.settings.WaitForBatchCompletion = false
us.settings.MinWaitInInstanceRunningMs = 45000
us.settings.MaxPerInstanceFailures = 0
us.settings.MaxFailedInstances = 0
us.settings.RollbackOnFailure = true
return us
}
// Max number of instances being updated at any given moment.
func (u *UpdateSettings) BatchSize(size int32) *UpdateSettings {
u.settings.UpdateGroupSize = size
return u
}
// Minimum number of seconds a shard must remain in RUNNING state before considered a success.
func (u *UpdateSettings) WatchTime(ms int32) *UpdateSettings {
u.settings.MinWaitInInstanceRunningMs = ms
return u
}
// Wait for all instances in a group to be done before moving on.
func (u *UpdateSettings) WaitForBatchCompletion(batchWait bool) *UpdateSettings {
u.settings.WaitForBatchCompletion = batchWait
return u
}
// Max number of instance failures to tolerate before marking instance as FAILED.
func (u *UpdateSettings) MaxPerInstanceFailures(inst int32) *UpdateSettings {
u.settings.MaxPerInstanceFailures = inst
return u
}
// Max number of FAILED instances to tolerate before terminating the update.
func (u *UpdateSettings) MaxFailedInstances(inst int32) *UpdateSettings {
u.settings.MaxFailedInstances = inst
return u
}
// When False, prevents auto rollback of a failed update.
func (u *UpdateSettings) RollbackOnFail(rollback bool) *UpdateSettings {
u.settings.RollbackOnFailure = rollback
return u
}
// Return internal Thrift API structure
func (u UpdateSettings) Settings() aurora.JobUpdateSettings {
return u.settings
}

127
zk.go
View file

@ -36,89 +36,70 @@ type ServiceInstance struct {
Status string `json:"status"` Status string `json:"status"`
} }
type NoopLogger struct{}
func (NoopLogger) Printf(format string, a ...interface{}) {
}
// Retrieves current Aurora leader from ZK. // Retrieves current Aurora leader from ZK.
func LeaderFromZK(cluster Cluster) (string, error) { func LeaderFromZK(cluster Cluster) (string, error) {
var err error
var zkurl string var zkurl string
duration := defaultBackoff.Duration retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) {
for step := 0; step < defaultBackoff.Steps; step++ {
// Attempt to find leader endpoints := strings.Split(cluster.ZK, ",")
zkurl, err = leaderFromZK(cluster)
if err == nil { //TODO (rdelvalle): When enabling debugging, change logger here
return zkurl, err c, _, err := zk.Connect(endpoints, time.Second*10, func(c *zk.Conn) { c.SetLogger(NoopLogger{}) })
if err != nil {
return false, errors.Wrap(err, "Failed to connect to Zookeeper at "+cluster.ZK)
} }
// Backoff if we failed to determine leader defer c.Close()
adjusted := duration
if defaultBackoff.Jitter > 0.0 { // Open up descriptor for the ZK path given
adjusted = Jitter(duration, defaultBackoff.Jitter) children, _, _, err := c.ChildrenW(cluster.SchedZKPath)
if err != nil {
return false, errors.Wrapf(err, "Path %s doesn't exist on Zookeeper ", cluster.SchedZKPath)
} }
fmt.Printf("Error determining Aurora leader: %v; retrying in %v\n", err, adjusted)
time.Sleep(adjusted) // Search for the leader through all the children in the given path
duration = time.Duration(float64(duration) * defaultBackoff.Factor) serviceInst := new(ServiceInstance)
for _, child := range children {
// Only the leader will start with member_
if strings.HasPrefix(child, "member_") {
data, _, err := c.Get(cluster.SchedZKPath + "/" + child)
if err != nil {
return false, errors.Wrap(err, "Error fetching contents of leader")
}
err = json.Unmarshal([]byte(data), serviceInst)
if err != nil {
return false, errors.Wrap(err, "Unable to unmarshall contents of leader")
}
// Should only be one endpoint
if len(serviceInst.AdditionalEndpoints) > 1 {
fmt.Errorf("Ambiguous end points schemes")
}
var scheme, host, port string
for k, v := range serviceInst.AdditionalEndpoints {
scheme = k
host = v.Host
port = strconv.Itoa(v.Port)
}
zkurl = scheme + "://" + host + ":" + port
return true, nil
}
}
return false, errors.New("No leader found")
})
if retryErr != nil {
return "", errors.Wrapf(retryErr, "Failed to determine leader after %v attempts", defaultBackoff.Steps)
} }
return "", errors.Wrapf(err, "Failed to determine leader after %v attempts", defaultBackoff.Steps) return zkurl, nil
}
func leaderFromZK(cluster Cluster) (string, error) {
endpoints := strings.Split(cluster.ZK, ",")
//TODO (rdelvalle): When enabling debugging, change logger here
c, _, err := zk.Connect(endpoints, time.Second*10, func(c *zk.Conn) { c.SetLogger(NoopLogger{}) })
if err != nil {
return "", errors.Wrap(err, "Failed to connect to Zookeeper at "+cluster.ZK)
}
defer c.Close()
children, _, _, err := c.ChildrenW(cluster.SchedZKPath)
if err != nil {
return "", errors.Wrapf(err, "Path %s doesn't exist on Zookeeper ", cluster.SchedZKPath)
}
serviceInst := new(ServiceInstance)
for _, child := range children {
// Only the leader will start with member_
if strings.HasPrefix(child, "member_") {
data, _, err := c.Get(cluster.SchedZKPath + "/" + child)
if err != nil {
return "", errors.Wrap(err, "Error fetching contents of leader")
}
err = json.Unmarshal([]byte(data), serviceInst)
if err != nil {
return "", errors.Wrap(err, "Unable to unmarshall contents of leader")
}
// Should only be one endpoint
if len(serviceInst.AdditionalEndpoints) > 1 {
fmt.Errorf("Ambiguous end points schemes")
}
var scheme, host, port string
for k, v := range serviceInst.AdditionalEndpoints {
scheme = k
host = v.Host
port = strconv.Itoa(v.Port)
}
return scheme + "://" + host + ":" + port, nil
}
}
return "", errors.New("No leader found")
} }