Code cleanup, added ability to attach logger, added CreateService api
* Code cleanup: Deleted multiple functions which have become stale. Removed cluster example as we replaced the need to create the Cluster object. * Cleaned up ZK connection code by using the backoff function. Added a test to the end to end to test that we're getting the host correctly from ZK. Changed clusters test to be an outside package. * Added LeaderFromZKURL test to end to end tests. * Added logger to realisConfig so that users can attach their own Loggers to the client. Logger is an interface that shadows most popular logging libraries. Only Print, Println, and Printf are needed to be a realis.Logger type. Example in the client uses the std library log. * Moved most fmt.Print* calls to be redirected to user provided logger. Logger by default is a no-op logger. * Adding CreateService to realis interface. Uses the StartJobUpdate API to create services instead of the createJobs API. * Bumping up version number inside client in anticipation of new release.
This commit is contained in:
parent
72b746e431
commit
e614e04f27
8 changed files with 292 additions and 298 deletions
|
@ -12,17 +12,18 @@
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
package realis
|
package realis_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"testing"
|
"testing"
|
||||||
|
"github.com/paypal/gorealis"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLoadClusters(t *testing.T) {
|
func TestLoadClusters(t *testing.T) {
|
||||||
|
|
||||||
clusters, err := LoadClusters("examples/clusters.json")
|
clusters, err := realis.LoadClusters("examples/clusters.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Print(err)
|
fmt.Print(err)
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,6 +27,7 @@ import (
|
||||||
"github.com/paypal/gorealis"
|
"github.com/paypal/gorealis"
|
||||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||||
"github.com/paypal/gorealis/response"
|
"github.com/paypal/gorealis/response"
|
||||||
|
"log"
|
||||||
)
|
)
|
||||||
|
|
||||||
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string
|
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList string
|
||||||
|
@ -45,11 +46,10 @@ func init() {
|
||||||
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
|
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
|
||||||
flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on")
|
flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on")
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
}
|
|
||||||
|
|
||||||
func main() {
|
// Attempt to load leader from zookeeper using a
|
||||||
|
// cluster.json file used for the default aurora client if provided.
|
||||||
// Attempt to load leader from zookeeper
|
// This will override the provided url in the arguments
|
||||||
if clustersConfig != "" {
|
if clustersConfig != "" {
|
||||||
clusters, err := realis.LoadClusters(clustersConfig)
|
clusters, err := realis.LoadClusters(clustersConfig)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -59,7 +59,7 @@ func main() {
|
||||||
|
|
||||||
cluster, ok := clusters[clusterName]
|
cluster, ok := clusters[clusterName]
|
||||||
if !ok {
|
if !ok {
|
||||||
fmt.Printf("Cluster %s chosen doesn't exist\n", clusterName)
|
fmt.Printf("Cluster %s doesn't exist in the file provided\n", clusterName)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -69,56 +69,42 @@ func main() {
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
|
||||||
var job realis.Job
|
var job realis.Job
|
||||||
var err error
|
var err error
|
||||||
var monitor *realis.Monitor
|
var monitor *realis.Monitor
|
||||||
var r realis.Realis
|
var r realis.Realis
|
||||||
|
|
||||||
var defaultBackoff = &realis.Backoff{
|
clientOptions := []realis.ClientOption{
|
||||||
Steps: 2,
|
realis.BasicAuth(username, password),
|
||||||
Duration: 10 * time.Second,
|
realis.ThriftJSON(),
|
||||||
Factor: 2.0,
|
realis.TimeoutMS(CONNECTION_TIMEOUT),
|
||||||
Jitter: 0.1,
|
realis.BackOff(&realis.Backoff{
|
||||||
|
Steps: 2,
|
||||||
|
Duration: 10 * time.Second,
|
||||||
|
Factor: 2.0,
|
||||||
|
Jitter: 0.1,
|
||||||
|
}),
|
||||||
|
realis.SetLogger(log.New(os.Stdout, "realis-debug: ", log.Ldate)),
|
||||||
}
|
}
|
||||||
|
|
||||||
//check if zkUrl is available.
|
//check if zkUrl is available.
|
||||||
if zkUrl != "" {
|
if zkUrl != "" {
|
||||||
fmt.Println("zkUrl: ", zkUrl)
|
fmt.Println("zkUrl: ", zkUrl)
|
||||||
cluster := &realis.Cluster{Name: "example",
|
clientOptions = append(clientOptions, realis.ZKUrl(zkUrl))
|
||||||
AuthMechanism: "UNAUTHENTICATED",
|
|
||||||
ZK: zkUrl,
|
|
||||||
SchedZKPath: "/aurora/scheduler",
|
|
||||||
AgentRunDir: "latest",
|
|
||||||
AgentRoot: "/var/lib/mesos",
|
|
||||||
}
|
|
||||||
fmt.Printf("cluster: %+v \n", cluster)
|
|
||||||
|
|
||||||
r, err = realis.NewRealisClient(realis.ZKUrl(zkUrl),
|
|
||||||
realis.BasicAuth(username, password),
|
|
||||||
realis.ThriftJSON(),
|
|
||||||
realis.TimeoutMS(CONNECTION_TIMEOUT),
|
|
||||||
realis.BackOff(defaultBackoff))
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
monitor = &realis.Monitor{r}
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
r, err = realis.NewRealisClient(realis.SchedulerUrl(url),
|
clientOptions = append(clientOptions, realis.SchedulerUrl(url))
|
||||||
realis.BasicAuth(username, password),
|
|
||||||
realis.ThriftJSON(),
|
|
||||||
realis.TimeoutMS(CONNECTION_TIMEOUT),
|
|
||||||
realis.BackOff(defaultBackoff))
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
|
|
||||||
monitor = &realis.Monitor{r}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
r, err = realis.NewRealisClient(clientOptions...)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
monitor = &realis.Monitor{r}
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
|
||||||
switch executor {
|
switch executor {
|
||||||
|
@ -197,6 +183,29 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
break
|
||||||
|
case "createService":
|
||||||
|
// Create a service with three instances using the update API instead of the createJob API
|
||||||
|
fmt.Println("Creating service")
|
||||||
|
settings := realis.NewUpdateSettings()
|
||||||
|
job.InstanceCount(3)
|
||||||
|
_, resp, err := r.CreateService(job, *settings)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Println(resp.String())
|
||||||
|
|
||||||
|
if ok, err := monitor.JobUpdate(*resp.GetKey(), 5, 50); !ok || err != nil {
|
||||||
|
_, err := r.KillJob(job.JobKey())
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
fmt.Println("ok: ", ok)
|
||||||
|
fmt.Println("err: ", err)
|
||||||
|
}
|
||||||
|
|
||||||
break
|
break
|
||||||
case "createDocker":
|
case "createDocker":
|
||||||
fmt.Println("Creating a docker based job")
|
fmt.Println("Creating a docker based job")
|
||||||
|
@ -352,7 +361,6 @@ func main() {
|
||||||
}
|
}
|
||||||
fmt.Println(resp.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
|
|
||||||
case "flexDown":
|
case "flexDown":
|
||||||
fmt.Println("Flexing down job")
|
fmt.Println("Flexing down job")
|
||||||
|
|
||||||
|
@ -407,7 +415,6 @@ func main() {
|
||||||
|
|
||||||
jobUpdateKey := response.JobUpdateKey(resp)
|
jobUpdateKey := response.JobUpdateKey(resp)
|
||||||
monitor.JobUpdate(*jobUpdateKey, 5, 500)
|
monitor.JobUpdate(*jobUpdateKey, 5, 500)
|
||||||
|
|
||||||
break
|
break
|
||||||
case "updateDetails":
|
case "updateDetails":
|
||||||
resp, err := r.JobUpdateDetails(aurora.JobUpdateQuery{
|
resp, err := r.JobUpdateDetails(aurora.JobUpdateQuery{
|
||||||
|
@ -457,9 +464,7 @@ func main() {
|
||||||
}
|
}
|
||||||
print(config.String())
|
print(config.String())
|
||||||
break
|
break
|
||||||
|
|
||||||
case "updatesummary":
|
case "updatesummary":
|
||||||
|
|
||||||
fmt.Println("Getting job update summary")
|
fmt.Println("Getting job update summary")
|
||||||
jobquery := &aurora.JobUpdateQuery{
|
jobquery := &aurora.JobUpdateQuery{
|
||||||
Role: &job.JobKey().Role,
|
Role: &job.JobKey().Role,
|
||||||
|
@ -471,7 +476,6 @@ func main() {
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println(updatesummary)
|
fmt.Println(updatesummary)
|
||||||
|
|
||||||
case "taskStatus":
|
case "taskStatus":
|
||||||
fmt.Println("Getting task status")
|
fmt.Println("Getting task status")
|
||||||
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
|
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
|
||||||
|
@ -485,7 +489,6 @@ func main() {
|
||||||
}
|
}
|
||||||
fmt.Printf("length: %d\n ", len(tasks))
|
fmt.Printf("length: %d\n ", len(tasks))
|
||||||
fmt.Printf("tasks: %+v\n", tasks)
|
fmt.Printf("tasks: %+v\n", tasks)
|
||||||
|
|
||||||
case "tasksWithoutConfig":
|
case "tasksWithoutConfig":
|
||||||
fmt.Println("Getting task status")
|
fmt.Println("Getting task status")
|
||||||
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
|
taskQ := &aurora.TaskQuery{Role: job.JobKey().Role,
|
||||||
|
@ -499,7 +502,6 @@ func main() {
|
||||||
}
|
}
|
||||||
fmt.Printf("length: %d\n ", len(tasks))
|
fmt.Printf("length: %d\n ", len(tasks))
|
||||||
fmt.Printf("tasks: %+v\n", tasks)
|
fmt.Printf("tasks: %+v\n", tasks)
|
||||||
|
|
||||||
case "drainHosts":
|
case "drainHosts":
|
||||||
fmt.Println("Setting hosts to DRAINING")
|
fmt.Println("Setting hosts to DRAINING")
|
||||||
if hostList == "" {
|
if hostList == "" {
|
||||||
|
@ -531,7 +533,6 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Print(result.String())
|
fmt.Print(result.String())
|
||||||
|
|
||||||
case "endMaintenance":
|
case "endMaintenance":
|
||||||
fmt.Println("Setting hosts to ACTIVE")
|
fmt.Println("Setting hosts to ACTIVE")
|
||||||
if hostList == "" {
|
if hostList == "" {
|
||||||
|
@ -563,7 +564,6 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Print(result.String())
|
fmt.Print(result.String())
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fmt.Println("Command not supported")
|
fmt.Println("Command not supported")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
2
job.go
2
job.go
|
@ -151,8 +151,6 @@ func (j *AuroraJob) RAM(ram int64) Job {
|
||||||
*j.resources["ram"].RamMb = ram
|
*j.resources["ram"].RamMb = ram
|
||||||
j.jobConfig.TaskConfig.RamMb = ram //Will be deprecated soon
|
j.jobConfig.TaskConfig.RamMb = ram //Will be deprecated soon
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
return j
|
return j
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -16,7 +16,6 @@
|
||||||
package realis
|
package realis
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||||
|
@ -59,7 +58,7 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
|
||||||
updateDetail := response.JobUpdateDetails(respDetail)
|
updateDetail := response.JobUpdateDetails(respDetail)
|
||||||
|
|
||||||
if len(updateDetail) == 0 {
|
if len(updateDetail) == 0 {
|
||||||
fmt.Println("No update found")
|
m.Client.RealisConfig().logger.Println("No update found")
|
||||||
return false, errors.New("No update found for " + updateKey.String())
|
return false, errors.New("No update found for " + updateKey.String())
|
||||||
}
|
}
|
||||||
status := updateDetail[0].Update.Summary.State.Status
|
status := updateDetail[0].Update.Summary.State.Status
|
||||||
|
@ -70,13 +69,13 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
|
||||||
// if we encounter an inactive state and it is not at rolled forward, update failed
|
// if we encounter an inactive state and it is not at rolled forward, update failed
|
||||||
switch status {
|
switch status {
|
||||||
case aurora.JobUpdateStatus_ROLLED_FORWARD:
|
case aurora.JobUpdateStatus_ROLLED_FORWARD:
|
||||||
fmt.Println("Update succeded")
|
m.Client.RealisConfig().logger.Println("Update succeded")
|
||||||
return true, nil
|
return true, nil
|
||||||
case aurora.JobUpdateStatus_FAILED:
|
case aurora.JobUpdateStatus_FAILED:
|
||||||
fmt.Println("Update failed")
|
m.Client.RealisConfig().logger.Println("Update failed")
|
||||||
return false, errors.New(UpdateFailed)
|
return false, errors.New(UpdateFailed)
|
||||||
case aurora.JobUpdateStatus_ROLLED_BACK:
|
case aurora.JobUpdateStatus_ROLLED_BACK:
|
||||||
fmt.Println("rolled back")
|
m.Client.RealisConfig().logger.Println("rolled back")
|
||||||
return false, errors.New(RolledBack)
|
return false, errors.New(RolledBack)
|
||||||
default:
|
default:
|
||||||
return false, nil
|
return false, nil
|
||||||
|
|
268
realis.go
268
realis.go
|
@ -31,28 +31,29 @@ import (
|
||||||
"github.com/pkg/errors"
|
"github.com/pkg/errors"
|
||||||
)
|
)
|
||||||
|
|
||||||
const VERSION = "1.0.4"
|
const VERSION = "1.1.0"
|
||||||
|
|
||||||
type Realis interface {
|
type Realis interface {
|
||||||
AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error)
|
AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error)
|
||||||
AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error)
|
AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error)
|
||||||
RemoveInstances(key *aurora.JobKey, count int32) (*aurora.Response, error)
|
|
||||||
CreateJob(auroraJob Job) (*aurora.Response, error)
|
CreateJob(auroraJob Job) (*aurora.Response, error)
|
||||||
|
CreateService(auroraJob Job, settings UpdateSettings) (*aurora.Response, *aurora.StartJobUpdateResult_, error)
|
||||||
DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error)
|
DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
|
|
||||||
GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
|
|
||||||
FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error)
|
FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error)
|
||||||
GetInstanceIds(key *aurora.JobKey, states map[aurora.ScheduleStatus]bool) (map[int32]bool, error)
|
GetInstanceIds(key *aurora.JobKey, states map[aurora.ScheduleStatus]bool) (map[int32]bool, error)
|
||||||
|
GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error)
|
||||||
|
GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
|
||||||
|
GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error)
|
||||||
JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error)
|
JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error)
|
||||||
KillJob(key *aurora.JobKey) (*aurora.Response, error)
|
KillJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
||||||
|
RemoveInstances(key *aurora.JobKey, count int32) (*aurora.Response, error)
|
||||||
RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
||||||
RestartJob(key *aurora.JobKey) (*aurora.Response, error)
|
RestartJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error)
|
RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error)
|
||||||
ScheduleCronJob(auroraJob Job) (*aurora.Response, error)
|
ScheduleCronJob(auroraJob Job) (*aurora.Response, error)
|
||||||
StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error)
|
StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error)
|
||||||
StartCronJob(key *aurora.JobKey) (*aurora.Response, error)
|
StartCronJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error)
|
|
||||||
ReestablishConn() error
|
ReestablishConn() error
|
||||||
RealisConfig() *RealisConfig
|
RealisConfig() *RealisConfig
|
||||||
Close()
|
Close()
|
||||||
|
@ -68,69 +69,101 @@ type realisClient struct {
|
||||||
client *aurora.AuroraSchedulerManagerClient
|
client *aurora.AuroraSchedulerManagerClient
|
||||||
readonlyClient *aurora.ReadOnlySchedulerClient
|
readonlyClient *aurora.ReadOnlySchedulerClient
|
||||||
adminClient *aurora.AuroraAdminClient
|
adminClient *aurora.AuroraAdminClient
|
||||||
|
logger Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
type option func(*RealisConfig)
|
type RealisConfig struct {
|
||||||
|
username, password string
|
||||||
|
url string
|
||||||
|
timeoutms int
|
||||||
|
binTransport, jsonTransport bool
|
||||||
|
cluster *Cluster
|
||||||
|
backoff *Backoff
|
||||||
|
transport thrift.TTransport
|
||||||
|
protoFactory thrift.TProtocolFactory
|
||||||
|
logger Logger
|
||||||
|
}
|
||||||
|
|
||||||
|
type Backoff struct {
|
||||||
|
Duration time.Duration // the base duration
|
||||||
|
Factor float64 // Duration is multipled by factor each iteration
|
||||||
|
Jitter float64 // The amount of jitter applied each iteration
|
||||||
|
Steps int // Exit with error after this many steps
|
||||||
|
}
|
||||||
|
|
||||||
|
var defaultBackoff = Backoff{
|
||||||
|
Steps: 3,
|
||||||
|
Duration: 10 * time.Second,
|
||||||
|
Factor: 5.0,
|
||||||
|
Jitter: 0.1,
|
||||||
|
}
|
||||||
|
|
||||||
|
type ClientOption func(*RealisConfig)
|
||||||
|
|
||||||
//Config sets for options in RealisConfig.
|
//Config sets for options in RealisConfig.
|
||||||
func BasicAuth(username, password string) option {
|
func BasicAuth(username, password string) ClientOption {
|
||||||
|
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.username = username
|
config.username = username
|
||||||
config.password = password
|
config.password = password
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func SchedulerUrl(url string) option {
|
func SchedulerUrl(url string) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.url = url
|
config.url = url
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TimeoutMS(timeout int) option {
|
func TimeoutMS(timeout int) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.timeoutms = timeout
|
config.timeoutms = timeout
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ZKCluster(cluster *Cluster) option {
|
func ZKCluster(cluster *Cluster) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.cluster = cluster
|
config.cluster = cluster
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ZKUrl(url string) option {
|
func ZKUrl(url string) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.cluster = GetDefaultClusterFromZKUrl(url)
|
config.cluster = GetDefaultClusterFromZKUrl(url)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func Retries(backoff *Backoff) option {
|
func Retries(backoff *Backoff) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.backoff = backoff
|
config.backoff = backoff
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ThriftJSON() option {
|
func ThriftJSON() ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.jsonTransport = true
|
config.jsonTransport = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func ThriftBinary() option {
|
func ThriftBinary() ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.binTransport = true
|
config.binTransport = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func BackOff(b *Backoff) option {
|
func BackOff(b *Backoff) ClientOption {
|
||||||
return func(config *RealisConfig) {
|
return func(config *RealisConfig) {
|
||||||
config.backoff = b
|
config.backoff = b
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func newTJSONTransport(url string, timeout int) (thrift.TTransport, error) {
|
// Using the word set to avoid name collision with Interface
|
||||||
|
func SetLogger(l Logger) ClientOption {
|
||||||
|
return func(config *RealisConfig) {
|
||||||
|
config.logger = l
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newTJSONTransport(url string, timeout int) (thrift.TTransport, error) {
|
||||||
trans, err := defaultTTransport(url, timeout)
|
trans, err := defaultTTransport(url, timeout)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error creating realis")
|
return nil, errors.Wrap(err, "Error creating realis")
|
||||||
|
@ -155,35 +188,41 @@ func newTBinTransport(url string, timeout int) (thrift.TTransport, error) {
|
||||||
return trans, err
|
return trans, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewRealisClient(options ...option) (Realis, error) {
|
func NewRealisClient(options ...ClientOption) (Realis, error) {
|
||||||
config := &RealisConfig{}
|
config := &RealisConfig{}
|
||||||
fmt.Println(" options length: ", len(options))
|
|
||||||
|
// Default configs
|
||||||
|
config.timeoutms = 10000
|
||||||
|
config.backoff = &defaultBackoff
|
||||||
|
config.logger = NoopLogger{}
|
||||||
|
|
||||||
|
// Override default configs where necessary
|
||||||
for _, opt := range options {
|
for _, opt := range options {
|
||||||
opt(config)
|
opt(config)
|
||||||
}
|
}
|
||||||
//Default timeout
|
|
||||||
if config.timeoutms == 0 {
|
config.logger.Println("Number of options applied to config: ", len(options))
|
||||||
config.timeoutms = 10000
|
|
||||||
}
|
|
||||||
//Set default Transport to JSON if needed.
|
//Set default Transport to JSON if needed.
|
||||||
if !config.jsonTransport && !config.binTransport {
|
if !config.jsonTransport && !config.binTransport {
|
||||||
config.jsonTransport = true
|
config.jsonTransport = true
|
||||||
}
|
}
|
||||||
|
|
||||||
var url string
|
var url string
|
||||||
var err error
|
var err error
|
||||||
//Cluster or URL?
|
|
||||||
|
// Determine how to get information to connect to the scheduler.
|
||||||
|
// Prioritize getting leader from ZK over using a direct URL.
|
||||||
if config.cluster != nil {
|
if config.cluster != nil {
|
||||||
url, err = LeaderFromZK(*config.cluster)
|
url, err = LeaderFromZK(*config.cluster)
|
||||||
|
|
||||||
// If ZK is configured, throw an error if the leader is unable to be determined
|
// If ZK is configured, throw an error if the leader is unable to be determined
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "LeaderFromZK error")
|
return nil, errors.Wrap(err, "LeaderFromZK error")
|
||||||
}
|
}
|
||||||
|
config.logger.Println("Scheduler URL from ZK: ", url)
|
||||||
fmt.Println("schedURLFromZK: ", url)
|
|
||||||
} else if config.url != "" {
|
} else if config.url != "" {
|
||||||
fmt.Println("Scheduler URL: ", config.url)
|
|
||||||
url = config.url
|
url = config.url
|
||||||
|
config.logger.Println("Scheduler URL: ", url)
|
||||||
} else {
|
} else {
|
||||||
return nil, errors.New("Incomplete Options -- url or cluster required")
|
return nil, errors.New("Incomplete Options -- url or cluster required")
|
||||||
}
|
}
|
||||||
|
@ -193,9 +232,9 @@ func NewRealisClient(options ...option) (Realis, error) {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error creating realis")
|
return nil, errors.Wrap(err, "Error creating realis")
|
||||||
}
|
}
|
||||||
|
|
||||||
config.transport = trans
|
config.transport = trans
|
||||||
config.protoFactory = thrift.NewTJSONProtocolFactory()
|
config.protoFactory = thrift.NewTJSONProtocolFactory()
|
||||||
|
|
||||||
} else if config.binTransport {
|
} else if config.binTransport {
|
||||||
trans, err := newTBinTransport(url, config.timeoutms)
|
trans, err := newTBinTransport(url, config.timeoutms)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -205,55 +244,22 @@ func NewRealisClient(options ...option) (Realis, error) {
|
||||||
config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
|
config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
config.logger.Printf("gorealis config url: %+v\n", config.url)
|
||||||
|
|
||||||
//Basic Authentication.
|
//Basic Authentication.
|
||||||
if config.username != "" && config.password != "" {
|
if config.username != "" && config.password != "" {
|
||||||
AddBasicAuth(config, config.username, config.password)
|
AddBasicAuth(config, config.username, config.password)
|
||||||
}
|
}
|
||||||
|
|
||||||
//Set defaultBackoff if required.
|
|
||||||
if config.backoff == nil {
|
|
||||||
config.backoff = &defaultBackoff
|
|
||||||
} else {
|
|
||||||
defaultBackoff = *config.backoff
|
|
||||||
fmt.Printf(" updating default backoff : %+v\n", *config.backoff)
|
|
||||||
}
|
|
||||||
|
|
||||||
fmt.Printf("gorealis config url: %+v\n", config.url)
|
|
||||||
|
|
||||||
return &realisClient{
|
return &realisClient{
|
||||||
config: config,
|
config: config,
|
||||||
client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory),
|
client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory),
|
||||||
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory),
|
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory),
|
||||||
adminClient: aurora.NewAuroraAdminClientFactory(config.transport, config.protoFactory)}, nil
|
adminClient: aurora.NewAuroraAdminClientFactory(config.transport, config.protoFactory),
|
||||||
|
logger: config.logger}, nil
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wrapper object to provide future flexibility
|
|
||||||
type RealisConfig struct {
|
|
||||||
username, password string
|
|
||||||
url string
|
|
||||||
timeoutms int
|
|
||||||
binTransport, jsonTransport bool
|
|
||||||
cluster *Cluster
|
|
||||||
backoff *Backoff
|
|
||||||
transport thrift.TTransport
|
|
||||||
protoFactory thrift.TProtocolFactory
|
|
||||||
}
|
|
||||||
|
|
||||||
type Backoff struct {
|
|
||||||
Duration time.Duration // the base duration
|
|
||||||
Factor float64 // Duration is multipled by factor each iteration
|
|
||||||
Jitter float64 // The amount of jitter applied each iteration
|
|
||||||
Steps int // Exit with error after this many steps
|
|
||||||
}
|
|
||||||
|
|
||||||
var defaultBackoff = Backoff{
|
|
||||||
Steps: 3,
|
|
||||||
Duration: 10 * time.Second,
|
|
||||||
Factor: 5.0,
|
|
||||||
Jitter: 0.1,
|
|
||||||
}
|
|
||||||
|
|
||||||
// Jitter returns a time.Duration between duration and duration + maxFactor *
|
// Jitter returns a time.Duration between duration and duration + maxFactor *
|
||||||
// duration.
|
// duration.
|
||||||
//
|
//
|
||||||
|
@ -267,33 +273,6 @@ func Jitter(duration time.Duration, maxFactor float64) time.Duration {
|
||||||
return wait
|
return wait
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a new Client with Cluster information and other details.
|
|
||||||
|
|
||||||
func NewDefaultClientUsingCluster(cluster *Cluster, user, passwd string) (Realis, error) {
|
|
||||||
|
|
||||||
url, err := LeaderFromZK(*cluster)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
fmt.Printf(" url: %s\n", url)
|
|
||||||
|
|
||||||
//Create new configuration with default transport layer
|
|
||||||
config, err := newDefaultConfig(url, 10000)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
config.username = user
|
|
||||||
config.password = passwd
|
|
||||||
config.cluster = cluster
|
|
||||||
config.url = ""
|
|
||||||
// Configured for vagrant
|
|
||||||
AddBasicAuth(config, user, passwd)
|
|
||||||
r := newClient(config)
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetDefaultClusterFromZKUrl(zkurl string) *Cluster {
|
func GetDefaultClusterFromZKUrl(zkurl string) *Cluster {
|
||||||
return &Cluster{Name: "defaultCluster",
|
return &Cluster{Name: "defaultCluster",
|
||||||
AuthMechanism: "UNAUTHENTICATED",
|
AuthMechanism: "UNAUTHENTICATED",
|
||||||
|
@ -304,65 +283,6 @@ func GetDefaultClusterFromZKUrl(zkurl string) *Cluster {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//This api would create default cluster object..
|
|
||||||
func NewDefaultClientUsingZKUrl(zkUrl, user, passwd string) (Realis, error) {
|
|
||||||
|
|
||||||
fmt.Printf(" zkUrl: %s\n", zkUrl)
|
|
||||||
cluster := GetDefaultClusterFromZKUrl(zkUrl)
|
|
||||||
|
|
||||||
url, err := LeaderFromZK(*cluster)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
fmt.Printf(" url: %s\n", url)
|
|
||||||
|
|
||||||
//Create new configuration with default transport layer
|
|
||||||
config, err := newDefaultConfig(url, 10000)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
config.username = user
|
|
||||||
config.password = passwd
|
|
||||||
config.cluster = cluster
|
|
||||||
config.url = ""
|
|
||||||
// Configured for vagrant
|
|
||||||
AddBasicAuth(config, user, passwd)
|
|
||||||
r := newClient(config)
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewDefaultClientUsingUrl(url, user, passwd string) (Realis, error) {
|
|
||||||
|
|
||||||
fmt.Printf(" url: %s\n", url)
|
|
||||||
//Create new configuration with default transport layer
|
|
||||||
config, err := newDefaultConfig(url, 10000)
|
|
||||||
if err != nil {
|
|
||||||
fmt.Println(err)
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
config.username = user
|
|
||||||
config.password = passwd
|
|
||||||
config.url = url
|
|
||||||
config.cluster = nil
|
|
||||||
// Configured for vagrant
|
|
||||||
AddBasicAuth(config, user, passwd)
|
|
||||||
config.backoff = &Backoff{Steps: 2, Duration: 10 * time.Second, Factor: 2.0, Jitter: 0.1}
|
|
||||||
r := newClient(config)
|
|
||||||
|
|
||||||
return r, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create a new Client with a default transport layer
|
|
||||||
func newClient(realisconfig *RealisConfig) Realis {
|
|
||||||
return &realisClient{
|
|
||||||
config: realisconfig,
|
|
||||||
client: aurora.NewAuroraSchedulerManagerClientFactory(realisconfig.transport, realisconfig.protoFactory),
|
|
||||||
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(realisconfig.transport, realisconfig.protoFactory),
|
|
||||||
adminClient: aurora.NewAuroraAdminClientFactory(realisconfig.transport, realisconfig.protoFactory)}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Creates a default Thrift Transport object for communications in gorealis using an HTTP Post Client
|
// Creates a default Thrift Transport object for communications in gorealis using an HTTP Post Client
|
||||||
func defaultTTransport(urlstr string, timeoutms int) (thrift.TTransport, error) {
|
func defaultTTransport(urlstr string, timeoutms int) (thrift.TTransport, error) {
|
||||||
jar, err := cookiejar.New(nil)
|
jar, err := cookiejar.New(nil)
|
||||||
|
@ -439,7 +359,7 @@ func basicAuth(username, password string) string {
|
||||||
|
|
||||||
func (r *realisClient) ReestablishConn() error {
|
func (r *realisClient) ReestablishConn() error {
|
||||||
//close existing connection..
|
//close existing connection..
|
||||||
fmt.Println("ReestablishConn begin ....")
|
r.logger.Println("ReestablishConn begin ....")
|
||||||
r.Close()
|
r.Close()
|
||||||
//First check cluster object for re-establish; if not available then try with scheduler url.
|
//First check cluster object for re-establish; if not available then try with scheduler url.
|
||||||
//var config *RealisConfig
|
//var config *RealisConfig
|
||||||
|
@ -452,7 +372,7 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Errorf("LeaderFromZK error: %+v\n ", err)
|
fmt.Errorf("LeaderFromZK error: %+v\n ", err)
|
||||||
}
|
}
|
||||||
fmt.Println("ReestablishConn url: ", url)
|
r.logger.Println("ReestablishConn url: ", url)
|
||||||
if r.config.jsonTransport {
|
if r.config.jsonTransport {
|
||||||
trans, err := newTJSONTransport(url, r.config.timeoutms)
|
trans, err := newTJSONTransport(url, r.config.timeoutms)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -469,7 +389,7 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
r.config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
|
r.config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
|
||||||
}
|
}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("error creating config: ", err)
|
r.logger.Println("error creating config: ", err)
|
||||||
}
|
}
|
||||||
// Configured for basic-auth
|
// Configured for basic-auth
|
||||||
AddBasicAuth(r.config, r.config.username, r.config.password)
|
AddBasicAuth(r.config, r.config.username, r.config.password)
|
||||||
|
@ -478,7 +398,7 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
|
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
|
||||||
} else if r.config.url != "" && r.config.username != "" && r.config.password != "" {
|
} else if r.config.url != "" && r.config.username != "" && r.config.password != "" {
|
||||||
//Re-establish using scheduler url.
|
//Re-establish using scheduler url.
|
||||||
fmt.Println("ReestablishConn url: ", r.config.url)
|
r.logger.Println("ReestablishConn url: ", r.config.url)
|
||||||
if r.config.jsonTransport {
|
if r.config.jsonTransport {
|
||||||
trans, err := newTJSONTransport(url, r.config.timeoutms)
|
trans, err := newTJSONTransport(url, r.config.timeoutms)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -499,14 +419,14 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(r.config.transport, r.config.protoFactory)
|
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(r.config.transport, r.config.protoFactory)
|
||||||
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
|
r.adminClient = aurora.NewAuroraAdminClientFactory(r.config.transport, r.config.protoFactory)
|
||||||
} else {
|
} else {
|
||||||
fmt.Println(" Missing Data for ReestablishConn ")
|
r.logger.Println(" Missing Data for ReestablishConn ")
|
||||||
fmt.Println(" r.config.cluster: ", r.config.cluster)
|
r.logger.Println(" r.config.cluster: ", r.config.cluster)
|
||||||
fmt.Println(" r.config.username: ", r.config.username)
|
r.logger.Println(" r.config.username: ", r.config.username)
|
||||||
fmt.Println(" r.config.passwd: ", r.config.password)
|
r.logger.Println(" r.config.passwd: ", r.config.password)
|
||||||
fmt.Println(" r.config.url: ", r.config.url)
|
r.logger.Println(" r.config.url: ", r.config.url)
|
||||||
return errors.New(" Missing Data for ReestablishConn ")
|
return errors.New(" Missing Data for ReestablishConn ")
|
||||||
}
|
}
|
||||||
fmt.Printf(" config url before return: %+v\n", r.config.url)
|
r.logger.Printf(" config url before return: %+v\n", r.config.url)
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -645,6 +565,9 @@ func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sends a create job message to the scheduler with a specific job configuration.
|
// Sends a create job message to the scheduler with a specific job configuration.
|
||||||
|
// Although this API is able to create service jobs, it is better to use CreateService instead
|
||||||
|
// as that API uses the update thrift call which has a few extra features available.
|
||||||
|
// Use this API to create ad-hoc jobs.
|
||||||
func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
var resp *aurora.Response
|
var resp *aurora.Response
|
||||||
var clientErr error
|
var clientErr error
|
||||||
|
@ -669,6 +592,24 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This API uses an update thrift call to create the services giving a few more robust features.
|
||||||
|
func (r *realisClient) CreateService(auroraJob Job, settings UpdateSettings) (*aurora.Response, *aurora.StartJobUpdateResult_, error) {
|
||||||
|
// Create a new job update object and ship it to the StartJobUpdate api
|
||||||
|
update := NewUpdateJob(auroraJob.TaskConfig(), &settings.settings)
|
||||||
|
update.InstanceCount(auroraJob.GetInstanceCount())
|
||||||
|
|
||||||
|
resp, err := r.StartJobUpdate(update, "")
|
||||||
|
if err != nil {
|
||||||
|
return resp, nil, errors.Wrap(err, "unable to create service")
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp != nil && resp.GetResult_() != nil {
|
||||||
|
return resp, resp.GetResult_().GetStartJobUpdateResult_(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp, nil, errors.New("results object is nil")
|
||||||
|
}
|
||||||
|
|
||||||
func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) {
|
func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
var resp *aurora.Response
|
var resp *aurora.Response
|
||||||
var clientErr error
|
var clientErr error
|
||||||
|
@ -962,6 +903,7 @@ func (r *realisClient) GetTasksWithoutConfigs(query *aurora.TaskQuery) (tasks []
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get the task configuration from the aurora scheduler for a job
|
||||||
func (r *realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) {
|
func (r *realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) {
|
||||||
|
|
||||||
ids := make(map[int32]bool)
|
ids := make(map[int32]bool)
|
||||||
|
@ -1166,7 +1108,7 @@ func (r *realisClient) MaintenanceStatus(hosts ...string) (*aurora.Response, *au
|
||||||
// Experienced an connection error
|
// Experienced an connection error
|
||||||
err1 := r.ReestablishConn()
|
err1 := r.ReestablishConn()
|
||||||
if err1 != nil {
|
if err1 != nil {
|
||||||
fmt.Println("error in re-establishing connection: ", err1)
|
r.logger.Println("error in re-establishing connection: ", err1)
|
||||||
}
|
}
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
|
@ -55,6 +55,14 @@ func TestMain(m *testing.M) {
|
||||||
os.Exit(m.Run())
|
os.Exit(m.Run())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestLeaderFromZK(t *testing.T) {
|
||||||
|
cluster := realis.GetDefaultClusterFromZKUrl("192.168.33.7:2181")
|
||||||
|
url, err := realis.LeaderFromZK(*cluster)
|
||||||
|
|
||||||
|
assert.NoError(t, err)
|
||||||
|
assert.Equal(t, url, "http://aurora.local:8081")
|
||||||
|
}
|
||||||
|
|
||||||
func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
||||||
|
|
||||||
job := realis.NewJob().
|
job := realis.NewJob().
|
||||||
|
|
69
updatejob.go
69
updatejob.go
|
@ -24,12 +24,15 @@ type UpdateJob struct {
|
||||||
req *aurora.JobUpdateRequest
|
req *aurora.JobUpdateRequest
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Create a default UpdateJob object.
|
// Create a default UpdateJob object.
|
||||||
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
||||||
|
|
||||||
req := aurora.NewJobUpdateRequest()
|
req := aurora.NewJobUpdateRequest()
|
||||||
req.TaskConfig = config
|
req.TaskConfig = config
|
||||||
req.Settings = aurora.NewJobUpdateSettings()
|
s := NewUpdateSettings().Settings()
|
||||||
|
req.Settings = &s
|
||||||
|
|
||||||
job := NewJob().(*AuroraJob)
|
job := NewJob().(*AuroraJob)
|
||||||
job.jobConfig.TaskConfig = config
|
job.jobConfig.TaskConfig = config
|
||||||
|
@ -60,7 +63,6 @@ func NewDefaultUpdateJob(config *aurora.TaskConfig) *UpdateJob {
|
||||||
req.Settings.MaxPerInstanceFailures = 0
|
req.Settings.MaxPerInstanceFailures = 0
|
||||||
req.Settings.MaxFailedInstances = 0
|
req.Settings.MaxFailedInstances = 0
|
||||||
req.Settings.RollbackOnFailure = true
|
req.Settings.RollbackOnFailure = true
|
||||||
req.Settings.WaitForBatchCompletion = false
|
|
||||||
|
|
||||||
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
//TODO(rdelvalle): Deep copy job struct to avoid unexpected behavior
|
||||||
return &UpdateJob{job, req}
|
return &UpdateJob{job, req}
|
||||||
|
@ -138,3 +140,66 @@ func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
||||||
u.req.Settings.RollbackOnFailure = rollback
|
u.req.Settings.RollbackOnFailure = rollback
|
||||||
return u
|
return u
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO(rdelvalle): Integrate this struct with the JobUpdate struct so that we don't repeat code
|
||||||
|
type UpdateSettings struct {
|
||||||
|
settings aurora.JobUpdateSettings
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewUpdateSettings() *UpdateSettings {
|
||||||
|
|
||||||
|
us := new(UpdateSettings)
|
||||||
|
|
||||||
|
// Mirrors defaults set by Pystachio
|
||||||
|
us.settings.UpdateOnlyTheseInstances = make(map[*aurora.Range]bool)
|
||||||
|
us.settings.UpdateGroupSize = 1
|
||||||
|
us.settings.WaitForBatchCompletion = false
|
||||||
|
us.settings.MinWaitInInstanceRunningMs = 45000
|
||||||
|
us.settings.MaxPerInstanceFailures = 0
|
||||||
|
us.settings.MaxFailedInstances = 0
|
||||||
|
us.settings.RollbackOnFailure = true
|
||||||
|
|
||||||
|
return us
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max number of instances being updated at any given moment.
|
||||||
|
func (u *UpdateSettings) BatchSize(size int32) *UpdateSettings {
|
||||||
|
u.settings.UpdateGroupSize = size
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Minimum number of seconds a shard must remain in RUNNING state before considered a success.
|
||||||
|
func (u *UpdateSettings) WatchTime(ms int32) *UpdateSettings {
|
||||||
|
u.settings.MinWaitInInstanceRunningMs = ms
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Wait for all instances in a group to be done before moving on.
|
||||||
|
func (u *UpdateSettings) WaitForBatchCompletion(batchWait bool) *UpdateSettings {
|
||||||
|
u.settings.WaitForBatchCompletion = batchWait
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max number of instance failures to tolerate before marking instance as FAILED.
|
||||||
|
func (u *UpdateSettings) MaxPerInstanceFailures(inst int32) *UpdateSettings {
|
||||||
|
u.settings.MaxPerInstanceFailures = inst
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Max number of FAILED instances to tolerate before terminating the update.
|
||||||
|
func (u *UpdateSettings) MaxFailedInstances(inst int32) *UpdateSettings {
|
||||||
|
u.settings.MaxFailedInstances = inst
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// When False, prevents auto rollback of a failed update.
|
||||||
|
func (u *UpdateSettings) RollbackOnFail(rollback bool) *UpdateSettings {
|
||||||
|
u.settings.RollbackOnFailure = rollback
|
||||||
|
return u
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return internal Thrift API structure
|
||||||
|
func (u UpdateSettings) Settings() aurora.JobUpdateSettings {
|
||||||
|
return u.settings
|
||||||
|
}
|
||||||
|
|
127
zk.go
127
zk.go
|
@ -36,89 +36,70 @@ type ServiceInstance struct {
|
||||||
Status string `json:"status"`
|
Status string `json:"status"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type NoopLogger struct{}
|
|
||||||
|
|
||||||
func (NoopLogger) Printf(format string, a ...interface{}) {
|
|
||||||
}
|
|
||||||
|
|
||||||
// Retrieves current Aurora leader from ZK.
|
// Retrieves current Aurora leader from ZK.
|
||||||
func LeaderFromZK(cluster Cluster) (string, error) {
|
func LeaderFromZK(cluster Cluster) (string, error) {
|
||||||
|
|
||||||
var err error
|
|
||||||
var zkurl string
|
var zkurl string
|
||||||
|
|
||||||
duration := defaultBackoff.Duration
|
retryErr := ExponentialBackoff(defaultBackoff, func() (bool, error) {
|
||||||
for step := 0; step < defaultBackoff.Steps; step++ {
|
|
||||||
|
|
||||||
// Attempt to find leader
|
endpoints := strings.Split(cluster.ZK, ",")
|
||||||
zkurl, err = leaderFromZK(cluster)
|
|
||||||
if err == nil {
|
//TODO (rdelvalle): When enabling debugging, change logger here
|
||||||
return zkurl, err
|
c, _, err := zk.Connect(endpoints, time.Second*10, func(c *zk.Conn) { c.SetLogger(NoopLogger{}) })
|
||||||
|
if err != nil {
|
||||||
|
return false, errors.Wrap(err, "Failed to connect to Zookeeper at "+cluster.ZK)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Backoff if we failed to determine leader
|
defer c.Close()
|
||||||
adjusted := duration
|
|
||||||
if defaultBackoff.Jitter > 0.0 {
|
// Open up descriptor for the ZK path given
|
||||||
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
children, _, _, err := c.ChildrenW(cluster.SchedZKPath)
|
||||||
|
if err != nil {
|
||||||
|
return false, errors.Wrapf(err, "Path %s doesn't exist on Zookeeper ", cluster.SchedZKPath)
|
||||||
}
|
}
|
||||||
fmt.Printf("Error determining Aurora leader: %v; retrying in %v\n", err, adjusted)
|
|
||||||
time.Sleep(adjusted)
|
// Search for the leader through all the children in the given path
|
||||||
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
serviceInst := new(ServiceInstance)
|
||||||
|
for _, child := range children {
|
||||||
|
|
||||||
|
// Only the leader will start with member_
|
||||||
|
if strings.HasPrefix(child, "member_") {
|
||||||
|
|
||||||
|
data, _, err := c.Get(cluster.SchedZKPath + "/" + child)
|
||||||
|
if err != nil {
|
||||||
|
return false, errors.Wrap(err, "Error fetching contents of leader")
|
||||||
|
}
|
||||||
|
|
||||||
|
err = json.Unmarshal([]byte(data), serviceInst)
|
||||||
|
if err != nil {
|
||||||
|
return false, errors.Wrap(err, "Unable to unmarshall contents of leader")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Should only be one endpoint
|
||||||
|
if len(serviceInst.AdditionalEndpoints) > 1 {
|
||||||
|
fmt.Errorf("Ambiguous end points schemes")
|
||||||
|
}
|
||||||
|
|
||||||
|
var scheme, host, port string
|
||||||
|
for k, v := range serviceInst.AdditionalEndpoints {
|
||||||
|
scheme = k
|
||||||
|
host = v.Host
|
||||||
|
port = strconv.Itoa(v.Port)
|
||||||
|
}
|
||||||
|
|
||||||
|
zkurl = scheme + "://" + host + ":" + port
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false, errors.New("No leader found")
|
||||||
|
})
|
||||||
|
|
||||||
|
if retryErr != nil {
|
||||||
|
return "", errors.Wrapf(retryErr, "Failed to determine leader after %v attempts", defaultBackoff.Steps)
|
||||||
}
|
}
|
||||||
|
|
||||||
return "", errors.Wrapf(err, "Failed to determine leader after %v attempts", defaultBackoff.Steps)
|
return zkurl, nil
|
||||||
}
|
|
||||||
|
|
||||||
func leaderFromZK(cluster Cluster) (string, error) {
|
|
||||||
|
|
||||||
endpoints := strings.Split(cluster.ZK, ",")
|
|
||||||
|
|
||||||
//TODO (rdelvalle): When enabling debugging, change logger here
|
|
||||||
c, _, err := zk.Connect(endpoints, time.Second*10, func(c *zk.Conn) { c.SetLogger(NoopLogger{}) })
|
|
||||||
if err != nil {
|
|
||||||
return "", errors.Wrap(err, "Failed to connect to Zookeeper at "+cluster.ZK)
|
|
||||||
}
|
|
||||||
|
|
||||||
defer c.Close()
|
|
||||||
|
|
||||||
children, _, _, err := c.ChildrenW(cluster.SchedZKPath)
|
|
||||||
if err != nil {
|
|
||||||
return "", errors.Wrapf(err, "Path %s doesn't exist on Zookeeper ", cluster.SchedZKPath)
|
|
||||||
}
|
|
||||||
|
|
||||||
serviceInst := new(ServiceInstance)
|
|
||||||
|
|
||||||
for _, child := range children {
|
|
||||||
|
|
||||||
// Only the leader will start with member_
|
|
||||||
if strings.HasPrefix(child, "member_") {
|
|
||||||
|
|
||||||
data, _, err := c.Get(cluster.SchedZKPath + "/" + child)
|
|
||||||
if err != nil {
|
|
||||||
return "", errors.Wrap(err, "Error fetching contents of leader")
|
|
||||||
}
|
|
||||||
|
|
||||||
err = json.Unmarshal([]byte(data), serviceInst)
|
|
||||||
if err != nil {
|
|
||||||
return "", errors.Wrap(err, "Unable to unmarshall contents of leader")
|
|
||||||
}
|
|
||||||
|
|
||||||
// Should only be one endpoint
|
|
||||||
if len(serviceInst.AdditionalEndpoints) > 1 {
|
|
||||||
fmt.Errorf("Ambiguous end points schemes")
|
|
||||||
}
|
|
||||||
|
|
||||||
var scheme, host, port string
|
|
||||||
for k, v := range serviceInst.AdditionalEndpoints {
|
|
||||||
scheme = k
|
|
||||||
host = v.Host
|
|
||||||
port = strconv.Itoa(v.Port)
|
|
||||||
}
|
|
||||||
|
|
||||||
return scheme + "://" + host + ":" + port, nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return "", errors.New("No leader found")
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue