gorealis resiliency
This commit is contained in:
parent
d97e59b9e6
commit
3add32a585
3 changed files with 367 additions and 114 deletions
|
@ -60,7 +60,6 @@ func main() {
|
||||||
|
|
||||||
var job realis.Job
|
var job realis.Job
|
||||||
var err error
|
var err error
|
||||||
var config *realis.RealisConfig
|
|
||||||
var monitor *realis.Monitor
|
var monitor *realis.Monitor
|
||||||
var r realis.Realis
|
var r realis.Realis
|
||||||
|
|
||||||
|
@ -76,7 +75,7 @@ func main() {
|
||||||
}
|
}
|
||||||
fmt.Printf("cluster: %+v \n", cluster)
|
fmt.Printf("cluster: %+v \n", cluster)
|
||||||
|
|
||||||
r, err = realis.NewClientUsingCluster(cluster, *username, *password)
|
r, err = realis.NewDefaultClientUsingCluster(cluster, *username, *password)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
@ -84,17 +83,11 @@ func main() {
|
||||||
monitor = &realis.Monitor{r}
|
monitor = &realis.Monitor{r}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
//Create new configuration with default transport layer
|
r, err = realis.NewDefaultClientUsingUrl(*url, *username, *password)
|
||||||
config, err = realis.NewDefaultConfig(*url, 10000)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Configured for vagrant
|
|
||||||
realis.AddBasicAuth(config, *username, *password)
|
|
||||||
r = realis.NewClient(config)
|
|
||||||
|
|
||||||
monitor = &realis.Monitor{r}
|
monitor = &realis.Monitor{r}
|
||||||
}
|
}
|
||||||
defer r.Close()
|
defer r.Close()
|
||||||
|
@ -122,20 +115,16 @@ func main() {
|
||||||
break
|
break
|
||||||
case "compose":
|
case "compose":
|
||||||
job = realis.NewJob().
|
job = realis.NewJob().
|
||||||
//Environment("prod").
|
Environment("prod").
|
||||||
//Role("vagrant").
|
Role("vagrant").
|
||||||
//Name("docker-compose").
|
Name("docker-compose").
|
||||||
Role("gorealis").
|
ExecutorName("docker-compose-executor").
|
||||||
Environment("k2").
|
|
||||||
Name("testapp").
|
|
||||||
ExecutorName("sampleapp").
|
|
||||||
ExecutorName("dce-regular").
|
|
||||||
ExecutorData("{}").
|
ExecutorData("{}").
|
||||||
CPU(0.25).
|
CPU(0.25).
|
||||||
RAM(64).
|
RAM(64).
|
||||||
Disk(100).
|
Disk(100).
|
||||||
IsService(true).
|
IsService(true).
|
||||||
InstanceCount(4).
|
InstanceCount(2).
|
||||||
AddPorts(4).
|
AddPorts(4).
|
||||||
AddLabel("fileName", "sample-app/docker-compose.yml").
|
AddLabel("fileName", "sample-app/docker-compose.yml").
|
||||||
AddURIs(true, true, "https://github.com/mesos/docker-compose-executor/releases/download/0.1.0/sample-app.tar.gz")
|
AddURIs(true, true, "https://github.com/mesos/docker-compose-executor/releases/download/0.1.0/sample-app.tar.gz")
|
||||||
|
@ -307,7 +296,7 @@ func main() {
|
||||||
case "flexUp":
|
case "flexUp":
|
||||||
fmt.Println("Flexing up job")
|
fmt.Println("Flexing up job")
|
||||||
|
|
||||||
numOfInstances := int32(2)
|
numOfInstances := int32(4)
|
||||||
|
|
||||||
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
|
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
|
12
monitors.go
12
monitors.go
|
@ -59,7 +59,7 @@ func (m *Monitor) JobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout
|
||||||
fmt.Println("error in ReestablishConn: ", err1)
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// if error remains then return (false, err).
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return false, err
|
return false, err
|
||||||
}
|
}
|
||||||
|
@ -114,10 +114,14 @@ func (m *Monitor) Instances(key *aurora.JobKey, instances int32, interval int, t
|
||||||
fmt.Println(" live: ", live)
|
fmt.Println(" live: ", live)
|
||||||
break
|
break
|
||||||
}
|
}
|
||||||
err1 := m.Client.ReestablishConn()
|
|
||||||
if err1 != nil {
|
if err != nil {
|
||||||
fmt.Println("error in ReestablishConn: ", err1)
|
err1 := m.Client.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//live, err := m.Client.GetInstanceIds(key, aurora.LIVE_STATES)
|
//live, err := m.Client.GetInstanceIds(key, aurora.LIVE_STATES)
|
||||||
|
|
442
realis.go
442
realis.go
|
@ -63,6 +63,7 @@ type realisClient struct {
|
||||||
// Wrapper object to provide future flexibility
|
// Wrapper object to provide future flexibility
|
||||||
type RealisConfig struct {
|
type RealisConfig struct {
|
||||||
username, password string
|
username, password string
|
||||||
|
url string
|
||||||
cluster *Cluster
|
cluster *Cluster
|
||||||
transport thrift.TTransport
|
transport thrift.TTransport
|
||||||
protoFactory thrift.TProtocolFactory
|
protoFactory thrift.TProtocolFactory
|
||||||
|
@ -76,8 +77,8 @@ type Backoff struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
var defaultBackoff = Backoff{
|
var defaultBackoff = Backoff{
|
||||||
Steps: 10,
|
Steps: 3,
|
||||||
Duration: 5 * time.Second,
|
Duration: 10 * time.Second,
|
||||||
Factor: 5.0,
|
Factor: 5.0,
|
||||||
Jitter: 0.1,
|
Jitter: 0.1,
|
||||||
}
|
}
|
||||||
|
@ -97,7 +98,7 @@ func Jitter(duration time.Duration, maxFactor float64) time.Duration {
|
||||||
|
|
||||||
// Create a new Client with Cluster information and other details.
|
// Create a new Client with Cluster information and other details.
|
||||||
|
|
||||||
func NewClientUsingCluster(cluster *Cluster, user, passwd string) (Realis, error) {
|
func NewDefaultClientUsingCluster(cluster *Cluster, user, passwd string) (Realis, error) {
|
||||||
|
|
||||||
url, err := LeaderFromZK(*cluster)
|
url, err := LeaderFromZK(*cluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
@ -115,6 +116,61 @@ func NewClientUsingCluster(cluster *Cluster, user, passwd string) (Realis, error
|
||||||
config.username = user
|
config.username = user
|
||||||
config.password = passwd
|
config.password = passwd
|
||||||
config.cluster = cluster
|
config.cluster = cluster
|
||||||
|
config.url = ""
|
||||||
|
// Configured for vagrant
|
||||||
|
AddBasicAuth(config, user, passwd)
|
||||||
|
r := NewClient(config)
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
//This api would create default cluster object..
|
||||||
|
func NewDefaultClientUsingZKUrl(zkUrl, user, passwd string) (Realis, error) {
|
||||||
|
|
||||||
|
fmt.Println(" zkUrl: %s", zkUrl)
|
||||||
|
cluster := &Cluster{Name: "testCluster",
|
||||||
|
AuthMechanism: "UNAUTHENTICATED",
|
||||||
|
ZK: zkUrl,
|
||||||
|
SchedZKPath: "/aurora/scheduler",
|
||||||
|
AgentRunDir: "latest",
|
||||||
|
AgentRoot: "/var/lib/mesos",
|
||||||
|
}
|
||||||
|
|
||||||
|
url, err := LeaderFromZK(*cluster)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
fmt.Printf(" url: %s\n", url)
|
||||||
|
|
||||||
|
//Create new configuration with default transport layer
|
||||||
|
config, err := NewDefaultConfig("http://localhost:18000", 10000)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
config.username = user
|
||||||
|
config.password = passwd
|
||||||
|
config.cluster = cluster
|
||||||
|
config.url = ""
|
||||||
|
// Configured for vagrant
|
||||||
|
AddBasicAuth(config, user, passwd)
|
||||||
|
r := NewClient(config)
|
||||||
|
return r, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewDefaultClientUsingUrl(url, user, passwd string) (Realis, error) {
|
||||||
|
|
||||||
|
fmt.Printf(" url: %s\n", url)
|
||||||
|
//Create new configuration with default transport layer
|
||||||
|
config, err := NewDefaultConfig("http://localhost:18000", 10000)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
config.username = user
|
||||||
|
config.password = passwd
|
||||||
|
config.url = url
|
||||||
|
config.cluster = nil
|
||||||
// Configured for vagrant
|
// Configured for vagrant
|
||||||
AddBasicAuth(config, user, passwd)
|
AddBasicAuth(config, user, passwd)
|
||||||
r := NewClient(config)
|
r := NewClient(config)
|
||||||
|
@ -202,17 +258,17 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
//close existing connection..
|
//close existing connection..
|
||||||
fmt.Println("ReestablishConn begin ....")
|
fmt.Println("ReestablishConn begin ....")
|
||||||
r.Close()
|
r.Close()
|
||||||
|
//First check cluster object for re-establish; if not available then try with scheduler url.
|
||||||
if r.config.cluster != nil && r.config.username != "" && r.config.password != "" {
|
if r.config.cluster != nil && r.config.username != "" && r.config.password != "" {
|
||||||
|
//Re-establish using cluster object.
|
||||||
url, err := LeaderFromZK(*r.config.cluster)
|
url, err := LeaderFromZK(*r.config.cluster)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println("LeaderFromZK error: ", err)
|
fmt.Errorf("LeaderFromZK error: %+v\n ", err)
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
fmt.Println("ReestablishConn url: ", url)
|
fmt.Println("ReestablishConn url: ", url)
|
||||||
config, err := NewDefaultConfig("http://localhost:18000", 10000)
|
config, err := NewDefaultConfig(url, 10000)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
// Configured for basic-auth
|
// Configured for basic-auth
|
||||||
AddBasicAuth(config, r.config.username, r.config.password)
|
AddBasicAuth(config, r.config.username, r.config.password)
|
||||||
|
@ -220,11 +276,23 @@ func (r *realisClient) ReestablishConn() error {
|
||||||
r.config = config
|
r.config = config
|
||||||
r.client = aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory)
|
r.client = aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory)
|
||||||
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory)
|
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory)
|
||||||
|
} else if r.config.url != "" && r.config.username != "" && r.config.password != "" {
|
||||||
|
//Re-establish using scheduler url.
|
||||||
|
//Create new configuration with default transport layer
|
||||||
|
config, err := NewDefaultConfig(r.config.url, 10000)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
}
|
||||||
|
AddBasicAuth(config, r.config.username, r.config.password)
|
||||||
|
r.config = config
|
||||||
|
r.client = aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory)
|
||||||
|
r.readonlyClient = aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory)
|
||||||
} else {
|
} else {
|
||||||
fmt.Println(" Missing Data for ReestablishConn ")
|
fmt.Println(" Missing Data for ReestablishConn ")
|
||||||
fmt.Println(" r.config.cluster: ", r.config.cluster)
|
fmt.Println(" r.config.cluster: ", r.config.cluster)
|
||||||
fmt.Println(" r.config.username: ", r.config.username)
|
fmt.Println(" r.config.username: ", r.config.username)
|
||||||
fmt.Println(" r.config.passwd: ", r.config.password)
|
fmt.Println(" r.config.passwd: ", r.config.password)
|
||||||
|
fmt.Println(" r.config.url: ", r.config.url)
|
||||||
return errors.New(" Missing Data for ReestablishConn ")
|
return errors.New(" Missing Data for ReestablishConn ")
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
@ -243,7 +311,30 @@ func (r *realisClient) GetInstanceIds(key *aurora.JobKey, states map[aurora.Sche
|
||||||
JobName: key.Name,
|
JobName: key.Name,
|
||||||
Statuses: states}
|
Statuses: states}
|
||||||
|
|
||||||
resp, err := r.client.GetTasksWithoutConfigs(taskQ)
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
|
||||||
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.GetTasksWithoutConfigs(taskQ); err == nil {
|
||||||
|
fmt.Println("resp: ", resp)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error querying Aurora Scheduler for active IDs")
|
return nil, errors.Wrap(err, "Error querying Aurora Scheduler for active IDs")
|
||||||
}
|
}
|
||||||
|
@ -259,36 +350,9 @@ func (r *realisClient) GetInstanceIds(key *aurora.JobKey, states map[aurora.Sche
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *realisClient) GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error) {
|
func (r *realisClient) GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.Response, error) {
|
||||||
resp, err := r.readonlyClient.GetJobUpdateSummaries(jobUpdateQuery)
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.Wrap(err, "Error getting job update summaries from Aurora Scheduler")
|
|
||||||
}
|
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Kill specific instances of a job.
|
|
||||||
func (r *realisClient) KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) {
|
|
||||||
|
|
||||||
instanceIds := make(map[int32]bool)
|
|
||||||
|
|
||||||
for _, instId := range instances {
|
|
||||||
instanceIds[instId] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
resp, err := r.client.KillTasks(key, instanceIds)
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sends a kill message to the scheduler for all active tasks under a job.
|
|
||||||
func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
|
|
||||||
|
|
||||||
var instanceIds map[int32]bool
|
|
||||||
var err error
|
|
||||||
var resp *aurora.Response
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
|
||||||
duration := defaultBackoff.Duration
|
duration := defaultBackoff.Duration
|
||||||
for i := 0; i < defaultBackoff.Steps; i++ {
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
if i != 0 {
|
if i != 0 {
|
||||||
|
@ -300,17 +364,61 @@ func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
time.Sleep(adjusted)
|
time.Sleep(adjusted)
|
||||||
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
}
|
}
|
||||||
|
if resp, err = r.readonlyClient.GetJobUpdateSummaries(jobUpdateQuery); err == nil {
|
||||||
if instanceIds, err = r.GetInstanceIds(key, aurora.ACTIVE_STATES); err == nil {
|
fmt.Println("resp: ", resp)
|
||||||
fmt.Println("instanceIds: ", instanceIds)
|
return response.ResponseCodeCheck(resp)
|
||||||
break
|
|
||||||
}
|
}
|
||||||
err1 := r.ReestablishConn()
|
err1 := r.ReestablishConn()
|
||||||
if err1 != nil {
|
if err1 != nil {
|
||||||
fmt.Println("error in ReestablishConn: ", err1)
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error getting job update summaries from Aurora Scheduler")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Kill specific instances of a job.
|
||||||
|
func (r *realisClient) KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) {
|
||||||
|
|
||||||
|
instanceIds := make(map[int32]bool)
|
||||||
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
duration := defaultBackoff.Duration
|
||||||
|
|
||||||
|
for _, instId := range instances {
|
||||||
|
instanceIds[instId] = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.KillTasks(key, instanceIds); err == nil {
|
||||||
|
fmt.Println("resp: ", resp)
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sends a kill message to the scheduler for all active tasks under a job.
|
||||||
|
func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
|
|
||||||
|
var instanceIds map[int32]bool
|
||||||
|
var err error
|
||||||
|
var resp *aurora.Response
|
||||||
|
instanceIds, err = r.GetInstanceIds(key, aurora.ACTIVE_STATES)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Could not retrieve relevant task instance IDs")
|
return nil, errors.Wrap(err, "Could not retrieve relevant task instance IDs")
|
||||||
}
|
}
|
||||||
|
@ -338,14 +446,11 @@ func (r *realisClient) KillJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
fmt.Println("error in ReestablishConn: ", err1)
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
return nil, errors.New("No tasks in the Active state")
|
return nil, errors.New("No tasks in the Active state")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Sends a create job message to the scheduler with a specific job configuration.
|
// Sends a create job message to the scheduler with a specific job configuration.
|
||||||
|
@ -377,33 +482,87 @@ func (r *realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) {
|
func (r *realisClient) ScheduleCronJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
resp, err := r.client.ScheduleCronJob(auroraJob.JobConfig())
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
|
||||||
if err != nil {
|
duration := defaultBackoff.Duration
|
||||||
return nil, errors.Wrap(err, "Error sending Cron Job Schedule message to Aurora Scheduler")
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp, err = r.client.ScheduleCronJob(auroraJob.JobConfig()); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error sending Cron Job Schedule message to Aurora Scheduler")
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *realisClient) DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error) {
|
func (r *realisClient) DescheduleCronJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
resp, err := r.client.DescheduleCronJob(key)
|
|
||||||
|
|
||||||
if err != nil {
|
var resp *aurora.Response
|
||||||
return nil, errors.Wrap(err, "Error sending Cron Job De-schedule message to Aurora Scheduler")
|
var err error
|
||||||
|
|
||||||
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp, err = r.client.DescheduleCronJob(key); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
return nil, errors.Wrap(err, "Error sending Cron Job De-schedule message to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *realisClient) StartCronJob(key *aurora.JobKey) (*aurora.Response, error) {
|
func (r *realisClient) StartCronJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
resp, err := r.client.StartCronJob(key)
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
|
||||||
if err != nil {
|
duration := defaultBackoff.Duration
|
||||||
return nil, errors.Wrap(err, "Error sending Start Cron Job message to Aurora Scheduler")
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp, err = r.client.StartCronJob(key); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
return nil, errors.Wrap(err, "Error sending Start Cron Job message to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restarts specific instances specified
|
// Restarts specific instances specified
|
||||||
|
@ -413,31 +572,66 @@ func (r *realisClient) RestartInstances(key *aurora.JobKey, instances ...int32)
|
||||||
for _, instId := range instances {
|
for _, instId := range instances {
|
||||||
instanceIds[instId] = true
|
instanceIds[instId] = true
|
||||||
}
|
}
|
||||||
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp, err = r.client.RestartShards(key, instanceIds); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
|
||||||
resp, err := r.client.RestartShards(key, instanceIds)
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.Wrap(err, "Error sending Restart command to Aurora Scheduler")
|
|
||||||
}
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error sending Restart command to Aurora Scheduler")
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restarts all active tasks under a job configuration.
|
// Restarts all active tasks under a job configuration.
|
||||||
func (r *realisClient) RestartJob(key *aurora.JobKey) (*aurora.Response, error) {
|
func (r *realisClient) RestartJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
|
|
||||||
instanceIds, err := r.GetInstanceIds(key, aurora.ACTIVE_STATES)
|
instanceIds, err1 := r.GetInstanceIds(key, aurora.ACTIVE_STATES)
|
||||||
if err != nil {
|
if err1 != nil {
|
||||||
return nil, errors.Wrap(err, "Could not retrieve relevant task instance IDs")
|
return nil, errors.Wrap(err1, "Could not retrieve relevant task instance IDs")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
if len(instanceIds) > 0 {
|
if len(instanceIds) > 0 {
|
||||||
resp, err := r.client.RestartShards(key, instanceIds)
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
|
||||||
|
if resp, err = r.client.RestartShards(key, instanceIds); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return nil, errors.Wrap(err, "Error sending Restart command to Aurora Scheduler")
|
|
||||||
}
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error sending Restart command to Aurora Scheduler")
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
} else {
|
} else {
|
||||||
return nil, errors.New("No tasks in the Active state")
|
return nil, errors.New("No tasks in the Active state")
|
||||||
}
|
}
|
||||||
|
@ -468,10 +662,7 @@ func (r *realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*au
|
||||||
fmt.Println("error in ReestablishConn: ", err1)
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//resp, err = r.client.StartJobUpdate(updateJob.req, message)
|
|
||||||
return nil, errors.Wrap(err, "Error sending StartJobUpdate command to Aurora Scheduler")
|
return nil, errors.Wrap(err, "Error sending StartJobUpdate command to Aurora Scheduler")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Abort Job Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
|
// Abort Job Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
|
||||||
|
@ -479,26 +670,57 @@ func (r *realisClient) AbortJobUpdate(
|
||||||
updateKey aurora.JobUpdateKey,
|
updateKey aurora.JobUpdateKey,
|
||||||
message string) (*aurora.Response, error) {
|
message string) (*aurora.Response, error) {
|
||||||
|
|
||||||
resp, err := r.client.AbortJobUpdate(&updateKey, message)
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
if err != nil {
|
duration := defaultBackoff.Duration
|
||||||
return nil, errors.Wrap(err, "Error sending AbortJobUpdate command to Aurora Scheduler")
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.AbortJobUpdate(&updateKey, message); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
return nil, errors.Wrap(err, "Error sending AbortJobUpdate command to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scale up the number of instances under a job configuration using the configuration for specific
|
// Scale up the number of instances under a job configuration using the configuration for specific
|
||||||
// instance to scale up.
|
// instance to scale up.
|
||||||
func (r *realisClient) AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error) {
|
func (r *realisClient) AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error) {
|
||||||
|
|
||||||
resp, err := r.client.AddInstances(&instKey, count)
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
if err != nil {
|
duration := defaultBackoff.Duration
|
||||||
return nil, errors.Wrap(err, "Error sending AddInstances command to Aurora Scheduler")
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.AddInstances(&instKey, count); err == nil {
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return nil, errors.Wrap(err, "Error sending AddInstances command to Aurora Scheduler")
|
||||||
return response.ResponseCodeCheck(resp)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//Scale down the number of instances under a job configuration using the configuratipn of a specific instance
|
//Scale down the number of instances under a job configuration using the configuratipn of a specific instance
|
||||||
|
@ -525,7 +747,28 @@ func (r *realisClient) RemoveInstances(key *aurora.JobKey, count int32) (*aurora
|
||||||
|
|
||||||
func (r *realisClient) GetTaskStatus(query *aurora.TaskQuery) (tasks []*aurora.ScheduledTask, e error) {
|
func (r *realisClient) GetTaskStatus(query *aurora.TaskQuery) (tasks []*aurora.ScheduledTask, e error) {
|
||||||
|
|
||||||
resp, err := r.client.GetTasksStatus(query)
|
var resp *aurora.Response
|
||||||
|
var err error
|
||||||
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.GetTasksStatus(query); err == nil {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error querying Aurora Scheduler for task status")
|
return nil, errors.Wrap(err, "Error querying Aurora Scheduler for task status")
|
||||||
}
|
}
|
||||||
|
@ -598,13 +841,32 @@ func (r *realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.Task
|
||||||
|
|
||||||
func (r *realisClient) JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error) {
|
func (r *realisClient) JobUpdateDetails(updateQuery aurora.JobUpdateQuery) (*aurora.Response, error) {
|
||||||
|
|
||||||
resp, err := r.client.GetJobUpdateDetails(&updateQuery)
|
var resp *aurora.Response
|
||||||
if err != nil {
|
var err error
|
||||||
return nil, errors.Wrap(err, "Unable to get job update details")
|
|
||||||
}
|
|
||||||
|
|
||||||
return response.ResponseCodeCheck(resp)
|
duration := defaultBackoff.Duration
|
||||||
|
for i := 0; i < defaultBackoff.Steps; i++ {
|
||||||
|
if i != 0 {
|
||||||
|
adjusted := duration
|
||||||
|
if defaultBackoff.Jitter > 0.0 {
|
||||||
|
adjusted = Jitter(duration, defaultBackoff.Jitter)
|
||||||
|
}
|
||||||
|
fmt.Println(" sleeping for: ", adjusted)
|
||||||
|
time.Sleep(adjusted)
|
||||||
|
duration = time.Duration(float64(duration) * defaultBackoff.Factor)
|
||||||
|
}
|
||||||
|
if resp, err = r.client.GetJobUpdateDetails(&updateQuery); err == nil {
|
||||||
|
fmt.Println(" resp: ", resp)
|
||||||
|
return response.ResponseCodeCheck(resp)
|
||||||
|
}
|
||||||
|
err1 := r.ReestablishConn()
|
||||||
|
if err1 != nil {
|
||||||
|
fmt.Println("error in ReestablishConn: ", err1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil, errors.Wrap(err, "Unable to get job update details")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error) {
|
func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string) (*aurora.Response, error) {
|
||||||
var resp *aurora.Response
|
var resp *aurora.Response
|
||||||
var err error
|
var err error
|
||||||
|
@ -630,7 +892,5 @@ func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//resp, err = r.client.RollbackJobUpdate(&key, message)
|
|
||||||
return nil, errors.Wrap(err, "Unable to roll back job update")
|
return nil, errors.Wrap(err, "Unable to roll back job update")
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue