Added the ability to monitor job updates.
Added the ability to kill and restart specific instances. Fixed incorrect documentation on using-the-sample-client. Added helper functions under the response package to extract fields from aurora.Response.
This commit is contained in:
parent
d8958966db
commit
01b700554a
6 changed files with 165 additions and 27 deletions
|
@ -24,6 +24,7 @@ These commands are set to run on a vagrant box. To be able to runt he docker com
|
||||||
executor examples, the vagrant box must be configured properly to use the docker compose executor.
|
executor examples, the vagrant box must be configured properly to use the docker compose executor.
|
||||||
|
|
||||||
### Thermos
|
### Thermos
|
||||||
|
|
||||||
#### Creating a Thermos job
|
#### Creating a Thermos job
|
||||||
```
|
```
|
||||||
$ cd $GOPATH/src/github.com/rdelval/gorealis
|
$ cd $GOPATH/src/github.com/rdelval/gorealis
|
||||||
|
@ -35,11 +36,12 @@ $ go run $GOPATH/src/github.com/rdelval/gorealis.git/examples/client.go -executo
|
||||||
```
|
```
|
||||||
|
|
||||||
### Docker Compose executor (custom executor)
|
### Docker Compose executor (custom executor)
|
||||||
|
|
||||||
#### Creating Docker Compose executor job
|
#### Creating Docker Compose executor job
|
||||||
```
|
```
|
||||||
$ go run $GOPATH/src/github.com/rdelval/gorealis/examples/client.go -executor=compose -url=http://192.168.33.7:8081 -cmd=create
|
$ go run $GOPATH/src/github.com/rdelval/gorealis/examples/client.go -executor=compose -url=http://192.168.33.7:8081 -cmd=create
|
||||||
```
|
```
|
||||||
#### Kill a Docker Compose executor job
|
#### Kill a Docker Compose executor job
|
||||||
```
|
```
|
||||||
$ go run $GOPATH/src/github.com/rdelval/gorealis.git/examples/client.go -executor=compose -url=http://192.168.33.7:8081 -cmd=kill
|
$ go run $GOPATH/src/github.com/rdelval/gorealis/examples/client.go -executor=compose -url=http://192.168.33.7:8081 -cmd=kill
|
||||||
```
|
```
|
||||||
|
|
|
@ -21,6 +21,7 @@ import (
|
||||||
"github.com/rdelval/gorealis"
|
"github.com/rdelval/gorealis"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
|
"github.com/rdelval/gorealis/response"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
@ -115,46 +116,46 @@ func main() {
|
||||||
switch *cmd {
|
switch *cmd {
|
||||||
case "create":
|
case "create":
|
||||||
fmt.Println("Creating job")
|
fmt.Println("Creating job")
|
||||||
response, err := r.CreateJob(job)
|
resp, err := r.CreateJob(job)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println(response.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
case "kill":
|
case "kill":
|
||||||
fmt.Println("Killing job")
|
fmt.Println("Killing job")
|
||||||
|
|
||||||
response, err := r.KillJob(job.JobKey())
|
resp, err := r.KillJob(job.JobKey())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println(response.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
case "restart":
|
case "restart":
|
||||||
fmt.Println("Restarting job")
|
fmt.Println("Restarting job")
|
||||||
response, err := r.RestartJob(job.JobKey())
|
resp, err := r.RestartJob(job.JobKey())
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Println(response.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
case "flexUp":
|
case "flexUp":
|
||||||
fmt.Println("Flexing up job")
|
fmt.Println("Flexing up job")
|
||||||
response, err := r.AddInstances(aurora.InstanceKey{job.JobKey(), 0}, 5)
|
resp, err := r.AddInstances(aurora.InstanceKey{job.JobKey(), 0}, 5)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println(response.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
case "update":
|
case "update":
|
||||||
fmt.Println("Updating a job with a new name")
|
fmt.Println("Updating a job with with more RAM and to 3 instances")
|
||||||
taskConfig, err := r.FetchTaskConfig(aurora.InstanceKey{job.JobKey(), 0})
|
taskConfig, err := r.FetchTaskConfig(aurora.InstanceKey{job.JobKey(), 0})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
|
@ -163,21 +164,32 @@ func main() {
|
||||||
updateJob := realis.NewUpdateJob(taskConfig)
|
updateJob := realis.NewUpdateJob(taskConfig)
|
||||||
updateJob.InstanceCount(3).RAM(128)
|
updateJob.InstanceCount(3).RAM(128)
|
||||||
|
|
||||||
resposne, err := r.StartJobUpdate(updateJob, "")
|
resp, err := r.StartJobUpdate(updateJob, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println(resposne.String())
|
|
||||||
|
jobUpdateKey := response.JobUpdateKey(resp)
|
||||||
|
r.MonitorJobUpdate(*jobUpdateKey, 5, 100)
|
||||||
|
|
||||||
|
break
|
||||||
|
case "updateDetails":
|
||||||
|
resp, err := r.JobUpdateDetails(aurora.JobUpdateKey{job.JobKey(), *updateId})
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
response.JobUpdateDetails(resp)
|
||||||
break
|
break
|
||||||
case "abortUpdate":
|
case "abortUpdate":
|
||||||
fmt.Println("Abort update")
|
fmt.Println("Abort update")
|
||||||
response, err := r.AbortJobUpdate(job.JobKey(), *updateId, "")
|
resp, err := r.AbortJobUpdate(aurora.JobUpdateKey{job.JobKey(), *updateId}, "")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Println(err)
|
fmt.Println(err)
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println(response.String())
|
fmt.Println(resp.String())
|
||||||
break
|
break
|
||||||
case "taskConfig":
|
case "taskConfig":
|
||||||
fmt.Println("Getting job info")
|
fmt.Println("Getting job info")
|
||||||
|
@ -189,7 +201,7 @@ func main() {
|
||||||
print(config.String())
|
print(config.String())
|
||||||
break
|
break
|
||||||
default:
|
default:
|
||||||
fmt.Println("Only create, kill, restart, flexUp, update, and abortUpdate are supported now")
|
fmt.Println("Command not supported")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
61
monitors.go
Normal file
61
monitors.go
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
/**
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Collection of monitors to create synchronicity
|
||||||
|
package realis
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gen-go/apache/aurora"
|
||||||
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"github.com/rdelval/gorealis/response"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Polls the scheduler every certain amount of time to see if the update has succeeded
|
||||||
|
func (r realisClient) MonitorJobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout int) bool {
|
||||||
|
|
||||||
|
for i:= 0; i*interval <= timeout; i++ {
|
||||||
|
respDetail, err := r.JobUpdateDetails(updateKey)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println(err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
updateDetail := response.JobUpdateDetails(respDetail)
|
||||||
|
|
||||||
|
// Iterate through the history of the events available
|
||||||
|
for _, detail := range updateDetail.UpdateEvents {
|
||||||
|
if _, ok := aurora.ACTIVE_JOB_UPDATE_STATES[detail.Status]; !ok {
|
||||||
|
|
||||||
|
// Rolled forward is the only state in which an update has been successfully updated
|
||||||
|
// if we encounter an inactive state and it is not at rolled forward, update failed
|
||||||
|
if detail.Status == aurora.JobUpdateStatus_ROLLED_FORWARD {
|
||||||
|
fmt.Println("Update succeded")
|
||||||
|
return true
|
||||||
|
} else {
|
||||||
|
fmt.Println("Update failed")
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Polling, update still active...")
|
||||||
|
time.Sleep(time.Duration(interval) * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
fmt.Println("Timed out")
|
||||||
|
return false
|
||||||
|
}
|
51
realis.go
51
realis.go
|
@ -28,12 +28,15 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
type Realis interface {
|
type Realis interface {
|
||||||
AbortJobUpdate(key *aurora.JobKey, updateId string, message string) (*aurora.Response, error)
|
AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) (*aurora.Response, error)
|
||||||
AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error)
|
AddInstances(instKey aurora.InstanceKey, count int32) (*aurora.Response, error)
|
||||||
CreateJob(auroraJob Job) (*aurora.Response, error)
|
CreateJob(auroraJob Job) (*aurora.Response, error)
|
||||||
FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error)
|
FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error)
|
||||||
|
JobUpdateDetails(updateKey aurora.JobUpdateKey) (*aurora.Response, error)
|
||||||
KillJob(key *aurora.JobKey) (*aurora.Response, error)
|
KillJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
KillInstance(key *aurora.JobKey, instanceId int32) (*aurora.Response, error)
|
KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
||||||
|
MonitorJobUpdate(updateKey aurora.JobUpdateKey, interval int, timeout int) bool
|
||||||
|
RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error)
|
||||||
RestartJob(key *aurora.JobKey) (*aurora.Response, error)
|
RestartJob(key *aurora.JobKey) (*aurora.Response, error)
|
||||||
StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error)
|
StartJobUpdate(updateJob *UpdateJob, message string) (*aurora.Response, error)
|
||||||
Close()
|
Close()
|
||||||
|
@ -56,7 +59,8 @@ func NewClient(config RealisConfig) Realis {
|
||||||
|
|
||||||
protocolFactory := thrift.NewTJSONProtocolFactory()
|
protocolFactory := thrift.NewTJSONProtocolFactory()
|
||||||
|
|
||||||
return realisClient{client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, protocolFactory)}
|
return realisClient{
|
||||||
|
client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, protocolFactory)}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create a default configuration of the transport layer, requires a URL to test connection with.
|
// Create a default configuration of the transport layer, requires a URL to test connection with.
|
||||||
|
@ -122,14 +126,16 @@ func (r realisClient) getActiveInstanceIds(key *aurora.JobKey) (map[int32]bool,
|
||||||
return jobInstanceIds, nil
|
return jobInstanceIds, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Kill a specific instance of a job.
|
// Kill specific instances of a job.
|
||||||
func (r realisClient) KillInstance(key *aurora.JobKey, instanceId int32) (*aurora.Response, error) {
|
func (r realisClient) KillInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) {
|
||||||
|
|
||||||
instanceIds := make(map[int32]bool)
|
instanceIds := make(map[int32]bool)
|
||||||
instanceIds[instanceId] = true
|
|
||||||
|
for _, instId := range instances {
|
||||||
|
instanceIds[instId] = true
|
||||||
|
}
|
||||||
|
|
||||||
response, err := r.client.KillTasks(key, instanceIds)
|
response, err := r.client.KillTasks(key, instanceIds)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
return nil, errors.Wrap(err, "Error sending Kill command to Aurora Scheduler")
|
||||||
}
|
}
|
||||||
|
@ -169,6 +175,22 @@ func (r realisClient) CreateJob(auroraJob Job) (*aurora.Response, error) {
|
||||||
return response, nil
|
return response, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Restarts specific instances specified
|
||||||
|
func (r realisClient) RestartInstances(key *aurora.JobKey, instances ...int32) (*aurora.Response, error) {
|
||||||
|
instanceIds := make(map[int32]bool)
|
||||||
|
|
||||||
|
for _, instId := range instances {
|
||||||
|
instanceIds[instId] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
response, err := r.client.RestartShards(key, instanceIds)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "Error sending Restart command to Aurora Scheduler")
|
||||||
|
}
|
||||||
|
|
||||||
|
return response, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Restarts all active tasks under a job configuration.
|
// Restarts all active tasks under a job configuration.
|
||||||
func (r realisClient) RestartJob(key *aurora.JobKey) (*aurora.Response, error) {
|
func (r realisClient) RestartJob(key *aurora.JobKey) (*aurora.Response, error) {
|
||||||
|
|
||||||
|
@ -204,11 +226,10 @@ func (r realisClient) StartJobUpdate(updateJob *UpdateJob, message string) (*aur
|
||||||
|
|
||||||
// Abort Job Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
|
// Abort Job Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
|
||||||
func (r realisClient) AbortJobUpdate(
|
func (r realisClient) AbortJobUpdate(
|
||||||
key *aurora.JobKey,
|
updateKey aurora.JobUpdateKey,
|
||||||
updateId string,
|
|
||||||
message string) (*aurora.Response, error) {
|
message string) (*aurora.Response, error) {
|
||||||
|
|
||||||
response, err := r.client.AbortJobUpdate(&aurora.JobUpdateKey{key, updateId}, message)
|
response, err := r.client.AbortJobUpdate(&updateKey, message)
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, errors.Wrap(err, "Error sending AbortJobUpdate command to Aurora Scheduler")
|
return nil, errors.Wrap(err, "Error sending AbortJobUpdate command to Aurora Scheduler")
|
||||||
|
@ -258,3 +279,13 @@ func (r realisClient) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskC
|
||||||
|
|
||||||
return tasks[0].AssignedTask.Task, nil
|
return tasks[0].AssignedTask.Task, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (r realisClient) JobUpdateDetails(updateKey aurora.JobUpdateKey) (*aurora.Response, error) {
|
||||||
|
|
||||||
|
resp, err := r.client.GetJobUpdateDetails(&updateKey)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.Wrap(err, "Unable to get job update details")
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp,nil
|
||||||
|
}
|
||||||
|
|
34
response/response.go
Normal file
34
response/response.go
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
/**
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Helper functions to process aurora.Response
|
||||||
|
package response
|
||||||
|
|
||||||
|
import (
|
||||||
|
"gen-go/apache/aurora"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Get key from a response created by a StartJobUpdate call
|
||||||
|
func JobUpdateKey(resp *aurora.Response) *aurora.JobUpdateKey {
|
||||||
|
return resp.Result_.StartJobUpdateResult_.GetKey()
|
||||||
|
}
|
||||||
|
|
||||||
|
func JobUpdateDetails(resp *aurora.Response) *aurora.JobUpdateDetails {
|
||||||
|
return resp.Result_.GetJobUpdateDetailsResult_.Details
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -98,14 +98,12 @@ func (u *UpdateJob) MaxPerInstanceFailures(inst int32) *UpdateJob {
|
||||||
|
|
||||||
// Max number of FAILED instances to tolerate before terminating the update.
|
// Max number of FAILED instances to tolerate before terminating the update.
|
||||||
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
|
func (u *UpdateJob) MaxFailedInstances(inst int32) *UpdateJob {
|
||||||
|
|
||||||
u.req.Settings.MaxFailedInstances = inst
|
u.req.Settings.MaxFailedInstances = inst
|
||||||
return u
|
return u
|
||||||
}
|
}
|
||||||
|
|
||||||
// When False, prevents auto rollback of a failed update.
|
// When False, prevents auto rollback of a failed update.
|
||||||
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
func (u *UpdateJob) RollbackOnFail(rollback bool) *UpdateJob {
|
||||||
|
|
||||||
u.req.Settings.RollbackOnFailure = rollback
|
u.req.Settings.RollbackOnFailure = rollback
|
||||||
return u
|
return u
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue