gorealis/examples/client.go

657 lines
16 KiB
Go

/**
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package main
import (
"flag"
"fmt"
"io/ioutil"
"log"
"strings"
"time"
realis "github.com/paypal/gorealis"
"github.com/paypal/gorealis/gen-go/apache/aurora"
"github.com/paypal/gorealis/response"
)
var cmd, executor, url, clustersConfig, clusterName, updateId, username, password, zkUrl, hostList, role string
var caCertsPath string
var clientKey, clientCert string
var ConnectionTimeout = 20000
func init() {
flag.StringVar(&cmd, "cmd", "", "Job request type to send to Aurora Scheduler")
flag.StringVar(&executor, "executor", "thermos", "Executor to use")
flag.StringVar(&url, "url", "", "URL at which the Aurora Scheduler exists as [url]:[port]")
flag.StringVar(&clustersConfig, "clusters", "", "Location of the clusters.json file used by aurora.")
flag.StringVar(&clusterName, "cluster", "devcluster", "Name of cluster to run job on (only necessary if clusters is set)")
flag.StringVar(&updateId, "updateId", "", "Update ID to operate on")
flag.StringVar(&username, "username", "aurora", "Username to use for authorization")
flag.StringVar(&password, "password", "secret", "Password to use for authorization")
flag.StringVar(&zkUrl, "zkurl", "", "zookeeper url")
flag.StringVar(&hostList, "hostList", "", "Comma separated list of hosts to operate on")
flag.StringVar(&role, "role", "", "owner role to use")
flag.StringVar(&caCertsPath, "caCertsPath", "", "Path to CA certs on local machine.")
flag.StringVar(&clientCert, "clientCert", "", "Client certificate to use to connect to Aurora.")
flag.StringVar(&clientKey, "clientKey", "", "Client private key to use to connect to Aurora.")
flag.Parse()
// Attempt to load leader from zookeeper using a
// cluster.json file used for the default aurora client if provided.
// This will override the provided url in the arguments
if clustersConfig != "" {
clusters, err := realis.LoadClusters(clustersConfig)
if err != nil {
log.Fatalln(err)
}
cluster, ok := clusters[clusterName]
if !ok {
log.Fatalf("Cluster %s doesn't exist in the file provided\n", clusterName)
}
url, err = realis.LeaderFromZK(cluster)
if err != nil {
log.Fatalln(err)
}
}
}
func main() {
var job realis.Job
var err error
var monitor *realis.Monitor
var r realis.Realis
clientOptions := []realis.ClientOption{
realis.BasicAuth(username, password),
realis.ThriftJSON(),
realis.TimeoutMS(ConnectionTimeout),
realis.BackOff(realis.Backoff{
Steps: 2,
Duration: 10 * time.Second,
Factor: 2.0,
Jitter: 0.1,
}),
realis.Debug(),
}
if zkUrl != "" {
fmt.Println("zkUrl: ", zkUrl)
clientOptions = append(clientOptions, realis.ZKUrl(zkUrl))
} else {
clientOptions = append(clientOptions, realis.SchedulerUrl(url))
}
if caCertsPath != "" {
clientOptions = append(clientOptions, realis.Certspath(caCertsPath))
}
if clientKey != "" && clientCert != "" {
clientOptions = append(clientOptions, realis.ClientCerts(clientKey, clientCert))
}
r, err = realis.NewRealisClient(clientOptions...)
if err != nil {
log.Fatalln(err)
}
monitor = &realis.Monitor{Client: r}
defer r.Close()
switch executor {
case "thermos":
payload, err := ioutil.ReadFile("examples/thermos_payload.json")
if err != nil {
log.Fatalln("Error reading json config file: ", err)
}
job = realis.NewJob().
Environment("prod").
Role("vagrant").
Name("hello_world_from_gorealis").
ExecutorName(aurora.AURORA_EXECUTOR_NAME).
ExecutorData(string(payload)).
CPU(1).
RAM(64).
Disk(100).
IsService(true).
InstanceCount(1).
AddPorts(1)
case "compose":
job = realis.NewJob().
Environment("prod").
Role("vagrant").
Name("docker-compose-test").
ExecutorName("docker-compose-executor").
ExecutorData("{}").
CPU(0.25).
RAM(512).
Disk(100).
IsService(true).
InstanceCount(1).
AddPorts(4).
AddLabel("fileName", "sample-app/docker-compose.yml").
AddURIs(true, true, "https://github.com/mesos/docker-compose-executor/releases/download/0.1.0/sample-app.tar.gz")
case "none":
job = realis.NewJob().
Environment("prod").
Role("vagrant").
Name("docker_as_task").
CPU(1).
RAM(64).
Disk(100).
IsService(true).
InstanceCount(1).
AddPorts(1)
default:
log.Fatalln("Only thermos, compose, and none are supported for now")
}
switch cmd {
case "create":
fmt.Println("Creating job")
resp, err := r.CreateJob(job)
if err != nil {
log.Fatalln(err)
}
fmt.Println(resp.String())
if ok, mErr := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 5, 50); !ok || mErr != nil {
_, err := r.KillJob(job.JobKey())
if err != nil {
log.Fatalln(err)
}
log.Fatalf("ok: %v\n err: %v", ok, mErr)
}
case "createService":
// Create a service with three instances using the update API instead of the createJob API
fmt.Println("Creating service")
settings := realis.NewUpdateSettings()
job.InstanceCount(3)
resp, result, err := r.CreateService(job, settings)
if err != nil {
log.Println("error: ", err)
log.Fatal("response: ", resp.String())
}
fmt.Println(result.String())
if ok, mErr := monitor.JobUpdate(*result.GetKey(), 5, 180); !ok || mErr != nil {
_, err := r.AbortJobUpdate(*result.GetKey(), "Monitor timed out")
_, err = r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
log.Fatalf("ok: %v\n err: %v", ok, mErr)
}
case "createDocker":
fmt.Println("Creating a docker based job")
container := realis.NewDockerContainer().Image("python:2.7").AddParameter("network", "host")
job.Container(container)
resp, err := r.CreateJob(job)
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
if ok, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 10, 300); !ok || err != nil {
_, err := r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
}
case "createMesosContainer":
fmt.Println("Creating a docker based job")
container := realis.NewMesosContainer().DockerImage("python", "2.7")
job.Container(container)
resp, err := r.CreateJob(job)
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
if ok, err := monitor.Instances(job.JobKey(), job.GetInstanceCount(), 10, 300); !ok || err != nil {
_, err := r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
}
case "scheduleCron":
fmt.Println("Scheduling a Cron job")
// Cron config
job.CronSchedule("* * * * *")
job.IsService(false)
resp, err := r.ScheduleCronJob(job)
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "startCron":
fmt.Println("Starting a Cron job")
resp, err := r.StartCronJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "descheduleCron":
fmt.Println("Descheduling a Cron job")
resp, err := r.DescheduleCronJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "kill":
fmt.Println("Killing job")
resp, err := r.KillJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
if ok, err := monitor.Instances(job.JobKey(), 0, 5, 50); !ok || err != nil {
log.Fatal("Unable to kill all instances of job")
}
fmt.Println(resp.String())
case "restart":
fmt.Println("Restarting job")
resp, err := r.RestartJob(job.JobKey())
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "liveCount":
fmt.Println("Getting instance count")
live, err := r.GetInstanceIds(job.JobKey(), aurora.LIVE_STATES)
if err != nil {
log.Fatal(err)
}
fmt.Printf("Live instances: %+v\n", live)
case "activeCount":
fmt.Println("Getting instance count")
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
log.Fatal(err)
}
fmt.Println("Number of live instances: ", len(live))
case "flexUp":
fmt.Println("Flexing up job")
numOfInstances := int32(4)
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
log.Fatal(err)
}
currInstances := int32(len(live))
fmt.Println("Current num of instances: ", currInstances)
resp, err := r.AddInstances(aurora.InstanceKey{
JobKey: job.JobKey(),
InstanceId: live[0],
},
numOfInstances)
if err != nil {
log.Fatal(err)
}
if ok, err := monitor.Instances(job.JobKey(), currInstances+numOfInstances, 5, 50); !ok || err != nil {
fmt.Println("Flexing up failed")
}
fmt.Println(resp.String())
case "flexDown":
fmt.Println("Flexing down job")
numOfInstances := int32(2)
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
log.Fatal(err)
}
currInstances := int32(len(live))
fmt.Println("Current num of instances: ", currInstances)
resp, err := r.RemoveInstances(job.JobKey(), numOfInstances)
if err != nil {
log.Fatal(err)
}
if ok, err := monitor.Instances(job.JobKey(), currInstances-numOfInstances, 5, 100); !ok || err != nil {
fmt.Println("flexDown failed")
}
fmt.Println(resp.String())
case "update":
fmt.Println("Updating a job with with more RAM and to 5 instances")
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
log.Fatal(err)
}
taskConfig, err := r.FetchTaskConfig(aurora.InstanceKey{
JobKey: job.JobKey(),
InstanceId: live[0],
})
if err != nil {
log.Fatal(err)
}
updateJob := realis.NewDefaultUpdateJob(taskConfig)
updateJob.InstanceCount(5).RAM(128)
resp, err := r.StartJobUpdate(updateJob, "")
if err != nil {
log.Fatal(err)
}
jobUpdateKey := response.JobUpdateKey(resp)
monitor.JobUpdate(*jobUpdateKey, 5, 500)
case "pauseJobUpdate":
resp, err := r.PauseJobUpdate(&aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
}, "")
if err != nil {
log.Fatal(err)
}
fmt.Println("PauseJobUpdate response: ", resp.String())
case "resumeJobUpdate":
resp, err := r.ResumeJobUpdate(&aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
}, "")
if err != nil {
log.Fatal(err)
}
fmt.Println("ResumeJobUpdate response: ", resp.String())
case "pulseJobUpdate":
resp, err := r.PulseJobUpdate(&aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
})
if err != nil {
log.Fatal(err)
}
fmt.Println("PulseJobUpdate response: ", resp.String())
case "updateDetails":
resp, err := r.JobUpdateDetails(aurora.JobUpdateQuery{
Key: &aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
},
Limit: 1,
})
if err != nil {
log.Fatal(err)
}
fmt.Println(response.JobUpdateDetails(resp))
case "abortUpdate":
fmt.Println("Abort update")
resp, err := r.AbortJobUpdate(aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
},
"")
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "rollbackUpdate":
fmt.Println("Abort update")
resp, err := r.RollbackJobUpdate(aurora.JobUpdateKey{
Job: job.JobKey(),
ID: updateId,
},
"")
if err != nil {
log.Fatal(err)
}
fmt.Println(resp.String())
case "taskConfig":
fmt.Println("Getting job info")
live, err := r.GetInstanceIds(job.JobKey(), aurora.ACTIVE_STATES)
if err != nil {
log.Fatal(err)
}
config, err := r.FetchTaskConfig(aurora.InstanceKey{
JobKey: job.JobKey(),
InstanceId: live[0],
})
if err != nil {
log.Fatal(err)
}
log.Println(config.String())
case "updatesummary":
fmt.Println("Getting job update summary")
jobquery := &aurora.JobUpdateQuery{
Role: &job.JobKey().Role,
JobKey: job.JobKey(),
}
updatesummary, err := r.GetJobUpdateSummaries(jobquery)
if err != nil {
log.Fatalf("error while getting update summary: %v", err)
}
fmt.Println(updatesummary)
case "taskStatus":
fmt.Println("Getting task status")
taskQ := &aurora.TaskQuery{
Role: &job.JobKey().Role,
Environment: &job.JobKey().Environment,
JobName: &job.JobKey().Name,
}
tasks, err := r.GetTaskStatus(taskQ)
if err != nil {
log.Fatalf("error: %+v\n ", err)
}
fmt.Printf("length: %d\n ", len(tasks))
fmt.Printf("tasks: %+v\n", tasks)
case "tasksWithoutConfig":
fmt.Println("Getting task status")
taskQ := &aurora.TaskQuery{
Role: &job.JobKey().Role,
Environment: &job.JobKey().Environment,
JobName: &job.JobKey().Name,
}
tasks, err := r.GetTasksWithoutConfigs(taskQ)
if err != nil {
log.Fatalf("error: %+v\n ", err)
}
fmt.Printf("length: %d\n ", len(tasks))
fmt.Printf("tasks: %+v\n", tasks)
case "drainHosts":
fmt.Println("Setting hosts to DRAINING")
if hostList == "" {
log.Fatal("No hosts specified to drain")
}
hosts := strings.Split(hostList, ",")
_, result, err := r.DrainHosts(hosts...)
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
5,
10)
if err != nil {
for host, ok := range hostResult {
if !ok {
fmt.Printf("Host %s did not transtion into desired mode(s)\n", host)
}
}
log.Fatalf("error: %+v\n", err.Error())
}
fmt.Print(result.String())
case "SLADrainHosts":
fmt.Println("Setting hosts to DRAINING using SLA aware draining")
if hostList == "" {
log.Fatal("No hosts specified to drain")
}
hosts := strings.Split(hostList, ",")
policy := aurora.SlaPolicy{PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: 50.0}}
result, err := r.SLADrainHosts(&policy, 30, hosts...)
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED, aurora.MaintenanceMode_DRAINING},
5,
10)
if err != nil {
for host, ok := range hostResult {
if !ok {
fmt.Printf("Host %s did not transtion into desired mode(s)\n", host)
}
}
log.Fatalf("error: %+v\n", err.Error())
}
fmt.Print(result.String())
case "endMaintenance":
fmt.Println("Setting hosts to ACTIVE")
if hostList == "" {
log.Fatal("No hosts specified to drain")
}
hosts := strings.Split(hostList, ",")
_, result, err := r.EndMaintenance(hosts...)
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
// Monitor change to DRAINING and DRAINED mode
hostResult, err := monitor.HostMaintenance(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
5,
10)
if err != nil {
for host, ok := range hostResult {
if !ok {
fmt.Printf("Host %s did not transtion into desired mode(s)\n", host)
}
}
log.Fatalf("error: %+v\n", err.Error())
}
fmt.Print(result.String())
case "getPendingReasons":
fmt.Println("Getting pending reasons")
taskQ := &aurora.TaskQuery{
Role: &job.JobKey().Role,
Environment: &job.JobKey().Environment,
JobName: &job.JobKey().Name,
}
reasons, err := r.GetPendingReason(taskQ)
if err != nil {
log.Fatalf("error: %+v\n ", err)
}
fmt.Printf("length: %d\n ", len(reasons))
fmt.Printf("tasks: %+v\n", reasons)
case "getJobs":
fmt.Println("GetJobs...role: ", role)
_, result, err := r.GetJobs(role)
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
fmt.Println("map size: ", len(result.Configs))
fmt.Println(result.String())
case "snapshot":
fmt.Println("Forcing scheduler to write snapshot to mesos replicated log")
err := r.Snapshot()
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
case "performBackup":
fmt.Println("Writing Backup of Snapshot to file system")
err := r.PerformBackup()
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
case "forceExplicitRecon":
fmt.Println("Force an explicit recon")
err := r.ForceExplicitTaskReconciliation(nil)
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
case "forceImplicitRecon":
fmt.Println("Force an implicit recon")
err := r.ForceImplicitTaskReconciliation()
if err != nil {
log.Fatalf("error: %+v\n", err.Error())
}
default:
log.Fatal("Command not supported")
}
}