1209 lines
38 KiB
Go
1209 lines
38 KiB
Go
/**
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
// Package realis provides the ability to use Thrift API to communicate with Apache Aurora.
|
|
package realis
|
|
|
|
import (
|
|
"crypto/tls"
|
|
"crypto/x509"
|
|
"encoding/base64"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"log"
|
|
"net/http"
|
|
"net/http/cookiejar"
|
|
"os"
|
|
"path/filepath"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/apache/thrift/lib/go/thrift"
|
|
"github.com/paypal/gorealis/v2/gen-go/apache/aurora"
|
|
"github.com/paypal/gorealis/v2/response"
|
|
"github.com/pkg/errors"
|
|
)
|
|
|
|
const VERSION = "2.0.1"
|
|
|
|
type Client struct {
|
|
config *clientConfig
|
|
client *aurora.AuroraSchedulerManagerClient
|
|
readonlyClient *aurora.ReadOnlySchedulerClient
|
|
adminClient *aurora.AuroraAdminClient
|
|
logger LevelLogger
|
|
lock *sync.Mutex
|
|
debug bool
|
|
transport thrift.TTransport
|
|
}
|
|
|
|
type clientConfig struct {
|
|
username, password string
|
|
url string
|
|
timeout time.Duration
|
|
binTransport, jsonTransport bool
|
|
cluster *Cluster
|
|
backoff Backoff
|
|
transport thrift.TTransport
|
|
protoFactory thrift.TProtocolFactory
|
|
logger *LevelLogger
|
|
InsecureSkipVerify bool
|
|
certsPath string
|
|
clientKey, clientCert string
|
|
options []ClientOption
|
|
debug bool
|
|
trace bool
|
|
zkOptions []ZKOpt
|
|
}
|
|
|
|
var defaultBackoff = Backoff{
|
|
Steps: 3,
|
|
Duration: 10 * time.Second,
|
|
Factor: 5.0,
|
|
Jitter: 0.1,
|
|
}
|
|
|
|
type ClientOption func(*clientConfig)
|
|
|
|
// clientConfig sets for options in clientConfig.
|
|
func BasicAuth(username, password string) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.username = username
|
|
config.password = password
|
|
}
|
|
}
|
|
|
|
func SchedulerUrl(url string) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.url = url
|
|
}
|
|
}
|
|
|
|
func Timeout(timeout time.Duration) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.timeout = timeout
|
|
}
|
|
}
|
|
|
|
func ZKCluster(cluster *Cluster) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.cluster = cluster
|
|
}
|
|
}
|
|
|
|
func ZKUrl(url string) ClientOption {
|
|
|
|
opts := []ZKOpt{ZKEndpoints(strings.Split(url, ",")...), ZKPath("/aurora/scheduler")}
|
|
|
|
return func(config *clientConfig) {
|
|
if config.zkOptions == nil {
|
|
config.zkOptions = opts
|
|
} else {
|
|
config.zkOptions = append(config.zkOptions, opts...)
|
|
}
|
|
}
|
|
}
|
|
|
|
func ThriftJSON() ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.jsonTransport = true
|
|
}
|
|
}
|
|
|
|
func ThriftBinary() ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.binTransport = true
|
|
}
|
|
}
|
|
|
|
func BackOff(b Backoff) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.backoff = b
|
|
}
|
|
}
|
|
|
|
func InsecureSkipVerify(InsecureSkipVerify bool) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.InsecureSkipVerify = InsecureSkipVerify
|
|
}
|
|
}
|
|
|
|
func CertsPath(certspath string) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.certsPath = certspath
|
|
}
|
|
}
|
|
|
|
func ClientCerts(clientKey, clientCert string) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.clientKey, config.clientCert = clientKey, clientCert
|
|
}
|
|
}
|
|
|
|
// Use this option if you'd like to override default settings for connecting to Zookeeper.
|
|
// See zk.go for what is possible to set as an option.
|
|
func ZookeeperOptions(opts ...ZKOpt) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.zkOptions = opts
|
|
}
|
|
}
|
|
|
|
// Using the word set to avoid name collision with Interface.
|
|
func SetLogger(l Logger) ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.logger = &LevelLogger{Logger: l}
|
|
}
|
|
}
|
|
|
|
// Enable debug statements.
|
|
func Debug() ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.debug = true
|
|
}
|
|
}
|
|
|
|
// Enable trace statements.
|
|
func Trace() ClientOption {
|
|
return func(config *clientConfig) {
|
|
config.trace = true
|
|
}
|
|
}
|
|
|
|
func newTJSONTransport(url string, timeout time.Duration, config *clientConfig) (thrift.TTransport, error) {
|
|
trans, err := defaultTTransport(url, timeout, config)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating realis")
|
|
}
|
|
httpTrans := (trans).(*thrift.THttpClient)
|
|
httpTrans.SetHeader("Content-Type", "application/x-thrift")
|
|
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
|
|
return trans, err
|
|
}
|
|
|
|
func newTBinTransport(url string, timeout time.Duration, config *clientConfig) (thrift.TTransport, error) {
|
|
trans, err := defaultTTransport(url, timeout, config)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating realis")
|
|
}
|
|
httpTrans := (trans).(*thrift.THttpClient)
|
|
httpTrans.DelHeader("Content-Type") // Workaround for using thrift HttpPostClient
|
|
httpTrans.SetHeader("Accept", "application/vnd.apache.thrift.binary")
|
|
httpTrans.SetHeader("Content-Type", "application/vnd.apache.thrift.binary")
|
|
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
|
|
|
|
return trans, err
|
|
}
|
|
|
|
// This client implementation uses a retry mechanism for all Thrift Calls.
|
|
// It will retry all calls which result in a temporary failure as well as calls that fail due to an EOF
|
|
// being returned by the http client. Most permanent failures are now being caught by the thriftCallWithRetries
|
|
// function and not being retried but there may be corner cases not yet handled.
|
|
func NewClient(options ...ClientOption) (*Client, error) {
|
|
config := &clientConfig{}
|
|
|
|
// Default configs
|
|
config.timeout = 10 * time.Second
|
|
config.backoff = defaultBackoff
|
|
config.logger = &LevelLogger{Logger: log.New(os.Stdout, "realis: ", log.Ltime|log.Ldate|log.LUTC)}
|
|
|
|
// Save options to recreate client if a connection error happens
|
|
config.options = options
|
|
|
|
// Override default configs where necessary
|
|
for _, opt := range options {
|
|
opt(config)
|
|
}
|
|
|
|
// TODO(rdelvalle): Move this logic to it's own function to make initialization code easier to read.
|
|
|
|
// Set a sane logger based upon configuration passed by the user
|
|
if config.logger == nil {
|
|
if config.debug || config.trace {
|
|
config.logger = &LevelLogger{Logger: log.New(os.Stdout, "realis: ", log.Ltime|log.Ldate|log.LUTC)}
|
|
} else {
|
|
config.logger = &LevelLogger{Logger: NoopLogger{}}
|
|
}
|
|
}
|
|
|
|
// Note, by this point, a LevelLogger should have been created.
|
|
config.logger.EnableDebug(config.debug)
|
|
config.logger.EnableTrace(config.trace)
|
|
|
|
config.logger.DebugPrintln("Number of options applied to clientConfig: ", len(options))
|
|
|
|
// Set default Transport to JSON if needed.
|
|
if !config.jsonTransport && !config.binTransport {
|
|
config.jsonTransport = true
|
|
}
|
|
|
|
var url string
|
|
var err error
|
|
|
|
// Find the leader using custom Zookeeper options if options are provided
|
|
if config.zkOptions != nil {
|
|
url, err = LeaderFromZKOpts(config.zkOptions...)
|
|
if err != nil {
|
|
return nil, NewTemporaryError(errors.Wrap(err, "LeaderFromZK error"))
|
|
}
|
|
config.logger.Println("Scheduler URL from ZK: ", url)
|
|
} else if config.cluster != nil {
|
|
// Determine how to get information to connect to the scheduler.
|
|
// Prioritize getting leader from ZK over using a direct URL.
|
|
url, err = LeaderFromZK(*config.cluster)
|
|
// If ZK is configured, throw an error if the leader is unable to be determined
|
|
if err != nil {
|
|
return nil, NewTemporaryError(errors.Wrap(err, "LeaderFromZK error"))
|
|
}
|
|
config.logger.Println("Scheduler URL from ZK: ", url)
|
|
} else if config.url != "" {
|
|
url = config.url
|
|
config.logger.Println("Scheduler URL: ", url)
|
|
} else {
|
|
return nil, errors.New("Incomplete Options -- url, cluster.json, or Zookeeper address required")
|
|
}
|
|
|
|
url, err = validateAndPopulateAuroraURL(url)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "unable to create realis object, invalid url")
|
|
}
|
|
|
|
if config.jsonTransport {
|
|
trans, err := newTJSONTransport(url, config.timeout, config)
|
|
if err != nil {
|
|
return nil, NewTemporaryError(errors.Wrap(err, "Error creating realis"))
|
|
}
|
|
config.transport = trans
|
|
config.protoFactory = thrift.NewTJSONProtocolFactory()
|
|
|
|
} else if config.binTransport {
|
|
trans, err := newTBinTransport(url, config.timeout, config)
|
|
if err != nil {
|
|
return nil, NewTemporaryError(errors.Wrap(err, "Error creating realis"))
|
|
}
|
|
config.transport = trans
|
|
config.protoFactory = thrift.NewTBinaryProtocolFactoryDefault()
|
|
}
|
|
|
|
config.logger.Printf("gorealis clientConfig url: %+v\n", url)
|
|
|
|
// Adding Basic Authentication.
|
|
if config.username != "" && config.password != "" {
|
|
httpTrans := (config.transport).(*thrift.THttpClient)
|
|
httpTrans.SetHeader("Authorization", "Basic "+basicAuth(config.username, config.password))
|
|
}
|
|
|
|
return &Client{
|
|
config: config,
|
|
client: aurora.NewAuroraSchedulerManagerClientFactory(config.transport, config.protoFactory),
|
|
readonlyClient: aurora.NewReadOnlySchedulerClientFactory(config.transport, config.protoFactory),
|
|
adminClient: aurora.NewAuroraAdminClientFactory(config.transport, config.protoFactory),
|
|
// We initialize logger this way to allow any logger which satisfies the Logger interface
|
|
logger: LevelLogger{Logger: config.logger, debug: config.debug, trace: config.trace},
|
|
lock: &sync.Mutex{},
|
|
transport: config.transport,
|
|
}, nil
|
|
}
|
|
|
|
func GetCerts(certPath string) (*x509.CertPool, error) {
|
|
globalRootCAs := x509.NewCertPool()
|
|
caFiles, err := ioutil.ReadDir(certPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, cert := range caFiles {
|
|
caPathFile := filepath.Join(certPath, cert.Name())
|
|
caCert, err := ioutil.ReadFile(caPathFile)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
globalRootCAs.AppendCertsFromPEM(caCert)
|
|
}
|
|
return globalRootCAs, nil
|
|
}
|
|
|
|
// Creates a default Thrift Transport object for communications in gorealis using an HTTP Post Client
|
|
func defaultTTransport(url string, timeout time.Duration, config *clientConfig) (thrift.TTransport, error) {
|
|
var transport http.Transport
|
|
|
|
jar, err := cookiejar.New(nil)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating Cookie Jar")
|
|
}
|
|
|
|
if config != nil {
|
|
tlsConfig := &tls.Config{}
|
|
if config.InsecureSkipVerify {
|
|
tlsConfig.InsecureSkipVerify = true
|
|
}
|
|
if config.certsPath != "" {
|
|
rootCAs, err := GetCerts(config.certsPath)
|
|
if err != nil {
|
|
config.logger.Println("error occurred couldn't fetch certs")
|
|
return nil, err
|
|
}
|
|
tlsConfig.RootCAs = rootCAs
|
|
}
|
|
if config.clientKey != "" && config.clientCert == "" {
|
|
return nil, fmt.Errorf("have to provide both client key,cert. Only client key provided ")
|
|
}
|
|
if config.clientKey == "" && config.clientCert != "" {
|
|
return nil, fmt.Errorf("have to provide both client key,cert. Only client cert provided ")
|
|
}
|
|
if config.clientKey != "" && config.clientCert != "" {
|
|
cert, err := tls.LoadX509KeyPair(config.clientCert, config.clientKey)
|
|
if err != nil {
|
|
config.logger.Println("error occurred loading client certs and keys")
|
|
return nil, err
|
|
}
|
|
tlsConfig.Certificates = []tls.Certificate{cert}
|
|
}
|
|
transport.TLSClientConfig = tlsConfig
|
|
}
|
|
|
|
trans, err := thrift.NewTHttpClientWithOptions(url,
|
|
thrift.THttpClientOptions{Client: &http.Client{Timeout: timeout, Transport: &transport, Jar: jar}})
|
|
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating transport")
|
|
}
|
|
|
|
if err := trans.Open(); err != nil {
|
|
return nil, errors.Wrapf(err, "Error opening connection to %s", url)
|
|
}
|
|
|
|
return trans, nil
|
|
}
|
|
|
|
// Create a default configuration of the transport layer, requires a URL to test connection with.
|
|
// Uses HTTP Post as transport layer and Thrift JSON as the wire protocol by default.
|
|
func newDefaultConfig(url string, timeout time.Duration, config *clientConfig) (*clientConfig, error) {
|
|
return newTJSONConfig(url, timeout, config)
|
|
}
|
|
|
|
// Creates a realis clientConfig object using HTTP Post and Thrift JSON protocol to communicate with Aurora.
|
|
func newTJSONConfig(url string, timeout time.Duration, config *clientConfig) (*clientConfig, error) {
|
|
trans, err := defaultTTransport(url, timeout, config)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating realis clientConfig")
|
|
}
|
|
|
|
httpTrans := (trans).(*thrift.THttpClient)
|
|
httpTrans.SetHeader("Content-Type", "application/x-thrift")
|
|
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
|
|
|
|
return &clientConfig{transport: trans, protoFactory: thrift.NewTJSONProtocolFactory()}, nil
|
|
}
|
|
|
|
// Creates a realis clientConfig clientConfig using HTTP Post and Thrift Binary protocol to communicate with Aurora.
|
|
func newTBinaryConfig(url string, timeout time.Duration, config *clientConfig) (*clientConfig, error) {
|
|
trans, err := defaultTTransport(url, timeout, config)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "Error creating realis clientConfig")
|
|
}
|
|
|
|
httpTrans := (trans).(*thrift.THttpClient)
|
|
httpTrans.DelHeader("Content-Type") // Workaround for using thrift HttpPostClient
|
|
|
|
httpTrans.SetHeader("Accept", "application/vnd.apache.thrift.binary")
|
|
httpTrans.SetHeader("Content-Type", "application/vnd.apache.thrift.binary")
|
|
httpTrans.SetHeader("User-Agent", "gorealis v"+VERSION)
|
|
|
|
return &clientConfig{transport: trans, protoFactory: thrift.NewTBinaryProtocolFactoryDefault()}, nil
|
|
|
|
}
|
|
|
|
func basicAuth(username, password string) string {
|
|
auth := username + ":" + password
|
|
return base64.StdEncoding.EncodeToString([]byte(auth))
|
|
}
|
|
|
|
func (c *Client) ReestablishConn() error {
|
|
// Close existing connection
|
|
c.logger.Println("Re-establishing Connection to Aurora")
|
|
c.Close()
|
|
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
// Recreate connection from scratch using original options
|
|
newRealis, err := NewClient(c.config.options...)
|
|
if err != nil {
|
|
// This could be a temporary network hiccup
|
|
return NewTemporaryError(err)
|
|
}
|
|
|
|
// If we are able to successfully re-connect, make receiver
|
|
// point to newly established connections.
|
|
c.config = newRealis.config
|
|
c.client = newRealis.client
|
|
c.readonlyClient = newRealis.readonlyClient
|
|
c.adminClient = newRealis.adminClient
|
|
c.logger = newRealis.logger
|
|
|
|
return nil
|
|
}
|
|
|
|
// Releases resources associated with the realis client.
|
|
func (c *Client) Close() {
|
|
|
|
c.lock.Lock()
|
|
defer c.lock.Unlock()
|
|
|
|
c.transport.Close()
|
|
}
|
|
|
|
// Uses predefined set of states to retrieve a set of active jobs in Apache Aurora.
|
|
func (c *Client) GetInstanceIds(key aurora.JobKey, states []aurora.ScheduleStatus) ([]int32, error) {
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &key.Role,
|
|
Environment: &key.Environment,
|
|
JobName: &key.Name,
|
|
Statuses: states,
|
|
}
|
|
|
|
c.logger.DebugPrintf("GetTasksWithoutConfigs Thrift Payload: %+v\n", taskQ)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetTasksWithoutConfigs(nil, taskQ)
|
|
})
|
|
|
|
// If we encountered an error we couldn't recover from by retrying, return an error to the user
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for active IDs")
|
|
}
|
|
|
|
// Construct instance id map to stay in line with thrift's representation of sets
|
|
tasks := response.ScheduleStatusResult(resp).GetTasks()
|
|
jobInstanceIds := make([]int32, 0, len(tasks))
|
|
for _, task := range tasks {
|
|
jobInstanceIds = append(jobInstanceIds, task.GetAssignedTask().GetInstanceId())
|
|
}
|
|
return jobInstanceIds, nil
|
|
|
|
}
|
|
|
|
func (c *Client) GetJobUpdateSummaries(jobUpdateQuery *aurora.JobUpdateQuery) (*aurora.GetJobUpdateSummariesResult_, error) {
|
|
c.logger.DebugPrintf("GetJobUpdateSummaries Thrift Payload: %+v\n", jobUpdateQuery)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.readonlyClient.GetJobUpdateSummaries(nil, jobUpdateQuery)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error getting job update summaries from Aurora Scheduler")
|
|
}
|
|
|
|
return resp.GetResult_().GetGetJobUpdateSummariesResult_(), nil
|
|
}
|
|
|
|
func (c *Client) GetJobs(role string) (*aurora.GetJobsResult_, error) {
|
|
|
|
var result *aurora.GetJobsResult_
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.readonlyClient.GetJobs(nil, role)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return result, errors.Wrap(retryErr, "Error getting Jobs from Aurora Scheduler")
|
|
}
|
|
|
|
if resp.GetResult_() != nil {
|
|
result = resp.GetResult_().GetJobsResult_
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// Kill specific instances of a job. Returns true, nil if a task was actually killed as a result of this API call.
|
|
// Returns false, nil if no tasks were killed as a result of this call but there was no error making the call.
|
|
func (c *Client) KillInstances(key aurora.JobKey, instances ...int32) (bool, error) {
|
|
c.logger.DebugPrintf("KillTasks Thrift Payload: %+v %v\n", key, instances)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.KillTasks(nil, &key, instances, "")
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return false, errors.Wrap(retryErr, "Error sending Kill command to Aurora Scheduler")
|
|
}
|
|
|
|
if len(resp.GetDetails()) > 0 {
|
|
c.logger.Println("KillTasks was called but no tasks killed as a result.")
|
|
return false, nil
|
|
} else {
|
|
return true, nil
|
|
}
|
|
|
|
}
|
|
|
|
func (c *Client) RealisConfig() *clientConfig {
|
|
return c.config
|
|
}
|
|
|
|
// Sends a kill message to the scheduler for all active tasks under a job.
|
|
func (c *Client) KillJob(key aurora.JobKey) error {
|
|
|
|
c.logger.DebugPrintf("KillTasks Thrift Payload: %+v\n", key)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
// Giving the KillTasks thrift call an empty set tells the Aurora scheduler to kill all active shards
|
|
return c.client.KillTasks(nil, &key, nil, "")
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Kill command to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Sends a create job message to the scheduler with a specific job configuration.
|
|
// Although this API is able to create service jobs, it is better to use CreateService instead
|
|
// as that API uses the update thrift call which has a few extra features available.
|
|
// Use this API to create ad-hoc jobs.
|
|
func (c *Client) CreateJob(auroraJob *AuroraJob) error {
|
|
// If no thermos configuration has been set this will result in a NOOP
|
|
err := auroraJob.BuildThermosPayload()
|
|
|
|
c.logger.DebugPrintf("CreateJob Thrift Payload: %+v\n", auroraJob.JobConfig())
|
|
|
|
if err != nil {
|
|
return errors.Wrap(err, "Unable to create Thermos payload")
|
|
}
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.CreateJob(nil, auroraJob.JobConfig())
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Create command to Aurora Scheduler")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// This API uses an update thrift call to create the services giving a few more robust features.
|
|
func (c *Client) CreateService(update *JobUpdate) (*aurora.StartJobUpdateResult_, error) {
|
|
updateResult, err := c.StartJobUpdate(update, "")
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "unable to create service")
|
|
}
|
|
|
|
return updateResult, err
|
|
}
|
|
|
|
func (c *Client) ScheduleCronJob(auroraJob *AuroraJob) error {
|
|
// If no thermos configuration has been set this will result in a NOOP
|
|
err := auroraJob.BuildThermosPayload()
|
|
|
|
c.logger.DebugPrintf("ScheduleCronJob Thrift Payload: %+v\n", auroraJob.JobConfig())
|
|
|
|
if err != nil {
|
|
return errors.Wrap(err, "Unable to create Thermos payload")
|
|
}
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.ScheduleCronJob(nil, auroraJob.JobConfig())
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Cron AuroraJob Schedule message to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (c *Client) DescheduleCronJob(key aurora.JobKey) error {
|
|
|
|
c.logger.DebugPrintf("DescheduleCronJob Thrift Payload: %+v\n", key)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.DescheduleCronJob(nil, &key)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Cron AuroraJob De-schedule message to Aurora Scheduler")
|
|
|
|
}
|
|
return nil
|
|
|
|
}
|
|
|
|
func (c *Client) StartCronJob(key aurora.JobKey) error {
|
|
|
|
c.logger.DebugPrintf("StartCronJob Thrift Payload: %+v\n", key)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.StartCronJob(nil, &key)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Start Cron AuroraJob message to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
|
|
}
|
|
|
|
// Restarts specific instances specified
|
|
func (c *Client) RestartInstances(key aurora.JobKey, instances ...int32) error {
|
|
c.logger.DebugPrintf("RestartShards Thrift Payload: %+v %v\n", key, instances)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.RestartShards(nil, &key, instances)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Restart command to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Restarts all active tasks under a job configuration.
|
|
func (c *Client) RestartJob(key aurora.JobKey) error {
|
|
|
|
instanceIds, err1 := c.GetInstanceIds(key, aurora.ACTIVE_STATES)
|
|
if err1 != nil {
|
|
return errors.Wrap(err1, "Could not retrieve relevant task instance IDs")
|
|
}
|
|
|
|
c.logger.DebugPrintf("RestartShards Thrift Payload: %+v %v\n", key, instanceIds)
|
|
|
|
if len(instanceIds) > 0 {
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.RestartShards(nil, &key, instanceIds)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending Restart command to Aurora Scheduler")
|
|
}
|
|
|
|
return nil
|
|
} else {
|
|
return errors.New("No tasks in the Active state")
|
|
}
|
|
}
|
|
|
|
// Update all tasks under a job configuration. Currently gorealis doesn't support for canary deployments.
|
|
func (c *Client) StartJobUpdate(updateJob *JobUpdate, message string) (*aurora.StartJobUpdateResult_, error) {
|
|
|
|
if err := updateJob.BuildThermosPayload(); err != nil {
|
|
return nil, errors.New("unable to generate the proper Thermos executor payload")
|
|
}
|
|
|
|
c.logger.DebugPrintf("StartJobUpdate Thrift Payload: %+v %v\n", updateJob, message)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.StartJobUpdate(nil, updateJob.request, message)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error sending StartJobUpdate command to Aurora Scheduler")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetStartJobUpdateResult_() != nil {
|
|
return resp.GetResult_().GetStartJobUpdateResult_(), nil
|
|
}
|
|
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
|
|
// Abort AuroraJob Update on Aurora. Requires the updateId which can be obtained on the Aurora web UI.
|
|
func (c *Client) AbortJobUpdate(updateKey aurora.JobUpdateKey, message string) error {
|
|
|
|
c.logger.DebugPrintf("AbortJobUpdate Thrift Payload: %+v %v\n", updateKey, message)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.AbortJobUpdate(nil, &updateKey, message)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending AbortJobUpdate command to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Pause AuroraJob Update. UpdateID is returned from StartJobUpdate or the Aurora web UI.
|
|
func (c *Client) PauseJobUpdate(updateKey *aurora.JobUpdateKey, message string) error {
|
|
|
|
c.logger.DebugPrintf("PauseJobUpdate Thrift Payload: %+v %v\n", updateKey, message)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.PauseJobUpdate(nil, updateKey, message)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending PauseJobUpdate command to Aurora Scheduler")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Resume Paused AuroraJob Update. UpdateID is returned from StartJobUpdate or the Aurora web UI.
|
|
func (c *Client) ResumeJobUpdate(updateKey *aurora.JobUpdateKey, message string) error {
|
|
|
|
c.logger.DebugPrintf("ResumeJobUpdate Thrift Payload: %+v %v\n", updateKey, message)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.ResumeJobUpdate(nil, updateKey, message)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending ResumeJobUpdate command to Aurora Scheduler")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Pulse AuroraJob Update on Aurora. UpdateID is returned from StartJobUpdate or the Aurora web UI.
|
|
func (c *Client) PulseJobUpdate(updateKey *aurora.JobUpdateKey) (aurora.JobUpdatePulseStatus, error) {
|
|
|
|
c.logger.DebugPrintf("PulseJobUpdate Thrift Payload: %+v\n", updateKey)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.PulseJobUpdate(nil, updateKey)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return aurora.JobUpdatePulseStatus(0), errors.Wrap(retryErr, "Error sending PulseJobUpdate command to Aurora Scheduler")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetPulseJobUpdateResult_() != nil {
|
|
return resp.GetResult_().GetPulseJobUpdateResult_().GetStatus(), nil
|
|
} else {
|
|
return aurora.JobUpdatePulseStatus(0), errors.New("Thrift error, field was nil unexpectedly")
|
|
}
|
|
|
|
}
|
|
|
|
// Scale up the number of instances under a job configuration using the configuration for specific
|
|
// instance to scale up.
|
|
func (c *Client) AddInstances(instKey aurora.InstanceKey, count int32) error {
|
|
|
|
c.logger.DebugPrintf("AddInstances Thrift Payload: %+v %v\n", instKey, count)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.AddInstances(nil, &instKey, count)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Error sending AddInstances command to Aurora Scheduler")
|
|
}
|
|
return nil
|
|
|
|
}
|
|
|
|
// Scale down the number of instances under a job configuration using the configuration of a specific instance
|
|
// Instances with a higher instance ID will be removed first. For example, if our instance ID list is [0,1,2,3]
|
|
// and we want to remove 2 instances, 2 and 3 will always be picked.
|
|
func (c *Client) RemoveInstances(key aurora.JobKey, count int) error {
|
|
instanceIds, err := c.GetInstanceIds(key, aurora.ACTIVE_STATES)
|
|
if err != nil {
|
|
return errors.Wrap(err, "RemoveInstances: Could not retrieve relevant instance IDs")
|
|
}
|
|
if len(instanceIds) < count {
|
|
return errors.Errorf("Insufficient active instances available for killing: "+
|
|
" Instances to be killed %d Active instances %d", count, len(instanceIds))
|
|
}
|
|
|
|
// Sort instanceIds in decreasing order
|
|
sort.Slice(instanceIds, func(i, j int) bool {
|
|
return instanceIds[i] > instanceIds[j]
|
|
})
|
|
|
|
// Get the last count instance ids to kill
|
|
instanceIds = instanceIds[:count]
|
|
killed, err := c.KillInstances(key, instanceIds...)
|
|
|
|
if !killed {
|
|
return errors.New("Flex down was not able to reduce the number of instances running.")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Get information about task including a fully hydrated task configuration object
|
|
func (c *Client) GetTaskStatus(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error) {
|
|
|
|
c.logger.DebugPrintf("GetTasksStatus Thrift Payload: %+v\n", query)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetTasksStatus(nil, query)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for task status")
|
|
}
|
|
|
|
return response.ScheduleStatusResult(resp).GetTasks(), nil
|
|
}
|
|
|
|
// Get pending reason
|
|
func (c *Client) GetPendingReason(query *aurora.TaskQuery) ([]*aurora.PendingReason, error) {
|
|
|
|
c.logger.DebugPrintf("GetPendingReason Thrift Payload: %+v\n", query)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetPendingReason(nil, query)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for pending Reasons")
|
|
}
|
|
|
|
var result []*aurora.PendingReason
|
|
|
|
if resp.GetResult_() != nil {
|
|
result = resp.GetResult_().GetGetPendingReasonResult_().GetReasons()
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// Get information about task including without a task configuration object
|
|
func (c *Client) GetTasksWithoutConfigs(query *aurora.TaskQuery) ([]*aurora.ScheduledTask, error) {
|
|
|
|
c.logger.DebugPrintf("GetTasksWithoutConfigs Thrift Payload: %+v\n", query)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetTasksWithoutConfigs(nil, query)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for task status without configs")
|
|
}
|
|
|
|
return response.ScheduleStatusResult(resp).GetTasks(), nil
|
|
|
|
}
|
|
|
|
// Get the task configuration from the aurora scheduler for a job
|
|
func (c *Client) FetchTaskConfig(instKey aurora.InstanceKey) (*aurora.TaskConfig, error) {
|
|
|
|
ids := []int32{instKey.GetInstanceId()}
|
|
|
|
taskQ := &aurora.TaskQuery{
|
|
Role: &instKey.JobKey.Role,
|
|
Environment: &instKey.JobKey.Environment,
|
|
JobName: &instKey.JobKey.Name,
|
|
InstanceIds: ids,
|
|
Statuses: aurora.ACTIVE_STATES,
|
|
}
|
|
|
|
c.logger.DebugPrintf("GetTasksStatus Thrift Payload: %+v\n", taskQ)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetTasksStatus(nil, taskQ)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Error querying Aurora Scheduler for task configuration")
|
|
}
|
|
|
|
tasks := response.ScheduleStatusResult(resp).GetTasks()
|
|
|
|
if len(tasks) == 0 {
|
|
return nil, errors.Errorf("Instance %d for jobkey %s/%s/%s doesn't exist",
|
|
instKey.InstanceId,
|
|
instKey.JobKey.Environment,
|
|
instKey.JobKey.Role,
|
|
instKey.JobKey.Name)
|
|
}
|
|
|
|
// Currently, instance 0 is always picked..
|
|
return tasks[0].AssignedTask.Task, nil
|
|
}
|
|
|
|
func (c *Client) JobUpdateDetails(updateQuery aurora.JobUpdateQuery) ([]*aurora.JobUpdateDetails, error) {
|
|
|
|
c.logger.DebugPrintf("GetJobUpdateDetails Thrift Payload: %+v\n", updateQuery)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.GetJobUpdateDetails(nil, &updateQuery)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to get job update details")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetGetJobUpdateDetailsResult_() != nil {
|
|
return resp.GetResult_().GetGetJobUpdateDetailsResult_().GetDetailsList(), nil
|
|
} else {
|
|
return nil, errors.New("Unknown Thrift error, field is nil.")
|
|
}
|
|
}
|
|
|
|
func (c *Client) RollbackJobUpdate(key aurora.JobUpdateKey, message string) error {
|
|
|
|
c.logger.DebugPrintf("RollbackJobUpdate Thrift Payload: %+v %v\n", key, message)
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.client.RollbackJobUpdate(nil, &key, message)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to roll back job update")
|
|
}
|
|
return nil
|
|
}
|
|
|
|
/* Admin functions */
|
|
// TODO(rdelvalle): Consider moving these functions to another interface. It would be a backwards incompatible change,
|
|
// but would add safety.
|
|
|
|
// Set a list of nodes to DRAINING. This means nothing will be able to be scheduled on them and any existing
|
|
// tasks will be killed and re-scheduled elsewhere in the cluster. Tasks from DRAINING nodes are not guaranteed
|
|
// to return to running unless there is enough capacity in the cluster to run them.
|
|
func (c *Client) DrainHosts(hosts ...string) ([]*aurora.HostStatus, error) {
|
|
|
|
if len(hosts) == 0 {
|
|
return nil, errors.New("no hosts provided to drain")
|
|
}
|
|
|
|
drainList := aurora.NewHosts()
|
|
drainList.HostNames = hosts
|
|
|
|
c.logger.DebugPrintf("DrainHosts Thrift Payload: %v\n", drainList)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.DrainHosts(nil, drainList)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetDrainHostsResult_() != nil {
|
|
return resp.GetResult_().GetDrainHostsResult_().GetStatuses(), nil
|
|
} else {
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
}
|
|
|
|
// Start SLA Aware Drain.
|
|
// defaultSlaPolicy is the fallback SlaPolicy to use if a task does not have an SlaPolicy.
|
|
// After timeoutSecs, tasks will be forcefully drained without checking SLA.
|
|
func (c *Client) SLADrainHosts(policy *aurora.SlaPolicy, timeout int64, hosts ...string) ([]*aurora.HostStatus, error) {
|
|
|
|
if len(hosts) == 0 {
|
|
return nil, errors.New("no hosts provided to drain")
|
|
}
|
|
|
|
drainList := aurora.NewHosts()
|
|
drainList.HostNames = hosts
|
|
|
|
c.logger.DebugPrintf("SLADrainHosts Thrift Payload: %v\n", drainList)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.SlaDrainHosts(nil, drainList, policy, timeout)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetDrainHostsResult_() != nil {
|
|
return resp.GetResult_().GetDrainHostsResult_().GetStatuses(), nil
|
|
} else {
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
}
|
|
|
|
func (c *Client) StartMaintenance(hosts ...string) ([]*aurora.HostStatus, error) {
|
|
|
|
if len(hosts) == 0 {
|
|
return nil, errors.New("no hosts provided to start maintenance on")
|
|
}
|
|
|
|
hostList := aurora.NewHosts()
|
|
hostList.HostNames = hosts
|
|
|
|
c.logger.DebugPrintf("StartMaintenance Thrift Payload: %v\n", hostList)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.StartMaintenance(nil, hostList)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetStartMaintenanceResult_() != nil {
|
|
return resp.GetResult_().GetStartMaintenanceResult_().GetStatuses(), nil
|
|
} else {
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
}
|
|
|
|
func (c *Client) EndMaintenance(hosts ...string) ([]*aurora.HostStatus, error) {
|
|
|
|
if len(hosts) == 0 {
|
|
return nil, errors.New("no hosts provided to end maintenance on")
|
|
}
|
|
|
|
hostList := aurora.NewHosts()
|
|
hostList.HostNames = hosts
|
|
|
|
c.logger.DebugPrintf("EndMaintenance Thrift Payload: %v\n", hostList)
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.EndMaintenance(nil, hostList)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
if resp.GetResult_() != nil && resp.GetResult_().GetEndMaintenanceResult_() != nil {
|
|
return resp.GetResult_().GetEndMaintenanceResult_().GetStatuses(), nil
|
|
} else {
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
|
|
}
|
|
|
|
func (c *Client) MaintenanceStatus(hosts ...string) (*aurora.MaintenanceStatusResult_, error) {
|
|
|
|
var result *aurora.MaintenanceStatusResult_
|
|
|
|
if len(hosts) == 0 {
|
|
return nil, errors.New("no hosts provided to get maintenance status from")
|
|
}
|
|
|
|
hostList := aurora.NewHosts()
|
|
hostList.HostNames = hosts
|
|
|
|
c.logger.DebugPrintf("MaintenanceStatus Thrift Payload: %v\n", hostList)
|
|
|
|
// Make thrift call. If we encounter an error sending the call, attempt to reconnect
|
|
// and continue trying to resend command until we run out of retries.
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.MaintenanceStatus(nil, hostList)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return result, errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
if resp.GetResult_() != nil {
|
|
result = resp.GetResult_().GetMaintenanceStatusResult_()
|
|
}
|
|
|
|
return result, nil
|
|
}
|
|
|
|
// SetQuota sets a quota aggregate for the given role
|
|
// TODO(zircote) Currently investigating an error that is returned from thrift calls that include resources for `NamedPort` and `NumGpu`
|
|
func (c *Client) SetQuota(role string, cpu *float64, ramMb *int64, diskMb *int64) error {
|
|
ramResource := aurora.NewResource()
|
|
ramResource.RamMb = ramMb
|
|
cpuResource := aurora.NewResource()
|
|
cpuResource.NumCpus = cpu
|
|
diskResource := aurora.NewResource()
|
|
diskResource.DiskMb = diskMb
|
|
|
|
quota := aurora.NewResourceAggregate()
|
|
quota.Resources = []*aurora.Resource{ramResource, cpuResource, diskResource}
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.SetQuota(nil, role, quota)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to set role quota")
|
|
}
|
|
return retryErr
|
|
|
|
}
|
|
|
|
// GetQuota returns the resource aggregate for the given role
|
|
func (c *Client) GetQuota(role string) (*aurora.GetQuotaResult_, error) {
|
|
|
|
resp, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.GetQuota(nil, role)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return nil, errors.Wrap(retryErr, "Unable to get role quota")
|
|
}
|
|
|
|
if resp.GetResult_() != nil {
|
|
return resp.GetResult_().GetGetQuotaResult_(), nil
|
|
} else {
|
|
return nil, errors.New("Thrift error: Field in response is nil unexpectedly.")
|
|
}
|
|
}
|
|
|
|
// Force Aurora Scheduler to perform a snapshot and write to Mesos log
|
|
func (c *Client) Snapshot() error {
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.Snapshot(nil)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Force Aurora Scheduler to write backup file to a file in the backup directory
|
|
func (c *Client) PerformBackup() error {
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.PerformBackup(nil)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Force an Implicit reconciliation between Mesos and Aurora
|
|
func (c *Client) ForceImplicitTaskReconciliation() error {
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.TriggerImplicitTaskReconciliation(nil)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Force an Explicit reconciliation between Mesos and Aurora
|
|
func (c *Client) ForceExplicitTaskReconciliation(batchSize *int32) error {
|
|
|
|
if batchSize != nil && *batchSize < 1 {
|
|
return errors.New("Invalid batch size.")
|
|
}
|
|
settings := aurora.NewExplicitReconciliationSettings()
|
|
|
|
settings.BatchSize = batchSize
|
|
|
|
_, retryErr := c.thriftCallWithRetries(func() (*aurora.Response, error) {
|
|
return c.adminClient.TriggerExplicitTaskReconciliation(nil, settings)
|
|
})
|
|
|
|
if retryErr != nil {
|
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
|
}
|
|
|
|
return nil
|
|
}
|