Adding force snapshot and force backup APIs (#73)

* Adding force snapshot and force backup APIs.
This commit is contained in:
Renan DelValle 2018-09-14 15:04:16 -07:00 committed by GitHub
parent 0f2ece10ac
commit 5099d7e6ec
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 64 additions and 2 deletions

View file

@ -644,12 +644,28 @@ func main() {
fmt.Println("GetJobs...role: ", role)
_, result, err := r.GetJobs(role)
if err != nil {
fmt.Print("error: %+v\n", err.Error())
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
fmt.Println("map size: ", len(result.Configs))
fmt.Println(result.String())
case "snapshot":
fmt.Println("Forcing scheduler to write snapshot to mesos replicated log")
err := r.Snapshot()
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
case "performBackup":
fmt.Println("Writing Backup of Snapshot to file system")
err := r.PerformBackup()
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
default:
fmt.Println("Command not supported")
os.Exit(1)

View file

@ -75,6 +75,8 @@ type Realis interface {
MaintenanceStatus(hosts ...string) (*aurora.Response, *aurora.MaintenanceStatusResult_, error)
SetQuota(role string, cpu *float64, ram *int64, disk *int64) (*aurora.Response, error)
GetQuota(role string) (*aurora.Response, error)
Snapshot() error
PerformBackup() error
}
type realisClient struct {
@ -940,6 +942,10 @@ func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string
return resp, nil
}
/* Admin functions */
// TODO(rdelvalle): Consider moving these functions to another interface. It would be a backwards incompatible change,
// but would add safety.
// Set a list of nodes to DRAINING. This means nothing will be able to be scheduled on them and any existing
// tasks will be killed and re-scheduled elsewhere in the cluster. Tasks from DRAINING nodes are not guaranteed
// to return to running unless there is enough capacity in the cluster to run them.
@ -1078,3 +1084,31 @@ func (r *realisClient) GetQuota(role string) (*aurora.Response, error) {
return resp, retryErr
}
// Force Aurora Scheduler to perform a snapshot and write to Mesos log
func (r *realisClient) Snapshot() error {
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.adminClient.Snapshot()
})
if retryErr != nil {
return errors.Wrap(retryErr, "Unable to recover connection")
}
return nil
}
// Force Aurora Scheduler to write backup file to a file in the backup directory
func (r *realisClient) PerformBackup() error {
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.adminClient.PerformBackup()
})
if retryErr != nil {
return errors.Wrap(retryErr, "Unable to recover connection")
}
return nil
}

View file

@ -143,12 +143,24 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
assert.True(t, success)
assert.NoError(t, err)
//Fetch all obs
//Fetch all Jobs
_, result, err := r.GetJobs(role)
fmt.Printf("GetJobs length: %+v \n", len(result.Configs))
assert.Equal(t, len(result.Configs), 1)
assert.NoError(t, err)
// Test asking the scheduler to perform a Snpshot
t.Run("TestRealisClient_Snapshot", func(t *testing.T) {
err := r.Snapshot()
assert.NoError(t, err)
})
// Test asking the scheduler to backup a Snapshot
t.Run("TestRealisClient_PerformBackup", func(t *testing.T) {
err := r.PerformBackup()
assert.NoError(t, err)
})
// Tasks must exist for it to, be killed
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
start := time.Now()