Adding force snapshot and force backup APIs.

This commit is contained in:
Renan DelValle 2018-09-14 13:02:15 -07:00
parent 0f2ece10ac
commit d6860e4ca4
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
3 changed files with 66 additions and 2 deletions

View file

@ -644,12 +644,28 @@ func main() {
fmt.Println("GetJobs...role: ", role)
_, result, err := r.GetJobs(role)
if err != nil {
fmt.Print("error: %+v\n", err.Error())
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
fmt.Println("map size: ", len(result.Configs))
fmt.Println(result.String())
case "snapshot":
fmt.Println("Forcing scheduler to write snapshot to mesos replicated log")
err := r.Snapshot()
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
case "performBackup":
fmt.Println("Writing Backup of Snapshot to file system")
err := r.PerformBackup()
if err != nil {
fmt.Printf("error: %+v\n", err.Error())
os.Exit(1)
}
default:
fmt.Println("Command not supported")
os.Exit(1)

View file

@ -75,6 +75,8 @@ type Realis interface {
MaintenanceStatus(hosts ...string) (*aurora.Response, *aurora.MaintenanceStatusResult_, error)
SetQuota(role string, cpu *float64, ram *int64, disk *int64) (*aurora.Response, error)
GetQuota(role string) (*aurora.Response, error)
Snapshot() (error)
PerformBackup() (error)
}
type realisClient struct {
@ -940,6 +942,10 @@ func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string
return resp, nil
}
/* Admin functions */
// TODO(rdelvalle): Consider moving these functions to another interface. It would be a backwards incompatible change,
// but would add safety.
// Set a list of nodes to DRAINING. This means nothing will be able to be scheduled on them and any existing
// tasks will be killed and re-scheduled elsewhere in the cluster. Tasks from DRAINING nodes are not guaranteed
// to return to running unless there is enough capacity in the cluster to run them.
@ -1078,3 +1084,31 @@ func (r *realisClient) GetQuota(role string) (*aurora.Response, error) {
return resp, retryErr
}
// Force Aurora Scheduler to perform a snapshot and write to Mesos log
func (r *realisClient) Snapshot() (error) {
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.adminClient.Snapshot()
})
if retryErr != nil {
return errors.Wrap(retryErr, "Unable to recover connection")
}
return nil
}
// Force Aurora Scheduler to write backup file to a file in the backup directory
func (r *realisClient) PerformBackup() (error) {
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
return r.adminClient.PerformBackup()
})
if retryErr != nil {
return errors.Wrap(retryErr, "Unable to recover connection")
}
return nil
}

View file

@ -143,12 +143,26 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
assert.True(t, success)
assert.NoError(t, err)
//Fetch all obs
//Fetch all Jobs
_, result, err := r.GetJobs(role)
fmt.Printf("GetJobs length: %+v \n", len(result.Configs))
assert.Equal(t, len(result.Configs), 1)
assert.NoError(t, err)
// Test asking the scheduler to perform a Snpshot
t.Run("TestRealisClient_Snapshot", func (t *testing.T) {
err := r.Snapshot()
assert.NoError(t, err)
})
// Test asking the scheduler to backup a Snapshot
t.Run("TestRealisClient_PerformBackup", func(t *testing.T) {
err := r.PerformBackup()
assert.NoError(t, err)
})
// Tasks must exist for it to, be killed
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
start := time.Now()