Adding force snapshot and force backup APIs (#73)
* Adding force snapshot and force backup APIs.
This commit is contained in:
parent
0f2ece10ac
commit
5099d7e6ec
3 changed files with 64 additions and 2 deletions
|
@ -644,12 +644,28 @@ func main() {
|
||||||
fmt.Println("GetJobs...role: ", role)
|
fmt.Println("GetJobs...role: ", role)
|
||||||
_, result, err := r.GetJobs(role)
|
_, result, err := r.GetJobs(role)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
fmt.Print("error: %+v\n", err.Error())
|
fmt.Printf("error: %+v\n", err.Error())
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
fmt.Println("map size: ", len(result.Configs))
|
fmt.Println("map size: ", len(result.Configs))
|
||||||
fmt.Println(result.String())
|
fmt.Println(result.String())
|
||||||
|
|
||||||
|
case "snapshot":
|
||||||
|
fmt.Println("Forcing scheduler to write snapshot to mesos replicated log")
|
||||||
|
err := r.Snapshot()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error: %+v\n", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
|
case "performBackup":
|
||||||
|
fmt.Println("Writing Backup of Snapshot to file system")
|
||||||
|
err := r.PerformBackup()
|
||||||
|
if err != nil {
|
||||||
|
fmt.Printf("error: %+v\n", err.Error())
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
|
||||||
default:
|
default:
|
||||||
fmt.Println("Command not supported")
|
fmt.Println("Command not supported")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
|
|
34
realis.go
34
realis.go
|
@ -75,6 +75,8 @@ type Realis interface {
|
||||||
MaintenanceStatus(hosts ...string) (*aurora.Response, *aurora.MaintenanceStatusResult_, error)
|
MaintenanceStatus(hosts ...string) (*aurora.Response, *aurora.MaintenanceStatusResult_, error)
|
||||||
SetQuota(role string, cpu *float64, ram *int64, disk *int64) (*aurora.Response, error)
|
SetQuota(role string, cpu *float64, ram *int64, disk *int64) (*aurora.Response, error)
|
||||||
GetQuota(role string) (*aurora.Response, error)
|
GetQuota(role string) (*aurora.Response, error)
|
||||||
|
Snapshot() error
|
||||||
|
PerformBackup() error
|
||||||
}
|
}
|
||||||
|
|
||||||
type realisClient struct {
|
type realisClient struct {
|
||||||
|
@ -940,6 +942,10 @@ func (r *realisClient) RollbackJobUpdate(key aurora.JobUpdateKey, message string
|
||||||
return resp, nil
|
return resp, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Admin functions */
|
||||||
|
// TODO(rdelvalle): Consider moving these functions to another interface. It would be a backwards incompatible change,
|
||||||
|
// but would add safety.
|
||||||
|
|
||||||
// Set a list of nodes to DRAINING. This means nothing will be able to be scheduled on them and any existing
|
// Set a list of nodes to DRAINING. This means nothing will be able to be scheduled on them and any existing
|
||||||
// tasks will be killed and re-scheduled elsewhere in the cluster. Tasks from DRAINING nodes are not guaranteed
|
// tasks will be killed and re-scheduled elsewhere in the cluster. Tasks from DRAINING nodes are not guaranteed
|
||||||
// to return to running unless there is enough capacity in the cluster to run them.
|
// to return to running unless there is enough capacity in the cluster to run them.
|
||||||
|
@ -1078,3 +1084,31 @@ func (r *realisClient) GetQuota(role string) (*aurora.Response, error) {
|
||||||
|
|
||||||
return resp, retryErr
|
return resp, retryErr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Force Aurora Scheduler to perform a snapshot and write to Mesos log
|
||||||
|
func (r *realisClient) Snapshot() error {
|
||||||
|
|
||||||
|
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
|
||||||
|
return r.adminClient.Snapshot()
|
||||||
|
})
|
||||||
|
|
||||||
|
if retryErr != nil {
|
||||||
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force Aurora Scheduler to write backup file to a file in the backup directory
|
||||||
|
func (r *realisClient) PerformBackup() error {
|
||||||
|
|
||||||
|
_, retryErr := r.thriftCallWithRetries(func() (*aurora.Response, error) {
|
||||||
|
return r.adminClient.PerformBackup()
|
||||||
|
})
|
||||||
|
|
||||||
|
if retryErr != nil {
|
||||||
|
return errors.Wrap(retryErr, "Unable to recover connection")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
|
@ -143,12 +143,24 @@ func TestRealisClient_CreateJob_Thermos(t *testing.T) {
|
||||||
assert.True(t, success)
|
assert.True(t, success)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
//Fetch all obs
|
//Fetch all Jobs
|
||||||
_, result, err := r.GetJobs(role)
|
_, result, err := r.GetJobs(role)
|
||||||
fmt.Printf("GetJobs length: %+v \n", len(result.Configs))
|
fmt.Printf("GetJobs length: %+v \n", len(result.Configs))
|
||||||
assert.Equal(t, len(result.Configs), 1)
|
assert.Equal(t, len(result.Configs), 1)
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
|
// Test asking the scheduler to perform a Snpshot
|
||||||
|
t.Run("TestRealisClient_Snapshot", func(t *testing.T) {
|
||||||
|
err := r.Snapshot()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
|
// Test asking the scheduler to backup a Snapshot
|
||||||
|
t.Run("TestRealisClient_PerformBackup", func(t *testing.T) {
|
||||||
|
err := r.PerformBackup()
|
||||||
|
assert.NoError(t, err)
|
||||||
|
})
|
||||||
|
|
||||||
// Tasks must exist for it to, be killed
|
// Tasks must exist for it to, be killed
|
||||||
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
|
t.Run("TestRealisClient_KillJob_Thermos", func(t *testing.T) {
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue