Changing timeouts to Duration types for giving the user an easier time setting high amoutns of time. Added support for sla-drain count and percentage policies. Coordination policy is to be implemented. Bumped up version number and fixed eimplicit recon calling the wrong API function.
This commit is contained in:
parent
2de607464f
commit
ef76abd73c
5 changed files with 84 additions and 35 deletions
11
cmd/force.go
11
cmd/force.go
|
@ -77,10 +77,10 @@ state for all currently known non-terminal tasks.
|
||||||
}
|
}
|
||||||
|
|
||||||
var forceExplicitReconCmd = &cobra.Command{
|
var forceExplicitReconCmd = &cobra.Command{
|
||||||
Use: "explicit",
|
Use: "explicit [batch_size]",
|
||||||
Short: "Force the leading scheduler to perform an explicit recon.",
|
Short: "Force the leading scheduler to perform an explicit recon.",
|
||||||
Long: `Aurora will send a list of non-terminal task IDs
|
Long: `Aurora will send a list of non-terminal task IDs and the master
|
||||||
and the master responds with the latest state for each task, if possible.
|
responds with the latest state for each task, if possible.
|
||||||
`,
|
`,
|
||||||
Run: explicitRecon,
|
Run: explicitRecon,
|
||||||
Args: cobra.MaximumNArgs(1),
|
Args: cobra.MaximumNArgs(1),
|
||||||
|
@ -120,14 +120,15 @@ func explicitRecon(cmd *cobra.Command, args []string) {
|
||||||
var forceImplicitReconCmd = &cobra.Command{
|
var forceImplicitReconCmd = &cobra.Command{
|
||||||
Use: "implicit",
|
Use: "implicit",
|
||||||
Short: "Force the leading scheduler to perform an implicit recon.",
|
Short: "Force the leading scheduler to perform an implicit recon.",
|
||||||
Long: `Force the `,
|
Long: `Forces leading scheduler to ask Mesos Master for a list of the latest state for
|
||||||
|
all currently known non-terminal tasks being run by Aurora.`,
|
||||||
Run: implicitRecon,
|
Run: implicitRecon,
|
||||||
}
|
}
|
||||||
|
|
||||||
func implicitRecon(cmd *cobra.Command, args []string) {
|
func implicitRecon(cmd *cobra.Command, args []string) {
|
||||||
|
|
||||||
log.Println("Forcing scheduler to perform an implicit reconciliation with Mesos")
|
log.Println("Forcing scheduler to perform an implicit reconciliation with Mesos")
|
||||||
err := client.PerformBackup()
|
err := client.ForceImplicitTaskReconciliation()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("error: %+v\n", err)
|
log.Fatalf("error: %+v\n", err)
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -21,10 +21,13 @@ var clientKey, clientCert string
|
||||||
var configFile string
|
var configFile string
|
||||||
var toJson bool
|
var toJson bool
|
||||||
var logLevel string
|
var logLevel string
|
||||||
|
var duration time.Duration
|
||||||
|
var percent float64
|
||||||
|
var count int64
|
||||||
|
|
||||||
const australisVer = "v0.0.6"
|
const australisVer = "v0.0.7"
|
||||||
|
|
||||||
var monitorInterval, monitorTimeout int
|
var monitorInterval, monitorTimeout time.Duration
|
||||||
|
|
||||||
func init() {
|
func init() {
|
||||||
|
|
||||||
|
|
88
cmd/start.go
88
cmd/start.go
|
@ -3,6 +3,7 @@ package cmd
|
||||||
import (
|
import (
|
||||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
@ -15,20 +16,34 @@ func init() {
|
||||||
startCmd.AddCommand(startDrainCmd)
|
startCmd.AddCommand(startDrainCmd)
|
||||||
|
|
||||||
// Maintenance specific flags
|
// Maintenance specific flags
|
||||||
startDrainCmd.Flags().IntVar(&monitorInterval,"interval", 5, "Interval at which to poll scheduler.")
|
startDrainCmd.Flags().DurationVar(&monitorInterval,"interval", time.Second * 5, "Interval at which to poll scheduler.")
|
||||||
startDrainCmd.Flags().IntVar(&monitorTimeout,"timeout", 50, "Time after which the monitor will stop polling and throw an error.")
|
startDrainCmd.Flags().DurationVar(&monitorTimeout,"timeout", time.Minute * 1, "Time after which the monitor will stop polling and throw an error.")
|
||||||
|
|
||||||
startCmd.AddCommand(startSLADrainCmd)
|
startCmd.AddCommand(startSLADrainCmd)
|
||||||
|
|
||||||
|
|
||||||
|
/* SLA Aware commands */
|
||||||
|
startSLADrainCmd.AddCommand(startSLACountDrainCmd)
|
||||||
|
|
||||||
// SLA Maintenance specific flags
|
// SLA Maintenance specific flags
|
||||||
startSLADrainCmd.Flags().IntVar(&monitorInterval,"interval", 5, "Interval at which to poll scheduler.")
|
startSLACountDrainCmd.Flags().DurationVar(&monitorInterval,"interval", time.Second * 5, "Interval at which to poll scheduler.")
|
||||||
startSLADrainCmd.Flags().IntVar(&monitorTimeout,"timeout", 50, "Time after which the monitor will stop polling and throw an error.")
|
startSLACountDrainCmd.Flags().DurationVar(&monitorTimeout,"timeout", time.Minute * 1, "Time after which the monitor will stop polling and throw an error.")
|
||||||
|
startSLACountDrainCmd.Flags().Int64Var(&count, "count", 5, "Instances count that should be running to meet SLA.")
|
||||||
|
startSLACountDrainCmd.Flags().DurationVar(&duration, "duration", time.Minute * 10, "Window of time from which we derive the SLA.")
|
||||||
|
|
||||||
|
startSLADrainCmd.AddCommand(startSLAPercentageDrainCmd)
|
||||||
|
|
||||||
|
// SLA Maintenance specific flags
|
||||||
|
startSLAPercentageDrainCmd.Flags().DurationVar(&monitorInterval,"interval", time.Second * 5, "Interval at which to poll scheduler.")
|
||||||
|
startSLAPercentageDrainCmd.Flags().DurationVar(&monitorTimeout,"timeout", time.Minute * 1, "Time after which the monitor will stop polling and throw an error.")
|
||||||
|
startSLAPercentageDrainCmd.Flags().Float64Var(&percent, "percent", 75.0, "Percentage of instances that should be running to meet SLA.")
|
||||||
|
startSLAPercentageDrainCmd.Flags().DurationVar(&duration, "duration", time.Minute * 10, "Window of time from which we derive the SLA.")
|
||||||
|
|
||||||
startCmd.AddCommand(startMaintenanceCmd)
|
startCmd.AddCommand(startMaintenanceCmd)
|
||||||
|
|
||||||
// SLA Maintenance specific flags
|
// SLA Maintenance specific flags
|
||||||
startMaintenanceCmd.Flags().IntVar(&monitorInterval,"interval", 5, "Interval at which to poll scheduler.")
|
startMaintenanceCmd.Flags().DurationVar(&monitorInterval,"interval", time.Second * 5, "Interval at which to poll scheduler.")
|
||||||
startMaintenanceCmd.Flags().IntVar(&monitorTimeout,"timeout", 50, "Time after which the monitor will stop polling and throw an error.")
|
startMaintenanceCmd.Flags().DurationVar(&monitorTimeout,"timeout", time.Minute * 1, "Time after which the monitor will stop polling and throw an error.")
|
||||||
}
|
}
|
||||||
|
|
||||||
var startCmd = &cobra.Command{
|
var startCmd = &cobra.Command{
|
||||||
|
@ -48,14 +63,30 @@ expects a space separated list of hosts to place into maintenance mode.`,
|
||||||
}
|
}
|
||||||
|
|
||||||
var startSLADrainCmd = &cobra.Command{
|
var startSLADrainCmd = &cobra.Command{
|
||||||
Use: "sla-drain [space separated host list]",
|
Use: "sla-drain",
|
||||||
Short: "Place a list of space separated Mesos Agents into maintenance mode using SLA awareness.",
|
Short: "Place a list of space separated Mesos Agents into maintenance mode using SLA aware strategies.",
|
||||||
Long: `Adds a Mesos Agent to Aurora's Drain list. Agents in this list
|
Long: `Adds a Mesos Agent to Aurora's Drain list. Agents in this list
|
||||||
are not allowed to schedule new tasks and any tasks already running on this Agent
|
are not allowed to schedule new tasks and any tasks already running on this Agent
|
||||||
are killed and rescheduled in an Agent that is not in maintenance mode. Command
|
are killed and rescheduled in an Agent that is not in maintenance mode. Command
|
||||||
expects a space separated list of hosts to place into maintenance mode.`,
|
expects a space separated list of hosts to place into maintenance mode.`,
|
||||||
|
}
|
||||||
|
|
||||||
|
var startSLACountDrainCmd = &cobra.Command{
|
||||||
|
Use: "count [space separated host list]",
|
||||||
|
Short: "Place a list of space separated Mesos Agents into maintenance mode using the count SLA aware policy as a fallback.",
|
||||||
|
Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the count SLA policy as a fallback
|
||||||
|
when a Job does not have a defined SLA policy.`,
|
||||||
Args: cobra.MinimumNArgs(1),
|
Args: cobra.MinimumNArgs(1),
|
||||||
Run: SLAdrain,
|
Run: SLACountDrain,
|
||||||
|
}
|
||||||
|
|
||||||
|
var startSLAPercentageDrainCmd = &cobra.Command{
|
||||||
|
Use: "percentage [space separated host list]",
|
||||||
|
Short: "Place a list of space separated Mesos Agents into maintenance mode using the percentage SLA aware policy as a fallback.",
|
||||||
|
Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the percentage SLA policy as a fallback
|
||||||
|
when a Job does not have a defined SLA policy.`,
|
||||||
|
Args: cobra.MinimumNArgs(1),
|
||||||
|
Run: SLAPercentageDrain,
|
||||||
}
|
}
|
||||||
|
|
||||||
var startMaintenanceCmd = &cobra.Command{
|
var startMaintenanceCmd = &cobra.Command{
|
||||||
|
@ -82,9 +113,8 @@ func drain(cmd *cobra.Command, args []string) {
|
||||||
hostResult, err := monitor.HostMaintenance(
|
hostResult, err := monitor.HostMaintenance(
|
||||||
args,
|
args,
|
||||||
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
|
||||||
monitorInterval,
|
int(monitorInterval.Seconds()),
|
||||||
monitorTimeout)
|
int(monitorTimeout.Seconds()))
|
||||||
|
|
||||||
|
|
||||||
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
|
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
|
||||||
|
|
||||||
|
@ -93,12 +123,9 @@ func drain(cmd *cobra.Command, args []string) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func SLAdrain(cmd *cobra.Command, args []string) {
|
func slaDrain(policy *aurora.SlaPolicy, hosts ...string) {
|
||||||
log.Infoln("Setting hosts to DRAINING with SLA awareness")
|
|
||||||
log.Infoln(args)
|
|
||||||
|
|
||||||
policy := aurora.SlaPolicy{CountSlaPolicy: &aurora.CountSlaPolicy{Count: 1, DurationSecs: 60*30}}
|
result, err := client.SLADrainHosts(policy, 60*60, hosts...)
|
||||||
result, err := client.SLADrainHosts(&policy, 60*60, args...)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalf("error: %+v\n", err)
|
log.Fatalf("error: %+v\n", err)
|
||||||
}
|
}
|
||||||
|
@ -107,17 +134,34 @@ func SLAdrain(cmd *cobra.Command, args []string) {
|
||||||
|
|
||||||
// Monitor change to DRAINING and DRAINED mode
|
// Monitor change to DRAINING and DRAINED mode
|
||||||
hostResult, err := monitor.HostMaintenance(
|
hostResult, err := monitor.HostMaintenance(
|
||||||
args,
|
hosts,
|
||||||
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
|
||||||
monitorInterval,
|
int(monitorInterval.Seconds()),
|
||||||
monitorTimeout)
|
int(monitorTimeout.Seconds()))
|
||||||
|
|
||||||
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
|
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatalln("error: %+v", err)
|
log.Fatalln("error: %+v", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func SLACountDrain(cmd *cobra.Command, args []string) {
|
||||||
|
log.Infoln("Setting hosts to DRAINING with the Count SLA policy.")
|
||||||
|
log.Infoln(args)
|
||||||
|
|
||||||
|
slaDrain(&aurora.SlaPolicy{
|
||||||
|
CountSlaPolicy: &aurora.CountSlaPolicy{Count: count, DurationSecs: int64(duration.Seconds())}},
|
||||||
|
args...)
|
||||||
|
}
|
||||||
|
|
||||||
|
func SLAPercentageDrain(cmd *cobra.Command, args []string) {
|
||||||
|
log.Infoln("Setting hosts to DRAINING with the Percentage SLA policy.")
|
||||||
|
log.Infoln(args)
|
||||||
|
|
||||||
|
slaDrain(&aurora.SlaPolicy{
|
||||||
|
PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: percent, DurationSecs: int64(duration.Seconds())}},
|
||||||
|
args...)
|
||||||
}
|
}
|
||||||
|
|
||||||
func maintenance(cmd *cobra.Command, args []string) {
|
func maintenance(cmd *cobra.Command, args []string) {
|
||||||
|
@ -134,8 +178,8 @@ func maintenance(cmd *cobra.Command, args []string) {
|
||||||
hostResult, err := monitor.HostMaintenance(
|
hostResult, err := monitor.HostMaintenance(
|
||||||
args,
|
args,
|
||||||
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
|
||||||
monitorInterval,
|
int(monitorInterval.Seconds()),
|
||||||
monitorTimeout)
|
int(monitorTimeout.Seconds()))
|
||||||
|
|
||||||
|
|
||||||
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED})
|
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED})
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
"github.com/paypal/gorealis/gen-go/apache/aurora"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
|
"time"
|
||||||
|
|
||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
)
|
)
|
||||||
|
@ -13,8 +14,8 @@ func init() {
|
||||||
|
|
||||||
// Stop subcommands
|
// Stop subcommands
|
||||||
stopCmd.AddCommand(stopMaintCmd)
|
stopCmd.AddCommand(stopMaintCmd)
|
||||||
stopMaintCmd.Flags().IntVar(&monitorInterval,"interval", 5, "Interval at which to poll scheduler.")
|
stopMaintCmd.Flags().DurationVar(&monitorInterval,"interval", time.Second * 5, "Interval at which to poll scheduler.")
|
||||||
stopMaintCmd.Flags().IntVar(&monitorTimeout,"timeout", 50, "Time after which the monitor will stop polling and throw an error.")
|
stopMaintCmd.Flags().DurationVar(&monitorTimeout,"timeout", time.Minute * 1, "Time after which the monitor will stop polling and throw an error.")
|
||||||
|
|
||||||
// Stop update
|
// Stop update
|
||||||
|
|
||||||
|
@ -58,8 +59,8 @@ func endMaintenance(cmd *cobra.Command, args []string) {
|
||||||
hostResult, err := monitor.HostMaintenance(
|
hostResult, err := monitor.HostMaintenance(
|
||||||
args,
|
args,
|
||||||
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
|
||||||
monitorInterval,
|
int(monitorInterval.Seconds()),
|
||||||
monitorTimeout)
|
int(monitorTimeout.Seconds()))
|
||||||
|
|
||||||
|
|
||||||
maintenanceMonitorPrint(hostResult,[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE})
|
maintenanceMonitorPrint(hostResult,[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE})
|
||||||
|
|
|
@ -65,8 +65,8 @@ func maintenanceMonitorPrint(hostResult map[string]bool, desiredStates []aurora.
|
||||||
if toJson {
|
if toJson {
|
||||||
fmt.Println(toJSON(output))
|
fmt.Println(toJSON(output))
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("Entered %v status: %v", output.DesiredStates, output.Transitioned)
|
fmt.Printf("Entered %v status: %v\n", output.DesiredStates, output.Transitioned)
|
||||||
fmt.Printf("Did not enter %v status: %v", output.DesiredStates, output.NonTransitioned)
|
fmt.Printf("Did not enter %v status: %v\n", output.DesiredStates, output.NonTransitioned)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue