diff --git a/cmd/monitor.go b/cmd/monitor.go index 5c9d30b..aa0fd85 100644 --- a/cmd/monitor.go +++ b/cmd/monitor.go @@ -11,10 +11,12 @@ import ( func init() { rootCmd.AddCommand(monitorCmd) - monitorCmd.AddCommand(monitorHostCmd) - monitorHostCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") - monitorHostCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") - monitorHostCmd.Flags().StringSliceVar(&statusList, "statuses", []string{aurora.MaintenanceMode_DRAINED.String()}, "List of acceptable statuses for a host to be in. (case-insensitive) [NONE, SCHEDULED, DRAINED, DRAINING]") + monitorCmd.AddCommand(monitorHostCmd.cmd) + + monitorHostCmd.cmd.Run = monitorHost + monitorHostCmd.cmd.Flags().DurationVar(&monitorHostCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") + monitorHostCmd.cmd.Flags().DurationVar(&monitorHostCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") + monitorHostCmd.cmd.Flags().StringSliceVar(&monitorHostCmd.statusList, "statuses", []string{aurora.MaintenanceMode_DRAINED.String()}, "List of acceptable statuses for a host to be in. (case-insensitive) [NONE, SCHEDULED, DRAINED, DRAINING]") } var monitorCmd = &cobra.Command{ @@ -22,24 +24,20 @@ var monitorCmd = &cobra.Command{ Short: "Watch for a specific state change", } -var monitorHostCmd = &cobra.Command{ - Use: "hosts", - Short: "Watch a host maintenance status until it enters one of the desired statuses.", - Long: `Provide a list of hosts to monitor for desired statuses. Statuses may be passed using the --statuses +var monitorHostCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "hosts", + Short: "Watch a host maintenance status until it enters one of the desired statuses.", + Long: `Provide a list of hosts to monitor for desired statuses. Statuses may be passed using the --statuses flag with a list of comma separated statuses. Statuses include [NONE, SCHEDULED, DRAINED, DRAINING]`, - PreRun: func(cmd *cobra.Command, args []string) { - // Manually initializing default values for this command as the default value for shared variables will - // be dependent on the order in which all commands were initialized - monitorTimeout = time.Minute * 10 - monitorInterval = time.Second * 5 }, - Run: monitorHost, + statusList: make([]string, 0), } func monitorHost(cmd *cobra.Command, args []string) { maintenanceModes := make([]aurora.MaintenanceMode, 0) - for _, status := range statusList { + for _, status := range monitorHostCmd.statusList { mode, err := aurora.MaintenanceModeFromString(strings.ToUpper(status)) if err != nil { log.Fatal(err) @@ -48,12 +46,12 @@ func monitorHost(cmd *cobra.Command, args []string) { maintenanceModes = append(maintenanceModes, mode) } - log.Println(monitorTimeout) - log.Println(monitorInterval) - hostResult, err := client.HostMaintenanceMonitor(args, maintenanceModes, monitorInterval, monitorTimeout) + log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval) + hostResult, err := client.HostMaintenanceMonitor(args, maintenanceModes, monitorHostCmd.monitorInterval, monitorHostCmd.monitorTimeout) maintenanceMonitorPrint(hostResult, maintenanceModes) + if err != nil { - log.Fatalf("error: %+v", err) + log.Fatal(err) } } diff --git a/cmd/rollback.go b/cmd/rollback.go index c46a819..846d10b 100644 --- a/cmd/rollback.go +++ b/cmd/rollback.go @@ -24,12 +24,12 @@ func init() { var rollbackCmd = &cobra.Command{ Use: "rollback", - Short: "rollback an operation such as an Update", + Short: "Rollback an operation such as an Update", } var rollbackUpdateCmd = &cobra.Command{ Use: "update", - Short: "rollback an update that is currently paused", + Short: "Rollback an update", Run: rollbackUpdate, } diff --git a/cmd/root.go b/cmd/root.go index 99809ee..6be61da 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ var log = logrus.New() const australisVer = "v0.0.8" -var monitorInterval, monitorTimeout, forceDrainTimeout time.Duration +var forceDrainTimeout time.Duration func init() { diff --git a/cmd/start.go b/cmd/start.go index d87be05..6c9dcde 100644 --- a/cmd/start.go +++ b/cmd/start.go @@ -11,38 +11,42 @@ func init() { rootCmd.AddCommand(startCmd) // Sub-commands - startCmd.AddCommand(startDrainCmd) + startCmd.AddCommand(startDrainCmd.cmd) + startDrainCmd.cmd.Run = drain // Maintenance specific flags - startDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") - startDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") + startDrainCmd.cmd.Flags().DurationVar(&startDrainCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") + startDrainCmd.cmd.Flags().DurationVar(&startDrainCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") startCmd.AddCommand(startSLADrainCmd) /* SLA Aware commands */ - startSLADrainCmd.AddCommand(startSLACountDrainCmd) + startSLADrainCmd.AddCommand(startSLACountDrainCmd.cmd) + startSLACountDrainCmd.cmd.Run = SLACountDrain // SLA Maintenance specific flags - startSLACountDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.") - startSLACountDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.") - startSLACountDrainCmd.Flags().Int64Var(&count, "count", 5, "Instances count that should be running to meet SLA.") - startSLACountDrainCmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.") - startSLACountDrainCmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.") + startSLACountDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.") + startSLACountDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.") + startSLACountDrainCmd.cmd.Flags().Int64Var(&count, "count", 5, "Instances count that should be running to meet SLA.") + startSLACountDrainCmd.cmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.") + startSLACountDrainCmd.cmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.") - startSLADrainCmd.AddCommand(startSLAPercentageDrainCmd) + startSLADrainCmd.AddCommand(startSLAPercentageDrainCmd.cmd) + startSLAPercentageDrainCmd.cmd.Run = SLAPercentageDrain // SLA Maintenance specific flags - startSLAPercentageDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.") - startSLAPercentageDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.") - startSLAPercentageDrainCmd.Flags().Float64Var(&percent, "percent", 75.0, "Percentage of instances that should be running to meet SLA.") - startSLAPercentageDrainCmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.") - startSLAPercentageDrainCmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.") + startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.") + startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.") + startSLAPercentageDrainCmd.cmd.Flags().Float64Var(&percent, "percent", 75.0, "Percentage of instances that should be running to meet SLA.") + startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.") + startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.") - startCmd.AddCommand(startMaintenanceCmd) + startCmd.AddCommand(startMaintenanceCmd.cmd) + startMaintenanceCmd.cmd.Run = maintenance // SLA Maintenance specific flags - startMaintenanceCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") - startMaintenanceCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") + startMaintenanceCmd.cmd.Flags().DurationVar(&startMaintenanceCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") + startMaintenanceCmd.cmd.Flags().DurationVar(&startMaintenanceCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.") } var startCmd = &cobra.Command{ @@ -50,21 +54,16 @@ var startCmd = &cobra.Command{ Short: "Start a service, maintenance on a host (DRAIN), a snapshot, or a backup.", } -var startDrainCmd = &cobra.Command{ - Use: "drain [space separated host list]", - Short: "Place a list of space separated Mesos Agents into draining mode.", - Long: `Adds a Mesos Agent to Aurora's Drain list. Agents in this list +var startDrainCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "drain [space separated host list]", + Short: "Place a list of space separated Mesos Agents into draining mode.", + Long: `Adds a Mesos Agent to Aurora's Drain list. Agents in this list are not allowed to schedule new tasks and any tasks already running on this Agent are killed and rescheduled in an Agent that is not in maintenance mode. Command expects a space separated list of hosts to place into maintenance mode.`, - Args: cobra.MinimumNArgs(1), - PreRun: func(cmd *cobra.Command, args []string) { - // Manually initializing default values for this command as the default value for shared variables will - // be dependent on the order in which all commands were initialized - monitorTimeout = time.Minute * 10 - monitorInterval = time.Second * 5 + Args: cobra.MinimumNArgs(1), }, - Run: drain, } var startSLADrainCmd = &cobra.Command{ @@ -76,50 +75,35 @@ are killed and rescheduled in an Agent that is not in maintenance mode. Command expects a space separated list of hosts to place into maintenance mode.`, } -var startSLACountDrainCmd = &cobra.Command{ - Use: "count [space separated host list]", - Short: "Place a list of space separated Mesos Agents into maintenance mode using the count SLA aware policy as a fallback.", - Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the count SLA policy as a fallback +var startSLACountDrainCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "count [space separated host list]", + Short: "Place a list of space separated Mesos Agents into maintenance mode using the count SLA aware policy as a fallback.", + Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the count SLA policy as a fallback when a Job does not have a defined SLA policy.`, - Args: cobra.MinimumNArgs(1), - PreRun: func(cmd *cobra.Command, args []string) { - // Manually initializing default values for this command as the default value for shared variables will - // be dependent on the order in which all commands were initialized - monitorTimeout = time.Minute * 20 - monitorInterval = time.Second * 10 + Args: cobra.MinimumNArgs(1), }, - Run: SLACountDrain, } -var startSLAPercentageDrainCmd = &cobra.Command{ - Use: "percentage [space separated host list]", - Short: "Place a list of space separated Mesos Agents into maintenance mode using the percentage SLA aware policy as a fallback.", - Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the percentage SLA policy as a fallback +var startSLAPercentageDrainCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "percentage [space separated host list]", + Short: "Place a list of space separated Mesos Agents into maintenance mode using the percentage SLA aware policy as a fallback.", + Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the percentage SLA policy as a fallback when a Job does not have a defined SLA policy.`, - Args: cobra.MinimumNArgs(1), - PreRun: func(cmd *cobra.Command, args []string) { - // Manually initializing default values for this command as the default value for shared variables will - // be dependent on the order in which all commands were initialized - monitorTimeout = time.Minute * 20 - monitorInterval = time.Second * 10 + Args: cobra.MinimumNArgs(1), }, - Run: SLAPercentageDrain, } -var startMaintenanceCmd = &cobra.Command{ - Use: "maintenance [space separated host list]", - Short: "Place a list of space separated Mesos Agents into maintenance mode.", - Long: `Places Mesos Agent into Maintenance mode. Agents in this list +var startMaintenanceCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "maintenance [space separated host list]", + Short: "Place a list of space separated Mesos Agents into maintenance mode.", + Long: `Places Mesos Agent into Maintenance mode. Agents in this list are de-prioritized for scheduling a task. Command expects a space separated list of hosts to place into maintenance mode.`, - Args: cobra.MinimumNArgs(1), - PreRun: func(cmd *cobra.Command, args []string) { - // Manually initializing default values for this command as the default value for shared variables will - // be dependent on the order in which all commands were initialized - monitorTimeout = time.Minute * 1 - monitorInterval = time.Second * 5 + Args: cobra.MinimumNArgs(1), }, - Run: maintenance, } func drain(cmd *cobra.Command, args []string) { @@ -132,12 +116,13 @@ func drain(cmd *cobra.Command, args []string) { log.Debugln(result) + log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval) // Monitor change to DRAINING and DRAINED mode hostResult, err := client.HostMaintenanceMonitor( args, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED}, - monitorInterval, - monitorTimeout) + startDrainCmd.monitorInterval, + startDrainCmd.monitorTimeout) maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED}) @@ -146,7 +131,7 @@ func drain(cmd *cobra.Command, args []string) { } } -func slaDrain(policy *aurora.SlaPolicy, hosts ...string) { +func slaDrain(policy *aurora.SlaPolicy, interval, timeout time.Duration, hosts ...string) { result, err := client.SLADrainHosts(policy, int64(forceDrainTimeout.Seconds()), hosts...) if err != nil { @@ -155,12 +140,13 @@ func slaDrain(policy *aurora.SlaPolicy, hosts ...string) { log.Debugln(result) + log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval) // Monitor change to DRAINING and DRAINED mode hostResult, err := client.HostMaintenanceMonitor( hosts, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED}, - monitorInterval, - monitorTimeout) + interval, + timeout) maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED}) @@ -175,6 +161,8 @@ func SLACountDrain(cmd *cobra.Command, args []string) { slaDrain(&aurora.SlaPolicy{ CountSlaPolicy: &aurora.CountSlaPolicy{Count: count, DurationSecs: int64(duration.Seconds())}}, + startSLACountDrainCmd.monitorInterval, + startSLACountDrainCmd.monitorTimeout, args...) } @@ -184,6 +172,8 @@ func SLAPercentageDrain(cmd *cobra.Command, args []string) { slaDrain(&aurora.SlaPolicy{ PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: percent, DurationSecs: int64(duration.Seconds())}}, + startSLAPercentageDrainCmd.monitorInterval, + startSLAPercentageDrainCmd.monitorTimeout, args...) } @@ -197,12 +187,14 @@ func maintenance(cmd *cobra.Command, args []string) { log.Debugln(result) + log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval) + // Monitor change to DRAINING and DRAINED mode hostResult, err := client.HostMaintenanceMonitor( args, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED}, - monitorInterval, - monitorTimeout) + startMaintenanceCmd.monitorInterval, + startMaintenanceCmd.monitorTimeout) maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED}) diff --git a/cmd/stop.go b/cmd/stop.go index 39b9889..fba06b5 100644 --- a/cmd/stop.go +++ b/cmd/stop.go @@ -7,13 +7,16 @@ import ( "github.com/spf13/cobra" ) +var stopMaintenanceConfig = MonitorCmdConfig{} + func init() { rootCmd.AddCommand(stopCmd) // Stop subcommands - stopCmd.AddCommand(stopMaintCmd) - stopMaintCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") - stopMaintCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*1, "Time after which the monitor will stop polling and throw an error.") + stopCmd.AddCommand(stopMaintCmd.cmd) + stopMaintCmd.cmd.Run = endMaintenance + stopMaintCmd.cmd.Flags().DurationVar(&stopMaintenanceConfig.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.") + stopMaintCmd.cmd.Flags().DurationVar(&stopMaintenanceConfig.monitorTimeout, "timeout", time.Minute*1, "Time after which the monitor will stop polling and throw an error.") // Stop update @@ -29,11 +32,12 @@ var stopCmd = &cobra.Command{ Short: "Stop a service or maintenance on a host (DRAIN).", } -var stopMaintCmd = &cobra.Command{ - Use: "drain [space separated host list]", - Short: "Stop maintenance on a host (move to NONE).", - Long: `Transition a list of hosts currently in a maintenance status out of it.`, - Run: endMaintenance, +var stopMaintCmd = MonitorCmdConfig{ + cmd: &cobra.Command{ + Use: "drain [space separated host list]", + Short: "Stop maintenance on a host (move to NONE).", + Long: `Transition a list of hosts currently in a maintenance status out of it.`, + }, } var stopUpdateCmd = &cobra.Command{ @@ -57,8 +61,8 @@ func endMaintenance(cmd *cobra.Command, args []string) { hostResult, err := client.HostMaintenanceMonitor( args, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}, - monitorInterval, - monitorTimeout) + stopMaintenanceConfig.monitorInterval, + stopMaintenanceConfig.monitorTimeout) maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE}) diff --git a/cmd/util.go b/cmd/util.go index 7eb99ea..679e992 100644 --- a/cmd/util.go +++ b/cmd/util.go @@ -4,11 +4,19 @@ import ( "bytes" "encoding/json" "fmt" + "time" "github.com/paypal/gorealis/v2/gen-go/apache/aurora" "github.com/sirupsen/logrus" + "github.com/spf13/cobra" ) +type MonitorCmdConfig struct { + cmd *cobra.Command + monitorInterval, monitorTimeout time.Duration + statusList []string +} + func toJSON(v interface{}) string { output, err := json.Marshal(v)