Workaround for intervals and timeout being dependent on the init execution order.

This commit is contained in:
Renan DelValle 2019-03-25 11:38:17 -07:00
parent ddc9bc408a
commit 3c817a7ffc
No known key found for this signature in database
GPG key ID: C240AD6D6F443EC9
6 changed files with 103 additions and 101 deletions

View file

@ -11,10 +11,12 @@ import (
func init() {
rootCmd.AddCommand(monitorCmd)
monitorCmd.AddCommand(monitorHostCmd)
monitorHostCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
monitorHostCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
monitorHostCmd.Flags().StringSliceVar(&statusList, "statuses", []string{aurora.MaintenanceMode_DRAINED.String()}, "List of acceptable statuses for a host to be in. (case-insensitive) [NONE, SCHEDULED, DRAINED, DRAINING]")
monitorCmd.AddCommand(monitorHostCmd.cmd)
monitorHostCmd.cmd.Run = monitorHost
monitorHostCmd.cmd.Flags().DurationVar(&monitorHostCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
monitorHostCmd.cmd.Flags().DurationVar(&monitorHostCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
monitorHostCmd.cmd.Flags().StringSliceVar(&monitorHostCmd.statusList, "statuses", []string{aurora.MaintenanceMode_DRAINED.String()}, "List of acceptable statuses for a host to be in. (case-insensitive) [NONE, SCHEDULED, DRAINED, DRAINING]")
}
var monitorCmd = &cobra.Command{
@ -22,24 +24,20 @@ var monitorCmd = &cobra.Command{
Short: "Watch for a specific state change",
}
var monitorHostCmd = &cobra.Command{
var monitorHostCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "hosts",
Short: "Watch a host maintenance status until it enters one of the desired statuses.",
Long: `Provide a list of hosts to monitor for desired statuses. Statuses may be passed using the --statuses
flag with a list of comma separated statuses. Statuses include [NONE, SCHEDULED, DRAINED, DRAINING]`,
PreRun: func(cmd *cobra.Command, args []string) {
// Manually initializing default values for this command as the default value for shared variables will
// be dependent on the order in which all commands were initialized
monitorTimeout = time.Minute * 10
monitorInterval = time.Second * 5
},
Run: monitorHost,
statusList: make([]string, 0),
}
func monitorHost(cmd *cobra.Command, args []string) {
maintenanceModes := make([]aurora.MaintenanceMode, 0)
for _, status := range statusList {
for _, status := range monitorHostCmd.statusList {
mode, err := aurora.MaintenanceModeFromString(strings.ToUpper(status))
if err != nil {
log.Fatal(err)
@ -48,12 +46,12 @@ func monitorHost(cmd *cobra.Command, args []string) {
maintenanceModes = append(maintenanceModes, mode)
}
log.Println(monitorTimeout)
log.Println(monitorInterval)
hostResult, err := client.HostMaintenanceMonitor(args, maintenanceModes, monitorInterval, monitorTimeout)
log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval)
hostResult, err := client.HostMaintenanceMonitor(args, maintenanceModes, monitorHostCmd.monitorInterval, monitorHostCmd.monitorTimeout)
maintenanceMonitorPrint(hostResult, maintenanceModes)
if err != nil {
log.Fatalf("error: %+v", err)
log.Fatal(err)
}
}

View file

@ -24,12 +24,12 @@ func init() {
var rollbackCmd = &cobra.Command{
Use: "rollback",
Short: "rollback an operation such as an Update",
Short: "Rollback an operation such as an Update",
}
var rollbackUpdateCmd = &cobra.Command{
Use: "update",
Short: "rollback an update that is currently paused",
Short: "Rollback an update",
Run: rollbackUpdate,
}

View file

@ -34,7 +34,7 @@ var log = logrus.New()
const australisVer = "v0.0.8"
var monitorInterval, monitorTimeout, forceDrainTimeout time.Duration
var forceDrainTimeout time.Duration
func init() {

View file

@ -11,38 +11,42 @@ func init() {
rootCmd.AddCommand(startCmd)
// Sub-commands
startCmd.AddCommand(startDrainCmd)
startCmd.AddCommand(startDrainCmd.cmd)
startDrainCmd.cmd.Run = drain
// Maintenance specific flags
startDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
startDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
startDrainCmd.cmd.Flags().DurationVar(&startDrainCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
startDrainCmd.cmd.Flags().DurationVar(&startDrainCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
startCmd.AddCommand(startSLADrainCmd)
/* SLA Aware commands */
startSLADrainCmd.AddCommand(startSLACountDrainCmd)
startSLADrainCmd.AddCommand(startSLACountDrainCmd.cmd)
startSLACountDrainCmd.cmd.Run = SLACountDrain
// SLA Maintenance specific flags
startSLACountDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.")
startSLACountDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.")
startSLACountDrainCmd.Flags().Int64Var(&count, "count", 5, "Instances count that should be running to meet SLA.")
startSLACountDrainCmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.")
startSLACountDrainCmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.")
startSLACountDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.")
startSLACountDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.")
startSLACountDrainCmd.cmd.Flags().Int64Var(&count, "count", 5, "Instances count that should be running to meet SLA.")
startSLACountDrainCmd.cmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.")
startSLACountDrainCmd.cmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.")
startSLADrainCmd.AddCommand(startSLAPercentageDrainCmd)
startSLADrainCmd.AddCommand(startSLAPercentageDrainCmd.cmd)
startSLAPercentageDrainCmd.cmd.Run = SLAPercentageDrain
// SLA Maintenance specific flags
startSLAPercentageDrainCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.")
startSLAPercentageDrainCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.")
startSLAPercentageDrainCmd.Flags().Float64Var(&percent, "percent", 75.0, "Percentage of instances that should be running to meet SLA.")
startSLAPercentageDrainCmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.")
startSLAPercentageDrainCmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.")
startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorInterval, "interval", time.Second*10, "Interval at which to poll scheduler.")
startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&startSLACountDrainCmd.monitorTimeout, "timeout", time.Minute*20, "Time after which the monitor will stop polling and throw an error.")
startSLAPercentageDrainCmd.cmd.Flags().Float64Var(&percent, "percent", 75.0, "Percentage of instances that should be running to meet SLA.")
startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&duration, "duration", time.Second*45, "Minimum time duration a task needs to be `RUNNING` to be treated as active.")
startSLAPercentageDrainCmd.cmd.Flags().DurationVar(&forceDrainTimeout, "sla-limit", time.Minute*60, "Time limit after which SLA-Aware drain sheds SLA Awareness.")
startCmd.AddCommand(startMaintenanceCmd)
startCmd.AddCommand(startMaintenanceCmd.cmd)
startMaintenanceCmd.cmd.Run = maintenance
// SLA Maintenance specific flags
startMaintenanceCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
startMaintenanceCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
startMaintenanceCmd.cmd.Flags().DurationVar(&startMaintenanceCmd.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
startMaintenanceCmd.cmd.Flags().DurationVar(&startMaintenanceCmd.monitorTimeout, "timeout", time.Minute*10, "Time after which the monitor will stop polling and throw an error.")
}
var startCmd = &cobra.Command{
@ -50,7 +54,8 @@ var startCmd = &cobra.Command{
Short: "Start a service, maintenance on a host (DRAIN), a snapshot, or a backup.",
}
var startDrainCmd = &cobra.Command{
var startDrainCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "drain [space separated host list]",
Short: "Place a list of space separated Mesos Agents into draining mode.",
Long: `Adds a Mesos Agent to Aurora's Drain list. Agents in this list
@ -58,13 +63,7 @@ are not allowed to schedule new tasks and any tasks already running on this Agen
are killed and rescheduled in an Agent that is not in maintenance mode. Command
expects a space separated list of hosts to place into maintenance mode.`,
Args: cobra.MinimumNArgs(1),
PreRun: func(cmd *cobra.Command, args []string) {
// Manually initializing default values for this command as the default value for shared variables will
// be dependent on the order in which all commands were initialized
monitorTimeout = time.Minute * 10
monitorInterval = time.Second * 5
},
Run: drain,
}
var startSLADrainCmd = &cobra.Command{
@ -76,50 +75,35 @@ are killed and rescheduled in an Agent that is not in maintenance mode. Command
expects a space separated list of hosts to place into maintenance mode.`,
}
var startSLACountDrainCmd = &cobra.Command{
var startSLACountDrainCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "count [space separated host list]",
Short: "Place a list of space separated Mesos Agents into maintenance mode using the count SLA aware policy as a fallback.",
Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the count SLA policy as a fallback
when a Job does not have a defined SLA policy.`,
Args: cobra.MinimumNArgs(1),
PreRun: func(cmd *cobra.Command, args []string) {
// Manually initializing default values for this command as the default value for shared variables will
// be dependent on the order in which all commands were initialized
monitorTimeout = time.Minute * 20
monitorInterval = time.Second * 10
},
Run: SLACountDrain,
}
var startSLAPercentageDrainCmd = &cobra.Command{
var startSLAPercentageDrainCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "percentage [space separated host list]",
Short: "Place a list of space separated Mesos Agents into maintenance mode using the percentage SLA aware policy as a fallback.",
Long: `Adds a Mesos Agent to Aurora's Drain list. Tasks will be drained using the percentage SLA policy as a fallback
when a Job does not have a defined SLA policy.`,
Args: cobra.MinimumNArgs(1),
PreRun: func(cmd *cobra.Command, args []string) {
// Manually initializing default values for this command as the default value for shared variables will
// be dependent on the order in which all commands were initialized
monitorTimeout = time.Minute * 20
monitorInterval = time.Second * 10
},
Run: SLAPercentageDrain,
}
var startMaintenanceCmd = &cobra.Command{
var startMaintenanceCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "maintenance [space separated host list]",
Short: "Place a list of space separated Mesos Agents into maintenance mode.",
Long: `Places Mesos Agent into Maintenance mode. Agents in this list
are de-prioritized for scheduling a task. Command
expects a space separated list of hosts to place into maintenance mode.`,
Args: cobra.MinimumNArgs(1),
PreRun: func(cmd *cobra.Command, args []string) {
// Manually initializing default values for this command as the default value for shared variables will
// be dependent on the order in which all commands were initialized
monitorTimeout = time.Minute * 1
monitorInterval = time.Second * 5
},
Run: maintenance,
}
func drain(cmd *cobra.Command, args []string) {
@ -132,12 +116,13 @@ func drain(cmd *cobra.Command, args []string) {
log.Debugln(result)
log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval)
// Monitor change to DRAINING and DRAINED mode
hostResult, err := client.HostMaintenanceMonitor(
args,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
monitorInterval,
monitorTimeout)
startDrainCmd.monitorInterval,
startDrainCmd.monitorTimeout)
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
@ -146,7 +131,7 @@ func drain(cmd *cobra.Command, args []string) {
}
}
func slaDrain(policy *aurora.SlaPolicy, hosts ...string) {
func slaDrain(policy *aurora.SlaPolicy, interval, timeout time.Duration, hosts ...string) {
result, err := client.SLADrainHosts(policy, int64(forceDrainTimeout.Seconds()), hosts...)
if err != nil {
@ -155,12 +140,13 @@ func slaDrain(policy *aurora.SlaPolicy, hosts ...string) {
log.Debugln(result)
log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval)
// Monitor change to DRAINING and DRAINED mode
hostResult, err := client.HostMaintenanceMonitor(
hosts,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED},
monitorInterval,
monitorTimeout)
interval,
timeout)
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_DRAINED})
@ -175,6 +161,8 @@ func SLACountDrain(cmd *cobra.Command, args []string) {
slaDrain(&aurora.SlaPolicy{
CountSlaPolicy: &aurora.CountSlaPolicy{Count: count, DurationSecs: int64(duration.Seconds())}},
startSLACountDrainCmd.monitorInterval,
startSLACountDrainCmd.monitorTimeout,
args...)
}
@ -184,6 +172,8 @@ func SLAPercentageDrain(cmd *cobra.Command, args []string) {
slaDrain(&aurora.SlaPolicy{
PercentageSlaPolicy: &aurora.PercentageSlaPolicy{Percentage: percent, DurationSecs: int64(duration.Seconds())}},
startSLAPercentageDrainCmd.monitorInterval,
startSLAPercentageDrainCmd.monitorTimeout,
args...)
}
@ -197,12 +187,14 @@ func maintenance(cmd *cobra.Command, args []string) {
log.Debugln(result)
log.Infof("Monitoring for %v at %v intervals", monitorHostCmd.monitorTimeout, monitorHostCmd.monitorInterval)
// Monitor change to DRAINING and DRAINED mode
hostResult, err := client.HostMaintenanceMonitor(
args,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED},
monitorInterval,
monitorTimeout)
startMaintenanceCmd.monitorInterval,
startMaintenanceCmd.monitorTimeout)
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_SCHEDULED})

View file

@ -7,13 +7,16 @@ import (
"github.com/spf13/cobra"
)
var stopMaintenanceConfig = MonitorCmdConfig{}
func init() {
rootCmd.AddCommand(stopCmd)
// Stop subcommands
stopCmd.AddCommand(stopMaintCmd)
stopMaintCmd.Flags().DurationVar(&monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
stopMaintCmd.Flags().DurationVar(&monitorTimeout, "timeout", time.Minute*1, "Time after which the monitor will stop polling and throw an error.")
stopCmd.AddCommand(stopMaintCmd.cmd)
stopMaintCmd.cmd.Run = endMaintenance
stopMaintCmd.cmd.Flags().DurationVar(&stopMaintenanceConfig.monitorInterval, "interval", time.Second*5, "Interval at which to poll scheduler.")
stopMaintCmd.cmd.Flags().DurationVar(&stopMaintenanceConfig.monitorTimeout, "timeout", time.Minute*1, "Time after which the monitor will stop polling and throw an error.")
// Stop update
@ -29,11 +32,12 @@ var stopCmd = &cobra.Command{
Short: "Stop a service or maintenance on a host (DRAIN).",
}
var stopMaintCmd = &cobra.Command{
var stopMaintCmd = MonitorCmdConfig{
cmd: &cobra.Command{
Use: "drain [space separated host list]",
Short: "Stop maintenance on a host (move to NONE).",
Long: `Transition a list of hosts currently in a maintenance status out of it.`,
Run: endMaintenance,
},
}
var stopUpdateCmd = &cobra.Command{
@ -57,8 +61,8 @@ func endMaintenance(cmd *cobra.Command, args []string) {
hostResult, err := client.HostMaintenanceMonitor(
args,
[]aurora.MaintenanceMode{aurora.MaintenanceMode_NONE},
monitorInterval,
monitorTimeout)
stopMaintenanceConfig.monitorInterval,
stopMaintenanceConfig.monitorTimeout)
maintenanceMonitorPrint(hostResult, []aurora.MaintenanceMode{aurora.MaintenanceMode_NONE})

View file

@ -4,11 +4,19 @@ import (
"bytes"
"encoding/json"
"fmt"
"time"
"github.com/paypal/gorealis/v2/gen-go/apache/aurora"
"github.com/sirupsen/logrus"
"github.com/spf13/cobra"
)
type MonitorCmdConfig struct {
cmd *cobra.Command
monitorInterval, monitorTimeout time.Duration
statusList []string
}
func toJSON(v interface{}) string {
output, err := json.Marshal(v)