2019-03-25 17:33:21 -07:00
/ * *
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
2017-12-03 12:41:23 -08:00
package cmd
import (
2019-03-19 15:28:24 -07:00
"time"
2019-01-31 18:15:11 -08:00
"github.com/paypal/gorealis/v2/gen-go/apache/aurora"
2017-12-03 12:41:23 -08:00
"github.com/spf13/cobra"
)
func init ( ) {
rootCmd . AddCommand ( startCmd )
// Sub-commands
2019-03-25 11:38:17 -07:00
startCmd . AddCommand ( startDrainCmd . cmd )
startDrainCmd . cmd . Run = drain
2018-05-30 18:15:16 -07:00
2018-09-16 21:22:00 -07:00
// Maintenance specific flags
2019-03-25 11:38:17 -07:00
startDrainCmd . cmd . Flags ( ) . DurationVar ( & startDrainCmd . monitorInterval , "interval" , time . Second * 5 , "Interval at which to poll scheduler." )
startDrainCmd . cmd . Flags ( ) . DurationVar ( & startDrainCmd . monitorTimeout , "timeout" , time . Minute * 10 , "Time after which the monitor will stop polling and throw an error." )
2018-05-30 18:15:16 -07:00
2018-11-16 21:54:18 -08:00
startCmd . AddCommand ( startSLADrainCmd )
2018-11-20 20:11:21 -08:00
/* SLA Aware commands */
2019-03-25 11:38:17 -07:00
startSLADrainCmd . AddCommand ( startSLACountDrainCmd . cmd )
startSLACountDrainCmd . cmd . Run = SLACountDrain
2018-11-20 20:11:21 -08:00
// SLA Maintenance specific flags
2019-03-25 11:38:17 -07:00
startSLACountDrainCmd . cmd . Flags ( ) . DurationVar ( & startSLACountDrainCmd . monitorInterval , "interval" , time . Second * 10 , "Interval at which to poll scheduler." )
startSLACountDrainCmd . cmd . Flags ( ) . DurationVar ( & startSLACountDrainCmd . monitorTimeout , "timeout" , time . Minute * 20 , "Time after which the monitor will stop polling and throw an error." )
startSLACountDrainCmd . cmd . Flags ( ) . Int64Var ( & count , "count" , 5 , "Instances count that should be running to meet SLA." )
2019-03-25 12:09:41 -07:00
startSLACountDrainCmd . cmd . Flags ( ) . DurationVar ( & duration , "duration" , time . Minute * 1 , "Minimum time duration a task needs to be `RUNNING` to be treated as active." )
2019-03-25 11:38:17 -07:00
startSLACountDrainCmd . cmd . Flags ( ) . DurationVar ( & forceDrainTimeout , "sla-limit" , time . Minute * 60 , "Time limit after which SLA-Aware drain sheds SLA Awareness." )
2018-11-20 20:11:21 -08:00
2019-03-25 11:38:17 -07:00
startSLADrainCmd . AddCommand ( startSLAPercentageDrainCmd . cmd )
startSLAPercentageDrainCmd . cmd . Run = SLAPercentageDrain
2018-11-20 20:11:21 -08:00
2018-11-16 21:54:18 -08:00
// SLA Maintenance specific flags
2019-03-25 13:00:58 -07:00
startSLAPercentageDrainCmd . cmd . Flags ( ) . DurationVar ( & startSLAPercentageDrainCmd . monitorInterval , "interval" , time . Second * 10 , "Interval at which to poll scheduler." )
startSLAPercentageDrainCmd . cmd . Flags ( ) . DurationVar ( & startSLAPercentageDrainCmd . monitorTimeout , "timeout" , time . Minute * 20 , "Time after which the monitor will stop polling and throw an error." )
2019-03-25 12:09:41 -07:00
startSLAPercentageDrainCmd . cmd . Flags ( ) . Float64Var ( & percent , "percent" , 80.0 , "Percentage of instances that should be running to meet SLA." )
startSLAPercentageDrainCmd . cmd . Flags ( ) . DurationVar ( & duration , "duration" , time . Minute * 1 , "Minimum time duration a task needs to be `RUNNING` to be treated as active." )
2019-03-25 11:38:17 -07:00
startSLAPercentageDrainCmd . cmd . Flags ( ) . DurationVar ( & forceDrainTimeout , "sla-limit" , time . Minute * 60 , "Time limit after which SLA-Aware drain sheds SLA Awareness." )
2018-11-16 21:54:18 -08:00
2019-03-25 11:38:17 -07:00
startCmd . AddCommand ( startMaintenanceCmd . cmd )
startMaintenanceCmd . cmd . Run = maintenance
2018-11-16 21:54:18 -08:00
// SLA Maintenance specific flags
2019-03-25 11:38:17 -07:00
startMaintenanceCmd . cmd . Flags ( ) . DurationVar ( & startMaintenanceCmd . monitorInterval , "interval" , time . Second * 5 , "Interval at which to poll scheduler." )
startMaintenanceCmd . cmd . Flags ( ) . DurationVar ( & startMaintenanceCmd . monitorTimeout , "timeout" , time . Minute * 10 , "Time after which the monitor will stop polling and throw an error." )
2017-12-03 12:41:23 -08:00
}
var startCmd = & cobra . Command {
Use : "start" ,
2018-09-16 21:22:00 -07:00
Short : "Start a service, maintenance on a host (DRAIN), a snapshot, or a backup." ,
2017-12-03 12:41:23 -08:00
}
2019-03-25 11:38:17 -07:00
var startDrainCmd = MonitorCmdConfig {
cmd : & cobra . Command {
Use : "drain [space separated host list]" ,
Short : "Place a list of space separated Mesos Agents into draining mode." ,
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Agents in this list
2017-12-03 12:41:23 -08:00
are not allowed to schedule new tasks and any tasks already running on this Agent
are killed and rescheduled in an Agent that is not in maintenance mode . Command
expects a space separated list of hosts to place into maintenance mode . ` ,
2019-03-25 11:38:17 -07:00
Args : cobra . MinimumNArgs ( 1 ) ,
2019-03-22 20:47:42 -07:00
} ,
2017-12-03 12:41:23 -08:00
}
2018-11-16 21:54:18 -08:00
var startSLADrainCmd = & cobra . Command {
2018-11-20 20:11:21 -08:00
Use : "sla-drain" ,
Short : "Place a list of space separated Mesos Agents into maintenance mode using SLA aware strategies." ,
2018-11-16 21:54:18 -08:00
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Agents in this list
are not allowed to schedule new tasks and any tasks already running on this Agent
are killed and rescheduled in an Agent that is not in maintenance mode . Command
expects a space separated list of hosts to place into maintenance mode . ` ,
2018-11-20 20:11:21 -08:00
}
2019-03-25 11:38:17 -07:00
var startSLACountDrainCmd = MonitorCmdConfig {
cmd : & cobra . Command {
Use : "count [space separated host list]" ,
Short : "Place a list of space separated Mesos Agents into maintenance mode using the count SLA aware policy as a fallback." ,
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Tasks will be drained using the count SLA policy as a fallback
2018-11-20 20:11:21 -08:00
when a Job does not have a defined SLA policy . ` ,
2019-03-25 11:38:17 -07:00
Args : cobra . MinimumNArgs ( 1 ) ,
2019-03-22 20:47:42 -07:00
} ,
2018-11-20 20:11:21 -08:00
}
2019-03-25 11:38:17 -07:00
var startSLAPercentageDrainCmd = MonitorCmdConfig {
cmd : & cobra . Command {
Use : "percentage [space separated host list]" ,
Short : "Place a list of space separated Mesos Agents into maintenance mode using the percentage SLA aware policy as a fallback." ,
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Tasks will be drained using the percentage SLA policy as a fallback
2018-11-20 20:11:21 -08:00
when a Job does not have a defined SLA policy . ` ,
2019-03-25 11:38:17 -07:00
Args : cobra . MinimumNArgs ( 1 ) ,
2019-03-22 20:47:42 -07:00
} ,
2018-11-16 21:54:18 -08:00
}
2019-03-25 11:38:17 -07:00
var startMaintenanceCmd = MonitorCmdConfig {
cmd : & cobra . Command {
Use : "maintenance [space separated host list]" ,
Short : "Place a list of space separated Mesos Agents into maintenance mode." ,
Long : ` Places Mesos Agent into Maintenance mode . Agents in this list
2018-11-16 21:54:18 -08:00
are de - prioritized for scheduling a task . Command
expects a space separated list of hosts to place into maintenance mode . ` ,
2019-03-25 11:38:17 -07:00
Args : cobra . MinimumNArgs ( 1 ) ,
2019-03-22 20:47:42 -07:00
} ,
2018-11-16 21:54:18 -08:00
}
2017-12-03 12:41:23 -08:00
func drain ( cmd * cobra . Command , args [ ] string ) {
2018-11-09 15:58:26 -08:00
log . Infoln ( "Setting hosts to DRAINING" )
log . Infoln ( args )
2018-12-27 11:31:51 -08:00
result , err := client . DrainHosts ( args ... )
2017-12-03 12:41:23 -08:00
if err != nil {
2018-11-09 15:58:26 -08:00
log . Fatalf ( "error: %+v\n" , err )
2017-12-03 12:41:23 -08:00
}
2018-11-09 15:58:26 -08:00
log . Debugln ( result )
2019-03-25 11:38:17 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , monitorHostCmd . monitorTimeout , monitorHostCmd . monitorInterval )
2017-12-03 12:41:23 -08:00
// Monitor change to DRAINING and DRAINED mode
2018-12-27 11:31:51 -08:00
hostResult , err := client . HostMaintenanceMonitor (
2017-12-03 12:41:23 -08:00
args ,
2018-05-30 17:50:53 -07:00
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } ,
2019-03-25 11:38:17 -07:00
startDrainCmd . monitorInterval ,
startDrainCmd . monitorTimeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } )
if err != nil {
2018-12-27 11:31:51 -08:00
log . Fatalln ( err )
2018-11-16 21:54:18 -08:00
}
}
2019-03-25 11:38:17 -07:00
func slaDrain ( policy * aurora . SlaPolicy , interval , timeout time . Duration , hosts ... string ) {
2018-11-16 21:54:18 -08:00
2019-03-22 19:01:10 -07:00
result , err := client . SLADrainHosts ( policy , int64 ( forceDrainTimeout . Seconds ( ) ) , hosts ... )
2018-11-16 21:54:18 -08:00
if err != nil {
log . Fatalf ( "error: %+v\n" , err )
}
log . Debugln ( result )
2019-03-25 13:00:58 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , timeout , interval )
2018-11-16 21:54:18 -08:00
// Monitor change to DRAINING and DRAINED mode
2018-12-27 11:31:51 -08:00
hostResult , err := client . HostMaintenanceMonitor (
2018-11-20 20:11:21 -08:00
hosts ,
2018-11-16 21:54:18 -08:00
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } ,
2019-03-25 11:38:17 -07:00
interval ,
timeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } )
2017-12-03 12:41:23 -08:00
if err != nil {
2019-03-22 19:01:10 -07:00
log . Fatalf ( "error: %+v" , err )
2017-12-03 12:41:23 -08:00
}
2018-11-20 20:11:21 -08:00
}
func SLACountDrain ( cmd * cobra . Command , args [ ] string ) {
log . Infoln ( "Setting hosts to DRAINING with the Count SLA policy." )
log . Infoln ( args )
slaDrain ( & aurora . SlaPolicy {
CountSlaPolicy : & aurora . CountSlaPolicy { Count : count , DurationSecs : int64 ( duration . Seconds ( ) ) } } ,
2019-03-25 11:38:17 -07:00
startSLACountDrainCmd . monitorInterval ,
startSLACountDrainCmd . monitorTimeout ,
2019-03-19 15:28:24 -07:00
args ... )
2018-11-20 20:11:21 -08:00
}
func SLAPercentageDrain ( cmd * cobra . Command , args [ ] string ) {
log . Infoln ( "Setting hosts to DRAINING with the Percentage SLA policy." )
log . Infoln ( args )
2017-12-03 12:41:23 -08:00
2018-11-20 20:11:21 -08:00
slaDrain ( & aurora . SlaPolicy {
PercentageSlaPolicy : & aurora . PercentageSlaPolicy { Percentage : percent , DurationSecs : int64 ( duration . Seconds ( ) ) } } ,
2019-03-25 11:38:17 -07:00
startSLAPercentageDrainCmd . monitorInterval ,
startSLAPercentageDrainCmd . monitorTimeout ,
2019-03-19 15:28:24 -07:00
args ... )
2017-12-03 12:41:23 -08:00
}
2018-09-16 21:22:00 -07:00
2018-11-16 21:54:18 -08:00
func maintenance ( cmd * cobra . Command , args [ ] string ) {
log . Infoln ( "Setting hosts to Maintenance mode" )
log . Infoln ( args )
2018-12-27 11:31:51 -08:00
result , err := client . StartMaintenance ( args ... )
2018-11-16 21:54:18 -08:00
if err != nil {
log . Fatalf ( "error: %+v\n" , err )
}
log . Debugln ( result )
2019-03-25 11:38:17 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , monitorHostCmd . monitorTimeout , monitorHostCmd . monitorInterval )
2018-11-16 21:54:18 -08:00
// Monitor change to DRAINING and DRAINED mode
2018-12-27 11:31:51 -08:00
hostResult , err := client . HostMaintenanceMonitor (
2018-11-16 21:54:18 -08:00
args ,
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_SCHEDULED } ,
2019-03-25 11:38:17 -07:00
startMaintenanceCmd . monitorInterval ,
startMaintenanceCmd . monitorTimeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_SCHEDULED } )
if err != nil {
log . Fatalln ( "error: %+v" , err )
}
}