2019-03-25 17:33:21 -07:00
/ * *
* Licensed under the Apache License , Version 2.0 ( the "License" ) ;
* you may not use this file except in compliance with the License .
* You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing , software
* distributed under the License is distributed on an "AS IS" BASIS ,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND , either express or implied .
* See the License for the specific language governing permissions and
* limitations under the License .
* /
2017-12-03 12:41:23 -08:00
package cmd
import (
2020-02-07 18:41:59 -08:00
"encoding/json"
"errors"
"io/ioutil"
"os"
2019-03-19 15:28:24 -07:00
"time"
2020-02-19 12:18:50 -08:00
"github.com/aurora-scheduler/gorealis/v2/gen-go/apache/aurora"
2017-12-03 12:41:23 -08:00
"github.com/spf13/cobra"
)
2020-01-29 16:12:53 -08:00
const countFlag = "count"
const percentageFlag = "percentage"
2020-02-07 18:41:59 -08:00
const jsonFlag = "json"
const jsonFileFlag = "json-file"
2020-01-29 16:12:53 -08:00
2017-12-03 12:41:23 -08:00
func init ( ) {
rootCmd . AddCommand ( startCmd )
// Sub-commands
2019-03-25 11:38:17 -07:00
startCmd . AddCommand ( startDrainCmd . cmd )
startDrainCmd . cmd . Run = drain
2018-05-30 18:15:16 -07:00
2018-09-16 21:22:00 -07:00
// Maintenance specific flags
2019-03-25 11:38:17 -07:00
startDrainCmd . cmd . Flags ( ) . DurationVar ( & startDrainCmd . monitorInterval , "interval" , time . Second * 5 , "Interval at which to poll scheduler." )
startDrainCmd . cmd . Flags ( ) . DurationVar ( & startDrainCmd . monitorTimeout , "timeout" , time . Minute * 10 , "Time after which the monitor will stop polling and throw an error." )
2020-02-07 18:41:59 -08:00
startDrainCmd . cmd . Flags ( ) . StringVar ( & fromJsonFile , jsonFileFlag , "" , "JSON file to read list of agents from." )
startDrainCmd . cmd . Flags ( ) . BoolVar ( & fromJson , jsonFlag , false , "Read JSON list of agents from the STDIN." )
2018-05-30 18:15:16 -07:00
2018-11-20 20:11:21 -08:00
/* SLA Aware commands */
2020-01-29 16:12:53 -08:00
startCmd . AddCommand ( startSLADrainCmd . cmd )
startSLADrainCmd . cmd . Run = slaDrain
2018-11-20 20:11:21 -08:00
// SLA Maintenance specific flags
2020-01-29 16:12:53 -08:00
startSLADrainCmd . cmd . Flags ( ) . Int64Var ( & count , countFlag , 5 , "Instances count that should be running to meet SLA." )
startSLADrainCmd . cmd . Flags ( ) . Float64Var ( & percent , percentageFlag , 80.0 , "Percentage of instances that should be running to meet SLA." )
startSLADrainCmd . cmd . Flags ( ) . DurationVar ( & duration , "duration" , time . Minute * 1 , "Minimum time duration a task needs to be `RUNNING` to be treated as active." )
startSLADrainCmd . cmd . Flags ( ) . DurationVar ( & forceDrainTimeout , "sla-limit" , time . Minute * 60 , "Time limit after which SLA-Aware drain sheds SLA Awareness." )
startSLADrainCmd . cmd . Flags ( ) . DurationVar ( & startSLADrainCmd . monitorInterval , "interval" , time . Second * 10 , "Interval at which to poll scheduler." )
startSLADrainCmd . cmd . Flags ( ) . DurationVar ( & startSLADrainCmd . monitorTimeout , "timeout" , time . Minute * 20 , "Time after which the monitor will stop polling and throw an error." )
2020-02-07 18:41:59 -08:00
startSLADrainCmd . cmd . Flags ( ) . StringVar ( & fromJsonFile , jsonFileFlag , "" , "JSON file to read list of agents from." )
startSLADrainCmd . cmd . Flags ( ) . BoolVar ( & fromJson , jsonFlag , false , "Read JSON list of agents from the STDIN." )
2018-11-16 21:54:18 -08:00
2019-03-25 11:38:17 -07:00
startCmd . AddCommand ( startMaintenanceCmd . cmd )
startMaintenanceCmd . cmd . Run = maintenance
2018-11-16 21:54:18 -08:00
// SLA Maintenance specific flags
2019-03-25 11:38:17 -07:00
startMaintenanceCmd . cmd . Flags ( ) . DurationVar ( & startMaintenanceCmd . monitorInterval , "interval" , time . Second * 5 , "Interval at which to poll scheduler." )
startMaintenanceCmd . cmd . Flags ( ) . DurationVar ( & startMaintenanceCmd . monitorTimeout , "timeout" , time . Minute * 10 , "Time after which the monitor will stop polling and throw an error." )
2020-02-07 18:41:59 -08:00
startMaintenanceCmd . cmd . Flags ( ) . StringVar ( & fromJsonFile , jsonFileFlag , "" , "JSON file to read list of agents from." )
startMaintenanceCmd . cmd . Flags ( ) . BoolVar ( & fromJson , jsonFlag , false , "Read JSON list of agents from the STDIN." )
2017-12-03 12:41:23 -08:00
}
var startCmd = & cobra . Command {
Use : "start" ,
2018-09-16 21:22:00 -07:00
Short : "Start a service, maintenance on a host (DRAIN), a snapshot, or a backup." ,
2017-12-03 12:41:23 -08:00
}
2019-06-28 10:21:51 -07:00
var startDrainCmd = monitorCmdConfig {
2019-03-25 11:38:17 -07:00
cmd : & cobra . Command {
2020-02-07 18:41:59 -08:00
Use : "drain [space separated host list or use JSON flags]" ,
2019-03-25 11:38:17 -07:00
Short : "Place a list of space separated Mesos Agents into draining mode." ,
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Agents in this list
2017-12-03 12:41:23 -08:00
are not allowed to schedule new tasks and any tasks already running on this Agent
are killed and rescheduled in an Agent that is not in maintenance mode . Command
expects a space separated list of hosts to place into maintenance mode . ` ,
2020-02-07 18:41:59 -08:00
Args : argsValidateJSONFlags ,
2019-03-22 20:47:42 -07:00
} ,
2017-12-03 12:41:23 -08:00
}
2020-01-29 16:12:53 -08:00
var startSLADrainCmd = monitorCmdConfig {
cmd : & cobra . Command {
2020-02-07 18:41:59 -08:00
Use : "sla-drain [space separated host list or use JSON flags]" ,
2020-01-29 16:12:53 -08:00
Short : "Place a list of space separated Mesos Agents into maintenance mode using SLA aware strategies." ,
Long : ` Adds a Mesos Agent to Aurora ' s Drain list . Agents in this list
2018-11-16 21:54:18 -08:00
are not allowed to schedule new tasks and any tasks already running on this Agent
are killed and rescheduled in an Agent that is not in maintenance mode . Command
2020-01-29 16:12:53 -08:00
expects a space separated list of hosts to place into maintenance mode .
If the -- count argument is passed , tasks will be drained using the count SLA policy as a fallback
when a Job does not have a defined SLA policy .
If the -- percentage argument is passed , tasks will be drained using the percentage SLA policy as a fallback
2018-11-20 20:11:21 -08:00
when a Job does not have a defined SLA policy . ` ,
2020-02-07 18:41:59 -08:00
Args : argsValidateJSONFlags ,
2019-03-22 20:47:42 -07:00
} ,
2018-11-16 21:54:18 -08:00
}
2019-06-28 10:21:51 -07:00
var startMaintenanceCmd = monitorCmdConfig {
2019-03-25 11:38:17 -07:00
cmd : & cobra . Command {
2020-02-07 18:41:59 -08:00
Use : "maintenance [space separated host list or use JSON flags]" ,
2019-03-25 11:38:17 -07:00
Short : "Place a list of space separated Mesos Agents into maintenance mode." ,
Long : ` Places Mesos Agent into Maintenance mode . Agents in this list
2018-11-16 21:54:18 -08:00
are de - prioritized for scheduling a task . Command
expects a space separated list of hosts to place into maintenance mode . ` ,
2020-02-07 18:41:59 -08:00
Args : argsValidateJSONFlags ,
2019-03-22 20:47:42 -07:00
} ,
2018-11-16 21:54:18 -08:00
}
2020-02-07 18:41:59 -08:00
func argsValidateJSONFlags ( cmd * cobra . Command , args [ ] string ) error {
if cmd . Flags ( ) . Changed ( jsonFlag ) && cmd . Flags ( ) . Changed ( jsonFileFlag ) {
return errors . New ( "only json file or json stdin must be set" )
}
// These two flags are mutually exclusive
if cmd . Flags ( ) . Changed ( jsonFlag ) != cmd . Flags ( ) . Changed ( jsonFileFlag ) {
return nil
}
if len ( args ) < 1 {
return errors . New ( "at least one host must be specified" )
}
return nil
}
func hostList ( cmd * cobra . Command , args [ ] string ) [ ] string {
var hosts [ ] string
if cmd . Flags ( ) . Changed ( jsonFlag ) {
err := json . NewDecoder ( os . Stdin ) . Decode ( & hosts )
if err != nil {
log . Fatal ( err )
}
} else if cmd . Flags ( ) . Changed ( jsonFileFlag ) {
data , err := ioutil . ReadFile ( fromJsonFile )
if err != nil {
log . Fatal ( err )
}
err = json . Unmarshal ( data , & hosts )
if err != nil {
log . Fatal ( err )
}
} else {
hosts = args
}
return hosts
}
2017-12-03 12:41:23 -08:00
func drain ( cmd * cobra . Command , args [ ] string ) {
2020-02-07 18:41:59 -08:00
hosts := hostList ( cmd , args )
2018-11-09 15:58:26 -08:00
log . Infoln ( "Setting hosts to DRAINING" )
2020-02-07 18:41:59 -08:00
log . Infoln ( hosts )
result , err := client . DrainHosts ( hosts ... )
2017-12-03 12:41:23 -08:00
if err != nil {
2020-01-29 16:12:53 -08:00
log . Fatalf ( "error: %+v" , err )
2017-12-03 12:41:23 -08:00
}
2018-11-09 15:58:26 -08:00
log . Debugln ( result )
2019-03-25 11:38:17 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , monitorHostCmd . monitorTimeout , monitorHostCmd . monitorInterval )
2017-12-03 12:41:23 -08:00
// Monitor change to DRAINING and DRAINED mode
2020-02-07 18:41:59 -08:00
hostResult , err := client . MonitorHostMaintenance (
hosts ,
2018-05-30 17:50:53 -07:00
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } ,
2019-03-25 11:38:17 -07:00
startDrainCmd . monitorInterval ,
startDrainCmd . monitorTimeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } )
if err != nil {
2018-12-27 11:31:51 -08:00
log . Fatalln ( err )
2018-11-16 21:54:18 -08:00
}
}
2020-01-29 16:12:53 -08:00
func slaDrainHosts ( policy * aurora . SlaPolicy , interval , timeout time . Duration , hosts ... string ) {
2019-03-22 19:01:10 -07:00
result , err := client . SLADrainHosts ( policy , int64 ( forceDrainTimeout . Seconds ( ) ) , hosts ... )
2018-11-16 21:54:18 -08:00
if err != nil {
log . Fatalf ( "error: %+v\n" , err )
}
log . Debugln ( result )
2019-03-25 13:00:58 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , timeout , interval )
2018-11-16 21:54:18 -08:00
// Monitor change to DRAINING and DRAINED mode
2020-02-07 18:41:59 -08:00
hostResult , err := client . MonitorHostMaintenance (
2018-11-20 20:11:21 -08:00
hosts ,
2018-11-16 21:54:18 -08:00
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } ,
2019-03-25 11:38:17 -07:00
interval ,
timeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_DRAINED } )
2017-12-03 12:41:23 -08:00
if err != nil {
2019-03-22 19:01:10 -07:00
log . Fatalf ( "error: %+v" , err )
2017-12-03 12:41:23 -08:00
}
2018-11-20 20:11:21 -08:00
}
2020-01-29 16:12:53 -08:00
func slaDrain ( cmd * cobra . Command , args [ ] string ) {
2020-02-07 18:41:59 -08:00
hosts := hostList ( cmd , args )
2020-01-29 16:12:53 -08:00
// This check makes sure only a single flag is set.
// If they're both set or both not set, the statement will evaluate to true.
if cmd . Flags ( ) . Changed ( percentageFlag ) == cmd . Flags ( ) . Changed ( countFlag ) {
log . Fatal ( "Either percentage or count must be set exclusively." )
}
2018-11-20 20:11:21 -08:00
2020-01-29 16:12:53 -08:00
policy := & aurora . SlaPolicy { }
2018-11-20 20:11:21 -08:00
2020-01-29 16:12:53 -08:00
if cmd . Flags ( ) . Changed ( percentageFlag ) {
log . Infoln ( "Setting hosts to DRAINING with the Percentage SLA policy." )
policy . PercentageSlaPolicy = & aurora . PercentageSlaPolicy {
Percentage : percent ,
DurationSecs : int64 ( duration . Seconds ( ) ) ,
}
}
2018-11-20 20:11:21 -08:00
2020-01-29 16:12:53 -08:00
if cmd . Flags ( ) . Changed ( countFlag ) {
log . Infoln ( "Setting hosts to DRAINING with the Count SLA policy." )
policy . CountSlaPolicy = & aurora . CountSlaPolicy { Count : count , DurationSecs : int64 ( duration . Seconds ( ) ) }
}
2017-12-03 12:41:23 -08:00
2020-01-29 16:12:53 -08:00
log . Infoln ( "Hosts affected: " , args )
2020-02-07 18:41:59 -08:00
slaDrainHosts ( policy , startDrainCmd . monitorInterval , startDrainCmd . monitorTimeout , hosts ... )
2017-12-03 12:41:23 -08:00
}
2018-09-16 21:22:00 -07:00
2018-11-16 21:54:18 -08:00
func maintenance ( cmd * cobra . Command , args [ ] string ) {
2020-02-07 18:41:59 -08:00
hosts := hostList ( cmd , args )
2018-11-16 21:54:18 -08:00
log . Infoln ( "Setting hosts to Maintenance mode" )
2020-02-07 18:41:59 -08:00
log . Infoln ( hosts )
result , err := client . StartMaintenance ( hosts ... )
2018-11-16 21:54:18 -08:00
if err != nil {
2020-02-07 18:41:59 -08:00
log . Fatalf ( "error: %+v" , err )
2018-11-16 21:54:18 -08:00
}
log . Debugln ( result )
2019-03-25 11:38:17 -07:00
log . Infof ( "Monitoring for %v at %v intervals" , monitorHostCmd . monitorTimeout , monitorHostCmd . monitorInterval )
2018-11-16 21:54:18 -08:00
// Monitor change to DRAINING and DRAINED mode
2020-02-07 18:41:59 -08:00
hostResult , err := client . MonitorHostMaintenance (
hosts ,
2018-11-16 21:54:18 -08:00
[ ] aurora . MaintenanceMode { aurora . MaintenanceMode_SCHEDULED } ,
2019-03-25 11:38:17 -07:00
startMaintenanceCmd . monitorInterval ,
startMaintenanceCmd . monitorTimeout )
2018-11-16 21:54:18 -08:00
maintenanceMonitorPrint ( hostResult , [ ] aurora . MaintenanceMode { aurora . MaintenanceMode_SCHEDULED } )
if err != nil {
log . Fatalln ( "error: %+v" , err )
}
}