2016-11-10 19:59:20 -05:00
package schedulers
import (
2016-11-14 22:53:06 -05:00
"bitbucket.org/sunybingcloud/electron/constants"
"bitbucket.org/sunybingcloud/electron/def"
2017-01-28 21:56:23 -05:00
powCap "bitbucket.org/sunybingcloud/electron/powerCapping"
2016-11-14 22:53:06 -05:00
"bitbucket.org/sunybingcloud/electron/rapl"
2017-01-28 19:45:47 -05:00
"bitbucket.org/sunybingcloud/electron/utilities/mesosUtils"
"bitbucket.org/sunybingcloud/electron/utilities/offerUtils"
2016-11-14 22:53:06 -05:00
"fmt"
"github.com/golang/protobuf/proto"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil"
sched "github.com/mesos/mesos-go/scheduler"
"log"
"math"
2017-01-14 20:04:37 -05:00
"os"
2016-11-17 21:51:02 -05:00
"sync"
2016-11-14 22:53:06 -05:00
"time"
2016-11-10 19:59:20 -05:00
)
2016-11-14 22:46:38 -05:00
// Decides if to take an offer or not
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) takeOffer ( offer * mesos . Offer , task def . Task ) bool {
2017-01-28 19:40:39 -05:00
offer_cpu , offer_mem , offer_watts := offerUtils . OfferAgg ( offer )
2016-11-14 22:46:38 -05:00
2017-02-09 18:05:38 -05:00
wattsConsideration , err := def . WattsToConsider ( task , s . classMapWatts , offer )
if err != nil {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2017-02-09 18:41:40 -05:00
if offer_cpu >= task . CPU && offer_mem >= task . RAM && ( ! s . wattsAsAResource || ( offer_watts >= wattsConsideration ) ) {
2016-11-14 22:53:06 -05:00
return true
}
return false
2016-11-14 22:46:38 -05:00
}
2016-11-10 19:59:20 -05:00
// electronScheduler implements the Scheduler interface.
2017-02-09 18:05:38 -05:00
type FirstFitProacCC struct {
2017-02-09 20:27:18 -05:00
base // Type embedded to inherit common functions
taskMonitor map [ string ] [ ] def . Task // store tasks that are currently running.
availablePower map [ string ] float64 // available power for each node in the cluster.
totalPower map [ string ] float64 // total power for each node in the cluster.
capper * powCap . ClusterwideCapper
ticker * time . Ticker
recapTicker * time . Ticker
isCapping bool // indicate whether we are currently performing cluster wide capping.
isRecapping bool // indicate whether we are currently performing cluster wide re-capping.
2016-11-10 19:59:20 -05:00
}
// New electron scheduler.
2017-02-09 18:41:40 -05:00
func NewFirstFitProacCC ( tasks [ ] def . Task , wattsAsAResource bool , schedTracePrefix string ,
2017-02-09 18:05:38 -05:00
classMapWatts bool ) * FirstFitProacCC {
2017-01-14 19:44:50 -05:00
logFile , err := os . Create ( "./" + schedTracePrefix + "_schedTrace.log" )
if err != nil {
log . Fatal ( err )
}
2017-02-09 18:05:38 -05:00
s := & FirstFitProacCC {
2017-02-09 20:27:18 -05:00
base : base {
tasks : tasks ,
wattsAsAResource : wattsAsAResource ,
classMapWatts : classMapWatts ,
Shutdown : make ( chan struct { } ) ,
Done : make ( chan struct { } ) ,
PCPLog : make ( chan struct { } ) ,
running : make ( map [ string ] map [ string ] bool ) ,
RecordPCP : false ,
schedTrace : log . New ( logFile , "" , log . LstdFlags ) ,
} ,
taskMonitor : make ( map [ string ] [ ] def . Task ) ,
availablePower : make ( map [ string ] float64 ) ,
totalPower : make ( map [ string ] float64 ) ,
capper : powCap . GetClusterwideCapperInstance ( ) ,
ticker : time . NewTicker ( 10 * time . Second ) ,
recapTicker : time . NewTicker ( 20 * time . Second ) ,
isCapping : false ,
isRecapping : false ,
2016-11-14 22:53:06 -05:00
}
return s
2016-11-10 19:59:20 -05:00
}
2016-11-17 21:51:02 -05:00
// mutex
2016-11-22 17:04:30 -05:00
var fcfsMutex sync . Mutex
2016-11-17 21:51:02 -05:00
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) newTask ( offer * mesos . Offer , task def . Task ) * mesos . TaskInfo {
2016-11-14 22:53:06 -05:00
taskName := fmt . Sprintf ( "%s-%d" , task . Name , * task . Instances )
s . tasksCreated ++
if ! s . RecordPCP {
// Turn on logging.
s . RecordPCP = true
time . Sleep ( 1 * time . Second ) // Make sure we're recording by the time the first task starts
}
// If this is our first time running into this Agent
if _ , ok := s . running [ offer . GetSlaveId ( ) . GoString ( ) ] ; ! ok {
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] = make ( map [ string ] bool )
}
// Setting the task ID to the task. This is done so that we can consider each task to be different,
// even though they have the same parameters.
2016-11-17 21:51:02 -05:00
task . SetTaskID ( * proto . String ( "electron-" + taskName ) )
2016-11-14 22:53:06 -05:00
// Add task to the list of tasks running on the node.
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] [ taskName ] = true
2016-11-25 16:19:45 -05:00
if len ( s . taskMonitor [ * offer . Hostname ] ) == 0 {
s . taskMonitor [ * offer . Hostname ] = [ ] def . Task { task }
2016-11-17 21:51:02 -05:00
} else {
2016-11-25 16:19:45 -05:00
s . taskMonitor [ * offer . Hostname ] = append ( s . taskMonitor [ * offer . Hostname ] , task )
2016-11-17 21:51:02 -05:00
}
2016-11-14 22:53:06 -05:00
resources := [ ] * mesos . Resource {
mesosutil . NewScalarResource ( "cpus" , task . CPU ) ,
mesosutil . NewScalarResource ( "mem" , task . RAM ) ,
}
2017-02-09 18:41:40 -05:00
if s . wattsAsAResource {
2017-02-09 18:05:38 -05:00
if wattsToConsider , err := def . WattsToConsider ( task , s . classMapWatts , offer ) ; err == nil {
log . Printf ( "Watts considered for host[%s] and task[%s] = %f" , * offer . Hostname , task . Name , wattsToConsider )
resources = append ( resources , mesosutil . NewScalarResource ( "watts" , wattsToConsider ) )
} else {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2016-11-10 19:59:20 -05:00
}
return & mesos . TaskInfo {
Name : proto . String ( taskName ) ,
TaskId : & mesos . TaskID {
Value : proto . String ( "electron-" + taskName ) ,
} ,
SlaveId : offer . SlaveId ,
Resources : resources ,
Command : & mesos . CommandInfo {
Value : proto . String ( task . CMD ) ,
} ,
Container : & mesos . ContainerInfo {
Type : mesos . ContainerInfo_DOCKER . Enum ( ) ,
Docker : & mesos . ContainerInfo_DockerInfo {
Image : proto . String ( task . Image ) ,
Network : mesos . ContainerInfo_DockerInfo_BRIDGE . Enum ( ) , // Run everything isolated
} ,
} ,
}
}
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) Disconnected ( sched . SchedulerDriver ) {
2016-11-14 22:53:06 -05:00
// Need to stop the capping process.
s . ticker . Stop ( )
2016-11-17 21:51:02 -05:00
s . recapTicker . Stop ( )
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-14 22:53:06 -05:00
s . isCapping = false
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-14 22:53:06 -05:00
log . Println ( "Framework disconnected with master" )
2016-11-10 19:59:20 -05:00
}
// go routine to cap the entire cluster in regular intervals of time.
2016-11-22 17:04:30 -05:00
var fcfsCurrentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet.
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) startCapping ( ) {
2016-11-14 22:53:06 -05:00
go func ( ) {
for {
select {
case <- s . ticker . C :
2016-11-22 17:04:30 -05:00
// Need to cap the cluster to the fcfsCurrentCapValue.
fcfsMutex . Lock ( )
if fcfsCurrentCapValue > 0.0 {
2017-03-23 22:16:05 -04:00
for host , _ := range constants . Hosts {
2016-11-14 22:53:06 -05:00
// Rounding curreCapValue to the nearest int.
2017-02-15 19:22:56 -05:00
if err := rapl . Cap ( host , "rapl" , float64 ( int ( math . Floor ( fcfsCurrentCapValue + 0.5 ) ) ) ) ; err != nil {
2016-11-17 21:51:02 -05:00
log . Println ( err )
}
}
2016-11-22 17:04:30 -05:00
log . Printf ( "Capped the cluster to %d" , int ( math . Floor ( fcfsCurrentCapValue + 0.5 ) ) )
2016-11-17 21:51:02 -05:00
}
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-17 21:51:02 -05:00
}
}
} ( )
}
// go routine to cap the entire cluster in regular intervals of time.
2016-11-22 17:04:30 -05:00
var fcfsRecapValue = 0.0 // The cluster wide cap value when recapping.
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) startRecapping ( ) {
2016-11-17 21:51:02 -05:00
go func ( ) {
for {
select {
case <- s . recapTicker . C :
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-17 21:51:02 -05:00
// If stopped performing cluster wide capping then we need to explicitly cap the entire cluster.
2016-11-22 17:04:30 -05:00
if s . isRecapping && fcfsRecapValue > 0.0 {
2017-03-23 22:16:05 -04:00
for host , _ := range constants . Hosts {
2016-11-17 21:51:02 -05:00
// Rounding curreCapValue to the nearest int.
2017-02-15 19:22:56 -05:00
if err := rapl . Cap ( host , "rapl" , float64 ( int ( math . Floor ( fcfsRecapValue + 0.5 ) ) ) ) ; err != nil {
2016-11-17 21:51:02 -05:00
log . Println ( err )
2016-11-14 22:53:06 -05:00
}
}
2016-11-22 17:04:30 -05:00
log . Printf ( "Recapped the cluster to %d" , int ( math . Floor ( fcfsRecapValue + 0.5 ) ) )
2016-11-14 22:53:06 -05:00
}
2016-11-17 21:51:02 -05:00
// setting recapping to false
s . isRecapping = false
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-14 22:53:06 -05:00
}
}
} ( )
2016-11-10 19:59:20 -05:00
}
2016-11-14 22:46:38 -05:00
// Stop cluster wide capping
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) stopCapping ( ) {
2016-11-14 22:53:06 -05:00
if s . isCapping {
log . Println ( "Stopping the cluster wide capping." )
s . ticker . Stop ( )
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-14 22:53:06 -05:00
s . isCapping = false
2016-11-17 21:51:02 -05:00
s . isRecapping = true
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-17 21:51:02 -05:00
}
}
// Stop cluster wide Recapping
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) stopRecapping ( ) {
2016-11-17 21:51:02 -05:00
// If not capping, then definitely recapping.
if ! s . isCapping && s . isRecapping {
log . Println ( "Stopping the cluster wide re-capping." )
s . recapTicker . Stop ( )
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-17 21:51:02 -05:00
s . isRecapping = false
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-14 22:53:06 -05:00
}
2016-11-14 22:46:38 -05:00
}
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) ResourceOffers ( driver sched . SchedulerDriver , offers [ ] * mesos . Offer ) {
2016-11-14 22:53:06 -05:00
log . Printf ( "Received %d resource offers" , len ( offers ) )
// retrieving the available power for all the hosts in the offers.
for _ , offer := range offers {
2017-03-25 18:06:39 -04:00
offerUtils . UpdateEnvironment ( offer )
2017-01-28 19:40:39 -05:00
_ , _ , offer_watts := offerUtils . OfferAgg ( offer )
2016-11-14 22:53:06 -05:00
s . availablePower [ * offer . Hostname ] = offer_watts
// setting total power if the first time.
if _ , ok := s . totalPower [ * offer . Hostname ] ; ! ok {
s . totalPower [ * offer . Hostname ] = offer_watts
}
}
for host , tpower := range s . totalPower {
2016-11-17 21:51:02 -05:00
log . Printf ( "TotalPower[%s] = %f" , host , tpower )
2016-11-14 22:53:06 -05:00
}
for _ , offer := range offers {
select {
case <- s . Shutdown :
log . Println ( "Done scheduling tasks: declining offer on [" , offer . GetHostname ( ) , "]" )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . LongFilter )
2016-11-14 22:53:06 -05:00
log . Println ( "Number of tasks still running: " , s . tasksRunning )
continue
default :
}
/ *
Clusterwide Capping strategy
For each task in s . tasks ,
1. Need to check whether the offer can be taken or not ( based on CPU and RAM requirements ) .
2. If the tasks fits the offer , then I need to detemrine the cluster wide cap .
2016-11-22 17:04:30 -05:00
3. fcfsCurrentCapValue is updated with the determined cluster wide cap .
2016-11-14 22:53:06 -05:00
Cluster wide capping is currently performed at regular intervals of time .
* /
2017-02-04 16:59:25 -05:00
offerTaken := false
2016-11-14 22:53:06 -05:00
2017-01-14 19:44:50 -05:00
for i := 0 ; i < len ( s . tasks ) ; i ++ {
task := s . tasks [ i ]
2017-02-09 22:48:34 -05:00
// Don't take offer if it doesn't match our task's host requirement
if offerUtils . HostMismatch ( * offer . Hostname , task . Host ) {
2016-11-14 22:53:06 -05:00
continue
}
// Does the task fit.
if s . takeOffer ( offer , task ) {
// Capping the cluster if haven't yet started,
if ! s . isCapping {
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-14 22:53:06 -05:00
s . isCapping = true
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-17 21:51:02 -05:00
s . startCapping ( )
2016-11-14 22:53:06 -05:00
}
2017-02-04 16:59:25 -05:00
offerTaken = true
2016-12-22 13:50:37 -05:00
tempCap , err := s . capper . FCFSDeterminedCap ( s . totalPower , & task )
2016-11-14 22:53:06 -05:00
if err == nil {
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
fcfsCurrentCapValue = tempCap
fcfsMutex . Unlock ( )
2016-11-14 22:53:06 -05:00
} else {
2017-02-09 18:05:38 -05:00
log . Println ( "Failed to determine new cluster wide cap: " )
2016-11-17 21:51:02 -05:00
log . Println ( err )
2016-11-14 22:53:06 -05:00
}
2016-11-17 21:51:02 -05:00
log . Printf ( "Starting on [%s]\n" , offer . GetHostname ( ) )
2017-01-14 19:44:50 -05:00
taskToSchedule := s . newTask ( offer , task )
toSchedule := [ ] * mesos . TaskInfo { taskToSchedule }
2017-01-28 19:40:39 -05:00
driver . LaunchTasks ( [ ] * mesos . OfferID { offer . Id } , toSchedule , mesosUtils . DefaultFilter )
2016-11-17 21:51:02 -05:00
log . Printf ( "Inst: %d" , * task . Instances )
2017-01-14 19:44:50 -05:00
s . schedTrace . Print ( offer . GetHostname ( ) + ":" + taskToSchedule . GetTaskId ( ) . GetValue ( ) )
2016-11-14 22:53:06 -05:00
* task . Instances --
if * task . Instances <= 0 {
// All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule.
s . tasks [ i ] = s . tasks [ len ( s . tasks ) - 1 ]
2016-11-14 22:46:38 -05:00
s . tasks = s . tasks [ : len ( s . tasks ) - 1 ]
if len ( s . tasks ) <= 0 {
log . Println ( "Done scheduling all tasks" )
2016-11-14 22:53:06 -05:00
// Need to stop the cluster wide capping as there aren't any more tasks to schedule.
s . stopCapping ( )
2016-11-17 21:51:02 -05:00
s . startRecapping ( ) // Load changes after every task finishes and hence we need to change the capping of the cluster.
2016-11-14 22:46:38 -05:00
close ( s . Shutdown )
}
2016-11-14 22:53:06 -05:00
}
break // Offer taken, move on.
} else {
// Task doesn't fit the offer. Move onto the next offer.
}
}
// If no task fit the offer, then declining the offer.
2017-02-04 16:59:25 -05:00
if ! offerTaken {
2016-11-17 21:51:02 -05:00
log . Printf ( "There is not enough resources to launch a task on Host: %s\n" , offer . GetHostname ( ) )
2017-01-28 19:40:39 -05:00
cpus , mem , watts := offerUtils . OfferAgg ( offer )
2016-11-14 22:53:06 -05:00
log . Printf ( "<CPU: %f, RAM: %f, Watts: %f>\n" , cpus , mem , watts )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . DefaultFilter )
2016-11-14 22:53:06 -05:00
}
}
2016-11-10 19:59:20 -05:00
}
2017-02-09 18:05:38 -05:00
func ( s * FirstFitProacCC ) StatusUpdate ( driver sched . SchedulerDriver , status * mesos . TaskStatus ) {
2016-11-17 21:51:02 -05:00
log . Printf ( "Received task status [%s] for task [%s]\n" , NameFor ( status . State ) , * status . TaskId . Value )
2016-11-10 19:59:20 -05:00
if * status . State == mesos . TaskState_TASK_RUNNING {
2016-11-29 22:26:55 -05:00
fcfsMutex . Lock ( )
2016-11-10 19:59:20 -05:00
s . tasksRunning ++
2016-11-29 22:26:55 -05:00
fcfsMutex . Unlock ( )
2016-11-10 19:59:20 -05:00
} else if IsTerminal ( status . State ) {
delete ( s . running [ status . GetSlaveId ( ) . GoString ( ) ] , * status . TaskId . Value )
2016-11-14 22:53:06 -05:00
// Need to remove the task from the window of tasks.
2016-12-22 13:50:37 -05:00
s . capper . TaskFinished ( * status . TaskId . Value )
2016-11-14 22:53:06 -05:00
// Determining the new cluster wide cap.
2017-01-08 19:15:53 -08:00
//tempCap, err := s.capper.NaiveRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value)
2016-12-22 13:50:37 -05:00
tempCap , err := s . capper . CleverRecap ( s . totalPower , s . taskMonitor , * status . TaskId . Value )
2016-11-17 21:51:02 -05:00
if err == nil {
// if new determined cap value is different from the current recap value then we need to recap.
2016-11-22 17:04:30 -05:00
if int ( math . Floor ( tempCap + 0.5 ) ) != int ( math . Floor ( fcfsRecapValue + 0.5 ) ) {
fcfsRecapValue = tempCap
fcfsMutex . Lock ( )
2016-11-17 21:51:02 -05:00
s . isRecapping = true
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
log . Printf ( "Determined re-cap value: %f\n" , fcfsRecapValue )
2016-11-17 21:51:02 -05:00
} else {
2016-11-22 17:04:30 -05:00
fcfsMutex . Lock ( )
2016-11-17 21:51:02 -05:00
s . isRecapping = false
2016-11-22 17:04:30 -05:00
fcfsMutex . Unlock ( )
2016-11-17 21:51:02 -05:00
}
} else {
2016-11-22 17:04:30 -05:00
// Not updating fcfsCurrentCapValue
2016-11-17 21:51:02 -05:00
log . Println ( err )
}
2016-11-14 22:46:38 -05:00
2016-11-29 22:26:55 -05:00
fcfsMutex . Lock ( )
2016-11-10 19:59:20 -05:00
s . tasksRunning --
2016-11-29 22:26:55 -05:00
fcfsMutex . Unlock ( )
2016-11-10 19:59:20 -05:00
if s . tasksRunning == 0 {
select {
case <- s . Shutdown :
2016-11-22 17:04:30 -05:00
// Need to stop the recapping process.
2016-11-17 21:51:02 -05:00
s . stopRecapping ( )
2016-11-10 19:59:20 -05:00
close ( s . Done )
default :
}
}
}
log . Printf ( "DONE: Task status [%s] for task [%s]" , NameFor ( status . State ) , * status . TaskId . Value )
}