2017-01-28 17:58:36 -05:00
package schedulers
import (
"bitbucket.org/sunybingcloud/electron/constants"
"bitbucket.org/sunybingcloud/electron/def"
2017-01-28 19:40:39 -05:00
"bitbucket.org/sunybingcloud/electron/utilities/mesosUtils"
"bitbucket.org/sunybingcloud/electron/utilities/offerUtils"
2017-01-28 17:58:36 -05:00
"fmt"
"github.com/golang/protobuf/proto"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil"
sched "github.com/mesos/mesos-go/scheduler"
"log"
"os"
"sort"
"time"
)
/ *
Tasks are categorized into small and large tasks based on the watts requirement .
2017-03-25 18:06:39 -04:00
All the small tasks are packed into offers from agents belonging to power class C and power class D , using BinPacking .
All the large tasks are spread among the offers from agents belonging to power class A and power class B , using FirstFit .
2017-01-28 17:58:36 -05:00
This was done to give a little more room for the large tasks ( power intensive ) for execution and reduce the possibility of
starvation of power intensive tasks .
* /
2017-02-10 16:39:13 -05:00
func ( s * TopHeavy ) takeOfferBinPack ( offer * mesos . Offer , totalCPU , totalRAM , totalWatts ,
wattsToConsider float64 , task def . Task ) bool {
offerCPU , offerRAM , offerWatts := offerUtils . OfferAgg ( offer )
//TODO: Insert watts calculation here instead of taking them as a parameter
if ( ! s . wattsAsAResource || ( offerWatts >= ( totalWatts + wattsToConsider ) ) ) &&
( offerCPU >= ( totalCPU + task . CPU ) ) &&
( offerRAM >= ( totalRAM + task . RAM ) ) {
return true
}
return false
}
func ( s * TopHeavy ) takeOfferFirstFit ( offer * mesos . Offer , wattsConsideration float64 , task def . Task ) bool {
offerCPU , offerRAM , offerWatts := offerUtils . OfferAgg ( offer )
//TODO: Insert watts calculation here instead of taking them as a parameter
if ( ! s . wattsAsAResource || ( offerWatts >= wattsConsideration ) ) &&
( offerCPU >= task . CPU ) && ( offerRAM >= task . RAM ) {
return true
}
return false
}
2017-01-28 17:58:36 -05:00
// electronScheduler implements the Scheduler interface
type TopHeavy struct {
base // Type embedded to inherit common functions
smallTasks , largeTasks [ ] def . Task
}
// New electron scheduler
2017-02-09 18:41:40 -05:00
func NewTopHeavy ( tasks [ ] def . Task , wattsAsAResource bool , schedTracePrefix string , classMapWatts bool ) * TopHeavy {
2017-01-28 17:58:36 -05:00
sort . Sort ( def . WattsSorter ( tasks ) )
logFile , err := os . Create ( "./" + schedTracePrefix + "_schedTrace.log" )
if err != nil {
log . Fatal ( err )
}
2017-08-22 13:00:40 -04:00
// Classification done based on MMPU watts requirements, into 2 clusters.
classifiedTasks := def . ClassifyTasks ( tasks , 2 )
2017-03-13 16:38:53 -04:00
2017-01-28 17:58:36 -05:00
s := & TopHeavy {
2017-02-09 20:27:18 -05:00
base : base {
wattsAsAResource : wattsAsAResource ,
classMapWatts : classMapWatts ,
Shutdown : make ( chan struct { } ) ,
Done : make ( chan struct { } ) ,
PCPLog : make ( chan struct { } ) ,
running : make ( map [ string ] map [ string ] bool ) ,
RecordPCP : false ,
schedTrace : log . New ( logFile , "" , log . LstdFlags ) ,
} ,
2017-08-22 13:00:40 -04:00
// Separating small tasks from large tasks.
2017-03-13 16:38:53 -04:00
smallTasks : classifiedTasks [ 0 ] . Tasks ,
largeTasks : classifiedTasks [ 1 ] . Tasks ,
2017-01-28 17:58:36 -05:00
}
return s
}
2017-02-09 18:05:38 -05:00
func ( s * TopHeavy ) newTask ( offer * mesos . Offer , task def . Task ) * mesos . TaskInfo {
2017-01-28 17:58:36 -05:00
taskName := fmt . Sprintf ( "%s-%d" , task . Name , * task . Instances )
s . tasksCreated ++
if ! s . RecordPCP {
// Turn on logging
s . RecordPCP = true
time . Sleep ( 1 * time . Second ) // Make sure we're recording by the time the first task starts
}
// If this is our first time running into this Agent
if _ , ok := s . running [ offer . GetSlaveId ( ) . GoString ( ) ] ; ! ok {
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] = make ( map [ string ] bool )
}
// Add task to list of tasks running on node
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] [ taskName ] = true
resources := [ ] * mesos . Resource {
mesosutil . NewScalarResource ( "cpus" , task . CPU ) ,
mesosutil . NewScalarResource ( "mem" , task . RAM ) ,
}
2017-02-09 18:41:40 -05:00
if s . wattsAsAResource {
2017-02-09 18:05:38 -05:00
if wattsToConsider , err := def . WattsToConsider ( task , s . classMapWatts , offer ) ; err == nil {
log . Printf ( "Watts considered for host[%s] and task[%s] = %f" , * offer . Hostname , task . Name , wattsToConsider )
resources = append ( resources , mesosutil . NewScalarResource ( "watts" , wattsToConsider ) )
} else {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2017-01-28 17:58:36 -05:00
}
return & mesos . TaskInfo {
Name : proto . String ( taskName ) ,
TaskId : & mesos . TaskID {
Value : proto . String ( "electron-" + taskName ) ,
} ,
SlaveId : offer . SlaveId ,
Resources : resources ,
Command : & mesos . CommandInfo {
Value : proto . String ( task . CMD ) ,
} ,
Container : & mesos . ContainerInfo {
Type : mesos . ContainerInfo_DOCKER . Enum ( ) ,
Docker : & mesos . ContainerInfo_DockerInfo {
Image : proto . String ( task . Image ) ,
Network : mesos . ContainerInfo_DockerInfo_BRIDGE . Enum ( ) , // Run everything isolated
} ,
} ,
}
}
// Shut down scheduler if no more tasks to schedule
func ( s * TopHeavy ) shutDownIfNecessary ( ) {
if len ( s . smallTasks ) <= 0 && len ( s . largeTasks ) <= 0 {
log . Println ( "Done scheduling all tasks" )
close ( s . Shutdown )
}
}
// create TaskInfo and log scheduling trace
2017-02-09 18:05:38 -05:00
func ( s * TopHeavy ) createTaskInfoAndLogSchedTrace ( offer * mesos . Offer , task def . Task ) * mesos . TaskInfo {
2017-01-28 17:58:36 -05:00
log . Println ( "Co-Located with:" )
coLocated ( s . running [ offer . GetSlaveId ( ) . GoString ( ) ] )
2017-02-09 18:05:38 -05:00
taskToSchedule := s . newTask ( offer , task )
2017-01-28 17:58:36 -05:00
fmt . Println ( "Inst: " , * task . Instances )
s . schedTrace . Print ( offer . GetHostname ( ) + ":" + taskToSchedule . GetTaskId ( ) . GetValue ( ) )
* task . Instances --
return taskToSchedule
}
// Using BinPacking to pack small tasks into this offer.
func ( s * TopHeavy ) pack ( offers [ ] * mesos . Offer , driver sched . SchedulerDriver ) {
for _ , offer := range offers {
select {
case <- s . Shutdown :
log . Println ( "Done scheduling tasks: declining offer on [" , offer . GetHostname ( ) , "]" )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . LongFilter )
2017-01-28 17:58:36 -05:00
log . Println ( "Number of tasks still running: " , s . tasksRunning )
continue
default :
}
tasks := [ ] * mesos . TaskInfo { }
totalWatts := 0.0
totalCPU := 0.0
totalRAM := 0.0
taken := false
for i := 0 ; i < len ( s . smallTasks ) ; i ++ {
task := s . smallTasks [ i ]
2017-02-09 18:05:38 -05:00
wattsConsideration , err := def . WattsToConsider ( task , s . classMapWatts , offer )
if err != nil {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2017-01-28 17:58:36 -05:00
for * task . Instances > 0 {
// Does the task fit
// OR lazy evaluation. If ignore watts is set to true, second statement won't
// be evaluated.
2017-02-10 16:39:13 -05:00
if s . takeOfferBinPack ( offer , totalCPU , totalRAM , totalWatts , wattsConsideration , task ) {
2017-01-28 17:58:36 -05:00
taken = true
2017-02-09 18:05:38 -05:00
totalWatts += wattsConsideration
2017-01-28 17:58:36 -05:00
totalCPU += task . CPU
totalRAM += task . RAM
2017-02-09 18:05:38 -05:00
tasks = append ( tasks , s . createTaskInfoAndLogSchedTrace ( offer , task ) )
2017-01-28 17:58:36 -05:00
if * task . Instances <= 0 {
// All instances of task have been scheduled, remove it
s . smallTasks = append ( s . smallTasks [ : i ] , s . smallTasks [ i + 1 : ] ... )
s . shutDownIfNecessary ( )
}
} else {
break // Continue on to next task
}
}
}
if taken {
log . Printf ( "Starting on [%s]\n" , offer . GetHostname ( ) )
2017-01-28 19:40:39 -05:00
driver . LaunchTasks ( [ ] * mesos . OfferID { offer . Id } , tasks , mesosUtils . DefaultFilter )
2017-01-28 17:58:36 -05:00
} else {
// If there was no match for the task
fmt . Println ( "There is not enough resources to launch a task:" )
2017-01-28 19:40:39 -05:00
cpus , mem , watts := offerUtils . OfferAgg ( offer )
2017-01-28 17:58:36 -05:00
log . Printf ( "<CPU: %f, RAM: %f, Watts: %f>\n" , cpus , mem , watts )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . DefaultFilter )
2017-01-28 17:58:36 -05:00
}
}
}
// Using first fit to spread large tasks into these offers.
func ( s * TopHeavy ) spread ( offers [ ] * mesos . Offer , driver sched . SchedulerDriver ) {
for _ , offer := range offers {
select {
case <- s . Shutdown :
log . Println ( "Done scheduling tasks: declining offer on [" , offer . GetHostname ( ) , "]" )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . LongFilter )
2017-01-28 17:58:36 -05:00
log . Println ( "Number of tasks still running: " , s . tasksRunning )
continue
default :
}
tasks := [ ] * mesos . TaskInfo { }
2017-02-04 16:59:25 -05:00
offerTaken := false
2017-01-28 17:58:36 -05:00
for i := 0 ; i < len ( s . largeTasks ) ; i ++ {
task := s . largeTasks [ i ]
2017-02-09 18:05:38 -05:00
wattsConsideration , err := def . WattsToConsider ( task , s . classMapWatts , offer )
if err != nil {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2017-01-28 17:58:36 -05:00
// Decision to take the offer or not
2017-02-10 16:39:13 -05:00
if s . takeOfferFirstFit ( offer , wattsConsideration , task ) {
2017-02-04 16:59:25 -05:00
offerTaken = true
2017-02-09 18:05:38 -05:00
tasks = append ( tasks , s . createTaskInfoAndLogSchedTrace ( offer , task ) )
2017-01-28 17:58:36 -05:00
log . Printf ( "Starting %s on [%s]\n" , task . Name , offer . GetHostname ( ) )
2017-01-28 19:40:39 -05:00
driver . LaunchTasks ( [ ] * mesos . OfferID { offer . Id } , tasks , mesosUtils . DefaultFilter )
2017-01-28 17:58:36 -05:00
if * task . Instances <= 0 {
// All instances of task have been scheduled, remove it
s . largeTasks = append ( s . largeTasks [ : i ] , s . largeTasks [ i + 1 : ] ... )
s . shutDownIfNecessary ( )
}
break // Offer taken, move on
}
}
2017-02-04 16:59:25 -05:00
if ! offerTaken {
2017-01-28 17:58:36 -05:00
// If there was no match for the task
fmt . Println ( "There is not enough resources to launch a task:" )
2017-01-28 19:40:39 -05:00
cpus , mem , watts := offerUtils . OfferAgg ( offer )
2017-01-28 17:58:36 -05:00
log . Printf ( "<CPU: %f, RAM: %f, Watts: %f>\n" , cpus , mem , watts )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . DefaultFilter )
2017-01-28 17:58:36 -05:00
}
}
}
func ( s * TopHeavy ) ResourceOffers ( driver sched . SchedulerDriver , offers [ ] * mesos . Offer ) {
log . Printf ( "Received %d resource offers" , len ( offers ) )
// We need to separate the offers into
2017-03-10 17:03:49 -05:00
// offers from ClassA and ClassB and offers from ClassC and ClassD.
2017-01-28 17:58:36 -05:00
// Offers from ClassA and ClassB would execute the large tasks.
2017-03-10 17:03:49 -05:00
// Offers from ClassC and ClassD would execute the small tasks.
offersHeavyPowerClasses := [ ] * mesos . Offer { }
offersLightPowerClasses := [ ] * mesos . Offer { }
2017-01-28 17:58:36 -05:00
for _ , offer := range offers {
2017-03-25 18:06:39 -04:00
offerUtils . UpdateEnvironment ( offer )
2017-01-28 17:58:36 -05:00
select {
case <- s . Shutdown :
log . Println ( "Done scheduling tasks: declining offer on [" , offer . GetHostname ( ) , "]" )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . LongFilter )
2017-01-28 17:58:36 -05:00
log . Println ( "Number of tasks still running: " , s . tasksRunning )
continue
default :
}
2017-03-24 16:34:17 -04:00
if _ , ok := constants . PowerClasses [ "A" ] [ * offer . Hostname ] ; ok {
2017-03-10 17:03:49 -05:00
offersHeavyPowerClasses = append ( offersHeavyPowerClasses , offer )
2017-03-24 16:28:49 -04:00
}
2017-03-24 16:34:17 -04:00
if _ , ok := constants . PowerClasses [ "B" ] [ * offer . Hostname ] ; ok {
2017-03-24 16:28:49 -04:00
offersHeavyPowerClasses = append ( offersHeavyPowerClasses , offer )
}
2017-03-24 16:34:17 -04:00
if _ , ok := constants . PowerClasses [ "C" ] [ * offer . Hostname ] ; ok {
2017-03-24 16:36:42 -04:00
offersLightPowerClasses = append ( offersLightPowerClasses , offer )
2017-03-24 16:28:49 -04:00
}
2017-03-24 16:34:17 -04:00
if _ , ok := constants . PowerClasses [ "D" ] [ * offer . Hostname ] ; ok {
2017-03-10 17:03:49 -05:00
offersLightPowerClasses = append ( offersLightPowerClasses , offer )
2017-01-28 17:58:36 -05:00
}
}
2017-03-10 17:03:49 -05:00
log . Println ( "Spreading Large tasks into ClassAB Offers:" )
for _ , o := range offersHeavyPowerClasses {
2017-01-28 17:58:36 -05:00
log . Println ( * o . Hostname )
}
2017-03-10 17:03:49 -05:00
log . Println ( "Packing Small tasks into ClassCD Offers:" )
for _ , o := range offersLightPowerClasses {
2017-01-28 17:58:36 -05:00
log . Println ( * o . Hostname )
}
2017-03-10 17:03:49 -05:00
// Packing tasks into offersLightPowerClasses
s . pack ( offersLightPowerClasses , driver )
// Spreading tasks among offersHeavyPowerClasses
s . spread ( offersHeavyPowerClasses , driver )
2017-01-28 17:58:36 -05:00
}
func ( s * TopHeavy ) StatusUpdate ( driver sched . SchedulerDriver , status * mesos . TaskStatus ) {
log . Printf ( "Received task status [%s] for task [%s]" , NameFor ( status . State ) , * status . TaskId . Value )
if * status . State == mesos . TaskState_TASK_RUNNING {
s . tasksRunning ++
} else if IsTerminal ( status . State ) {
delete ( s . running [ status . GetSlaveId ( ) . GoString ( ) ] , * status . TaskId . Value )
s . tasksRunning --
if s . tasksRunning == 0 {
select {
case <- s . Shutdown :
close ( s . Done )
default :
}
}
}
log . Printf ( "DONE: Task status [%s] for task [%s]" , NameFor ( status . State ) , * status . TaskId . Value )
}