2016-12-15 14:32:34 -05:00
package schedulers
import (
"bitbucket.org/sunybingcloud/electron/constants"
"bitbucket.org/sunybingcloud/electron/def"
"bitbucket.org/sunybingcloud/electron/rapl"
2017-01-28 19:40:39 -05:00
"bitbucket.org/sunybingcloud/electron/utilities/mesosUtils"
"bitbucket.org/sunybingcloud/electron/utilities/offerUtils"
2016-12-15 14:32:34 -05:00
"errors"
2016-12-16 15:49:30 -05:00
"fmt"
2016-12-15 14:32:34 -05:00
"github.com/golang/protobuf/proto"
mesos "github.com/mesos/mesos-go/mesosproto"
"github.com/mesos/mesos-go/mesosutil"
sched "github.com/mesos/mesos-go/scheduler"
"log"
"math"
2017-01-14 19:44:50 -05:00
"os"
2016-12-16 15:49:30 -05:00
"sync"
2016-12-15 14:32:34 -05:00
"time"
)
/ *
Piston Capper implements the Scheduler interface
This is basically extending the BinPacking algorithm to also cap each node at a different values ,
corresponding to the load on that node .
* /
2017-01-14 19:44:50 -05:00
type BinPackedPistonCapper struct {
2017-02-09 20:27:18 -05:00
base // Type embedded to inherit common functions
taskMonitor map [ string ] [ ] def . Task
totalPower map [ string ] float64
ticker * time . Ticker
isCapping bool
2016-12-15 14:32:34 -05:00
}
// New electron scheduler.
2017-02-09 18:41:40 -05:00
func NewBinPackedPistonCapper ( tasks [ ] def . Task , wattsAsAResource bool , schedTracePrefix string ,
2017-02-09 18:05:38 -05:00
classMapWatts bool ) * BinPackedPistonCapper {
2017-01-14 19:44:50 -05:00
logFile , err := os . Create ( "./" + schedTracePrefix + "_schedTrace.log" )
if err != nil {
log . Fatal ( err )
}
s := & BinPackedPistonCapper {
2017-02-09 20:27:18 -05:00
base : base {
tasks : tasks ,
wattsAsAResource : wattsAsAResource ,
classMapWatts : classMapWatts ,
Shutdown : make ( chan struct { } ) ,
Done : make ( chan struct { } ) ,
PCPLog : make ( chan struct { } ) ,
running : make ( map [ string ] map [ string ] bool ) ,
RecordPCP : false ,
schedTrace : log . New ( logFile , "" , log . LstdFlags ) ,
} ,
taskMonitor : make ( map [ string ] [ ] def . Task ) ,
totalPower : make ( map [ string ] float64 ) ,
ticker : time . NewTicker ( 5 * time . Second ) ,
isCapping : false ,
2016-12-15 14:32:34 -05:00
}
return s
}
2016-12-18 14:30:29 -05:00
// check whether task fits the offer or not.
2017-02-09 18:05:38 -05:00
func ( s * BinPackedPistonCapper ) takeOffer ( offer * mesos . Offer , offerWatts float64 , offerCPU float64 , offerRAM float64 ,
2016-12-20 16:28:03 -05:00
totalWatts float64 , totalCPU float64 , totalRAM float64 , task def . Task ) bool {
2017-02-09 18:05:38 -05:00
wattsConsideration , err := def . WattsToConsider ( task , s . classMapWatts , offer )
if err != nil {
// Error in determining wattsToConsider
log . Fatal ( err )
}
2017-02-09 18:41:40 -05:00
if ( ! s . wattsAsAResource || ( offerWatts >= ( totalWatts + wattsConsideration ) ) ) &&
2016-12-18 14:30:29 -05:00
( offerCPU >= ( totalCPU + task . CPU ) ) &&
( offerRAM >= ( totalRAM + task . RAM ) ) {
return true
} else {
return false
}
}
2016-12-15 14:32:34 -05:00
// mutex
2017-01-14 19:44:50 -05:00
var bpPistonMutex sync . Mutex
2016-12-15 14:32:34 -05:00
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) newTask ( offer * mesos . Offer , task def . Task ) * mesos . TaskInfo {
2016-12-15 14:32:34 -05:00
taskName := fmt . Sprintf ( "%s-%d" , task . Name , * task . Instances )
s . tasksCreated ++
if ! s . RecordPCP {
// Turn on logging
s . RecordPCP = true
time . Sleep ( 1 * time . Second ) // Make sure we're recording by the time the first task starts
}
// If this is our first time running into this Agent
if _ , ok := s . running [ offer . GetSlaveId ( ) . GoString ( ) ] ; ! ok {
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] = make ( map [ string ] bool )
}
// Setting the task ID to the task. This is done so that we can consider each task to be different,
// even though they have the same parameters.
task . SetTaskID ( * proto . String ( "electron-" + taskName ) )
// Add task to list of tasks running on node
s . running [ offer . GetSlaveId ( ) . GoString ( ) ] [ taskName ] = true
// Adding the task to the taskMonitor
if len ( s . taskMonitor [ * offer . Hostname ] ) == 0 {
s . taskMonitor [ * offer . Hostname ] = [ ] def . Task { task }
} else {
s . taskMonitor [ * offer . Hostname ] = append ( s . taskMonitor [ * offer . Hostname ] , task )
}
resources := [ ] * mesos . Resource {
mesosutil . NewScalarResource ( "cpus" , task . CPU ) ,
mesosutil . NewScalarResource ( "mem" , task . RAM ) ,
}
2017-02-09 18:41:40 -05:00
if s . wattsAsAResource {
2017-02-09 18:05:38 -05:00
if wattsToConsider , err := def . WattsToConsider ( task , s . classMapWatts , offer ) ; err == nil {
log . Printf ( "Watts considered for host[%s] and task[%s] = %f" , * offer . Hostname , task . Name , wattsToConsider )
resources = append ( resources , mesosutil . NewScalarResource ( "watts" , wattsToConsider ) )
} else {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2016-12-15 14:32:34 -05:00
}
return & mesos . TaskInfo {
Name : proto . String ( taskName ) ,
TaskId : & mesos . TaskID {
Value : proto . String ( "electron-" + taskName ) ,
} ,
SlaveId : offer . SlaveId ,
Resources : resources ,
Command : & mesos . CommandInfo {
Value : proto . String ( task . CMD ) ,
} ,
Container : & mesos . ContainerInfo {
Type : mesos . ContainerInfo_DOCKER . Enum ( ) ,
Docker : & mesos . ContainerInfo_DockerInfo {
Image : proto . String ( task . Image ) ,
Network : mesos . ContainerInfo_DockerInfo_BRIDGE . Enum ( ) , // Run everything isolated
} ,
} ,
}
}
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) Disconnected ( sched . SchedulerDriver ) {
// Need to stop the capping process
s . ticker . Stop ( )
bpPistonMutex . Lock ( )
s . isCapping = false
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
log . Println ( "Framework disconnected with master" )
}
// go routine to cap the each node in the cluster at regular intervals of time.
2017-01-14 19:44:50 -05:00
var bpPistonCapValues = make ( map [ string ] float64 )
2016-12-16 15:49:30 -05:00
2016-12-15 14:32:34 -05:00
// Storing the previous cap value for each host so as to not repeatedly cap the nodes to the same value. (reduces overhead)
2017-02-15 19:22:56 -05:00
var bpPistonPreviousRoundedCapValues = make ( map [ string ] float64 )
2016-12-16 15:49:30 -05:00
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) startCapping ( ) {
2016-12-15 14:32:34 -05:00
go func ( ) {
for {
select {
case <- s . ticker . C :
// Need to cap each node
2017-01-14 19:44:50 -05:00
bpPistonMutex . Lock ( )
for host , capValue := range bpPistonCapValues {
2017-02-15 19:22:56 -05:00
roundedCapValue := float64 ( int ( math . Floor ( capValue + 0.5 ) ) )
2016-12-15 14:32:34 -05:00
// has the cap value changed
2017-01-14 19:44:50 -05:00
if prevRoundedCap , ok := bpPistonPreviousRoundedCapValues [ host ] ; ok {
2016-12-15 14:32:34 -05:00
if prevRoundedCap != roundedCapValue {
if err := rapl . Cap ( host , "rapl" , roundedCapValue ) ; err != nil {
log . Println ( err )
2016-12-15 14:39:06 -05:00
} else {
2017-02-09 18:05:38 -05:00
log . Printf ( "Capped [%s] at %d" , host ,
int ( math . Floor ( capValue + 0.5 ) ) )
2016-12-15 14:32:34 -05:00
}
2017-01-14 19:44:50 -05:00
bpPistonPreviousRoundedCapValues [ host ] = roundedCapValue
2016-12-15 14:32:34 -05:00
}
} else {
if err := rapl . Cap ( host , "rapl" , roundedCapValue ) ; err != nil {
log . Println ( err )
2016-12-15 14:39:06 -05:00
} else {
2016-12-16 15:49:30 -05:00
log . Printf ( "Capped [%s] at %d" , host , int ( math . Floor ( capValue + 0.5 ) ) )
2016-12-15 14:32:34 -05:00
}
2017-01-14 19:44:50 -05:00
bpPistonPreviousRoundedCapValues [ host ] = roundedCapValue
2016-12-15 14:32:34 -05:00
}
}
2017-01-14 19:44:50 -05:00
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
}
}
} ( )
}
// Stop the capping
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) stopCapping ( ) {
2016-12-15 14:32:34 -05:00
if s . isCapping {
log . Println ( "Stopping the capping." )
s . ticker . Stop ( )
2017-01-14 19:44:50 -05:00
bpPistonMutex . Lock ( )
2016-12-15 14:32:34 -05:00
s . isCapping = false
2017-01-14 19:44:50 -05:00
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
}
}
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) ResourceOffers ( driver sched . SchedulerDriver , offers [ ] * mesos . Offer ) {
2016-12-15 14:32:34 -05:00
log . Printf ( "Received %d resource offers" , len ( offers ) )
// retrieving the total power for each host in the offers
for _ , offer := range offers {
2017-03-25 18:06:39 -04:00
offerUtils . UpdateEnvironment ( offer )
2016-12-15 14:32:34 -05:00
if _ , ok := s . totalPower [ * offer . Hostname ] ; ! ok {
2017-02-09 18:05:38 -05:00
_ , _ , offerWatts := offerUtils . OfferAgg ( offer )
s . totalPower [ * offer . Hostname ] = offerWatts
2016-12-15 14:32:34 -05:00
}
}
// Displaying the totalPower
for host , tpower := range s . totalPower {
log . Printf ( "TotalPower[%s] = %f" , host , tpower )
}
/ *
2016-12-16 15:49:30 -05:00
Piston capping strategy
2016-12-15 14:32:34 -05:00
2016-12-16 15:49:30 -05:00
Perform bin - packing of tasks on nodes in the cluster , making sure that no task is given less hard - limit resources than requested .
For each set of tasks that are scheduled , compute the new cap values for each host in the cluster .
At regular intervals of time , cap each node in the cluster .
2016-12-15 14:32:34 -05:00
* /
for _ , offer := range offers {
select {
case <- s . Shutdown :
log . Println ( "Done scheduling tasks: declining offer on [" , offer . GetHostname ( ) , "]" )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . LongFilter )
2016-12-15 14:32:34 -05:00
log . Println ( "Number of tasks still running: " , s . tasksRunning )
continue
default :
}
fitTasks := [ ] * mesos . TaskInfo { }
2017-01-28 19:40:39 -05:00
offerCPU , offerRAM , offerWatts := offerUtils . OfferAgg ( offer )
2017-02-04 16:59:25 -05:00
offerTaken := false
2016-12-15 14:32:34 -05:00
totalWatts := 0.0
totalCPU := 0.0
totalRAM := 0.0
// Store the partialLoad for host corresponding to this offer.
// Once we can't fit any more tasks, we update capValue for this host with partialLoad and then launch the fit tasks.
partialLoad := 0.0
2017-01-14 19:44:50 -05:00
for i := 0 ; i < len ( s . tasks ) ; i ++ {
task := s . tasks [ i ]
2017-02-09 18:05:38 -05:00
wattsConsideration , err := def . WattsToConsider ( task , s . classMapWatts , offer )
if err != nil {
// Error in determining wattsConsideration
log . Fatal ( err )
}
2017-02-09 22:48:34 -05:00
// Don't take offer if it doesn't match our task's host requirement
2017-02-11 00:05:42 -05:00
if offerUtils . HostMismatch ( * offer . Hostname , task . Host ) {
continue
2016-12-15 14:32:34 -05:00
}
for * task . Instances > 0 {
// Does the task fit
2017-02-09 18:05:38 -05:00
if s . takeOffer ( offer , offerWatts , offerCPU , offerRAM ,
totalWatts , totalCPU , totalRAM , task ) {
2016-12-15 14:32:34 -05:00
// Start piston capping if haven't started yet
if ! s . isCapping {
s . isCapping = true
s . startCapping ( )
}
2017-02-04 16:59:25 -05:00
offerTaken = true
2017-02-09 18:05:38 -05:00
totalWatts += wattsConsideration
2016-12-15 14:32:34 -05:00
totalCPU += task . CPU
totalRAM += task . RAM
log . Println ( "Co-Located with: " )
coLocated ( s . running [ offer . GetSlaveId ( ) . GoString ( ) ] )
2017-01-14 19:44:50 -05:00
taskToSchedule := s . newTask ( offer , task )
fitTasks = append ( fitTasks , taskToSchedule )
2016-12-15 14:32:34 -05:00
log . Println ( "Inst: " , * task . Instances )
2017-01-14 19:44:50 -05:00
s . schedTrace . Print ( offer . GetHostname ( ) + ":" + taskToSchedule . GetTaskId ( ) . GetValue ( ) )
2016-12-15 14:32:34 -05:00
* task . Instances --
// updating the cap value for offer.Hostname
2017-02-10 20:53:18 -05:00
partialLoad += ( ( wattsConsideration * constants . Tolerance ) / s . totalPower [ * offer . Hostname ] ) * 100
2016-12-15 14:32:34 -05:00
if * task . Instances <= 0 {
// All instances of task have been scheduled. Remove it
s . tasks = append ( s . tasks [ : i ] , s . tasks [ i + 1 : ] ... )
if len ( s . tasks ) <= 0 {
log . Println ( "Done scheduling all tasks" )
close ( s . Shutdown )
}
}
} else {
break // Continue on to next task
}
}
}
2017-02-04 16:59:25 -05:00
if offerTaken {
2016-12-15 14:32:34 -05:00
// Updating the cap value for offer.Hostname
2017-01-14 19:44:50 -05:00
bpPistonMutex . Lock ( )
bpPistonCapValues [ * offer . Hostname ] += partialLoad
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
log . Printf ( "Starting on [%s]\n" , offer . GetHostname ( ) )
2017-01-28 19:40:39 -05:00
driver . LaunchTasks ( [ ] * mesos . OfferID { offer . Id } , fitTasks , mesosUtils . DefaultFilter )
2016-12-15 14:32:34 -05:00
} else {
// If there was no match for task
log . Println ( "There is not enough resources to launch task: " )
2017-01-28 19:40:39 -05:00
cpus , mem , watts := offerUtils . OfferAgg ( offer )
2016-12-15 14:32:34 -05:00
log . Printf ( "<CPU: %f, RAM: %f, Watts: %f>\n" , cpus , mem , watts )
2017-01-28 19:40:39 -05:00
driver . DeclineOffer ( offer . Id , mesosUtils . DefaultFilter )
2016-12-15 14:32:34 -05:00
}
}
}
// Remove finished task from the taskMonitor
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) deleteFromTaskMonitor ( finishedTaskID string ) ( def . Task , string , error ) {
2016-12-15 14:32:34 -05:00
hostOfFinishedTask := ""
indexOfFinishedTask := - 1
found := false
var finishedTask def . Task
for host , tasks := range s . taskMonitor {
for i , task := range tasks {
if task . TaskID == finishedTaskID {
hostOfFinishedTask = host
indexOfFinishedTask = i
found = true
}
}
if found {
break
}
}
if hostOfFinishedTask != "" && indexOfFinishedTask != - 1 {
finishedTask = s . taskMonitor [ hostOfFinishedTask ] [ indexOfFinishedTask ]
log . Printf ( "Removing task with TaskID [%s] from the list of running tasks\n" ,
s . taskMonitor [ hostOfFinishedTask ] [ indexOfFinishedTask ] . TaskID )
s . taskMonitor [ hostOfFinishedTask ] = append ( s . taskMonitor [ hostOfFinishedTask ] [ : indexOfFinishedTask ] ,
s . taskMonitor [ hostOfFinishedTask ] [ indexOfFinishedTask + 1 : ] ... )
} else {
return finishedTask , hostOfFinishedTask , errors . New ( "Finished Task not present in TaskMonitor" )
}
return finishedTask , hostOfFinishedTask , nil
}
2017-01-14 19:44:50 -05:00
func ( s * BinPackedPistonCapper ) StatusUpdate ( driver sched . SchedulerDriver , status * mesos . TaskStatus ) {
2016-12-15 14:32:34 -05:00
log . Printf ( "Received task status [%s] for task [%s]\n" , NameFor ( status . State ) , * status . TaskId . Value )
if * status . State == mesos . TaskState_TASK_RUNNING {
2017-01-14 19:44:50 -05:00
bpPistonMutex . Lock ( )
2016-12-15 14:32:34 -05:00
s . tasksRunning ++
2017-01-14 19:44:50 -05:00
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
} else if IsTerminal ( status . State ) {
delete ( s . running [ status . GetSlaveId ( ) . GoString ( ) ] , * status . TaskId . Value )
// Deleting the task from the taskMonitor
finishedTask , hostOfFinishedTask , err := s . deleteFromTaskMonitor ( * status . TaskId . Value )
if err != nil {
log . Println ( err )
}
2017-02-09 18:05:38 -05:00
// Need to determine the watts consideration for the finishedTask
var wattsConsideration float64
if s . classMapWatts {
wattsConsideration = finishedTask . ClassToWatts [ hostToPowerClass ( hostOfFinishedTask ) ]
} else {
wattsConsideration = finishedTask . Watts
}
2016-12-15 14:32:34 -05:00
// Need to update the cap values for host of the finishedTask
2017-01-14 19:44:50 -05:00
bpPistonMutex . Lock ( )
2017-02-10 20:53:18 -05:00
bpPistonCapValues [ hostOfFinishedTask ] -= ( ( wattsConsideration * constants . Tolerance ) / s . totalPower [ hostOfFinishedTask ] ) * 100
2016-12-15 14:32:34 -05:00
// Checking to see if the cap value has become 0, in which case we uncap the host.
2017-01-14 19:44:50 -05:00
if int ( math . Floor ( bpPistonCapValues [ hostOfFinishedTask ] + 0.5 ) ) == 0 {
bpPistonCapValues [ hostOfFinishedTask ] = 100
2016-12-15 14:32:34 -05:00
}
s . tasksRunning --
2017-01-14 19:44:50 -05:00
bpPistonMutex . Unlock ( )
2016-12-15 14:32:34 -05:00
if s . tasksRunning == 0 {
select {
case <- s . Shutdown :
s . stopCapping ( )
close ( s . Done )
default :
}
}
}
log . Printf ( "DONE: Task status [%s] for task [%s]" , NameFor ( status . State ) , * status . TaskId . Value )
}