Resolved merge conflicts with the master. Also, added TODO in README to use Go1.8 techniques.
This commit is contained in:
commit
1b15bb14e7
24 changed files with 122 additions and 101 deletions
51
README.md
51
README.md
|
@ -8,9 +8,8 @@ To Do:
|
||||||
* Add ability to use constraints
|
* Add ability to use constraints
|
||||||
* Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
|
* Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average
|
||||||
* Make parameters corresponding to each scheduler configurable (possible to have a config template for each scheduler?)
|
* Make parameters corresponding to each scheduler configurable (possible to have a config template for each scheduler?)
|
||||||
* TODO : Adding type of scheduler to be used, to be picked from a config file, along with it's configurable parameters.
|
* Adding type of scheduler to be used, to be picked from a config file, along with it's configurable parameters.
|
||||||
* Write test code for each scheduler (This should be after the design change)
|
* Write test code for each scheduler (This should be after the design change)
|
||||||
* Some of the constants in constants/constants.go can vary based on the environment.
|
|
||||||
Possible to setup the constants at runtime based on the environment?
|
Possible to setup the constants at runtime based on the environment?
|
||||||
* Log fix for declining offer -- different reason when insufficient resources as compared to when there are no
|
* Log fix for declining offer -- different reason when insufficient resources as compared to when there are no
|
||||||
longer any tasks to schedule.
|
longer any tasks to schedule.
|
||||||
|
@ -19,8 +18,9 @@ To Do:
|
||||||
* Convert def#WattsToConsider(...) to be a receiver of def.Task and change the name of it to Watts(...).
|
* Convert def#WattsToConsider(...) to be a receiver of def.Task and change the name of it to Watts(...).
|
||||||
* Have a generic sorter for task resources instead of having one for each kind of resource.
|
* Have a generic sorter for task resources instead of having one for each kind of resource.
|
||||||
* **Critical** -- Add software requirements to the README.md (Mesos version, RAPL version, PCP version, Go version...)
|
* **Critical** -- Add software requirements to the README.md (Mesos version, RAPL version, PCP version, Go version...)
|
||||||
* Retrofit to use Go 1.8 sorting techniques. Use def/taskUtils.go for reference.
|
* **Critical** -- Retrofit to use Go 1.8 sorting techniques. Use def/taskUtils.go for reference.
|
||||||
* Retrofit TopHeavy and BottomHeavy schedulers to use the clustering utility for tasks.
|
* Handle powerclass not configured on a node condition. As of now, an assumption is made that the powerclass is configured
|
||||||
|
for all the nodes.
|
||||||
|
|
||||||
**Requires [Performance Co-Pilot](http://pcp.io/) tool pmdumptext to be installed on the
|
**Requires [Performance Co-Pilot](http://pcp.io/) tool pmdumptext to be installed on the
|
||||||
machine on which electron is launched for logging to work and PCP collector agents installed
|
machine on which electron is launched for logging to work and PCP collector agents installed
|
||||||
|
@ -31,9 +31,9 @@ How to run (Use the --help option to get information about other command-line op
|
||||||
|
|
||||||
`./electron -workload <workload json>`
|
`./electron -workload <workload json>`
|
||||||
|
|
||||||
To run electron with ignoreWatts, run the following command,
|
To run electron with Watts as Resource, run the following command,
|
||||||
|
|
||||||
`./electron -workload <workload json> -ignoreWatts`
|
`./electron -workload <workload json> -wattsAsAResource`
|
||||||
|
|
||||||
|
|
||||||
Workload schema:
|
Workload schema:
|
||||||
|
@ -44,30 +44,31 @@ Workload schema:
|
||||||
"name": "minife",
|
"name": "minife",
|
||||||
"cpu": 3.0,
|
"cpu": 3.0,
|
||||||
"ram": 4096,
|
"ram": 4096,
|
||||||
"watts": 50,
|
"watts": 63.141,
|
||||||
"image": "gouravr/minife:v5",
|
"class_to_watts": {
|
||||||
"cmd": "cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100",
|
"A": 93.062,
|
||||||
"inst": 9,
|
"B": 65.552,
|
||||||
"class_to_watts" : {
|
"C": 57.897,
|
||||||
"A": 30.2475289996,
|
"D": 60.729
|
||||||
"B": 35.6491229228,
|
},
|
||||||
"C": 24.0476734352
|
"image": "rdelvalle/minife:electron1",
|
||||||
}
|
"cmd": "cd src && mpirun -np 3 miniFE.x -nx 100 -ny 100 -nz 100",
|
||||||
|
"inst": 10
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "dgemm",
|
"name": "dgemm",
|
||||||
"cpu": 3.0,
|
"cpu": 3.0,
|
||||||
"ram": 4096,
|
"ram": 32,
|
||||||
"watts": 50,
|
"watts": 85.903,
|
||||||
"image": "gouravr/dgemm:v2",
|
"class_to_watts": {
|
||||||
|
"A": 114.789,
|
||||||
|
"B": 89.133,
|
||||||
|
"C": 82.672,
|
||||||
|
"D": 81.944
|
||||||
|
},
|
||||||
|
"image": "rdelvalle/dgemm:electron1",
|
||||||
"cmd": "/./mt-dgemm 1024",
|
"cmd": "/./mt-dgemm 1024",
|
||||||
"inst": 9,
|
"inst": 10
|
||||||
"class_to_watts" : {
|
|
||||||
"A": 35.2475289996,
|
|
||||||
"B": 25.6491229228,
|
|
||||||
"C": 29.0476734352
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
|
@ -7,34 +7,14 @@ TODO: Clean this up and use Mesos Attributes instead.
|
||||||
*/
|
*/
|
||||||
package constants
|
package constants
|
||||||
|
|
||||||
var Hosts = []string{"stratos-001.cs.binghamton.edu", "stratos-002.cs.binghamton.edu",
|
var Hosts = make(map[string]struct{})
|
||||||
"stratos-003.cs.binghamton.edu", "stratos-004.cs.binghamton.edu",
|
|
||||||
"stratos-005.cs.binghamton.edu", "stratos-006.cs.binghamton.edu",
|
|
||||||
"stratos-007.cs.binghamton.edu", "stratos-008.cs.binghamton.edu"}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Classification of the nodes in the cluster based on their Thermal Design Power (TDP).
|
Classification of the nodes in the cluster based on their Thermal Design Power (TDP).
|
||||||
The power classes are labelled in the decreasing order of the corresponding TDP, with class A nodes
|
The power classes are labelled in the decreasing order of the corresponding TDP, with class A nodes
|
||||||
having the highest TDP and class C nodes having the lowest TDP.
|
having the highest TDP and class C nodes having the lowest TDP.
|
||||||
*/
|
*/
|
||||||
var PowerClasses = map[string]map[string]bool{
|
var PowerClasses = make(map[string]map[string]struct{})
|
||||||
"A": map[string]bool{
|
|
||||||
"stratos-005.cs.binghamton.edu": true,
|
|
||||||
"stratos-006.cs.binghamton.edu": true,
|
|
||||||
},
|
|
||||||
"B": map[string]bool{
|
|
||||||
"stratos-007.cs.binghamton.edu": true,
|
|
||||||
},
|
|
||||||
"C": map[string]bool{
|
|
||||||
"stratos-008.cs.binghamton.edu": true,
|
|
||||||
},
|
|
||||||
"D": map[string]bool {
|
|
||||||
"stratos-001.cs.binghamton.edu": true,
|
|
||||||
"stratos-002.cs.binghamton.edu": true,
|
|
||||||
"stratos-003.cs.binghamton.edu": true,
|
|
||||||
"stratos-004.cs.binghamton.edu": true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Margin with respect to the required power for a job.
|
Margin with respect to the required power for a job.
|
||||||
|
@ -47,4 +27,4 @@ var Tolerance = 0.70
|
||||||
var ConsiderationWindowSize = 20
|
var ConsiderationWindowSize = 20
|
||||||
|
|
||||||
// Threshold below which a host should be capped
|
// Threshold below which a host should be capped
|
||||||
var CapThreshold = 12.5
|
var LowerCapLimit = 12.5
|
||||||
|
|
|
@ -43,7 +43,7 @@ func TasksFromJSON(uri string) ([]Task, error) {
|
||||||
func (tsk *Task) UpdateHost(newHost string) bool {
|
func (tsk *Task) UpdateHost(newHost string) bool {
|
||||||
// Validation
|
// Validation
|
||||||
isCorrectHost := false
|
isCorrectHost := false
|
||||||
for _, existingHost := range constants.Hosts {
|
for existingHost, _ := range constants.Hosts {
|
||||||
if newHost == existingHost {
|
if newHost == existingHost {
|
||||||
isCorrectHost = true
|
isCorrectHost = true
|
||||||
}
|
}
|
||||||
|
|
|
@ -3,6 +3,7 @@ package pcp
|
||||||
import (
|
import (
|
||||||
"bitbucket.org/sunybingcloud/electron/constants"
|
"bitbucket.org/sunybingcloud/electron/constants"
|
||||||
"bitbucket.org/sunybingcloud/electron/rapl"
|
"bitbucket.org/sunybingcloud/electron/rapl"
|
||||||
|
"bitbucket.org/sunybingcloud/electron/utilities"
|
||||||
"bufio"
|
"bufio"
|
||||||
"container/ring"
|
"container/ring"
|
||||||
"log"
|
"log"
|
||||||
|
@ -14,7 +15,6 @@ import (
|
||||||
"strings"
|
"strings"
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
"bitbucket.org/sunybingcloud/electron/utilities"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func round(num float64) int {
|
func round(num float64) int {
|
||||||
|
@ -170,11 +170,12 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
}
|
}
|
||||||
// If no new victim found, then we need to cap the best victim among the ones that are already capped
|
// If no new victim found, then we need to cap the best victim among the ones that are already capped
|
||||||
if !newVictimFound {
|
if !newVictimFound {
|
||||||
|
canCapAlreadyCappedVictim := false
|
||||||
for i := 0; i < len(alreadyCappedHosts); i++ {
|
for i := 0; i < len(alreadyCappedHosts); i++ {
|
||||||
// If already capped then the host must be present in orderCappedVictims
|
// If already capped then the host must be present in orderCappedVictims
|
||||||
capValue := orderCappedVictims[alreadyCappedHosts[i]]
|
capValue := orderCappedVictims[alreadyCappedHosts[i]]
|
||||||
// If capValue is greater than the threshold then cap, else continue
|
// If capValue is greater than the threshold then cap, else continue
|
||||||
if capValue > constants.CapThreshold {
|
if capValue > constants.LowerCapLimit {
|
||||||
newCapValue := getNextCapValue(capValue, 2)
|
newCapValue := getNextCapValue(capValue, 2)
|
||||||
if err := rapl.Cap(alreadyCappedHosts[i], "rapl", newCapValue); err != nil {
|
if err := rapl.Cap(alreadyCappedHosts[i], "rapl", newCapValue); err != nil {
|
||||||
log.Printf("Error capping host[%s]", alreadyCappedHosts[i])
|
log.Printf("Error capping host[%s]", alreadyCappedHosts[i])
|
||||||
|
@ -182,7 +183,7 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
// Successful cap
|
// Successful cap
|
||||||
log.Printf("Capped host[%s] at %f", alreadyCappedHosts[i], newCapValue)
|
log.Printf("Capped host[%s] at %f", alreadyCappedHosts[i], newCapValue)
|
||||||
// Checking whether this victim can be capped further
|
// Checking whether this victim can be capped further
|
||||||
if newCapValue <= constants.CapThreshold {
|
if newCapValue <= constants.LowerCapLimit {
|
||||||
// Deleting victim from cappedVictims
|
// Deleting victim from cappedVictims
|
||||||
delete(cappedVictims, alreadyCappedHosts[i])
|
delete(cappedVictims, alreadyCappedHosts[i])
|
||||||
// Updating the cap value in orderCappedVictims
|
// Updating the cap value in orderCappedVictims
|
||||||
|
@ -192,6 +193,7 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
cappedVictims[alreadyCappedHosts[i]] = newCapValue
|
cappedVictims[alreadyCappedHosts[i]] = newCapValue
|
||||||
orderCappedVictims[alreadyCappedHosts[i]] = newCapValue
|
orderCappedVictims[alreadyCappedHosts[i]] = newCapValue
|
||||||
}
|
}
|
||||||
|
canCapAlreadyCappedVictim = true
|
||||||
break // Breaking only on successful cap.
|
break // Breaking only on successful cap.
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -200,6 +202,9 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
// If cannot find any victim, then all nodes have been capped to the maximum and we stop capping at this point.
|
// If cannot find any victim, then all nodes have been capped to the maximum and we stop capping at this point.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if !canCapAlreadyCappedVictim {
|
||||||
|
log.Println("No Victim left to cap.")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
} else if clusterMean < loThreshold {
|
} else if clusterMean < loThreshold {
|
||||||
|
@ -211,7 +216,8 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
orderCappedToSort := utilities.GetPairList(orderCappedVictims)
|
orderCappedToSort := utilities.GetPairList(orderCappedVictims)
|
||||||
sort.Sort(orderCappedToSort) // Sorted hosts in non-decreasing order of capped states
|
sort.Sort(orderCappedToSort) // Sorted hosts in non-decreasing order of capped states
|
||||||
hostToUncap := orderCappedToSort[0].Key
|
hostToUncap := orderCappedToSort[0].Key
|
||||||
// Uncapping the host
|
// Uncapping the host.
|
||||||
|
// This is a floating point operation and might suffer from precision loss.
|
||||||
newUncapValue := orderCappedVictims[hostToUncap] * 2.0
|
newUncapValue := orderCappedVictims[hostToUncap] * 2.0
|
||||||
if err := rapl.Cap(hostToUncap, "rapl", newUncapValue); err != nil {
|
if err := rapl.Cap(hostToUncap, "rapl", newUncapValue); err != nil {
|
||||||
log.Printf("Error uncapping host[%s]", hostToUncap)
|
log.Printf("Error uncapping host[%s]", hostToUncap)
|
||||||
|
@ -231,7 +237,7 @@ func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, pref
|
||||||
delete(orderCappedVictims, hostToUncap)
|
delete(orderCappedVictims, hostToUncap)
|
||||||
// Removing entry from cappedVictims as this host is no longer capped
|
// Removing entry from cappedVictims as this host is no longer capped
|
||||||
delete(cappedVictims, hostToUncap)
|
delete(cappedVictims, hostToUncap)
|
||||||
} else if newUncapValue > constants.CapThreshold { // this check is unnecessary and can be converted to 'else'
|
} else if newUncapValue > constants.LowerCapLimit { // this check is unnecessary and can be converted to 'else'
|
||||||
// Updating the cap value
|
// Updating the cap value
|
||||||
orderCappedVictims[hostToUncap] = newUncapValue
|
orderCappedVictims[hostToUncap] = newUncapValue
|
||||||
cappedVictims[hostToUncap] = newUncapValue
|
cappedVictims[hostToUncap] = newUncapValue
|
||||||
|
|
17
pcp/pcp.go
17
pcp/pcp.go
|
@ -37,15 +37,6 @@ func Start(quit chan struct{}, logging *bool, prefix string) {
|
||||||
// Write to logfile
|
// Write to logfile
|
||||||
logFile.WriteString(scanner.Text() + "\n")
|
logFile.WriteString(scanner.Text() + "\n")
|
||||||
|
|
||||||
/*
|
|
||||||
headers := strings.Split(scanner.Text(), ",")
|
|
||||||
|
|
||||||
for _, hostMetric := range headers {
|
|
||||||
split := strings.Split(hostMetric, ":")
|
|
||||||
fmt.Printf("Host %s: Metric: %s\n", split[0], split[1])
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
// Throw away first set of results
|
// Throw away first set of results
|
||||||
scanner.Scan()
|
scanner.Scan()
|
||||||
|
|
||||||
|
@ -57,15 +48,7 @@ func Start(quit chan struct{}, logging *bool, prefix string) {
|
||||||
logFile.WriteString(scanner.Text() + "\n")
|
logFile.WriteString(scanner.Text() + "\n")
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
fmt.Printf("Second: %d\n", seconds)
|
|
||||||
for i, val := range strings.Split(scanner.Text(), ",") {
|
|
||||||
fmt.Printf("host metric: %s val: %s\n", headers[i], val)
|
|
||||||
}*/
|
|
||||||
|
|
||||||
seconds++
|
seconds++
|
||||||
|
|
||||||
// fmt.Println("--------------------------------")
|
|
||||||
}
|
}
|
||||||
}(logging)
|
}(logging)
|
||||||
|
|
||||||
|
|
|
@ -110,7 +110,7 @@ func (capper ClusterwideCapper) CleverRecap(totalPower map[string]float64,
|
||||||
wattsUsages := make(map[string][]float64)
|
wattsUsages := make(map[string][]float64)
|
||||||
hostOfFinishedTask := ""
|
hostOfFinishedTask := ""
|
||||||
indexOfFinishedTask := -1
|
indexOfFinishedTask := -1
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
wattsUsages[host] = []float64{0.0}
|
wattsUsages[host] = []float64{0.0}
|
||||||
}
|
}
|
||||||
for host, tasks := range taskMonitor {
|
for host, tasks := range taskMonitor {
|
||||||
|
|
10
scheduler.go
10
scheduler.go
|
@ -43,7 +43,7 @@ func main() {
|
||||||
}
|
}
|
||||||
|
|
||||||
if *hiThreshold < *loThreshold {
|
if *hiThreshold < *loThreshold {
|
||||||
fmt.Println("High threshold is of a lower value than low threhold.")
|
fmt.Println("High threshold is of a lower value than low threshold.")
|
||||||
os.Exit(1)
|
os.Exit(1)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -74,12 +74,12 @@ func main() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
//go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix)
|
go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix)
|
||||||
//go pcp.StartPCPLogAndExtremaDynamicCap(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix, *hiThreshold, *loThreshold)
|
//go pcp.StartPCPLogAndExtremaDynamicCap(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix, *hiThreshold, *loThreshold)
|
||||||
go pcp.StartPCPLogAndProgressiveExtremaCap(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix, *hiThreshold, *loThreshold)
|
//go pcp.StartPCPLogAndProgressiveExtremaCap(scheduler.PCPLog, &scheduler.RecordPCP, logPrefix, *hiThreshold, *loThreshold)
|
||||||
time.Sleep(1 * time.Second) // Take a second between starting PCP log and continuing
|
time.Sleep(1 * time.Second) // Take a second between starting PCP log and continuing
|
||||||
|
|
||||||
// Attempt to handle signint to not leave pmdumptext running
|
// Attempt to handle SIGINT to not leave pmdumptext running
|
||||||
// Catch interrupt
|
// Catch interrupt
|
||||||
go func() {
|
go func() {
|
||||||
c := make(chan os.Signal, 1)
|
c := make(chan os.Signal, 1)
|
||||||
|
@ -120,4 +120,4 @@ func main() {
|
||||||
log.Printf("Framework stopped with status %s and error: %s\n", status.String(), err.Error())
|
log.Printf("Framework stopped with status %s and error: %s\n", status.String(), err.Error())
|
||||||
}
|
}
|
||||||
log.Println("Exiting...")
|
log.Println("Exiting...")
|
||||||
}
|
}
|
|
@ -127,6 +127,7 @@ func (s *BinPackSortedWattsSortedOffers) ResourceOffers(driver sched.SchedulerDr
|
||||||
log.Println("Sorted Offers:")
|
log.Println("Sorted Offers:")
|
||||||
for i := 0; i < len(offers); i++ {
|
for i := 0; i < len(offers); i++ {
|
||||||
offer := offers[i]
|
offer := offers[i]
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
||||||
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
||||||
}
|
}
|
||||||
|
|
|
@ -210,6 +210,7 @@ func (s *BinPackedPistonCapper) ResourceOffers(driver sched.SchedulerDriver, off
|
||||||
|
|
||||||
// retrieving the total power for each host in the offers
|
// retrieving the total power for each host in the offers
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
if _, ok := s.totalPower[*offer.Hostname]; !ok {
|
if _, ok := s.totalPower[*offer.Hostname]; !ok {
|
||||||
_, _, offerWatts := offerUtils.OfferAgg(offer)
|
_, _, offerWatts := offerUtils.OfferAgg(offer)
|
||||||
s.totalPower[*offer.Hostname] = offerWatts
|
s.totalPower[*offer.Hostname] = offerWatts
|
||||||
|
|
|
@ -120,6 +120,7 @@ func (s *BinPackSortedWatts) ResourceOffers(driver sched.SchedulerDriver, offers
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -17,12 +17,12 @@ import (
|
||||||
)
|
)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Tasks are categorized into small and large tasks based on the watts requirement.
|
Tasks are categorized into small and large tasks based on watts requirements.
|
||||||
All the small tasks are packed into offers from agents belonging to power class C and power class D, using BinPacking.
|
All the large tasks are packed into offers from agents belonging to power classes A and B, using Bin-Packing.
|
||||||
All the large tasks are spread among the offers from agents belonging to power class A and power class B, using FirstFit.
|
All the small tasks are spread among offers from agents belonging to power class C and D, using First-Fit.
|
||||||
|
|
||||||
BinPacking has the most effect when co-scheduling of tasks is increased. Large tasks typically utilize more resources and hence,
|
Bin-Packing has the most effect when co-scheduling of tasks is increased. Large tasks typically utilize more resources and hence,
|
||||||
co-scheduling them has a great impact on the total power utilization.
|
co-scheduling them has a great impact on the total power utilization.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
func (s *BottomHeavy) takeOfferBinPack(offer *mesos.Offer, totalCPU, totalRAM, totalWatts,
|
func (s *BottomHeavy) takeOfferBinPack(offer *mesos.Offer, totalCPU, totalRAM, totalWatts,
|
||||||
|
@ -174,7 +174,7 @@ func (s *BottomHeavy) createTaskInfoAndLogSchedTrace(offer *mesos.Offer, task de
|
||||||
return taskToSchedule
|
return taskToSchedule
|
||||||
}
|
}
|
||||||
|
|
||||||
// Using BinPacking to pack small tasks into this offer.
|
// Using BinPacking to pack large tasks into the given offers.
|
||||||
func (s *BottomHeavy) pack(offers []*mesos.Offer, driver sched.SchedulerDriver) {
|
func (s *BottomHeavy) pack(offers []*mesos.Offer, driver sched.SchedulerDriver) {
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
select {
|
select {
|
||||||
|
@ -236,7 +236,7 @@ func (s *BottomHeavy) pack(offers []*mesos.Offer, driver sched.SchedulerDriver)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Using first fit to spread large tasks into these offers.
|
// Using First-Fit to spread small tasks among the given offers.
|
||||||
func (s *BottomHeavy) spread(offers []*mesos.Offer, driver sched.SchedulerDriver) {
|
func (s *BottomHeavy) spread(offers []*mesos.Offer, driver sched.SchedulerDriver) {
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
select {
|
select {
|
||||||
|
@ -297,6 +297,7 @@ func (s *BottomHeavy) ResourceOffers(driver sched.SchedulerDriver, offers []*mes
|
||||||
offersLightPowerClasses := []*mesos.Offer{}
|
offersLightPowerClasses := []*mesos.Offer{}
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
@ -307,13 +308,19 @@ func (s *BottomHeavy) ResourceOffers(driver sched.SchedulerDriver, offers []*mes
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
if constants.PowerClasses["A"][*offer.Hostname] ||
|
if _, ok := constants.PowerClasses["A"][*offer.Hostname]; ok {
|
||||||
constants.PowerClasses["B"][*offer.Hostname] {
|
|
||||||
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
||||||
} else if constants.PowerClasses["C"][*offer.Hostname] ||
|
}
|
||||||
constants.PowerClasses["D"][*offer.Hostname] {
|
if _, ok := constants.PowerClasses["B"][*offer.Hostname]; ok {
|
||||||
|
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
||||||
|
}
|
||||||
|
if _, ok := constants.PowerClasses["C"][*offer.Hostname]; ok {
|
||||||
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
||||||
}
|
}
|
||||||
|
if _, ok := constants.PowerClasses["D"][*offer.Hostname]; ok {
|
||||||
|
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("Packing Large tasks into ClassAB offers:")
|
log.Println("Packing Large tasks into ClassAB offers:")
|
||||||
|
|
|
@ -164,6 +164,7 @@ func (s *BPSWMaxMinWatts) ResourceOffers(driver sched.SchedulerDriver, offers []
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -261,6 +261,7 @@ func (s *BPSWMaxMinPistonCapping) ResourceOffers(driver sched.SchedulerDriver, o
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -164,7 +164,7 @@ func (s *BPSWMaxMinProacCC) startCapping() {
|
||||||
// updating cap value
|
// updating cap value
|
||||||
bpMaxMinProacCCCapValue = bpMaxMinProacCCNewCapValue
|
bpMaxMinProacCCCapValue = bpMaxMinProacCCNewCapValue
|
||||||
if bpMaxMinProacCCCapValue > 0.0 {
|
if bpMaxMinProacCCCapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding cap value to nearest int
|
// Rounding cap value to nearest int
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(bpMaxMinProacCCCapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(bpMaxMinProacCCCapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -190,7 +190,7 @@ func (s *BPSWMaxMinProacCC) startRecapping() {
|
||||||
bpMaxMinProacCCMutex.Lock()
|
bpMaxMinProacCCMutex.Lock()
|
||||||
// If stopped performing cluster-wide capping, then we need to recap.
|
// If stopped performing cluster-wide capping, then we need to recap.
|
||||||
if s.isRecapping && bpMaxMinProacCCRecapValue > 0.0 {
|
if s.isRecapping && bpMaxMinProacCCRecapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding the recap value to the nearest int
|
// Rounding the recap value to the nearest int
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(bpMaxMinProacCCRecapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(bpMaxMinProacCCRecapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -300,6 +300,7 @@ func (s *BPSWMaxMinProacCC) ResourceOffers(driver sched.SchedulerDriver, offers
|
||||||
|
|
||||||
// retrieving the available power for all the hosts in the offers.
|
// retrieving the available power for all the hosts in the offers.
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
_, _, offerWatts := offerUtils.OfferAgg(offer)
|
_, _, offerWatts := offerUtils.OfferAgg(offer)
|
||||||
s.availablePower[*offer.Hostname] = offerWatts
|
s.availablePower[*offer.Hostname] = offerWatts
|
||||||
// setting total power if the first time
|
// setting total power if the first time
|
||||||
|
|
|
@ -119,6 +119,7 @@ func (s *FirstFit) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -164,7 +164,7 @@ func (s *FirstFitProacCC) startCapping() {
|
||||||
// Need to cap the cluster to the fcfsCurrentCapValue.
|
// Need to cap the cluster to the fcfsCurrentCapValue.
|
||||||
fcfsMutex.Lock()
|
fcfsMutex.Lock()
|
||||||
if fcfsCurrentCapValue > 0.0 {
|
if fcfsCurrentCapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding curreCapValue to the nearest int.
|
// Rounding curreCapValue to the nearest int.
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(fcfsCurrentCapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(fcfsCurrentCapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -188,7 +188,7 @@ func (s *FirstFitProacCC) startRecapping() {
|
||||||
fcfsMutex.Lock()
|
fcfsMutex.Lock()
|
||||||
// If stopped performing cluster wide capping then we need to explicitly cap the entire cluster.
|
// If stopped performing cluster wide capping then we need to explicitly cap the entire cluster.
|
||||||
if s.isRecapping && fcfsRecapValue > 0.0 {
|
if s.isRecapping && fcfsRecapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding curreCapValue to the nearest int.
|
// Rounding curreCapValue to the nearest int.
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(fcfsRecapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(fcfsRecapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -233,6 +233,7 @@ func (s *FirstFitProacCC) ResourceOffers(driver sched.SchedulerDriver, offers []
|
||||||
|
|
||||||
// retrieving the available power for all the hosts in the offers.
|
// retrieving the available power for all the hosts in the offers.
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
_, _, offer_watts := offerUtils.OfferAgg(offer)
|
_, _, offer_watts := offerUtils.OfferAgg(offer)
|
||||||
s.availablePower[*offer.Hostname] = offer_watts
|
s.availablePower[*offer.Hostname] = offer_watts
|
||||||
// setting total power if the first time.
|
// setting total power if the first time.
|
||||||
|
|
|
@ -126,6 +126,7 @@ func (s *FirstFitSortedOffers) ResourceOffers(driver sched.SchedulerDriver, offe
|
||||||
log.Println("Sorted Offers:")
|
log.Println("Sorted Offers:")
|
||||||
for i := 0; i < len(offers); i++ {
|
for i := 0; i < len(offers); i++ {
|
||||||
offer := offers[i]
|
offer := offers[i]
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
||||||
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
||||||
}
|
}
|
||||||
|
|
|
@ -177,7 +177,7 @@ func (s *FirstFitSortedWattsProacCC) startCapping() {
|
||||||
// Need to cap the cluster to the rankedCurrentCapValue.
|
// Need to cap the cluster to the rankedCurrentCapValue.
|
||||||
rankedMutex.Lock()
|
rankedMutex.Lock()
|
||||||
if rankedCurrentCapValue > 0.0 {
|
if rankedCurrentCapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding currentCapValue to the nearest int.
|
// Rounding currentCapValue to the nearest int.
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(rankedCurrentCapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(rankedCurrentCapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -201,7 +201,7 @@ func (s *FirstFitSortedWattsProacCC) startRecapping() {
|
||||||
rankedMutex.Lock()
|
rankedMutex.Lock()
|
||||||
// If stopped performing cluster wide capping then we need to explicitly cap the entire cluster.
|
// If stopped performing cluster wide capping then we need to explicitly cap the entire cluster.
|
||||||
if s.isRecapping && rankedRecapValue > 0.0 {
|
if s.isRecapping && rankedRecapValue > 0.0 {
|
||||||
for _, host := range constants.Hosts {
|
for host, _ := range constants.Hosts {
|
||||||
// Rounding currentCapValue to the nearest int.
|
// Rounding currentCapValue to the nearest int.
|
||||||
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(rankedRecapValue+0.5)))); err != nil {
|
if err := rapl.Cap(host, "rapl", float64(int(math.Floor(rankedRecapValue+0.5)))); err != nil {
|
||||||
log.Println(err)
|
log.Println(err)
|
||||||
|
@ -246,6 +246,7 @@ func (s *FirstFitSortedWattsProacCC) ResourceOffers(driver sched.SchedulerDriver
|
||||||
|
|
||||||
// retrieving the available power for all the hosts in the offers.
|
// retrieving the available power for all the hosts in the offers.
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
_, _, offer_watts := offerUtils.OfferAgg(offer)
|
_, _, offer_watts := offerUtils.OfferAgg(offer)
|
||||||
s.availablePower[*offer.Hostname] = offer_watts
|
s.availablePower[*offer.Hostname] = offer_watts
|
||||||
// setting total power if the first time.
|
// setting total power if the first time.
|
||||||
|
|
|
@ -128,6 +128,7 @@ func (s *FirstFitSortedWattsSortedOffers) ResourceOffers(driver sched.SchedulerD
|
||||||
log.Println("Sorted Offers:")
|
log.Println("Sorted Offers:")
|
||||||
for i := 0; i < len(offers); i++ {
|
for i := 0; i < len(offers); i++ {
|
||||||
offer := offers[i]
|
offer := offers[i]
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
offerCPU, _, _ := offerUtils.OfferAgg(offer)
|
||||||
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
log.Printf("Offer[%s].CPU = %f\n", offer.GetHostname(), offerCPU)
|
||||||
}
|
}
|
||||||
|
|
|
@ -122,6 +122,7 @@ func (s *FirstFitSortedWatts) ResourceOffers(driver sched.SchedulerDriver, offer
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -112,6 +112,7 @@ func (s *FirstFitWattsOnly) ResourceOffers(driver sched.SchedulerDriver, offers
|
||||||
log.Printf("Received %d resource offers", len(offers))
|
log.Printf("Received %d resource offers", len(offers))
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
|
|
@ -18,7 +18,7 @@ func coLocated(tasks map[string]bool) {
|
||||||
// Get the powerClass of the given hostname
|
// Get the powerClass of the given hostname
|
||||||
func hostToPowerClass(hostName string) string {
|
func hostToPowerClass(hostName string) string {
|
||||||
for powerClass, hosts := range constants.PowerClasses {
|
for powerClass, hosts := range constants.PowerClasses {
|
||||||
if ok := hosts[hostName]; ok {
|
if _, ok := hosts[hostName]; ok {
|
||||||
return powerClass
|
return powerClass
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,8 +18,8 @@ import (
|
||||||
|
|
||||||
/*
|
/*
|
||||||
Tasks are categorized into small and large tasks based on the watts requirement.
|
Tasks are categorized into small and large tasks based on the watts requirement.
|
||||||
All the large tasks are packed into offers from agents belonging to power class A and power class B, using BinPacking.
|
All the small tasks are packed into offers from agents belonging to power class C and power class D, using BinPacking.
|
||||||
All the small tasks are spread among the offers from agents belonging to power class C and power class D, using FirstFit.
|
All the large tasks are spread among the offers from agents belonging to power class A and power class B, using FirstFit.
|
||||||
|
|
||||||
This was done to give a little more room for the large tasks (power intensive) for execution and reduce the possibility of
|
This was done to give a little more room for the large tasks (power intensive) for execution and reduce the possibility of
|
||||||
starvation of power intensive tasks.
|
starvation of power intensive tasks.
|
||||||
|
@ -296,6 +296,7 @@ func (s *TopHeavy) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.
|
||||||
offersLightPowerClasses := []*mesos.Offer{}
|
offersLightPowerClasses := []*mesos.Offer{}
|
||||||
|
|
||||||
for _, offer := range offers {
|
for _, offer := range offers {
|
||||||
|
offerUtils.UpdateEnvironment(offer)
|
||||||
select {
|
select {
|
||||||
case <-s.Shutdown:
|
case <-s.Shutdown:
|
||||||
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]")
|
||||||
|
@ -306,11 +307,16 @@ func (s *TopHeavy) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
if constants.PowerClasses["A"][*offer.Hostname] ||
|
if _, ok := constants.PowerClasses["A"][*offer.Hostname]; ok {
|
||||||
constants.PowerClasses["B"][*offer.Hostname] {
|
|
||||||
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
||||||
} else if constants.PowerClasses["C"][*offer.Hostname] ||
|
}
|
||||||
constants.PowerClasses["D"][*offer.Hostname] {
|
if _, ok := constants.PowerClasses["B"][*offer.Hostname]; ok {
|
||||||
|
offersHeavyPowerClasses = append(offersHeavyPowerClasses, offer)
|
||||||
|
}
|
||||||
|
if _, ok := constants.PowerClasses["C"][*offer.Hostname]; ok {
|
||||||
|
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
||||||
|
}
|
||||||
|
if _, ok := constants.PowerClasses["D"][*offer.Hostname]; ok {
|
||||||
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
offersLightPowerClasses = append(offersLightPowerClasses, offer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
package offerUtils
|
package offerUtils
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bitbucket.org/sunybingcloud/electron/constants"
|
||||||
mesos "github.com/mesos/mesos-go/mesosproto"
|
mesos "github.com/mesos/mesos-go/mesosproto"
|
||||||
|
"log"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -60,3 +62,26 @@ func HostMismatch(offerHost string, taskHost string) bool {
|
||||||
}
|
}
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If the host in the offer is a new host, add the host to the set of Hosts and
|
||||||
|
// register the powerclass of this host.
|
||||||
|
func UpdateEnvironment(offer *mesos.Offer) {
|
||||||
|
var host = offer.GetHostname()
|
||||||
|
// If this host is not present in the set of hosts.
|
||||||
|
if _, ok := constants.Hosts[host]; !ok {
|
||||||
|
log.Printf("New host detected. Adding host [%s]", host)
|
||||||
|
// Add this host.
|
||||||
|
constants.Hosts[host] = struct{}{}
|
||||||
|
// Get the power class of this host.
|
||||||
|
class := PowerClass(offer)
|
||||||
|
log.Printf("Registering the power class... Host [%s] --> PowerClass [%s]", host, class)
|
||||||
|
// If new power class, register the power class.
|
||||||
|
if _, ok := constants.PowerClasses[class]; !ok {
|
||||||
|
constants.PowerClasses[class] = make(map[string]struct{})
|
||||||
|
}
|
||||||
|
// If the host of this class is not yet present in PowerClasses[class], add it.
|
||||||
|
if _, ok := constants.PowerClasses[class][host]; !ok {
|
||||||
|
constants.PowerClasses[class][host] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
Reference in a new issue