2017-02-24 20:52:59 -05:00
package def
import (
2018-01-19 17:46:35 -05:00
"github.com/mash/gokmeans"
2017-08-22 13:00:40 -04:00
"log"
2017-08-26 22:33:06 -04:00
"sort"
2018-01-26 17:29:43 -05:00
"errors"
"fmt"
2017-02-24 20:52:59 -05:00
)
2017-09-28 15:36:47 -04:00
// Information about a cluster of tasks.
2017-02-25 15:43:32 -05:00
type TaskCluster struct {
ClusterIndex int
Tasks [ ] Task
SizeScore int // How many other clusters is this cluster bigger than
}
2017-09-28 15:36:47 -04:00
// Classification of Tasks using KMeans clustering using the watts consumption observations.
2017-02-25 15:43:32 -05:00
type TasksToClassify [ ] Task
2017-08-22 13:00:40 -04:00
// Basic taskObservation calculator. This returns an array consisting of the MMPU requirements of a task.
func ( tc TasksToClassify ) taskObservationCalculator ( task Task ) [ ] float64 {
if task . ClassToWatts != nil {
2017-09-28 15:36:47 -04:00
// Taking the aggregate.
2017-08-22 13:00:40 -04:00
observations := [ ] float64 { }
for _ , watts := range task . ClassToWatts {
observations = append ( observations , watts )
}
return observations
} else if task . Watts != 0.0 {
return [ ] float64 { task . Watts }
} else {
log . Fatal ( "Unable to classify tasks. Missing Watts or ClassToWatts attribute in workload." )
2017-09-28 15:36:47 -04:00
return [ ] float64 { 0.0 } // Won't reach here.
2017-08-22 13:00:40 -04:00
}
}
func ClassifyTasks ( tasks [ ] Task , numberOfClusters int ) [ ] TaskCluster {
tc := TasksToClassify ( tasks )
return tc . classify ( numberOfClusters , tc . taskObservationCalculator )
}
func ( tc TasksToClassify ) classify ( numberOfClusters int , taskObservation func ( task Task ) [ ] float64 ) [ ] TaskCluster {
2017-02-25 15:43:32 -05:00
clusters := make ( map [ int ] [ ] Task )
2017-02-25 19:57:01 -05:00
observations := getObservations ( tc , taskObservation )
2017-09-28 15:36:47 -04:00
// TODO: Make the max number of rounds configurable based on the size of the workload.
2017-04-30 16:48:38 -04:00
// The max number of rounds (currently defaulted to 100) is the number of iterations performed to obtain
2017-09-28 15:36:47 -04:00
// distinct clusters. When the data size becomes very large, we would need more iterations for clustering.
2017-02-25 15:43:32 -05:00
if trained , centroids := gokmeans . Train ( observations , numberOfClusters , 100 ) ; trained {
for i := 0 ; i < len ( observations ) ; i ++ {
observation := observations [ i ]
classIndex := gokmeans . Nearest ( observation , centroids )
if _ , ok := clusters [ classIndex ] ; ok {
clusters [ classIndex ] = append ( clusters [ classIndex ] , tc [ i ] )
} else {
clusters [ classIndex ] = [ ] Task { tc [ i ] }
}
}
}
2017-02-25 19:57:01 -05:00
return labelAndOrder ( clusters , numberOfClusters , taskObservation )
2017-02-25 15:43:32 -05:00
}
2017-09-28 15:36:47 -04:00
// Record observations.
2017-02-25 19:57:01 -05:00
func getObservations ( tasks [ ] Task , taskObservation func ( task Task ) [ ] float64 ) [ ] gokmeans . Node {
2017-02-24 20:52:59 -05:00
observations := [ ] gokmeans . Node { }
for i := 0 ; i < len ( tasks ) ; i ++ {
2017-02-25 19:57:01 -05:00
observations = append ( observations , taskObservation ( tasks [ i ] ) )
2017-02-24 20:52:59 -05:00
}
return observations
}
2017-09-28 15:36:47 -04:00
// Size tasks based on the power consumption.
2017-04-30 16:48:38 -04:00
// TODO: Size the cluster in a better way other than just taking an aggregate of the watts resource requirement.
2017-02-25 19:57:01 -05:00
func clusterSize ( tasks [ ] Task , taskObservation func ( task Task ) [ ] float64 ) float64 {
2017-02-24 20:52:59 -05:00
size := 0.0
for _ , task := range tasks {
2017-02-25 19:57:01 -05:00
for _ , observation := range taskObservation ( task ) {
size += observation
2017-02-24 20:52:59 -05:00
}
}
return size
}
2017-09-28 15:36:47 -04:00
// Order clusters in increasing order of task heaviness.
2017-02-25 19:57:01 -05:00
func labelAndOrder ( clusters map [ int ] [ ] Task , numberOfClusters int , taskObservation func ( task Task ) [ ] float64 ) [ ] TaskCluster {
2017-09-28 15:36:47 -04:00
// Determine the position of the cluster in the ordered list of clusters.
2017-02-25 15:43:32 -05:00
sizedClusters := [ ] TaskCluster { }
2017-02-24 20:52:59 -05:00
2017-09-28 15:36:47 -04:00
// Initializing.
2017-02-24 20:52:59 -05:00
for i := 0 ; i < numberOfClusters ; i ++ {
2017-02-25 15:43:32 -05:00
sizedClusters = append ( sizedClusters , TaskCluster {
ClusterIndex : i ,
Tasks : clusters [ i ] ,
SizeScore : 0 ,
} )
2017-02-24 20:52:59 -05:00
}
2017-02-25 15:43:32 -05:00
for i := 0 ; i < numberOfClusters - 1 ; i ++ {
2017-09-28 15:36:47 -04:00
// Sizing the current cluster.
2017-02-25 19:57:01 -05:00
sizeI := clusterSize ( clusters [ i ] , taskObservation )
2017-02-24 20:52:59 -05:00
2017-09-28 15:36:47 -04:00
// Comparing with the other clusters.
2017-02-25 15:43:32 -05:00
for j := i + 1 ; j < numberOfClusters ; j ++ {
2017-02-25 19:57:01 -05:00
sizeJ := clusterSize ( clusters [ j ] , taskObservation )
2017-02-25 15:43:32 -05:00
if sizeI > sizeJ {
sizedClusters [ i ] . SizeScore ++
2017-02-24 20:52:59 -05:00
} else {
2017-02-25 15:43:32 -05:00
sizedClusters [ j ] . SizeScore ++
2017-02-24 20:52:59 -05:00
}
}
}
2017-09-28 15:36:47 -04:00
// Sorting the clusters based on sizeScore.
2017-02-25 15:43:32 -05:00
sort . SliceStable ( sizedClusters , func ( i , j int ) bool {
return sizedClusters [ i ] . SizeScore <= sizedClusters [ j ] . SizeScore
} )
return sizedClusters
2017-02-24 20:52:59 -05:00
}
2017-08-23 19:35:19 -04:00
// Generic Task Sorter.
// Be able to sort an array of tasks based on any of the tasks' resources.
2017-08-26 15:39:45 -04:00
func SortTasks ( ts [ ] Task , sb sortBy ) {
2017-08-26 22:33:06 -04:00
sort . SliceStable ( ts , func ( i , j int ) bool {
2017-08-26 15:19:30 -04:00
return sb ( & ts [ i ] ) <= sb ( & ts [ j ] )
2017-08-26 15:39:45 -04:00
} )
2017-08-23 19:35:19 -04:00
}
2018-01-26 17:29:43 -05:00
// Map taskIDs to resource requirements.
type TaskResources struct {
CPU float64
Ram float64
Watts float64
}
var taskResourceRequirement map [ string ] * TaskResources
// Record resource requirements for all the tasks.
func initTaskResourceRequirements ( tasks [ ] Task ) {
taskResourceRequirement = make ( map [ string ] * TaskResources )
baseTaskID := "electron-"
for _ , task := range tasks {
for i := * task . Instances ; i > 0 ; i -- {
2018-01-29 18:03:46 -05:00
taskID := fmt . Sprintf ( "%s-%d" , baseTaskID + task . Name , i )
2018-01-26 17:29:43 -05:00
taskResourceRequirement [ taskID ] = & TaskResources {
CPU : task . CPU ,
Ram : task . RAM ,
Watts : task . Watts ,
}
}
}
}
// Retrieve the resource requirement of a task specified by the TaskID
func GetResourceRequirement ( taskID string ) ( TaskResources , error ) {
if tr , ok := taskResourceRequirement [ taskID ] ; ok {
return * tr , nil
} else {
// Shouldn't be here.
return TaskResources { } , errors . New ( "Invalid TaskID: " + taskID )
}
}