2019-10-31 14:32:46 -04:00
// Copyright (C) 2018 spdfg
2018-10-06 20:03:14 -07:00
//
// This file is part of Elektron.
//
// Elektron is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Elektron is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Elektron. If not, see <http://www.gnu.org/licenses/>.
//
2017-02-24 20:52:59 -05:00
package def
import (
2018-01-26 17:29:43 -05:00
"errors"
"fmt"
2018-01-29 18:19:44 -05:00
"log"
"sort"
2018-09-30 18:23:38 -07:00
"github.com/mash/gokmeans"
"github.com/montanaflynn/stats"
2017-02-24 20:52:59 -05:00
)
2017-09-28 15:36:47 -04:00
// Information about a cluster of tasks.
2017-02-25 15:43:32 -05:00
type TaskCluster struct {
ClusterIndex int
Tasks [ ] Task
SizeScore int // How many other clusters is this cluster bigger than
}
2017-09-28 15:36:47 -04:00
// Classification of Tasks using KMeans clustering using the watts consumption observations.
2017-02-25 15:43:32 -05:00
type TasksToClassify [ ] Task
2017-08-22 13:00:40 -04:00
// Basic taskObservation calculator. This returns an array consisting of the MMPU requirements of a task.
func ( tc TasksToClassify ) taskObservationCalculator ( task Task ) [ ] float64 {
if task . ClassToWatts != nil {
2017-09-28 15:36:47 -04:00
// Taking the aggregate.
2017-08-22 13:00:40 -04:00
observations := [ ] float64 { }
for _ , watts := range task . ClassToWatts {
observations = append ( observations , watts )
}
return observations
} else if task . Watts != 0.0 {
return [ ] float64 { task . Watts }
} else {
log . Fatal ( "Unable to classify tasks. Missing Watts or ClassToWatts attribute in workload." )
2017-09-28 15:36:47 -04:00
return [ ] float64 { 0.0 } // Won't reach here.
2017-08-22 13:00:40 -04:00
}
}
func ClassifyTasks ( tasks [ ] Task , numberOfClusters int ) [ ] TaskCluster {
tc := TasksToClassify ( tasks )
return tc . classify ( numberOfClusters , tc . taskObservationCalculator )
}
func ( tc TasksToClassify ) classify ( numberOfClusters int , taskObservation func ( task Task ) [ ] float64 ) [ ] TaskCluster {
2017-02-25 15:43:32 -05:00
clusters := make ( map [ int ] [ ] Task )
2017-02-25 19:57:01 -05:00
observations := getObservations ( tc , taskObservation )
2017-09-28 15:36:47 -04:00
// TODO: Make the max number of rounds configurable based on the size of the workload.
2017-04-30 16:48:38 -04:00
// The max number of rounds (currently defaulted to 100) is the number of iterations performed to obtain
2017-09-28 15:36:47 -04:00
// distinct clusters. When the data size becomes very large, we would need more iterations for clustering.
2017-02-25 15:43:32 -05:00
if trained , centroids := gokmeans . Train ( observations , numberOfClusters , 100 ) ; trained {
for i := 0 ; i < len ( observations ) ; i ++ {
observation := observations [ i ]
classIndex := gokmeans . Nearest ( observation , centroids )
if _ , ok := clusters [ classIndex ] ; ok {
clusters [ classIndex ] = append ( clusters [ classIndex ] , tc [ i ] )
} else {
clusters [ classIndex ] = [ ] Task { tc [ i ] }
}
}
}
2017-02-25 19:57:01 -05:00
return labelAndOrder ( clusters , numberOfClusters , taskObservation )
2017-02-25 15:43:32 -05:00
}
2017-09-28 15:36:47 -04:00
// Record observations.
2017-02-25 19:57:01 -05:00
func getObservations ( tasks [ ] Task , taskObservation func ( task Task ) [ ] float64 ) [ ] gokmeans . Node {
2017-02-24 20:52:59 -05:00
observations := [ ] gokmeans . Node { }
for i := 0 ; i < len ( tasks ) ; i ++ {
2017-02-25 19:57:01 -05:00
observations = append ( observations , taskObservation ( tasks [ i ] ) )
2017-02-24 20:52:59 -05:00
}
return observations
}
2018-04-17 20:09:35 +00:00
// Sizing each task cluster using the average MMMPU requirement of the task in the cluster.
func clusterSizeAvgMMMPU ( tasks [ ] Task , taskObservation func ( task Task ) [ ] float64 ) float64 {
mmmpuValues := [ ] float64 { }
// Total sum of the Median of Median Max Power Usage values for all tasks.
total := 0.0
2017-02-24 20:52:59 -05:00
for _ , task := range tasks {
2018-04-17 20:09:35 +00:00
observations := taskObservation ( task )
if len ( observations ) > 0 {
// taskObservation would give us the mmpu values. We would need to take the median of these
// values to obtain the Median of Median Max Power Usage value.
if medianValue , err := stats . Median ( observations ) ; err == nil {
mmmpuValues = append ( mmmpuValues , medianValue )
total += medianValue
} else {
// skip this value
// there is an error in the task config.
log . Println ( err )
}
} else {
// There is only one observation for the task.
mmmpuValues = append ( mmmpuValues , observations [ 0 ] )
2017-02-24 20:52:59 -05:00
}
}
2018-04-17 20:09:35 +00:00
return total / float64 ( len ( mmmpuValues ) )
2017-02-24 20:52:59 -05:00
}
2017-09-28 15:36:47 -04:00
// Order clusters in increasing order of task heaviness.
2017-02-25 19:57:01 -05:00
func labelAndOrder ( clusters map [ int ] [ ] Task , numberOfClusters int , taskObservation func ( task Task ) [ ] float64 ) [ ] TaskCluster {
2017-09-28 15:36:47 -04:00
// Determine the position of the cluster in the ordered list of clusters.
2017-02-25 15:43:32 -05:00
sizedClusters := [ ] TaskCluster { }
2017-02-24 20:52:59 -05:00
2017-09-28 15:36:47 -04:00
// Initializing.
2017-02-24 20:52:59 -05:00
for i := 0 ; i < numberOfClusters ; i ++ {
2017-02-25 15:43:32 -05:00
sizedClusters = append ( sizedClusters , TaskCluster {
ClusterIndex : i ,
Tasks : clusters [ i ] ,
SizeScore : 0 ,
} )
2017-02-24 20:52:59 -05:00
}
2017-02-25 15:43:32 -05:00
for i := 0 ; i < numberOfClusters - 1 ; i ++ {
2018-04-17 20:09:35 +00:00
// Sizing the current cluster based on average Median of Median Max Power Usage of tasks.
sizeI := clusterSizeAvgMMMPU ( clusters [ i ] , taskObservation )
2017-02-24 20:52:59 -05:00
2017-09-28 15:36:47 -04:00
// Comparing with the other clusters.
2017-02-25 15:43:32 -05:00
for j := i + 1 ; j < numberOfClusters ; j ++ {
2018-04-17 20:09:35 +00:00
sizeJ := clusterSizeAvgMMMPU ( clusters [ j ] , taskObservation )
2017-02-25 15:43:32 -05:00
if sizeI > sizeJ {
sizedClusters [ i ] . SizeScore ++
2017-02-24 20:52:59 -05:00
} else {
2017-02-25 15:43:32 -05:00
sizedClusters [ j ] . SizeScore ++
2017-02-24 20:52:59 -05:00
}
}
}
2017-09-28 15:36:47 -04:00
// Sorting the clusters based on sizeScore.
2017-02-25 15:43:32 -05:00
sort . SliceStable ( sizedClusters , func ( i , j int ) bool {
return sizedClusters [ i ] . SizeScore <= sizedClusters [ j ] . SizeScore
} )
return sizedClusters
2017-02-24 20:52:59 -05:00
}
2017-08-23 19:35:19 -04:00
// Generic Task Sorter.
// Be able to sort an array of tasks based on any of the tasks' resources.
2018-02-16 21:49:12 +00:00
func SortTasks ( ts [ ] Task , sb SortBy ) {
2017-08-26 22:33:06 -04:00
sort . SliceStable ( ts , func ( i , j int ) bool {
2017-08-26 15:19:30 -04:00
return sb ( & ts [ i ] ) <= sb ( & ts [ j ] )
2017-08-26 15:39:45 -04:00
} )
2017-08-23 19:35:19 -04:00
}
2018-01-26 17:29:43 -05:00
// Map taskIDs to resource requirements.
type TaskResources struct {
CPU float64
Ram float64
Watts float64
}
var taskResourceRequirement map [ string ] * TaskResources
// Record resource requirements for all the tasks.
func initTaskResourceRequirements ( tasks [ ] Task ) {
taskResourceRequirement = make ( map [ string ] * TaskResources )
baseTaskID := "electron-"
for _ , task := range tasks {
for i := * task . Instances ; i > 0 ; i -- {
2018-01-29 18:19:44 -05:00
taskID := fmt . Sprintf ( "%s-%d" , baseTaskID + task . Name , i )
2018-01-26 17:29:43 -05:00
taskResourceRequirement [ taskID ] = & TaskResources {
2018-01-29 18:19:44 -05:00
CPU : task . CPU ,
Ram : task . RAM ,
2018-01-26 17:29:43 -05:00
Watts : task . Watts ,
}
}
}
}
// Retrieve the resource requirement of a task specified by the TaskID
func GetResourceRequirement ( taskID string ) ( TaskResources , error ) {
if tr , ok := taskResourceRequirement [ taskID ] ; ok {
return * tr , nil
} else {
// Shouldn't be here.
return TaskResources { } , errors . New ( "Invalid TaskID: " + taskID )
}
2018-01-29 18:19:44 -05:00
}
2018-04-17 20:09:35 +00:00
// Determine the distribution of light power consuming and heavy power consuming tasks in a given window.
func GetTaskDistributionInWindow ( windowSize int , tasks [ ] Task ) ( float64 , error ) {
getTotalInstances := func ( ts [ ] Task , taskExceedingWindow struct {
taskName string
instsToDiscard int
} ) int {
total := 0
for _ , t := range ts {
if t . Name == taskExceedingWindow . taskName {
total += ( * t . Instances - taskExceedingWindow . instsToDiscard )
continue
}
total += * t . Instances
}
return total
}
getTasksInWindow := func ( ) ( tasksInWindow [ ] Task , taskExceedingWindow struct {
taskName string
instsToDiscard int
} ) {
tasksTraversed := 0
// Name of task, only few instances of which fall within the window.
lastTaskName := ""
for _ , task := range tasks {
tasksInWindow = append ( tasksInWindow , task )
tasksTraversed += * task . Instances
lastTaskName = task . Name
if tasksTraversed >= windowSize {
taskExceedingWindow . taskName = lastTaskName
taskExceedingWindow . instsToDiscard = tasksTraversed - windowSize
break
}
}
return
}
// Retrieving the tasks that are in the window.
tasksInWIndow , taskExceedingWindow := getTasksInWindow ( )
// Classifying the tasks based on Median of Median Max Power Usage values.
taskClusters := ClassifyTasks ( tasksInWIndow , 2 )
// First we'll need to check if the tasks in the window could be classified into 2 clusters.
// If yes, then we proceed with determining the distribution.
// Else, we throw an error stating that the distribution is even as only one cluster could be formed.
if len ( taskClusters [ 1 ] . Tasks ) == 0 {
return - 1.0 , errors . New ( "Only one cluster could be formed." )
}
// The first cluster would corresponding to the light power consuming tasks.
// The second cluster would corresponding to the high power consuming tasks.
lpcTasksTotalInst := getTotalInstances ( taskClusters [ 0 ] . Tasks , taskExceedingWindow )
2018-04-17 23:44:36 +00:00
fmt . Printf ( "lpc:%d\n" , lpcTasksTotalInst )
2018-04-17 20:09:35 +00:00
hpcTasksTotalInst := getTotalInstances ( taskClusters [ 1 ] . Tasks , taskExceedingWindow )
2018-04-17 23:44:36 +00:00
fmt . Printf ( "hpc:%d\n" , hpcTasksTotalInst )
2018-04-17 20:09:35 +00:00
return float64 ( lpcTasksTotalInst ) / float64 ( hpcTasksTotalInst ) , nil
}