fixed bug in cleverRecap(...). Now we switch from the primitive recap to the clever recap as the cap determined by the later would be lesser when the cluster is relatively idle.
This commit is contained in:
parent
43c173c60b
commit
7bea56206b
1 changed files with 57 additions and 34 deletions
|
@ -113,18 +113,11 @@ func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentag
|
|||
}
|
||||
|
||||
/*
|
||||
Recapping the entire cluster. Also, removing the finished task from the list of running tasks.
|
||||
A recapping strategy which decides between 2 different recapping schemes.
|
||||
1. the regular scheme based on the average power usage across the cluster.
|
||||
2. A scheme based on the average of the loads on each node in the cluster.
|
||||
|
||||
We would, at this point, have a better knowledge about the state of the cluster.
|
||||
|
||||
1. Calculate the total allocated watts per node in the cluster.
|
||||
2. Compute the ratio of the total watts usage per node to the total power for that node.
|
||||
This would give us the load on that node.
|
||||
3. Now, compute the average load across all the nodes in the cluster.
|
||||
This would be the cap value.
|
||||
|
||||
Note: Although this would ensure lesser power usage, it might increase makespan if there is a heavy workload on just one node.
|
||||
TODO: return a map[string]float64 that contains the recap value per node. This way, we can provide the right amount of power per node.
|
||||
The recap value picked the least among the two.
|
||||
*/
|
||||
func (capper clusterwideCapper) cleverRecap(total_power map[string]float64,
|
||||
task_monitor map[string][]def.Task, finished_taskId string) (float64, error) {
|
||||
|
@ -132,49 +125,79 @@ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64,
|
|||
if total_power == nil || task_monitor == nil {
|
||||
return 100.0, errors.New("Invalid argument: total_power, task_monitor")
|
||||
}
|
||||
|
||||
// determining the recap value by calling the regular recap(...)
|
||||
toggle := false
|
||||
recapValue, err := capper.recap(total_power, task_monitor, finished_taskId)
|
||||
if err == nil {
|
||||
toggle = true
|
||||
}
|
||||
|
||||
// watts usage on each node in the cluster.
|
||||
watts_usages := make(map[string][]float64)
|
||||
host_of_finished_task := ""
|
||||
index_of_finished_task := -1
|
||||
index_of_finished_task := -1
|
||||
for _, host := range constants.Hosts {
|
||||
watts_usages[host] = []float64{0.0}
|
||||
}
|
||||
for host, tasks := range task_monitor {
|
||||
for i, task := range tasks {
|
||||
if task.TaskID == finished_taskId {
|
||||
host_of_finished_task = host
|
||||
index_of_finished_task = i
|
||||
// Not considering this task
|
||||
continue
|
||||
}
|
||||
host_of_finished_task = host
|
||||
index_of_finished_task = i
|
||||
// Not considering this task for the computation of total_allocated_power and total_running_tasks
|
||||
continue
|
||||
}
|
||||
watts_usages[host] = append(watts_usages[host], float64(task.Watts) * constants.Cap_margin)
|
||||
}
|
||||
}
|
||||
|
||||
// Updating task monitor
|
||||
// Updating task monitor. If recap(...) has deleted the finished task from the taskMonitor,
|
||||
// then this will be ignored.
|
||||
if host_of_finished_task != "" && index_of_finished_task != -1 {
|
||||
log.Printf("Removing task with task [%s] from the list of running tasks\n",
|
||||
task_monitor[host_of_finished_task][index_of_finished_task].TaskID)
|
||||
task_monitor[host_of_finished_task][index_of_finished_task].TaskID)
|
||||
task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task],
|
||||
task_monitor[host_of_finished_task][index_of_finished_task+1:]...)
|
||||
task_monitor[host_of_finished_task][index_of_finished_task+1:]...)
|
||||
}
|
||||
|
||||
// load on each node in the cluster.
|
||||
loads := []float64{}
|
||||
for host, usages := range watts_usages {
|
||||
total_usage := 0.0
|
||||
for _, usage := range usages {
|
||||
total_usage += usage
|
||||
// Need to check whether there are still tasks running on the cluster. If not then we return an error.
|
||||
clusterIdle := true
|
||||
for _, tasks := range task_monitor {
|
||||
if len(tasks) > 0 {
|
||||
clusterIdle = false
|
||||
}
|
||||
}
|
||||
|
||||
if !clusterIdle {
|
||||
// load on each node in the cluster.
|
||||
loads := []float64{0.0}
|
||||
for host, usages := range watts_usages {
|
||||
total_usage := 0.0
|
||||
for _, usage := range usages {
|
||||
total_usage += usage
|
||||
}
|
||||
loads = append(loads, total_usage / total_power[host])
|
||||
}
|
||||
|
||||
// Now need to compute the average load.
|
||||
total_load := 0.0
|
||||
for _, load := range loads {
|
||||
total_load += load
|
||||
}
|
||||
average_load := (total_load / float64(len(loads)) * 100.0) // this would be the cap value.
|
||||
// If toggle is true, then we need to return the least recap value.
|
||||
if toggle {
|
||||
if average_load <= recapValue {
|
||||
return average_load, nil
|
||||
} else {
|
||||
return recapValue, nil
|
||||
}
|
||||
} else {
|
||||
return average_load, nil
|
||||
}
|
||||
loads = append(loads, total_usage / total_power[host])
|
||||
}
|
||||
// Now need to compute the average load.
|
||||
total_load := 0.0
|
||||
for _, load := range loads {
|
||||
total_load += load
|
||||
}
|
||||
average_load := total_load / float64(len(loads)) // this would be the cap value.
|
||||
return average_load, nil
|
||||
return 100.0, errors.New("No task running on the cluster.")
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
Reference in a new issue