fixed bug in cleverRecap(...). Now we switch from the primitive recap to the clever recap as the cap determined by the later would be lesser when the cluster is relatively idle.

This commit is contained in:
Pradyumna Kaushik 2016-11-25 16:05:55 -05:00 committed by Renan DelValle
parent 43c173c60b
commit 7bea56206b

View file

@ -113,18 +113,11 @@ func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentag
}
/*
Recapping the entire cluster. Also, removing the finished task from the list of running tasks.
A recapping strategy which decides between 2 different recapping schemes.
1. the regular scheme based on the average power usage across the cluster.
2. A scheme based on the average of the loads on each node in the cluster.
We would, at this point, have a better knowledge about the state of the cluster.
1. Calculate the total allocated watts per node in the cluster.
2. Compute the ratio of the total watts usage per node to the total power for that node.
This would give us the load on that node.
3. Now, compute the average load across all the nodes in the cluster.
This would be the cap value.
Note: Although this would ensure lesser power usage, it might increase makespan if there is a heavy workload on just one node.
TODO: return a map[string]float64 that contains the recap value per node. This way, we can provide the right amount of power per node.
The recap value picked the least among the two.
*/
func (capper clusterwideCapper) cleverRecap(total_power map[string]float64,
task_monitor map[string][]def.Task, finished_taskId string) (float64, error) {
@ -132,49 +125,79 @@ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64,
if total_power == nil || task_monitor == nil {
return 100.0, errors.New("Invalid argument: total_power, task_monitor")
}
// determining the recap value by calling the regular recap(...)
toggle := false
recapValue, err := capper.recap(total_power, task_monitor, finished_taskId)
if err == nil {
toggle = true
}
// watts usage on each node in the cluster.
watts_usages := make(map[string][]float64)
host_of_finished_task := ""
index_of_finished_task := -1
index_of_finished_task := -1
for _, host := range constants.Hosts {
watts_usages[host] = []float64{0.0}
}
for host, tasks := range task_monitor {
for i, task := range tasks {
if task.TaskID == finished_taskId {
host_of_finished_task = host
index_of_finished_task = i
// Not considering this task
continue
}
host_of_finished_task = host
index_of_finished_task = i
// Not considering this task for the computation of total_allocated_power and total_running_tasks
continue
}
watts_usages[host] = append(watts_usages[host], float64(task.Watts) * constants.Cap_margin)
}
}
// Updating task monitor
// Updating task monitor. If recap(...) has deleted the finished task from the taskMonitor,
// then this will be ignored.
if host_of_finished_task != "" && index_of_finished_task != -1 {
log.Printf("Removing task with task [%s] from the list of running tasks\n",
task_monitor[host_of_finished_task][index_of_finished_task].TaskID)
task_monitor[host_of_finished_task][index_of_finished_task].TaskID)
task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task],
task_monitor[host_of_finished_task][index_of_finished_task+1:]...)
task_monitor[host_of_finished_task][index_of_finished_task+1:]...)
}
// load on each node in the cluster.
loads := []float64{}
for host, usages := range watts_usages {
total_usage := 0.0
for _, usage := range usages {
total_usage += usage
// Need to check whether there are still tasks running on the cluster. If not then we return an error.
clusterIdle := true
for _, tasks := range task_monitor {
if len(tasks) > 0 {
clusterIdle = false
}
}
if !clusterIdle {
// load on each node in the cluster.
loads := []float64{0.0}
for host, usages := range watts_usages {
total_usage := 0.0
for _, usage := range usages {
total_usage += usage
}
loads = append(loads, total_usage / total_power[host])
}
// Now need to compute the average load.
total_load := 0.0
for _, load := range loads {
total_load += load
}
average_load := (total_load / float64(len(loads)) * 100.0) // this would be the cap value.
// If toggle is true, then we need to return the least recap value.
if toggle {
if average_load <= recapValue {
return average_load, nil
} else {
return recapValue, nil
}
} else {
return average_load, nil
}
loads = append(loads, total_usage / total_power[host])
}
// Now need to compute the average load.
total_load := 0.0
for _, load := range loads {
total_load += load
}
average_load := total_load / float64(len(loads)) // this would be the cap value.
return average_load, nil
return 100.0, errors.New("No task running on the cluster.")
}
/*