This repository has been archived on 2024-04-10. You can view files and clone it, but you cannot make any changes to it's state, such as pushing and creating new issues, pull requests or comments.
elektron/powerCap/progressiveExtrema.go
Bhargavi Hanumant Alandikar 3543960689 Elektron Logging library (#16)
switch to logrus for logging.

replaced old logging library with a wrapper around logrus.
We now just need to use the exported Log(...) and Logf(...) from the logging/
package that wraps around a set of loggers constituting a chain (following COR).
Loggers are configured using a YAML file that specifies the following.
1. enabled/disabled
2. whether the message should be logged on console.
3. filename extension.
4. minimum log level.

Retrofitted source code to now use the updated logging library.
Updated the documentation with information regarding the specification
of the log config file.

Currently, the log format in the config file is not adhered to. This is going to be
addressed in a future commit.
2019-12-09 20:15:33 -05:00

302 lines
11 KiB
Go

// Copyright (C) 2018 spdfg
//
// This file is part of Elektron.
//
// Elektron is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// Elektron is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with Elektron. If not, see <http://www.gnu.org/licenses/>.
//
package powerCap
import (
"bufio"
"container/ring"
"fmt"
"math"
"os/exec"
"sort"
"strconv"
"strings"
"syscall"
"time"
log "github.com/sirupsen/logrus"
"github.com/spdfg/elektron/constants"
elekLog "github.com/spdfg/elektron/logging"
. "github.com/spdfg/elektron/logging/types"
"github.com/spdfg/elektron/pcp"
"github.com/spdfg/elektron/rapl"
"github.com/spdfg/elektron/utilities"
)
func round(num float64) int {
return int(math.Floor(num + math.Copysign(0.5, num)))
}
func getNextCapValue(curCapValue float64, precision int) float64 {
curCapValue /= 2.0
output := math.Pow(10, float64(precision))
return float64(round(curCapValue*output)) / output
}
func StartPCPLogAndProgressiveExtremaCap(quit chan struct{}, logging *bool, hiThreshold, loThreshold float64, pcpConfigFile string) {
var pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c " + pcpConfigFile
cmd := exec.Command("sh", "-c", pcpCommand, pcpConfigFile)
cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true}
if hiThreshold < loThreshold {
elekLog.Log(CONSOLE, log.InfoLevel, "High threshold is lower than low threshold!")
}
pipe, err := cmd.StdoutPipe()
if err != nil {
log.Fatal(err)
}
//cmd.Stdout = stdout
scanner := bufio.NewScanner(pipe)
go func(logging *bool, hiThreshold, loThreshold float64) {
// Get names of the columns.
scanner.Scan()
// Write to logfile
elekLog.Log(PCP, log.InfoLevel, scanner.Text())
headers := strings.Split(scanner.Text(), ",")
powerIndexes := make([]int, 0, 0)
powerHistories := make(map[string]*ring.Ring)
indexToHost := make(map[int]string)
for i, hostMetric := range headers {
metricSplit := strings.Split(hostMetric, ":")
if strings.Contains(metricSplit[1], "RAPL_ENERGY_PKG") ||
strings.Contains(metricSplit[1], "RAPL_ENERGY_DRAM") {
powerIndexes = append(powerIndexes, i)
indexToHost[i] = metricSplit[0]
// Only create one ring per host.
if _, ok := powerHistories[metricSplit[0]]; !ok {
// Two PKGS, two DRAM per node, 20 = 5 seconds of tracking.
powerHistories[metricSplit[0]] = ring.New(20)
}
}
}
// Throw away first set of results.
scanner.Scan()
// To keep track of the capped states of the capped victims.
cappedVictims := make(map[string]float64)
// TODO: Come with a better name for this.
orderCapped := make([]string, 0, 8)
// TODO: Change this to a priority queue ordered by the cap value. This will get rid of the sorting performed in the code.
// Parallel data structure to orderCapped to keep track of the uncapped states of the uncapped victims.
orderCappedVictims := make(map[string]float64)
clusterPowerHist := ring.New(5)
seconds := 0
for scanner.Scan() {
if *logging {
elekLog.Log(CONSOLE, log.InfoLevel, "Logging PCP...")
split := strings.Split(scanner.Text(), ",")
text := scanner.Text()
elekLog.Log(PCP, log.InfoLevel, text)
totalPower := 0.0
for _, powerIndex := range powerIndexes {
power, _ := strconv.ParseFloat(split[powerIndex], 64)
host := indexToHost[powerIndex]
powerHistories[host].Value = power
powerHistories[host] = powerHistories[host].Next()
elekLog.WithFields(log.Fields{
"Host": indexToHost[powerIndex],
"Power": fmt.Sprintf("%f", power*pcp.RAPLUnits),
}).Log(CONSOLE, log.InfoLevel, "")
totalPower += power
}
clusterPower := totalPower * pcp.RAPLUnits
clusterPowerHist.Value = clusterPower
clusterPowerHist = clusterPowerHist.Next()
clusterMean := pcp.AverageClusterPowerHistory(clusterPowerHist)
elekLog.WithFields(log.Fields{
"Total power": fmt.Sprintf("%f %d Sec", clusterPower, clusterPowerHist.Len()),
"Avg": fmt.Sprintf("%f", clusterMean),
}).Log(CONSOLE, log.InfoLevel, "")
if clusterMean >= hiThreshold {
elekLog.Log(CONSOLE, log.InfoLevel, "Need to cap a node")
elekLog.Logf(CONSOLE, log.InfoLevel, "Cap values of capped victims %v", cappedVictims)
elekLog.Logf(CONSOLE, log.InfoLevel, "Cap values of victims to uncap %v", orderCappedVictims)
// Create statics for all victims and choose one to cap
victims := make([]pcp.Victim, 0, 8)
// TODO: Just keep track of the largest to reduce fron nlogn to n
for name, history := range powerHistories {
histMean := pcp.AverageNodePowerHistory(history)
// Consider doing mean calculations using go routines if we need to speed up.
victims = append(victims, pcp.Victim{Watts: histMean, Host: name})
}
sort.Sort(pcp.VictimSorter(victims)) // Sort by average wattage.
// Finding the best victim to cap in a round robin manner.
newVictimFound := false
alreadyCappedHosts := []string{} // Host-names of victims that are already capped.
for i := 0; i < len(victims); i++ {
// Try to pick a victim that hasn't been capped yet.
if _, ok := cappedVictims[victims[i].Host]; !ok {
// If this victim can't be capped further, then we move on to find another victim.
if _, ok := orderCappedVictims[victims[i].Host]; ok {
continue
}
// Need to cap this victim.
if err := rapl.Cap(victims[i].Host, "rapl", 50.0); err != nil {
elekLog.Logf(CONSOLE, log.ErrorLevel, "Error capping host %s", victims[i].Host)
} else {
elekLog.Logf(CONSOLE, log.InfoLevel, "Capped host[%s] at %f", victims[i].Host, 50.0)
// Keeping track of this victim and it's cap value
cappedVictims[victims[i].Host] = 50.0
newVictimFound = true
// This node can be uncapped and hence adding to orderCapped.
orderCapped = append(orderCapped, victims[i].Host)
orderCappedVictims[victims[i].Host] = 50.0
break // Breaking only on successful cap.
}
} else {
alreadyCappedHosts = append(alreadyCappedHosts, victims[i].Host)
}
}
// If no new victim found, then we need to cap the best victim among the ones that are already capped.
if !newVictimFound {
canCapAlreadyCappedVictim := false
for i := 0; i < len(alreadyCappedHosts); i++ {
// If already capped then the host must be present in orderCappedVictims.
capValue := orderCappedVictims[alreadyCappedHosts[i]]
// If capValue is greater than the threshold then cap, else continue.
if capValue > constants.LowerCapLimit {
newCapValue := getNextCapValue(capValue, 2)
if err := rapl.Cap(alreadyCappedHosts[i], "rapl", newCapValue); err != nil {
elekLog.Logf(CONSOLE, log.ErrorLevel, "Error capping host %s", alreadyCappedHosts[i])
} else {
// Successful cap
elekLog.Logf(CONSOLE, log.InfoLevel, "Capped host[%s] at %f", alreadyCappedHosts[i], newCapValue)
// Checking whether this victim can be capped further
if newCapValue <= constants.LowerCapLimit {
// Deleting victim from cappedVictims.
delete(cappedVictims, alreadyCappedHosts[i])
// Updating the cap value in orderCappedVictims.
orderCappedVictims[alreadyCappedHosts[i]] = newCapValue
} else {
// Updating the cap value.
cappedVictims[alreadyCappedHosts[i]] = newCapValue
orderCappedVictims[alreadyCappedHosts[i]] = newCapValue
}
canCapAlreadyCappedVictim = true
break // Breaking only on successful cap.
}
} else {
// Do nothing.
// Continue to find another victim to cap.
// If cannot find any victim, then all nodes have been
// capped to the maximum and we stop capping at this point.
}
}
if !canCapAlreadyCappedVictim {
elekLog.Log(CONSOLE, log.InfoLevel, "No Victim left to cap")
}
}
} else if clusterMean < loThreshold {
elekLog.Log(CONSOLE, log.InfoLevel, "Need to uncap a node")
elekLog.Logf(CONSOLE, log.InfoLevel, "Cap values of capped victims - %v", cappedVictims)
elekLog.Logf(CONSOLE, log.InfoLevel, "Cap values of victims to uncap - %v", orderCappedVictims)
if len(orderCapped) > 0 {
// We pick the host that is capped the most to uncap.
orderCappedToSort := utilities.GetPairList(orderCappedVictims)
sort.Sort(orderCappedToSort) // Sorted hosts in non-decreasing order of capped states.
hostToUncap := orderCappedToSort[0].Key
// Uncapping the host.
// This is a floating point operation and might suffer from precision loss.
newUncapValue := orderCappedVictims[hostToUncap] * 2.0
if err := rapl.Cap(hostToUncap, "rapl", newUncapValue); err != nil {
elekLog.Logf(CONSOLE, log.ErrorLevel, "Error uncapping host %s", hostToUncap)
} else {
// Successful uncap
elekLog.Logf(CONSOLE, log.InfoLevel, "Uncapped host[%s] to %f", hostToUncap, newUncapValue)
// Can we uncap this host further. If not, then we remove its entry from orderCapped
if newUncapValue >= 100.0 { // can compare using ==
// Deleting entry from orderCapped
for i, victimHost := range orderCapped {
if victimHost == hostToUncap {
orderCapped = append(orderCapped[:i], orderCapped[i+1:]...)
break // We are done removing host from orderCapped.
}
}
// Removing entry for host from the parallel data structure.
delete(orderCappedVictims, hostToUncap)
// Removing entry from cappedVictims as this host is no longer capped.
delete(cappedVictims, hostToUncap)
} else if newUncapValue > constants.LowerCapLimit { // This check is unnecessary and can be converted to 'else'.
// Updating the cap value.
orderCappedVictims[hostToUncap] = newUncapValue
cappedVictims[hostToUncap] = newUncapValue
}
}
} else {
elekLog.Log(CONSOLE, log.InfoLevel, "No host staged for Uncapped")
}
}
}
seconds++
}
}(logging, hiThreshold, loThreshold)
elekLog.Log(CONSOLE, log.InfoLevel, "PCP logging started")
if err := cmd.Start(); err != nil {
log.Fatal(err)
}
pgid, err := syscall.Getpgid(cmd.Process.Pid)
select {
case <-quit:
elekLog.Log(CONSOLE, log.InfoLevel, "Stopping PCP logging in 5 seconds")
time.Sleep(5 * time.Second)
// http://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly
// Kill process and all children processes.
syscall.Kill(-pgid, 15)
return
}
}