From fb9e88e07d661851acce0784728eabb3bb094e63 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 13:23:10 -0800 Subject: [PATCH] Initial work on rapl-daemon. Initial server set up. API to read max power per zone and API to write new power cap have both been written. --- rapl-daemon/README.md | 13 ++++++ rapl-daemon/main.go | 49 ++++++++++++++++++++++ rapl-daemon/util.go | 88 ++++++++++++++++++++++++++++++++++++++++ rapl-daemon/util_test.go | 14 +++++++ 4 files changed, 164 insertions(+) create mode 100644 rapl-daemon/README.md create mode 100644 rapl-daemon/main.go create mode 100644 rapl-daemon/util.go create mode 100644 rapl-daemon/util_test.go diff --git a/rapl-daemon/README.md b/rapl-daemon/README.md new file mode 100644 index 0000000..08fc9df --- /dev/null +++ b/rapl-daemon/README.md @@ -0,0 +1,13 @@ +# RAPL Daemon + +This runs a server that is capable changing the percentage at which +a node is being throttled using RAPL. This daemon should be installed +on all worker nodes. + +### Sample payload for testing: +``` + curl --header "Content-Type: application/json" \ + --request POST \ + --data '{"percentage":75}' \ + http://localhost:9090/powercap + ``` diff --git a/rapl-daemon/main.go b/rapl-daemon/main.go new file mode 100644 index 0000000..e174dd8 --- /dev/null +++ b/rapl-daemon/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "encoding/json" + "fmt" + "html" + "log" + "net/http" +) + +const powercapDir = "/sys/class/powercap/" + +// Cap is a payload that is expected from Elektron to cap a node +type Cap struct { + Percentage int +} + +func main() { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Unsupported endpoint %s", html.EscapeString(r.URL.Path)) + }) + + http.HandleFunc("/powercap", powercapEndpoint) + log.Fatal(http.ListenAndServe(":9090", nil)) +} + +// Handler powercapping HTTP API endpoint +func powercapEndpoint(w http.ResponseWriter, r *http.Request) { + var payload Cap + decoder := json.NewDecoder(r.Body) + err := decoder.Decode(&payload) + if err != nil { + http.Error(w, "Error parsing payload: "+err.Error(), 400) + return + } + + if payload.Percentage < 0 || payload.Percentage > 100 { + http.Error(w, "Bad payload: percentage must be between 0 and 100", 400) + return + } + + err = capNode(powercapDir, payload.Percentage) + if err != nil { + http.Error(w, err.Error(), 400) + return + } + + fmt.Fprintf(w, "Capped node at %d percent", payload.Percentage) +} diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go new file mode 100644 index 0000000..19e5e44 --- /dev/null +++ b/rapl-daemon/util.go @@ -0,0 +1,88 @@ +package main + +import ( + "fmt" + "io/ioutil" + "math" + "path/filepath" + "strconv" + "strings" +) + +const raplPrefixCPU = "intel-rapl" + +// constraint_0 is usually the longer window while constraint_1 is usually the longer window +const maxPowerFileShortWindow = "constraint_0_max_power_uw" +const powerLimitFileShortWindow = "constraint_0_power_limit_uw" + +// capNode uses pseudo files made available by the Linux kernel +// in order to capNode CPU power. More information is available at: +// https://www.kernel.org/doc/html/latest/power/powercap/powercap.html +func capNode(base string, percentage int) error { + + if percentage <= 0 || percentage > 100 { + return fmt.Errorf("cap percentage must be between 0 (non-inclusive) and 100 (inclusive): %d", percentage) + } + + files, err := ioutil.ReadDir(base) + if err != nil { + return err + } + + for _, file := range files { + + fields := strings.Split(file.Name(), ":") + + // Fields should be in the form intel-rapl:X where X is the power zone + // We ignore sub-zones which follow the form intel-rapl:X:Y + if len(fields) != 2 { + continue + } + + if fields[0] == raplPrefixCPU { + maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileShortWindow)) + if err != nil { + fmt.Println("unable to retreive max power for zone ", err) + continue + } + + // We use floats to mitigate the possibility of an integer overflows. + powercap := uint(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) + + err = capZone(filepath.Join(base, file.Name(), powerLimitFileShortWindow), powercap) + if err != nil { + fmt.Println("unable to write powercap value: ", err) + continue + } + fmt.Println(file.Name(), ": ", int(maxPower), ", ", int(powercap)) + } + } + + return nil +} + +// maxPower returns the value in float of the maximum watts a power zone +// can use. +func maxPower(zone string) (uint64, error) { + maxPower, err := ioutil.ReadFile(zone) + if err != nil { + return 0.0, err + } + + maxPoweruW, err := strconv.ParseUint(strings.TrimSpace(string(maxPower)), 10, 64) + if err != nil { + return 0.0, err + } + + return maxPoweruW, nil + +} + +// capZone caps a power zone to a specific amount of watts specified by value +func capZone(zone string, value uint) error { + err := ioutil.WriteFile(zone, []byte(string(value)), 0644) + if err != nil { + return err + } + return nil +} diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go new file mode 100644 index 0000000..afdc411 --- /dev/null +++ b/rapl-daemon/util_test.go @@ -0,0 +1,14 @@ +package main + +import "testing" + +// TODO(rdelvalle): Add more thourough testing. Generate mock files +// that mimic the powercap subsystem and create test to operate on it. +func TestCap(t *testing.T) { + + err := capNode("/sys/devices/virtual/powercap/intel-rapl", 95) + + if err != nil { + t.Fail() + } +}