From fb9e88e07d661851acce0784728eabb3bb094e63 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 13:23:10 -0800 Subject: [PATCH 01/18] Initial work on rapl-daemon. Initial server set up. API to read max power per zone and API to write new power cap have both been written. --- rapl-daemon/README.md | 13 ++++++ rapl-daemon/main.go | 49 ++++++++++++++++++++++ rapl-daemon/util.go | 88 ++++++++++++++++++++++++++++++++++++++++ rapl-daemon/util_test.go | 14 +++++++ 4 files changed, 164 insertions(+) create mode 100644 rapl-daemon/README.md create mode 100644 rapl-daemon/main.go create mode 100644 rapl-daemon/util.go create mode 100644 rapl-daemon/util_test.go diff --git a/rapl-daemon/README.md b/rapl-daemon/README.md new file mode 100644 index 0000000..08fc9df --- /dev/null +++ b/rapl-daemon/README.md @@ -0,0 +1,13 @@ +# RAPL Daemon + +This runs a server that is capable changing the percentage at which +a node is being throttled using RAPL. This daemon should be installed +on all worker nodes. + +### Sample payload for testing: +``` + curl --header "Content-Type: application/json" \ + --request POST \ + --data '{"percentage":75}' \ + http://localhost:9090/powercap + ``` diff --git a/rapl-daemon/main.go b/rapl-daemon/main.go new file mode 100644 index 0000000..e174dd8 --- /dev/null +++ b/rapl-daemon/main.go @@ -0,0 +1,49 @@ +package main + +import ( + "encoding/json" + "fmt" + "html" + "log" + "net/http" +) + +const powercapDir = "/sys/class/powercap/" + +// Cap is a payload that is expected from Elektron to cap a node +type Cap struct { + Percentage int +} + +func main() { + http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { + fmt.Fprintf(w, "Unsupported endpoint %s", html.EscapeString(r.URL.Path)) + }) + + http.HandleFunc("/powercap", powercapEndpoint) + log.Fatal(http.ListenAndServe(":9090", nil)) +} + +// Handler powercapping HTTP API endpoint +func powercapEndpoint(w http.ResponseWriter, r *http.Request) { + var payload Cap + decoder := json.NewDecoder(r.Body) + err := decoder.Decode(&payload) + if err != nil { + http.Error(w, "Error parsing payload: "+err.Error(), 400) + return + } + + if payload.Percentage < 0 || payload.Percentage > 100 { + http.Error(w, "Bad payload: percentage must be between 0 and 100", 400) + return + } + + err = capNode(powercapDir, payload.Percentage) + if err != nil { + http.Error(w, err.Error(), 400) + return + } + + fmt.Fprintf(w, "Capped node at %d percent", payload.Percentage) +} diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go new file mode 100644 index 0000000..19e5e44 --- /dev/null +++ b/rapl-daemon/util.go @@ -0,0 +1,88 @@ +package main + +import ( + "fmt" + "io/ioutil" + "math" + "path/filepath" + "strconv" + "strings" +) + +const raplPrefixCPU = "intel-rapl" + +// constraint_0 is usually the longer window while constraint_1 is usually the longer window +const maxPowerFileShortWindow = "constraint_0_max_power_uw" +const powerLimitFileShortWindow = "constraint_0_power_limit_uw" + +// capNode uses pseudo files made available by the Linux kernel +// in order to capNode CPU power. More information is available at: +// https://www.kernel.org/doc/html/latest/power/powercap/powercap.html +func capNode(base string, percentage int) error { + + if percentage <= 0 || percentage > 100 { + return fmt.Errorf("cap percentage must be between 0 (non-inclusive) and 100 (inclusive): %d", percentage) + } + + files, err := ioutil.ReadDir(base) + if err != nil { + return err + } + + for _, file := range files { + + fields := strings.Split(file.Name(), ":") + + // Fields should be in the form intel-rapl:X where X is the power zone + // We ignore sub-zones which follow the form intel-rapl:X:Y + if len(fields) != 2 { + continue + } + + if fields[0] == raplPrefixCPU { + maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileShortWindow)) + if err != nil { + fmt.Println("unable to retreive max power for zone ", err) + continue + } + + // We use floats to mitigate the possibility of an integer overflows. + powercap := uint(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) + + err = capZone(filepath.Join(base, file.Name(), powerLimitFileShortWindow), powercap) + if err != nil { + fmt.Println("unable to write powercap value: ", err) + continue + } + fmt.Println(file.Name(), ": ", int(maxPower), ", ", int(powercap)) + } + } + + return nil +} + +// maxPower returns the value in float of the maximum watts a power zone +// can use. +func maxPower(zone string) (uint64, error) { + maxPower, err := ioutil.ReadFile(zone) + if err != nil { + return 0.0, err + } + + maxPoweruW, err := strconv.ParseUint(strings.TrimSpace(string(maxPower)), 10, 64) + if err != nil { + return 0.0, err + } + + return maxPoweruW, nil + +} + +// capZone caps a power zone to a specific amount of watts specified by value +func capZone(zone string, value uint) error { + err := ioutil.WriteFile(zone, []byte(string(value)), 0644) + if err != nil { + return err + } + return nil +} diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go new file mode 100644 index 0000000..afdc411 --- /dev/null +++ b/rapl-daemon/util_test.go @@ -0,0 +1,14 @@ +package main + +import "testing" + +// TODO(rdelvalle): Add more thourough testing. Generate mock files +// that mimic the powercap subsystem and create test to operate on it. +func TestCap(t *testing.T) { + + err := capNode("/sys/devices/virtual/powercap/intel-rapl", 95) + + if err != nil { + t.Fail() + } +} -- 2.47.2 From 2955c5d27ff4d6f3e42502e198d1c50f021431f1 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 13:24:26 -0800 Subject: [PATCH 02/18] Removing python script since this has been ported to go code now. --- scripts/RAPL_PKG_Throttle.py | 53 ------------------------------------ 1 file changed, 53 deletions(-) delete mode 100644 scripts/RAPL_PKG_Throttle.py diff --git a/scripts/RAPL_PKG_Throttle.py b/scripts/RAPL_PKG_Throttle.py deleted file mode 100644 index 0da5085..0000000 --- a/scripts/RAPL_PKG_Throttle.py +++ /dev/null @@ -1,53 +0,0 @@ -#!/usr/bin/python3 -# Copyright (C) 2018 spdfg -# -# This file is part of Elektron. -# -# Elektron is free software: you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation, either version 3 of the License, or -# (at your option) any later version. -# -# Elektron is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with Elektron. If not, see . -# - -import sys -import os -import math - -def main(): - POWER_CAP_DIR = "/sys/class/powercap/" - - if not len(sys.argv) == 2: - print(sys.argv[0] + " ") - exit(-1) - - if not os.path.exists(POWER_CAP_DIR): - print("Powercap framework not installed exist") - exit(-1) - - throttle_percent = float(sys.argv[1])/100.0 - - if throttle_percent > 1.0 or throttle_percent < 0: - print("Percent must be between 0 and 100") - exit(-1) - -# print(throttle_percent) - - - - for directory in os.listdir(POWER_CAP_DIR): - if len(directory.split(':')) == 2: - max_watts = open(POWER_CAP_DIR + directory + '/constraint_0_max_power_uw', 'r') - rapl_cap_watts = open(POWER_CAP_DIR + directory + '/constraint_0_power_limit_uw', 'w') #0=longer window, 1=shorter window - - rapl_cap_watts.write(str(math.ceil(float(max_watts.read())*throttle_percent))) - - -main() -- 2.47.2 From 78f533fe21b2f5d630a48ffe098a6670055fe7d9 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 17:15:16 -0800 Subject: [PATCH 03/18] Adding test for happy path retrieving max power. --- go.sum | 1 + rapl-daemon/util_test.go | 32 ++++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/go.sum b/go.sum index f2fecd3..411d53c 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,6 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/fatih/color v1.7.0 h1:DkWD4oS2D8LGGgTQ6IvwJJXSL5Vp2ffcQg58nFV38Ys= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index afdc411..351677c 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -1,10 +1,18 @@ package main -import "testing" +import ( + "fmt" + "io/ioutil" + "os" + "strconv" + "testing" + + "github.com/stretchr/testify/assert" +) // TODO(rdelvalle): Add more thourough testing. Generate mock files // that mimic the powercap subsystem and create test to operate on it. -func TestCap(t *testing.T) { +func TestCapNode(t *testing.T) { err := capNode("/sys/devices/virtual/powercap/intel-rapl", 95) @@ -12,3 +20,23 @@ func TestCap(t *testing.T) { t.Fail() } } + +func TestMaxPower(t *testing.T) { + const maxWattage uint64 = 1500000 + + tmpfile, err := ioutil.TempFile("", maxPowerFileShortWindow) + assert.NoError(t, err) + + defer os.Remove(tmpfile.Name()) + + fmt.Println(tmpfile.Name()) + _, err = tmpfile.Write([]byte(strconv.FormatUint(maxWattage, 10))) + assert.NoError(t, err) + + maxWatts, err := maxPower(tmpfile.Name()) + assert.NoError(t, err) + assert.Equal(t, maxWattage, maxWatts) + + err = tmpfile.Close() + assert.NoError(t, err) +} -- 2.47.2 From 3d9f92a9c09b5aea7d403db7d5f0ed71f0ed6970 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 17:42:59 -0800 Subject: [PATCH 04/18] Change some data types around to avoid too much conversion. --- rapl-daemon/util.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 19e5e44..4e432a8 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -47,7 +47,7 @@ func capNode(base string, percentage int) error { } // We use floats to mitigate the possibility of an integer overflows. - powercap := uint(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) + powercap := uint64(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) err = capZone(filepath.Join(base, file.Name(), powerLimitFileShortWindow), powercap) if err != nil { @@ -79,8 +79,8 @@ func maxPower(zone string) (uint64, error) { } // capZone caps a power zone to a specific amount of watts specified by value -func capZone(zone string, value uint) error { - err := ioutil.WriteFile(zone, []byte(string(value)), 0644) +func capZone(zone string, value uint64) error { + err := ioutil.WriteFile(zone, []byte(strconv.FormatUint(value, 10)), 0644) if err != nil { return err } -- 2.47.2 From 78d1a1158307dee257fbf1c31553b754385b88a0 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 17:43:18 -0800 Subject: [PATCH 05/18] Add happy path test for cap zone --- rapl-daemon/util_test.go | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index 351677c..010493e 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -1,10 +1,11 @@ package main import ( - "fmt" "io/ioutil" + "math" "os" "strconv" + "strings" "testing" "github.com/stretchr/testify/assert" @@ -29,7 +30,6 @@ func TestMaxPower(t *testing.T) { defer os.Remove(tmpfile.Name()) - fmt.Println(tmpfile.Name()) _, err = tmpfile.Write([]byte(strconv.FormatUint(maxWattage, 10))) assert.NoError(t, err) @@ -40,3 +40,29 @@ func TestMaxPower(t *testing.T) { err = tmpfile.Close() assert.NoError(t, err) } + +func TestCapZone(t *testing.T) { + + const maxPower float64 = 1500000 + const percentage float64 = .50 + + tmpfile, err := ioutil.TempFile("", powerLimitFileShortWindow) + assert.NoError(t, err) + + defer os.Remove(tmpfile.Name()) + + powercap := uint64(math.Ceil(maxPower * percentage)) + + err = capZone(tmpfile.Name(), powercap) + assert.NoError(t, err) + + newCapBytes, err := ioutil.ReadFile(tmpfile.Name()) + assert.NoError(t, err) + + newCap, err := strconv.ParseUint(strings.TrimSpace(string(newCapBytes)), 10, 64) + assert.NoError(t, err) + assert.Equal(t, powercap, newCap) + + err = tmpfile.Close() + assert.NoError(t, err) +} -- 2.47.2 From cf4ffe61c026c3b96c21563bb06e0810efad81c7 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:09:03 -0800 Subject: [PATCH 06/18] Removing uncessary print statement. --- rapl-daemon/util.go | 1 - 1 file changed, 1 deletion(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 4e432a8..0a031c0 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -54,7 +54,6 @@ func capNode(base string, percentage int) error { fmt.Println("unable to write powercap value: ", err) continue } - fmt.Println(file.Name(), ": ", int(maxPower), ", ", int(powercap)) } } -- 2.47.2 From b3de44bac6640df6878a989eaac2544d394d65fa Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:09:41 -0800 Subject: [PATCH 07/18] Change cap node to use a temporary setup. --- rapl-daemon/util_test.go | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index 010493e..61612b2 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -4,6 +4,7 @@ import ( "io/ioutil" "math" "os" + "path/filepath" "strconv" "strings" "testing" @@ -15,11 +16,20 @@ import ( // that mimic the powercap subsystem and create test to operate on it. func TestCapNode(t *testing.T) { - err := capNode("/sys/devices/virtual/powercap/intel-rapl", 95) + RAPLdir, err := ioutil.TempDir("", "intel-rapl") + assert.NoError(t, err) + defer os.RemoveAll(RAPLdir) - if err != nil { - t.Fail() - } + zonePath := filepath.Join(RAPLdir, raplPrefixCPU+":0") + err = os.Mkdir(zonePath, 755) + assert.NoError(t, err) + err = ioutil.WriteFile(filepath.Join(zonePath, maxPowerFileShortWindow), []byte("1500000"), 0444) + assert.NoError(t, err) + err = ioutil.WriteFile(filepath.Join(zonePath, powerLimitFileShortWindow), []byte("1500000"), 0644) + assert.NoError(t, err) + + err = capNode(RAPLdir, 95) + assert.NoError(t, err) } func TestMaxPower(t *testing.T) { @@ -42,7 +52,6 @@ func TestMaxPower(t *testing.T) { } func TestCapZone(t *testing.T) { - const maxPower float64 = 1500000 const percentage float64 = .50 -- 2.47.2 From 2f52fa2eb9c71e4feadcbfd53c94d3d2aa22a951 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:11:43 -0800 Subject: [PATCH 08/18] Renaming arguments to be more descriptive. --- rapl-daemon/util.go | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 0a031c0..5deee2c 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -62,8 +62,8 @@ func capNode(base string, percentage int) error { // maxPower returns the value in float of the maximum watts a power zone // can use. -func maxPower(zone string) (uint64, error) { - maxPower, err := ioutil.ReadFile(zone) +func maxPower(maxFile string) (uint64, error) { + maxPower, err := ioutil.ReadFile(maxFile) if err != nil { return 0.0, err } @@ -74,12 +74,11 @@ func maxPower(zone string) (uint64, error) { } return maxPoweruW, nil - } // capZone caps a power zone to a specific amount of watts specified by value -func capZone(zone string, value uint64) error { - err := ioutil.WriteFile(zone, []byte(strconv.FormatUint(value, 10)), 0644) +func capZone(limitFile string, value uint64) error { + err := ioutil.WriteFile(limitFile, []byte(strconv.FormatUint(value, 10)), 0644) if err != nil { return err } -- 2.47.2 From d4075271cd14cb70c0a8964c92eab9699ef9b4a9 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:17:43 -0800 Subject: [PATCH 09/18] Changing todo message. --- rapl-daemon/util_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index 61612b2..19fc942 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -12,8 +12,7 @@ import ( "github.com/stretchr/testify/assert" ) -// TODO(rdelvalle): Add more thourough testing. Generate mock files -// that mimic the powercap subsystem and create test to operate on it. +// TODO(rdelvalle): Create filesystem only once and allow tests to use it func TestCapNode(t *testing.T) { RAPLdir, err := ioutil.TempDir("", "intel-rapl") -- 2.47.2 From 64effeef2c16412d299c15dcd23af8c3b7ec39d0 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:40:47 -0800 Subject: [PATCH 10/18] Changing test structure to only set up mock subsystem once and allowing functions to test on it later. --- rapl-daemon/util_test.go | 83 +++++++++++++++++++++------------------- 1 file changed, 43 insertions(+), 40 deletions(-) diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index 19fc942..e3a601e 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -2,6 +2,7 @@ package main import ( "io/ioutil" + "log" "math" "os" "path/filepath" @@ -12,65 +13,67 @@ import ( "github.com/stretchr/testify/assert" ) +var raplDir string + +const maxWattage uint64 = 1500000 + +func TestMain(m *testing.M) { + var err error + raplDir, err = ioutil.TempDir("", raplPrefixCPU) + if err != nil { + log.Fatal(err) + } + + defer os.RemoveAll(raplDir) + + // Create temporary directory that mocks powercap subsytem + zonePath := filepath.Join(raplDir, raplPrefixCPU+":0") + err = os.Mkdir(zonePath, 755) + if err != nil { + log.Fatal(err) + } + + initialWatts := strconv.FormatUint(maxWattage, 10) + + err = ioutil.WriteFile(filepath.Join(zonePath, maxPowerFileShortWindow), []byte(initialWatts), 0444) + if err != nil { + log.Fatal(err) + } + + err = ioutil.WriteFile(filepath.Join(zonePath, powerLimitFileShortWindow), []byte(initialWatts), 0644) + if err != nil { + log.Fatal(err) + } + + os.Exit(m.Run()) +} + // TODO(rdelvalle): Create filesystem only once and allow tests to use it func TestCapNode(t *testing.T) { - - RAPLdir, err := ioutil.TempDir("", "intel-rapl") - assert.NoError(t, err) - defer os.RemoveAll(RAPLdir) - - zonePath := filepath.Join(RAPLdir, raplPrefixCPU+":0") - err = os.Mkdir(zonePath, 755) - assert.NoError(t, err) - err = ioutil.WriteFile(filepath.Join(zonePath, maxPowerFileShortWindow), []byte("1500000"), 0444) - assert.NoError(t, err) - err = ioutil.WriteFile(filepath.Join(zonePath, powerLimitFileShortWindow), []byte("1500000"), 0644) - assert.NoError(t, err) - - err = capNode(RAPLdir, 95) + err := capNode(raplDir, 95) assert.NoError(t, err) } func TestMaxPower(t *testing.T) { - const maxWattage uint64 = 1500000 + maxFile := filepath.Join(raplDir, raplPrefixCPU+":0", maxPowerFileShortWindow) - tmpfile, err := ioutil.TempFile("", maxPowerFileShortWindow) - assert.NoError(t, err) - - defer os.Remove(tmpfile.Name()) - - _, err = tmpfile.Write([]byte(strconv.FormatUint(maxWattage, 10))) - assert.NoError(t, err) - - maxWatts, err := maxPower(tmpfile.Name()) + maxWatts, err := maxPower(maxFile) assert.NoError(t, err) assert.Equal(t, maxWattage, maxWatts) - - err = tmpfile.Close() - assert.NoError(t, err) } func TestCapZone(t *testing.T) { - const maxPower float64 = 1500000 const percentage float64 = .50 - tmpfile, err := ioutil.TempFile("", powerLimitFileShortWindow) + powercap := uint64(math.Ceil(float64(maxWattage) * percentage)) + limitFile := filepath.Join(raplDir, raplPrefixCPU+":0", powerLimitFileShortWindow) + err := capZone(limitFile, powercap) assert.NoError(t, err) - defer os.Remove(tmpfile.Name()) - - powercap := uint64(math.Ceil(maxPower * percentage)) - - err = capZone(tmpfile.Name(), powercap) - assert.NoError(t, err) - - newCapBytes, err := ioutil.ReadFile(tmpfile.Name()) + newCapBytes, err := ioutil.ReadFile(limitFile) assert.NoError(t, err) newCap, err := strconv.ParseUint(strings.TrimSpace(string(newCapBytes)), 10, 64) assert.NoError(t, err) assert.Equal(t, powercap, newCap) - - err = tmpfile.Close() - assert.NoError(t, err) } -- 2.47.2 From 466e30752627dd1d869892b049afcb54edc6272e Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:51:41 -0800 Subject: [PATCH 11/18] Adding some more coverage for unhappy paths and fixing some values to reflect they are no longer floats. --- rapl-daemon/util.go | 9 +++++++-- rapl-daemon/util_test.go | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 5deee2c..8646b20 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -4,6 +4,7 @@ import ( "fmt" "io/ioutil" "math" + "os" "path/filepath" "strconv" "strings" @@ -65,12 +66,12 @@ func capNode(base string, percentage int) error { func maxPower(maxFile string) (uint64, error) { maxPower, err := ioutil.ReadFile(maxFile) if err != nil { - return 0.0, err + return 0, err } maxPoweruW, err := strconv.ParseUint(strings.TrimSpace(string(maxPower)), 10, 64) if err != nil { - return 0.0, err + return 0, err } return maxPoweruW, nil @@ -78,6 +79,10 @@ func maxPower(maxFile string) (uint64, error) { // capZone caps a power zone to a specific amount of watts specified by value func capZone(limitFile string, value uint64) error { + if _, err := os.Stat(limitFile); os.IsNotExist(err) { + return err + } + err := ioutil.WriteFile(limitFile, []byte(strconv.FormatUint(value, 10)), 0644) if err != nil { return err diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index e3a601e..ffada72 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -52,6 +52,16 @@ func TestMain(m *testing.M) { func TestCapNode(t *testing.T) { err := capNode(raplDir, 95) assert.NoError(t, err) + + t.Run("badPercentage", func(t *testing.T) { + err := capNode(raplDir, 1000) + assert.Error(t, err) + }) + + t.Run("zeroPercent", func(t *testing.T) { + err := capNode(raplDir, 0) + assert.Error(t, err) + }) } func TestMaxPower(t *testing.T) { @@ -60,6 +70,11 @@ func TestMaxPower(t *testing.T) { maxWatts, err := maxPower(maxFile) assert.NoError(t, err) assert.Equal(t, maxWattage, maxWatts) + + t.Run("nameDoesNotExist", func(t *testing.T) { + _, err := maxPower("madeupname") + assert.Error(t, err) + }) } func TestCapZone(t *testing.T) { @@ -76,4 +91,9 @@ func TestCapZone(t *testing.T) { newCap, err := strconv.ParseUint(strings.TrimSpace(string(newCapBytes)), 10, 64) assert.NoError(t, err) assert.Equal(t, powercap, newCap) + + t.Run("nameDoesNotExist", func(t *testing.T) { + err := capZone("madeupname", powercap) + assert.Error(t, err) + }) } -- 2.47.2 From 6670907ff3ed323906f1844a75792adc2ab8b343 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:55:15 -0800 Subject: [PATCH 12/18] Keeping the old script around as it should be removed in a different PR. --- scripts/RAPL_PKG_Throttle.py | 53 ++++++++++++++++++++++++++++++++++++ 1 file changed, 53 insertions(+) create mode 100644 scripts/RAPL_PKG_Throttle.py diff --git a/scripts/RAPL_PKG_Throttle.py b/scripts/RAPL_PKG_Throttle.py new file mode 100644 index 0000000..0da5085 --- /dev/null +++ b/scripts/RAPL_PKG_Throttle.py @@ -0,0 +1,53 @@ +#!/usr/bin/python3 +# Copyright (C) 2018 spdfg +# +# This file is part of Elektron. +# +# Elektron is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Elektron is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Elektron. If not, see . +# + +import sys +import os +import math + +def main(): + POWER_CAP_DIR = "/sys/class/powercap/" + + if not len(sys.argv) == 2: + print(sys.argv[0] + " ") + exit(-1) + + if not os.path.exists(POWER_CAP_DIR): + print("Powercap framework not installed exist") + exit(-1) + + throttle_percent = float(sys.argv[1])/100.0 + + if throttle_percent > 1.0 or throttle_percent < 0: + print("Percent must be between 0 and 100") + exit(-1) + +# print(throttle_percent) + + + + for directory in os.listdir(POWER_CAP_DIR): + if len(directory.split(':')) == 2: + max_watts = open(POWER_CAP_DIR + directory + '/constraint_0_max_power_uw', 'r') + rapl_cap_watts = open(POWER_CAP_DIR + directory + '/constraint_0_power_limit_uw', 'w') #0=longer window, 1=shorter window + + rapl_cap_watts.write(str(math.ceil(float(max_watts.read())*throttle_percent))) + + +main() -- 2.47.2 From 32b8fde5755b60e98299235f692b42c6c31b0cc7 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 4 Jan 2020 18:57:44 -0800 Subject: [PATCH 13/18] Delegating percentage check to capNode function. --- rapl-daemon/main.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/rapl-daemon/main.go b/rapl-daemon/main.go index e174dd8..6dfcaf0 100644 --- a/rapl-daemon/main.go +++ b/rapl-daemon/main.go @@ -34,11 +34,6 @@ func powercapEndpoint(w http.ResponseWriter, r *http.Request) { return } - if payload.Percentage < 0 || payload.Percentage > 100 { - http.Error(w, "Bad payload: percentage must be between 0 and 100", 400) - return - } - err = capNode(powercapDir, payload.Percentage) if err != nil { http.Error(w, err.Error(), 400) -- 2.47.2 From d2680df56319c62e8fa3b3b26bec38e587185472 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 17 Jan 2020 09:22:17 -0800 Subject: [PATCH 14/18] Fixing typo. --- rapl-daemon/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rapl-daemon/README.md b/rapl-daemon/README.md index 08fc9df..e557b9c 100644 --- a/rapl-daemon/README.md +++ b/rapl-daemon/README.md @@ -1,7 +1,7 @@ # RAPL Daemon -This runs a server that is capable changing the percentage at which -a node is being throttled using RAPL. This daemon should be installed +This runs a server that is capable of changing the percentage at which +a node is being throttled to using RAPL. This daemon should be installed on all worker nodes. ### Sample payload for testing: -- 2.47.2 From 74545680fcceabc43133044594b2d409575c0746 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 17 Jan 2020 09:31:38 -0800 Subject: [PATCH 15/18] Fixing typos. --- rapl-daemon/main.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/rapl-daemon/main.go b/rapl-daemon/main.go index 6dfcaf0..f1b2c96 100644 --- a/rapl-daemon/main.go +++ b/rapl-daemon/main.go @@ -10,7 +10,7 @@ import ( const powercapDir = "/sys/class/powercap/" -// Cap is a payload that is expected from Elektron to cap a node +// Cap is a payload that is expected from Elektron to cap a node. type Cap struct { Percentage int } @@ -24,13 +24,13 @@ func main() { log.Fatal(http.ListenAndServe(":9090", nil)) } -// Handler powercapping HTTP API endpoint +// Handler for the powercapping HTTP API endpoint. func powercapEndpoint(w http.ResponseWriter, r *http.Request) { var payload Cap decoder := json.NewDecoder(r.Body) err := decoder.Decode(&payload) if err != nil { - http.Error(w, "Error parsing payload: "+err.Error(), 400) + http.Error(w, "error parsing payload: "+err.Error(), 400) return } @@ -40,5 +40,5 @@ func powercapEndpoint(w http.ResponseWriter, r *http.Request) { return } - fmt.Fprintf(w, "Capped node at %d percent", payload.Percentage) + fmt.Fprintf(w, "capped node at %d percent", payload.Percentage) } -- 2.47.2 From 74b488750d265567c5541524392ca4afc7a3b6ad Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 17 Jan 2020 09:33:43 -0800 Subject: [PATCH 16/18] Changing shortWindow to longWindow as constraint_0 actually points to the long window. --- rapl-daemon/util.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 8646b20..4207e8f 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -12,9 +12,9 @@ import ( const raplPrefixCPU = "intel-rapl" -// constraint_0 is usually the longer window while constraint_1 is usually the longer window -const maxPowerFileShortWindow = "constraint_0_max_power_uw" -const powerLimitFileShortWindow = "constraint_0_power_limit_uw" +// constraint_0 is usually the longer window while constraint_1 is usually the shorter window +const maxPowerFileLongWindow = "constraint_0_max_power_uw" +const powerLimitFileLongWindow = "constraint_0_power_limit_uw" // capNode uses pseudo files made available by the Linux kernel // in order to capNode CPU power. More information is available at: @@ -22,7 +22,7 @@ const powerLimitFileShortWindow = "constraint_0_power_limit_uw" func capNode(base string, percentage int) error { if percentage <= 0 || percentage > 100 { - return fmt.Errorf("cap percentage must be between 0 (non-inclusive) and 100 (inclusive): %d", percentage) + return fmt.Errorf("cap percentage must be between (0, 100]: %d", percentage) } files, err := ioutil.ReadDir(base) @@ -41,7 +41,7 @@ func capNode(base string, percentage int) error { } if fields[0] == raplPrefixCPU { - maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileShortWindow)) + maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileLongWindow)) if err != nil { fmt.Println("unable to retreive max power for zone ", err) continue @@ -50,7 +50,7 @@ func capNode(base string, percentage int) error { // We use floats to mitigate the possibility of an integer overflows. powercap := uint64(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) - err = capZone(filepath.Join(base, file.Name(), powerLimitFileShortWindow), powercap) + err = capZone(filepath.Join(base, file.Name(), powerLimitFileLongWindow), powercap) if err != nil { fmt.Println("unable to write powercap value: ", err) continue -- 2.47.2 From bcd25c805a5dcb2f6c1c7967e908c06737c66058 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 17 Jan 2020 09:43:34 -0800 Subject: [PATCH 17/18] Renaming variable in test. --- rapl-daemon/util.go | 3 +-- rapl-daemon/util_test.go | 17 ++++++++--------- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 4207e8f..1d86483 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -61,8 +61,7 @@ func capNode(base string, percentage int) error { return nil } -// maxPower returns the value in float of the maximum watts a power zone -// can use. +// maxPower returns the value in float of the maximum watts a power zone can use. func maxPower(maxFile string) (uint64, error) { maxPower, err := ioutil.ReadFile(maxFile) if err != nil { diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index ffada72..5e26faa 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -35,12 +35,12 @@ func TestMain(m *testing.M) { initialWatts := strconv.FormatUint(maxWattage, 10) - err = ioutil.WriteFile(filepath.Join(zonePath, maxPowerFileShortWindow), []byte(initialWatts), 0444) + err = ioutil.WriteFile(filepath.Join(zonePath, maxPowerFileLongWindow), []byte(initialWatts), 0444) if err != nil { log.Fatal(err) } - err = ioutil.WriteFile(filepath.Join(zonePath, powerLimitFileShortWindow), []byte(initialWatts), 0644) + err = ioutil.WriteFile(filepath.Join(zonePath, powerLimitFileLongWindow), []byte(initialWatts), 0644) if err != nil { log.Fatal(err) } @@ -48,30 +48,29 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } -// TODO(rdelvalle): Create filesystem only once and allow tests to use it func TestCapNode(t *testing.T) { err := capNode(raplDir, 95) assert.NoError(t, err) - t.Run("badPercentage", func(t *testing.T) { + t.Run("bad-percentage", func(t *testing.T) { err := capNode(raplDir, 1000) assert.Error(t, err) }) - t.Run("zeroPercent", func(t *testing.T) { + t.Run("zero-percent", func(t *testing.T) { err := capNode(raplDir, 0) assert.Error(t, err) }) } func TestMaxPower(t *testing.T) { - maxFile := filepath.Join(raplDir, raplPrefixCPU+":0", maxPowerFileShortWindow) + maxFile := filepath.Join(raplDir, raplPrefixCPU+":0", maxPowerFileLongWindow) maxWatts, err := maxPower(maxFile) assert.NoError(t, err) assert.Equal(t, maxWattage, maxWatts) - t.Run("nameDoesNotExist", func(t *testing.T) { + t.Run("name-does-not-exist", func(t *testing.T) { _, err := maxPower("madeupname") assert.Error(t, err) }) @@ -81,7 +80,7 @@ func TestCapZone(t *testing.T) { const percentage float64 = .50 powercap := uint64(math.Ceil(float64(maxWattage) * percentage)) - limitFile := filepath.Join(raplDir, raplPrefixCPU+":0", powerLimitFileShortWindow) + limitFile := filepath.Join(raplDir, raplPrefixCPU+":0", powerLimitFileLongWindow) err := capZone(limitFile, powercap) assert.NoError(t, err) @@ -92,7 +91,7 @@ func TestCapZone(t *testing.T) { assert.NoError(t, err) assert.Equal(t, powercap, newCap) - t.Run("nameDoesNotExist", func(t *testing.T) { + t.Run("name-does-not-exist", func(t *testing.T) { err := capZone("madeupname", powercap) assert.Error(t, err) }) -- 2.47.2 From 5e4ba5a933d02c25b436d49cd6c91d49991350f4 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 18 Jan 2020 20:11:44 -0800 Subject: [PATCH 18/18] capping funciton now returns which zones were sucessfully capped and which zones could not be capped. This information is now returned to the caller of the HTTP api. --- rapl-daemon/README.md | 26 ++++++++++++++++++++++++++ rapl-daemon/main.go | 26 +++++++++++++++++++++----- rapl-daemon/util.go | 36 ++++++++++++++++++++++++++++-------- rapl-daemon/util_test.go | 19 +++++++++++-------- 4 files changed, 86 insertions(+), 21 deletions(-) diff --git a/rapl-daemon/README.md b/rapl-daemon/README.md index e557b9c..415a31c 100644 --- a/rapl-daemon/README.md +++ b/rapl-daemon/README.md @@ -11,3 +11,29 @@ on all worker nodes. --data '{"percentage":75}' \ http://localhost:9090/powercap ``` + +### Payload + +```json +{ + "percentage":75 +} +``` + +### Response + +The daemon will respond with a json payload containing zones that were +successfully capped as well as the zones that were not capped. + +```json +{ + "cappedZones": null, + "failedZones": [ + "intel-rapl:0", + "intel-rapl:1" + ], + "error": "some zones were not able to be powercapped" +} +``` + +Field error will not exist if failed zones is empty. \ No newline at end of file diff --git a/rapl-daemon/main.go b/rapl-daemon/main.go index f1b2c96..e88d3eb 100644 --- a/rapl-daemon/main.go +++ b/rapl-daemon/main.go @@ -15,6 +15,13 @@ type Cap struct { Percentage int } +// CapResponse is the payload sent with information about the capping call +type CapResponse struct { + CappedZones []string `json:"cappedZones"` + FailedZones []string `json:"failedZones"` + Error *string `json:"error"` +} + func main() { http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { fmt.Fprintf(w, "Unsupported endpoint %s", html.EscapeString(r.URL.Path)) @@ -26,19 +33,28 @@ func main() { // Handler for the powercapping HTTP API endpoint. func powercapEndpoint(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/json") + var payload Cap + var response CapResponse + decoder := json.NewDecoder(r.Body) err := decoder.Decode(&payload) if err != nil { - http.Error(w, "error parsing payload: "+err.Error(), 400) + errorMsg := "error parsing payload: " + err.Error() + response.Error = &errorMsg + json.NewEncoder(w).Encode(response) return } - err = capNode(powercapDir, payload.Percentage) + cappedZones, failedZones, err := capNode(powercapDir, payload.Percentage) if err != nil { - http.Error(w, err.Error(), 400) - return + errorMsg := err.Error() + response.Error = &errorMsg } - fmt.Fprintf(w, "capped node at %d percent", payload.Percentage) + response.CappedZones = cappedZones + response.FailedZones = failedZones + + json.NewEncoder(w).Encode(response) } diff --git a/rapl-daemon/util.go b/rapl-daemon/util.go index 1d86483..daf8dff 100644 --- a/rapl-daemon/util.go +++ b/rapl-daemon/util.go @@ -19,19 +19,19 @@ const powerLimitFileLongWindow = "constraint_0_power_limit_uw" // capNode uses pseudo files made available by the Linux kernel // in order to capNode CPU power. More information is available at: // https://www.kernel.org/doc/html/latest/power/powercap/powercap.html -func capNode(base string, percentage int) error { +func capNode(base string, percentage int) ([]string, []string, error) { if percentage <= 0 || percentage > 100 { - return fmt.Errorf("cap percentage must be between (0, 100]: %d", percentage) + return nil, nil, fmt.Errorf("cap percentage must be between 0 (non-inclusive) and 100 (inclusive): %d", percentage) } files, err := ioutil.ReadDir(base) if err != nil { - return err + return nil, nil, err } + var capped, failed []string for _, file := range files { - fields := strings.Split(file.Name(), ":") // Fields should be in the form intel-rapl:X where X is the power zone @@ -43,22 +43,28 @@ func capNode(base string, percentage int) error { if fields[0] == raplPrefixCPU { maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileLongWindow)) if err != nil { + failed = append(failed, file.Name()) fmt.Println("unable to retreive max power for zone ", err) continue } - // We use floats to mitigate the possibility of an integer overflows. + // We use floats to mitigate the possibility of an integer overflow. powercap := uint64(math.Ceil(float64(maxPower) * (float64(percentage) / 100))) - err = capZone(filepath.Join(base, file.Name(), powerLimitFileLongWindow), powercap) - if err != nil { + if err := capZone(filepath.Join(base, file.Name(), powerLimitFileLongWindow), powercap); err != nil { + failed = append(failed, file.Name()) fmt.Println("unable to write powercap value: ", err) continue } + capped = append(capped, file.Name()) } } - return nil + if len(failed) > 0 { + return capped, failed, fmt.Errorf("some zones were not able to be powercapped") + } + + return capped, nil, nil } // maxPower returns the value in float of the maximum watts a power zone can use. @@ -88,3 +94,17 @@ func capZone(limitFile string, value uint64) error { } return nil } + +func currentCap(limit string) (uint64, error) { + powercap, err := ioutil.ReadFile(limit) + if err != nil { + return 0, err + } + + powercapuW, err := strconv.ParseUint(strings.TrimSpace(string(powercap)), 10, 64) + if err != nil { + return 0, err + } + + return powercapuW, nil +} diff --git a/rapl-daemon/util_test.go b/rapl-daemon/util_test.go index 5e26faa..58065bd 100644 --- a/rapl-daemon/util_test.go +++ b/rapl-daemon/util_test.go @@ -7,7 +7,6 @@ import ( "os" "path/filepath" "strconv" - "strings" "testing" "github.com/stretchr/testify/assert" @@ -48,18 +47,25 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } +// TODO(rdelvalle): Add tests where capping fails func TestCapNode(t *testing.T) { - err := capNode(raplDir, 95) + capped, failed, err := capNode(raplDir, 95) assert.NoError(t, err) + assert.Len(t, capped, 1) + assert.Nil(t, failed) t.Run("bad-percentage", func(t *testing.T) { - err := capNode(raplDir, 1000) + capped, failed, err := capNode(raplDir, 1000) assert.Error(t, err) + assert.Nil(t, capped) + assert.Nil(t, failed) }) t.Run("zero-percent", func(t *testing.T) { - err := capNode(raplDir, 0) + capped, failed, err := capNode(raplDir, 0) assert.Error(t, err) + assert.Nil(t, capped) + assert.Nil(t, failed) }) } @@ -84,10 +90,7 @@ func TestCapZone(t *testing.T) { err := capZone(limitFile, powercap) assert.NoError(t, err) - newCapBytes, err := ioutil.ReadFile(limitFile) - assert.NoError(t, err) - - newCap, err := strconv.ParseUint(strings.TrimSpace(string(newCapBytes)), 10, 64) + newCap, err := currentCap(limitFile) assert.NoError(t, err) assert.Equal(t, powercap, newCap) -- 2.47.2