Initial work on rapl-daemon. Initial server set up. API to read max power per zone and API to write new power cap have both been written.
This commit is contained in:
parent
3543960689
commit
fb9e88e07d
4 changed files with 164 additions and 0 deletions
13
rapl-daemon/README.md
Normal file
13
rapl-daemon/README.md
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
# RAPL Daemon
|
||||||
|
|
||||||
|
This runs a server that is capable changing the percentage at which
|
||||||
|
a node is being throttled using RAPL. This daemon should be installed
|
||||||
|
on all worker nodes.
|
||||||
|
|
||||||
|
### Sample payload for testing:
|
||||||
|
```
|
||||||
|
curl --header "Content-Type: application/json" \
|
||||||
|
--request POST \
|
||||||
|
--data '{"percentage":75}' \
|
||||||
|
http://localhost:9090/powercap
|
||||||
|
```
|
49
rapl-daemon/main.go
Normal file
49
rapl-daemon/main.go
Normal file
|
@ -0,0 +1,49 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/json"
|
||||||
|
"fmt"
|
||||||
|
"html"
|
||||||
|
"log"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
const powercapDir = "/sys/class/powercap/"
|
||||||
|
|
||||||
|
// Cap is a payload that is expected from Elektron to cap a node
|
||||||
|
type Cap struct {
|
||||||
|
Percentage int
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
fmt.Fprintf(w, "Unsupported endpoint %s", html.EscapeString(r.URL.Path))
|
||||||
|
})
|
||||||
|
|
||||||
|
http.HandleFunc("/powercap", powercapEndpoint)
|
||||||
|
log.Fatal(http.ListenAndServe(":9090", nil))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handler powercapping HTTP API endpoint
|
||||||
|
func powercapEndpoint(w http.ResponseWriter, r *http.Request) {
|
||||||
|
var payload Cap
|
||||||
|
decoder := json.NewDecoder(r.Body)
|
||||||
|
err := decoder.Decode(&payload)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, "Error parsing payload: "+err.Error(), 400)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if payload.Percentage < 0 || payload.Percentage > 100 {
|
||||||
|
http.Error(w, "Bad payload: percentage must be between 0 and 100", 400)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
err = capNode(powercapDir, payload.Percentage)
|
||||||
|
if err != nil {
|
||||||
|
http.Error(w, err.Error(), 400)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Fprintf(w, "Capped node at %d percent", payload.Percentage)
|
||||||
|
}
|
88
rapl-daemon/util.go
Normal file
88
rapl-daemon/util.go
Normal file
|
@ -0,0 +1,88 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"io/ioutil"
|
||||||
|
"math"
|
||||||
|
"path/filepath"
|
||||||
|
"strconv"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
const raplPrefixCPU = "intel-rapl"
|
||||||
|
|
||||||
|
// constraint_0 is usually the longer window while constraint_1 is usually the longer window
|
||||||
|
const maxPowerFileShortWindow = "constraint_0_max_power_uw"
|
||||||
|
const powerLimitFileShortWindow = "constraint_0_power_limit_uw"
|
||||||
|
|
||||||
|
// capNode uses pseudo files made available by the Linux kernel
|
||||||
|
// in order to capNode CPU power. More information is available at:
|
||||||
|
// https://www.kernel.org/doc/html/latest/power/powercap/powercap.html
|
||||||
|
func capNode(base string, percentage int) error {
|
||||||
|
|
||||||
|
if percentage <= 0 || percentage > 100 {
|
||||||
|
return fmt.Errorf("cap percentage must be between 0 (non-inclusive) and 100 (inclusive): %d", percentage)
|
||||||
|
}
|
||||||
|
|
||||||
|
files, err := ioutil.ReadDir(base)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, file := range files {
|
||||||
|
|
||||||
|
fields := strings.Split(file.Name(), ":")
|
||||||
|
|
||||||
|
// Fields should be in the form intel-rapl:X where X is the power zone
|
||||||
|
// We ignore sub-zones which follow the form intel-rapl:X:Y
|
||||||
|
if len(fields) != 2 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
if fields[0] == raplPrefixCPU {
|
||||||
|
maxPower, err := maxPower(filepath.Join(base, file.Name(), maxPowerFileShortWindow))
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("unable to retreive max power for zone ", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// We use floats to mitigate the possibility of an integer overflows.
|
||||||
|
powercap := uint(math.Ceil(float64(maxPower) * (float64(percentage) / 100)))
|
||||||
|
|
||||||
|
err = capZone(filepath.Join(base, file.Name(), powerLimitFileShortWindow), powercap)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("unable to write powercap value: ", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
fmt.Println(file.Name(), ": ", int(maxPower), ", ", int(powercap))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// maxPower returns the value in float of the maximum watts a power zone
|
||||||
|
// can use.
|
||||||
|
func maxPower(zone string) (uint64, error) {
|
||||||
|
maxPower, err := ioutil.ReadFile(zone)
|
||||||
|
if err != nil {
|
||||||
|
return 0.0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
maxPoweruW, err := strconv.ParseUint(strings.TrimSpace(string(maxPower)), 10, 64)
|
||||||
|
if err != nil {
|
||||||
|
return 0.0, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return maxPoweruW, nil
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// capZone caps a power zone to a specific amount of watts specified by value
|
||||||
|
func capZone(zone string, value uint) error {
|
||||||
|
err := ioutil.WriteFile(zone, []byte(string(value)), 0644)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
14
rapl-daemon/util_test.go
Normal file
14
rapl-daemon/util_test.go
Normal file
|
@ -0,0 +1,14 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import "testing"
|
||||||
|
|
||||||
|
// TODO(rdelvalle): Add more thourough testing. Generate mock files
|
||||||
|
// that mimic the powercap subsystem and create test to operate on it.
|
||||||
|
func TestCap(t *testing.T) {
|
||||||
|
|
||||||
|
err := capNode("/sys/devices/virtual/powercap/intel-rapl", 95)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
t.Fail()
|
||||||
|
}
|
||||||
|
}
|
Reference in a new issue