From ffa3f47dc66b5f4ee80d38a436b1365a7f5c06c3 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 22 Dec 2016 22:58:54 -0500 Subject: [PATCH 001/102] From 2e4e52df79f859b0f8ed7aba4a6314f786383a62 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 5 Sep 2016 01:20:33 -0400 Subject: [PATCH 002/102] First commit --- README.md | 1 + 1 file changed, 1 insertion(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..5405928 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +Inital repository for electron framework From 120a13432c02d2feee8333bbf096ac1a40d36b12 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 8 Sep 2016 02:06:24 -0400 Subject: [PATCH 003/102] Initial transformation from RENDLER to electron --- scheduler.go | 277 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 scheduler.go diff --git a/scheduler.go b/scheduler.go new file mode 100644 index 0000000..73ae041 --- /dev/null +++ b/scheduler.go @@ -0,0 +1,277 @@ +package main + +import ( + "flag" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "os" + "os/signal" + "path/filepath" + "time" + "bitbucket.org/bingcloud/electron/states" +) + +const ( + taskCPUs = 0.1 + taskMem = 32.0 + shutdownTimeout = time.Duration(30) * time.Second +) + +const ( + dockerCommand = "echo Hello_World!" +) + +var ( + defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} +) + +// maxTasksForOffer computes how many tasks can be launched using a given offer +func maxTasksForOffer(offer *mesos.Offer) int { + count := 0 + + var cpus, mem, watts float64 + + for _, resource := range offer.Resources { + switch resource.GetName() { + case "cpus": + cpus += *resource.GetScalar().Value + case "mem": + mem += *resource.GetScalar().Value + case "watts": + watts += *resource.GetScalar().Value + fmt.Println("Got watts!: ", *resource.GetScalar().Value) + } + } + + for cpus >= taskCPUs && mem >= taskMem { + count++ + cpus -= taskCPUs + mem -= taskMem + } + + return count +} + +// rendlerScheduler implements the Scheduler interface and stores +// the state needed for Rendler to function. +type electronScheduler struct { + tasksCreated int + tasksRunning int + + dockerExecutor *mesos.ExecutorInfo + renderExecutor *mesos.ExecutorInfo + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + shutdown chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up + done chan struct{} +} + +// New electron scheduler +func newElectronScheduler() *electronScheduler { + rendlerArtifacts := executorURIs() + + s := &electronScheduler{ + + dockerExecutor: &mesos.ExecutorInfo{ + ExecutorId: &mesos.ExecutorID{Value: proto.String("crawl-executor")}, + Command: &mesos.CommandInfo{ + Value: proto.String(dockerCommand), + Uris: rendlerArtifacts, + }, + Name: proto.String("Crawler"), + }, + + shutdown: make(chan struct{}), + done: make(chan struct{}), + } + return s +} + +func (s *electronScheduler) newTaskPrototype(offer *mesos.Offer) *mesos.TaskInfo { + taskID := s.tasksCreated + s.tasksCreated++ + return &mesos.TaskInfo{ + TaskId: &mesos.TaskID{ + Value: proto.String(fmt.Sprintf("RENDLER-%d", taskID)), + }, + SlaveId: offer.SlaveId, + Resources: []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", taskCPUs), + mesosutil.NewScalarResource("mem", taskMem), + }, + } +} + +func (s *electronScheduler) newCrawlTask(url string, offer *mesos.Offer) *mesos.TaskInfo { + task := s.newTaskPrototype(offer) + task.Name = proto.String("Electron_" + *task.TaskId.Value) + task.Executor = s.dockerExecutor + task.Data = []byte(url) + return task +} + +func (s *electronScheduler) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *electronScheduler) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *electronScheduler) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + for _, offer := range offers { + select { + case <-s.shutdown: + log.Println("Shutting down: declining offer on [", offer.Hostname, "]") + driver.DeclineOffer(offer.Id, defaultFilter) + if s.tasksRunning == 0 { + close(s.done) + } + continue + default: + } + + tasks := []*mesos.TaskInfo{} + tasksToLaunch := maxTasksForOffer(offer) + for tasksToLaunch > 0 { + fmt.Println("There is enough resources to launch a task!") + } + + if len(tasks) == 0 { + driver.DeclineOffer(offer.Id, defaultFilter) + } else { + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + } + } +} + +func (s *electronScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", states.NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if states.IsTerminal(status.State) { + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.shutdown: + close(s.done) + default: + } + } + } +} + +func (s *electronScheduler) FrameworkMessage( + driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + switch *executorID.Value { + case *s.dockerExecutor.ExecutorId.Value: + log.Print("Received framework message ", message) + + default: + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) + } +} + +func (s *electronScheduler) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} +func (s *electronScheduler) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} +func (s *electronScheduler) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} + +func executorURIs() []*mesos.CommandInfo_URI { + basePath, err := filepath.Abs(filepath.Dir(os.Args[0]) + "/../..") + if err != nil { + log.Fatal("Failed to find the path to RENDLER") + } + baseURI := fmt.Sprintf("%s/", basePath) + + pathToURI := func(path string, extract bool) *mesos.CommandInfo_URI { + return &mesos.CommandInfo_URI{ + Value: &path, + Extract: &extract, + } + } + + return []*mesos.CommandInfo_URI{ + pathToURI(baseURI+"render.js", false), + pathToURI(baseURI+"python/crawl_executor.py", false), + pathToURI(baseURI+"python/render_executor.py", false), + pathToURI(baseURI+"python/results.py", false), + pathToURI(baseURI+"python/task_state.py", false), + } +} + +func main() { + master := flag.String("master", "127.0.1.1:5050", "Location of leading Mesos master") + + flag.Parse() + + scheduler := newElectronScheduler() + driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ + Master: *master, + Framework: &mesos.FrameworkInfo{ + Name: proto.String("RENDLER"), + User: proto.String(""), + }, + Scheduler: scheduler, + }) + if err != nil { + log.Printf("Unable to create scheduler driver: %s", err) + return + } + + // Catch interrupt + go func() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, os.Kill) + s := <-c + if s != os.Interrupt { + return + } + + log.Println("Electron is shutting down") + close(scheduler.shutdown) + + select { + case <-scheduler.done: + case <-time.After(shutdownTimeout): + } + + // Done shutting down + driver.Stop(false) + }() + + if status, err := driver.Run(); err != nil { + log.Printf("Framework stopped with status %s and error: %s\n", status.String(), err.Error()) + } + log.Println("Exiting...") +} From 62961d27d3d5f1cac8daf3772ad2e9824e54de42 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 10 Sep 2016 18:40:56 -0400 Subject: [PATCH 004/102] Adding helper functions from RENDLER --- .idea/libraries/GOPATH__electron_.xml | 14 +++++++++ scheduler.go | 5 ++-- states.go | 41 +++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 3 deletions(-) create mode 100644 .idea/libraries/GOPATH__electron_.xml create mode 100644 states.go diff --git a/.idea/libraries/GOPATH__electron_.xml b/.idea/libraries/GOPATH__electron_.xml new file mode 100644 index 0000000..e8a1f05 --- /dev/null +++ b/.idea/libraries/GOPATH__electron_.xml @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/scheduler.go b/scheduler.go index 73ae041..2ce4531 100644 --- a/scheduler.go +++ b/scheduler.go @@ -12,7 +12,6 @@ import ( "os/signal" "path/filepath" "time" - "bitbucket.org/bingcloud/electron/states" ) const ( @@ -161,11 +160,11 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers } func (s *electronScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { - log.Printf("Received task status [%s] for task [%s]", states.NameFor(status.State), *status.TaskId.Value) + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { s.tasksRunning++ - } else if states.IsTerminal(status.State) { + } else if IsTerminal(status.State) { s.tasksRunning-- if s.tasksRunning == 0 { select { diff --git a/states.go b/states.go new file mode 100644 index 0000000..3a7a41b --- /dev/null +++ b/states.go @@ -0,0 +1,41 @@ +package main + +import ( +mesos "github.com/mesos/mesos-go/mesosproto" +) + +// NameFor returns the string name for a TaskState. +func NameFor(state *mesos.TaskState) string { + switch *state { + case mesos.TaskState_TASK_STAGING: + return "TASK_STAGING" + case mesos.TaskState_TASK_STARTING: + return "TASK_STARTING" + case mesos.TaskState_TASK_RUNNING: + return "TASK_RUNNING" + case mesos.TaskState_TASK_FINISHED: + return "TASK_FINISHED" // TERMINAL + case mesos.TaskState_TASK_FAILED: + return "TASK_FAILED" // TERMINAL + case mesos.TaskState_TASK_KILLED: + return "TASK_KILLED" // TERMINAL + case mesos.TaskState_TASK_LOST: + return "TASK_LOST" // TERMINAL + default: + return "UNKNOWN" + } +} + +// IsTerminal determines if a TaskState is a terminal state, i.e. if it singals +// that the task has stopped running. +func IsTerminal(state *mesos.TaskState) bool { + switch *state { + case mesos.TaskState_TASK_FINISHED, + mesos.TaskState_TASK_FAILED, + mesos.TaskState_TASK_KILLED, + mesos.TaskState_TASK_LOST: + return true + default: + return false + } +} \ No newline at end of file From 0e7832e55a718624900e949b437a0dced92f53da Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 10 Sep 2016 18:41:53 -0400 Subject: [PATCH 005/102] Removing uncessary xml file --- .idea/libraries/GOPATH__electron_.xml | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 .idea/libraries/GOPATH__electron_.xml diff --git a/.idea/libraries/GOPATH__electron_.xml b/.idea/libraries/GOPATH__electron_.xml deleted file mode 100644 index e8a1f05..0000000 --- a/.idea/libraries/GOPATH__electron_.xml +++ /dev/null @@ -1,14 +0,0 @@ - - - - - - - - - - - - - - \ No newline at end of file From ddf1d86da1236e1f3a9ae6e1f9cfc7c3e43c6f41 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 15 Sep 2016 15:53:56 -0400 Subject: [PATCH 006/102] Framework is now able to sucessfully launch a docker task --- scheduler.go | 138 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 55 deletions(-) diff --git a/scheduler.go b/scheduler.go index 2ce4531..bef2610 100644 --- a/scheduler.go +++ b/scheduler.go @@ -10,13 +10,10 @@ import ( "log" "os" "os/signal" - "path/filepath" "time" ) const ( - taskCPUs = 0.1 - taskMem = 32.0 shutdownTimeout = time.Duration(30) * time.Second ) @@ -28,9 +25,51 @@ var ( defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} ) -// maxTasksForOffer computes how many tasks can be launched using a given offer -func maxTasksForOffer(offer *mesos.Offer) int { - count := 0 +type Task struct { + cpu float64 + mem float64 + watts float64 + image string +} + +// NameFor returns the string name for a TaskState. +func NameFor(state *mesos.TaskState) string { + switch *state { + case mesos.TaskState_TASK_STAGING: + return "TASK_STAGING" + case mesos.TaskState_TASK_STARTING: + return "TASK_STARTING" + case mesos.TaskState_TASK_RUNNING: + return "TASK_RUNNING" + case mesos.TaskState_TASK_FINISHED: + return "TASK_FINISHED" // TERMINAL + case mesos.TaskState_TASK_FAILED: + return "TASK_FAILED" // TERMINAL + case mesos.TaskState_TASK_KILLED: + return "TASK_KILLED" // TERMINAL + case mesos.TaskState_TASK_LOST: + return "TASK_LOST" // TERMINAL + default: + return "UNKNOWN" + } +} + +// IsTerminal determines if a TaskState is a terminal state, i.e. if it singals +// that the task has stopped running. +func IsTerminal(state *mesos.TaskState) bool { + switch *state { + case mesos.TaskState_TASK_FINISHED, + mesos.TaskState_TASK_FAILED, + mesos.TaskState_TASK_KILLED, + mesos.TaskState_TASK_LOST: + return true + default: + return false + } +} + +// Decides if to take an offer or not +func offerDecision(offer *mesos.Offer) bool { var cpus, mem, watts float64 @@ -46,13 +85,18 @@ func maxTasksForOffer(offer *mesos.Offer) int { } } - for cpus >= taskCPUs && mem >= taskMem { - count++ - cpus -= taskCPUs - mem -= taskMem + var taskCPUs, taskMem, taskWatts float64 + + // Insert calculation here + taskWatts = 50 + taskMem = 4096 + taskCPUs = 3.0 + + if cpus >= taskCPUs && mem >= taskMem && watts >= taskWatts { + return true } - return count + return false } // rendlerScheduler implements the Scheduler interface and stores @@ -60,9 +104,9 @@ func maxTasksForOffer(offer *mesos.Offer) int { type electronScheduler struct { tasksCreated int tasksRunning int + taskQueue []Task //FIFO dockerExecutor *mesos.ExecutorInfo - renderExecutor *mesos.ExecutorInfo // This channel is closed when the program receives an interrupt, // signalling that the program should shut down. @@ -74,17 +118,15 @@ type electronScheduler struct { // New electron scheduler func newElectronScheduler() *electronScheduler { - rendlerArtifacts := executorURIs() s := &electronScheduler{ dockerExecutor: &mesos.ExecutorInfo{ - ExecutorId: &mesos.ExecutorID{Value: proto.String("crawl-executor")}, + ExecutorId: &mesos.ExecutorID{Value: proto.String("docker-runner")}, Command: &mesos.CommandInfo{ Value: proto.String(dockerCommand), - Uris: rendlerArtifacts, }, - Name: proto.String("Crawler"), + Name: proto.String("Runner"), }, shutdown: make(chan struct{}), @@ -93,26 +135,35 @@ func newElectronScheduler() *electronScheduler { return s } -func (s *electronScheduler) newTaskPrototype(offer *mesos.Offer) *mesos.TaskInfo { +func (s *electronScheduler) newTask(offer *mesos.Offer, taskCPUs, taskMem, taskWatts float64) *mesos.TaskInfo { taskID := s.tasksCreated s.tasksCreated++ return &mesos.TaskInfo{ TaskId: &mesos.TaskID{ - Value: proto.String(fmt.Sprintf("RENDLER-%d", taskID)), + Value: proto.String(fmt.Sprintf("Electron-%d", taskID)), }, SlaveId: offer.SlaveId, Resources: []*mesos.Resource{ mesosutil.NewScalarResource("cpus", taskCPUs), mesosutil.NewScalarResource("mem", taskMem), + mesosutil.NewScalarResource("watts", taskWatts), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String("gouravr/minife:v5"), + }, + }, } } -func (s *electronScheduler) newCrawlTask(url string, offer *mesos.Offer) *mesos.TaskInfo { - task := s.newTaskPrototype(offer) +func (s *electronScheduler) newDockerTask(offer *mesos.Offer, taskCPUs, taskMem, taskWatts float64) *mesos.TaskInfo { + task := s.newTask(offer, taskCPUs, taskMem, taskWatts) task.Name = proto.String("Electron_" + *task.TaskId.Value) - task.Executor = s.dockerExecutor - task.Data = []byte(url) + task.Command = &mesos.CommandInfo{ + Value: proto.String("cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100"), + } return task } @@ -146,15 +197,15 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers } tasks := []*mesos.TaskInfo{} - tasksToLaunch := maxTasksForOffer(offer) - for tasksToLaunch > 0 { - fmt.Println("There is enough resources to launch a task!") - } - if len(tasks) == 0 { - driver.DeclineOffer(offer.Id, defaultFilter) - } else { + if offerDecision(offer) { + tasks = append(tasks, s.newDockerTask(offer, 3.0, 4096, 50)) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + time.Sleep(15 * time.Minute) + } else { + fmt.Println("There is enough resources to launch a task!") + driver.DeclineOffer(offer.Id, defaultFilter) + time.Sleep(15 * time.Minute) } } } @@ -206,39 +257,15 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { log.Printf("Receiving an error: %s", err) } -func executorURIs() []*mesos.CommandInfo_URI { - basePath, err := filepath.Abs(filepath.Dir(os.Args[0]) + "/../..") - if err != nil { - log.Fatal("Failed to find the path to RENDLER") - } - baseURI := fmt.Sprintf("%s/", basePath) - - pathToURI := func(path string, extract bool) *mesos.CommandInfo_URI { - return &mesos.CommandInfo_URI{ - Value: &path, - Extract: &extract, - } - } - - return []*mesos.CommandInfo_URI{ - pathToURI(baseURI+"render.js", false), - pathToURI(baseURI+"python/crawl_executor.py", false), - pathToURI(baseURI+"python/render_executor.py", false), - pathToURI(baseURI+"python/results.py", false), - pathToURI(baseURI+"python/task_state.py", false), - } -} - func main() { - master := flag.String("master", "127.0.1.1:5050", "Location of leading Mesos master") - + master := flag.String("master", "xavier:5050", "Location of leading Mesos master") flag.Parse() scheduler := newElectronScheduler() driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, Framework: &mesos.FrameworkInfo{ - Name: proto.String("RENDLER"), + Name: proto.String("Electron"), User: proto.String(""), }, Scheduler: scheduler, @@ -269,6 +296,7 @@ func main() { driver.Stop(false) }() + log.Printf("Starting...") if status, err := driver.Run(); err != nil { log.Printf("Framework stopped with status %s and error: %s\n", status.String(), err.Error()) } From 9af4e60189c56cf39fe58cde75da9c9285e55a03 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 15 Sep 2016 16:13:31 -0400 Subject: [PATCH 007/102] Moving states into scheduler --- states.go | 41 ----------------------------------------- 1 file changed, 41 deletions(-) delete mode 100644 states.go diff --git a/states.go b/states.go deleted file mode 100644 index 3a7a41b..0000000 --- a/states.go +++ /dev/null @@ -1,41 +0,0 @@ -package main - -import ( -mesos "github.com/mesos/mesos-go/mesosproto" -) - -// NameFor returns the string name for a TaskState. -func NameFor(state *mesos.TaskState) string { - switch *state { - case mesos.TaskState_TASK_STAGING: - return "TASK_STAGING" - case mesos.TaskState_TASK_STARTING: - return "TASK_STARTING" - case mesos.TaskState_TASK_RUNNING: - return "TASK_RUNNING" - case mesos.TaskState_TASK_FINISHED: - return "TASK_FINISHED" // TERMINAL - case mesos.TaskState_TASK_FAILED: - return "TASK_FAILED" // TERMINAL - case mesos.TaskState_TASK_KILLED: - return "TASK_KILLED" // TERMINAL - case mesos.TaskState_TASK_LOST: - return "TASK_LOST" // TERMINAL - default: - return "UNKNOWN" - } -} - -// IsTerminal determines if a TaskState is a terminal state, i.e. if it singals -// that the task has stopped running. -func IsTerminal(state *mesos.TaskState) bool { - switch *state { - case mesos.TaskState_TASK_FINISHED, - mesos.TaskState_TASK_FAILED, - mesos.TaskState_TASK_KILLED, - mesos.TaskState_TASK_LOST: - return true - default: - return false - } -} \ No newline at end of file From 66fcf8fd8e7964406004543bb1c498e180e3999f Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 15 Sep 2016 16:31:23 -0400 Subject: [PATCH 008/102] Removed sleeps that were causing framework to hang. --- scheduler.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/scheduler.go b/scheduler.go index bef2610..5dae4c7 100644 --- a/scheduler.go +++ b/scheduler.go @@ -201,11 +201,9 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers if offerDecision(offer) { tasks = append(tasks, s.newDockerTask(offer, 3.0, 4096, 50)) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) - time.Sleep(15 * time.Minute) } else { fmt.Println("There is enough resources to launch a task!") driver.DeclineOffer(offer.Id, defaultFilter) - time.Sleep(15 * time.Minute) } } } From 2db3ecbf0b3694e08ca0ce8c36d19bdbb8df8fff Mon Sep 17 00:00:00 2001 From: Gourav Date: Fri, 16 Sep 2016 20:03:36 +0000 Subject: [PATCH 009/102] initial pcp logging script --- pcp.go | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 pcp.go diff --git a/pcp.go b/pcp.go new file mode 100644 index 0000000..95e907e --- /dev/null +++ b/pcp.go @@ -0,0 +1,25 @@ +package main + +import ( + "fmt" + "log" + "os" + "os/exec" +) + +func main() { + cmd := exec.Command("sh", "-c", "pmdumptext -m -l -o -d , -c config") + stdout, err := os.Create("./output.txt") + cmd.Stdout = stdout + fmt.Println("PCP started: ", stdout) + + if err != nil { + log.Fatal(err) + } + if err := cmd.Start(); err != nil { + log.Fatal(err) + } + if err := cmd.Wait(); err != nil { + log.Fatal(err) + } +} \ No newline at end of file From 9097564b0b54979c1009244d15b4444668a6b047 Mon Sep 17 00:00:00 2001 From: Gourav Date: Fri, 16 Sep 2016 20:04:26 +0000 Subject: [PATCH 010/102] basic configuration for pcp --- config | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 config diff --git a/config b/config new file mode 100644 index 0000000..c0fb735 --- /dev/null +++ b/config @@ -0,0 +1,72 @@ +stratos-001:kernel.all.load[5] +stratos-002:kernel.all.load[5] +stratos-003:kernel.all.load[5] +stratos-004:kernel.all.load[5] +stratos-005:kernel.all.load[5] +stratos-006:kernel.all.load[5] +stratos-007:kernel.all.load[5] +stratos-008:kernel.all.load[5] + +stratos-001:kernel.all.cpu.user +stratos-002:kernel.all.cpu.user +stratos-003:kernel.all.cpu.user +stratos-004:kernel.all.cpu.user +stratos-005:kernel.all.cpu.user +stratos-006:kernel.all.cpu.user +stratos-007:kernel.all.cpu.user +stratos-008:kernel.all.cpu.user + +stratos-001:kernel.all.cpu.sys +stratos-002:kernel.all.cpu.sys +stratos-003:kernel.all.cpu.sys +stratos-004:kernel.all.cpu.sys +stratos-005:kernel.all.cpu.sys +stratos-006:kernel.all.cpu.sys +stratos-007:kernel.all.cpu.sys +stratos-008:kernel.all.cpu.sys + +stratos-001:kernel.all.cpu.idle +stratos-002:kernel.all.cpu.idle +stratos-003:kernel.all.cpu.idle +stratos-004:kernel.all.cpu.idle +stratos-005:kernel.all.cpu.idle +stratos-006:kernel.all.cpu.idle +stratos-007:kernel.all.cpu.idle +stratos-008:kernel.all.cpu.idle + +stratos-001:mem.util.free +stratos-002:mem.util.free +stratos-003:mem.util.free +stratos-004:mem.util.free +stratos-005:mem.util.free +stratos-006:mem.util.free +stratos-007:mem.util.free +stratos-008:mem.util.free + +stratos-001:mem.util.used +stratos-002:mem.util.used +stratos-003:mem.util.used +stratos-004:mem.util.used +stratos-005:mem.util.used +stratos-006:mem.util.used +stratos-007:mem.util.used +stratos-008:mem.util.used + +#RAPL CPU PKG +stratos-001:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-002:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-003:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-004:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-005:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-006:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-007:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +stratos-008:perfevent.hwcounters.rapl__RAPL_ENERGY_PKG.value +#RAPL DRAM +stratos-001:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-002:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-003:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-004:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-005:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-006:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-007:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value +stratos-008:perfevent.hwcounters.rapl__RAPL_ENERGY_DRAM.value From 150b1fa35069f365f35ed583873c4656344b4d7f Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 16:14:41 -0400 Subject: [PATCH 011/102] Adding todo to README --- README.md | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 5405928..b222a7f 100644 --- a/README.md +++ b/README.md @@ -1 +1,10 @@ -Inital repository for electron framework +Electron: Power as a first class citizen + + +To Do: +* Define schema for what workload would look like +* Add queue for jobs to be executed +* Create metrics for each task launched [Time to schedule, run time, power used] +* Have calibration phase? + + From 54b69f35e312dd4aa20f3c9cbd83cc1310f494e9 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 16:16:33 -0400 Subject: [PATCH 012/102] Adding formatting to README --- README.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index b222a7f..2ef954d 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,10 @@ -Electron: Power as a first class citizen - +# Electron: Power as a first class citizen +====================================== To Do: -* Define schema for what workload would look like -* Add queue for jobs to be executed -* Create metrics for each task launched [Time to schedule, run time, power used] -* Have calibration phase? + * Define schema for what workload would look like + * Add queue for jobs to be executed + * Create metrics for each task launched [Time to schedule, run time, power used] + * Have calibration phase? From 08de958da27b15d99591c7fcfde21dc2a8d8b817 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 20:17:08 +0000 Subject: [PATCH 013/102] README.md edited online with Bitbucket --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 2ef954d..a4fb9c4 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ ====================================== To Do: + * Define schema for what workload would look like * Add queue for jobs to be executed * Create metrics for each task launched [Time to schedule, run time, power used] From 34a0b0aef95f069ad265a11defda611a6510bd86 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 20:17:46 +0000 Subject: [PATCH 014/102] README.md edited online with Bitbucket --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a4fb9c4..9ab9e97 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Electron: Power as a first class citizen +Electron: Power as a first class citizen ====================================== To Do: From 6b76e45d6d20a52bb13bcc0126a603f94ba8f59a Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 16:18:59 -0400 Subject: [PATCH 015/102] Changing tagline --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ab9e97..4d5c250 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -Electron: Power as a first class citizen +Electron: A power budget manager ====================================== To Do: From 8034222d1e60277bd4e72fd70278146900294a82 Mon Sep 17 00:00:00 2001 From: Gourav Date: Fri, 16 Sep 2016 21:37:23 +0000 Subject: [PATCH 016/102] making filenames more dynamic --- pcp.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pcp.go b/pcp.go index 95e907e..2f008c6 100644 --- a/pcp.go +++ b/pcp.go @@ -5,13 +5,15 @@ import ( "log" "os" "os/exec" + "time" ) func main() { cmd := exec.Command("sh", "-c", "pmdumptext -m -l -o -d , -c config") - stdout, err := os.Create("./output.txt") + time := time.Now().Format("200601021504") + stdout, err := os.Create("./"+time+".txt") cmd.Stdout = stdout - fmt.Println("PCP started: ", stdout) + fmt.Println("PCP started: ") if err != nil { log.Fatal(err) @@ -22,4 +24,4 @@ func main() { if err := cmd.Wait(); err != nil { log.Fatal(err) } -} \ No newline at end of file +} From bc5e959adefdbc76c06dbb84871f251adfde5d20 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 16 Sep 2016 19:06:53 -0400 Subject: [PATCH 017/102] Scheduler now has a first fit algorithm based on memory, cpu, and watts. Watts need to be set by the user but should be calculated through heuristics. Framework now also works by having a set of tasks and launching them. --- README.md | 3 +- pcp.go | 2 +- scheduler.go | 135 ++++++++++++++++++++---------------------------- states.go | 41 +++++++++++++++ task.go | 33 ++++++++++++ workload_1.json | 11 ++++ 6 files changed, 142 insertions(+), 83 deletions(-) create mode 100644 states.go create mode 100644 task.go create mode 100644 workload_1.json diff --git a/README.md b/README.md index 4d5c250..801a7d4 100644 --- a/README.md +++ b/README.md @@ -7,5 +7,4 @@ To Do: * Add queue for jobs to be executed * Create metrics for each task launched [Time to schedule, run time, power used] * Have calibration phase? - - + * Add ability to use constraints diff --git a/pcp.go b/pcp.go index 95e907e..872a38a 100644 --- a/pcp.go +++ b/pcp.go @@ -7,7 +7,7 @@ import ( "os/exec" ) -func main() { +func PCP() { cmd := exec.Command("sh", "-c", "pmdumptext -m -l -o -d , -c config") stdout, err := os.Create("./output.txt") cmd.Stdout = stdout diff --git a/scheduler.go b/scheduler.go index 5dae4c7..3dbddb7 100644 --- a/scheduler.go +++ b/scheduler.go @@ -17,59 +17,12 @@ const ( shutdownTimeout = time.Duration(30) * time.Second ) -const ( - dockerCommand = "echo Hello_World!" -) - var ( defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} ) -type Task struct { - cpu float64 - mem float64 - watts float64 - image string -} - -// NameFor returns the string name for a TaskState. -func NameFor(state *mesos.TaskState) string { - switch *state { - case mesos.TaskState_TASK_STAGING: - return "TASK_STAGING" - case mesos.TaskState_TASK_STARTING: - return "TASK_STARTING" - case mesos.TaskState_TASK_RUNNING: - return "TASK_RUNNING" - case mesos.TaskState_TASK_FINISHED: - return "TASK_FINISHED" // TERMINAL - case mesos.TaskState_TASK_FAILED: - return "TASK_FAILED" // TERMINAL - case mesos.TaskState_TASK_KILLED: - return "TASK_KILLED" // TERMINAL - case mesos.TaskState_TASK_LOST: - return "TASK_LOST" // TERMINAL - default: - return "UNKNOWN" - } -} - -// IsTerminal determines if a TaskState is a terminal state, i.e. if it singals -// that the task has stopped running. -func IsTerminal(state *mesos.TaskState) bool { - switch *state { - case mesos.TaskState_TASK_FINISHED, - mesos.TaskState_TASK_FAILED, - mesos.TaskState_TASK_KILLED, - mesos.TaskState_TASK_LOST: - return true - default: - return false - } -} - // Decides if to take an offer or not -func offerDecision(offer *mesos.Offer) bool { +func offerDecision(offer *mesos.Offer, task Task) bool { var cpus, mem, watts float64 @@ -81,18 +34,14 @@ func offerDecision(offer *mesos.Offer) bool { mem += *resource.GetScalar().Value case "watts": watts += *resource.GetScalar().Value - fmt.Println("Got watts!: ", *resource.GetScalar().Value) } } - var taskCPUs, taskMem, taskWatts float64 - // Insert calculation here - taskWatts = 50 - taskMem = 4096 - taskCPUs = 3.0 + //TODO: Insert watts calculation here instead of taking them as a parameter - if cpus >= taskCPUs && mem >= taskMem && watts >= taskWatts { + + if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { return true } @@ -104,7 +53,7 @@ func offerDecision(offer *mesos.Offer) bool { type electronScheduler struct { tasksCreated int tasksRunning int - taskQueue []Task //FIFO + tasks []Task dockerExecutor *mesos.ExecutorInfo @@ -117,56 +66,48 @@ type electronScheduler struct { } // New electron scheduler -func newElectronScheduler() *electronScheduler { +func newElectronScheduler(tasks []Task) *electronScheduler { s := &electronScheduler{ dockerExecutor: &mesos.ExecutorInfo{ ExecutorId: &mesos.ExecutorID{Value: proto.String("docker-runner")}, - Command: &mesos.CommandInfo{ - Value: proto.String(dockerCommand), - }, Name: proto.String("Runner"), }, - + tasks: tasks, shutdown: make(chan struct{}), done: make(chan struct{}), } return s } -func (s *electronScheduler) newTask(offer *mesos.Offer, taskCPUs, taskMem, taskWatts float64) *mesos.TaskInfo { +func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskInfo { taskID := s.tasksCreated s.tasksCreated++ return &mesos.TaskInfo{ + Name: proto.String("Electron_" + fmt.Sprintf("Electron-%d", taskID)), TaskId: &mesos.TaskID{ Value: proto.String(fmt.Sprintf("Electron-%d", taskID)), }, SlaveId: offer.SlaveId, Resources: []*mesos.Resource{ - mesosutil.NewScalarResource("cpus", taskCPUs), - mesosutil.NewScalarResource("mem", taskMem), - mesosutil.NewScalarResource("watts", taskWatts), + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + mesosutil.NewScalarResource("watts", task.Watts), + }, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), }, Container: &mesos.ContainerInfo{ Type: mesos.ContainerInfo_DOCKER.Enum(), Docker: &mesos.ContainerInfo_DockerInfo{ - Image: proto.String("gouravr/minife:v5"), + Image: proto.String(task.Image), }, }, } } -func (s *electronScheduler) newDockerTask(offer *mesos.Offer, taskCPUs, taskMem, taskWatts float64) *mesos.TaskInfo { - task := s.newTask(offer, taskCPUs, taskMem, taskWatts) - task.Name = proto.String("Electron_" + *task.TaskId.Value) - task.Command = &mesos.CommandInfo{ - Value: proto.String("cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100"), - } - return task -} - func (s *electronScheduler) Registered( _ sched.SchedulerDriver, frameworkID *mesos.FrameworkID, @@ -196,15 +137,36 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers default: } + if(len(s.tasks) <= 0) { + log.Println("Done with scheduling all tasks...") + os.Exit(0) + } + tasks := []*mesos.TaskInfo{} - if offerDecision(offer) { - tasks = append(tasks, s.newDockerTask(offer, 3.0, 4096, 50)) - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) - } else { + // First fit strategy + + taken := false + for i, task := range s.tasks { + // Decision to take the offer or not + if offerDecision(offer, task) { + tasks = append(tasks, s.newTask(offer, task)) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + + // Delete scheduled task + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + taken = true + + } + } + + // If there was no match for the task + if !taken { fmt.Println("There is enough resources to launch a task!") driver.DeclineOffer(offer.Id, defaultFilter) } + } } @@ -257,9 +219,22 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { func main() { master := flag.String("master", "xavier:5050", "Location of leading Mesos master") + tasksFile := flag.String("tasks", "", "JSON file containing task definitions") flag.Parse() - scheduler := newElectronScheduler() + + if *tasksFile == "" { + fmt.Println("No file containing tasks specifiction provided.") + os.Exit(1) + } + + tasks, err := TasksFromJSON(*tasksFile) + if(err != nil || len(tasks) == 0) { + fmt.Println("Invalid tasks specification file provided") + os.Exit(1) + } + + scheduler := newElectronScheduler(tasks) driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, Framework: &mesos.FrameworkInfo{ diff --git a/states.go b/states.go new file mode 100644 index 0000000..d3b8afa --- /dev/null +++ b/states.go @@ -0,0 +1,41 @@ +package main + +import ( + mesos "github.com/mesos/mesos-go/mesosproto" +) + +// NameFor returns the string name for a TaskState. +func NameFor(state *mesos.TaskState) string { + switch *state { + case mesos.TaskState_TASK_STAGING: + return "TASK_STAGING" + case mesos.TaskState_TASK_STARTING: + return "TASK_STARTING" + case mesos.TaskState_TASK_RUNNING: + return "TASK_RUNNING" + case mesos.TaskState_TASK_FINISHED: + return "TASK_FINISHED" // TERMINAL + case mesos.TaskState_TASK_FAILED: + return "TASK_FAILED" // TERMINAL + case mesos.TaskState_TASK_KILLED: + return "TASK_KILLED" // TERMINAL + case mesos.TaskState_TASK_LOST: + return "TASK_LOST" // TERMINAL + default: + return "UNKNOWN" + } +} + +// IsTerminal determines if a TaskState is a terminal state, i.e. if it singals +// that the task has stopped running. +func IsTerminal(state *mesos.TaskState) bool { + switch *state { + case mesos.TaskState_TASK_FINISHED, + mesos.TaskState_TASK_FAILED, + mesos.TaskState_TASK_KILLED, + mesos.TaskState_TASK_LOST: + return true + default: + return false + } +} diff --git a/task.go b/task.go new file mode 100644 index 0000000..467379c --- /dev/null +++ b/task.go @@ -0,0 +1,33 @@ +package main + +import ( + "encoding/json" + "os" + "github.com/pkg/errors" +) + +type Task struct { + CPU float64 `json: "cpu"` + RAM float64 `json: "ram"` + Watts float64 `json: "watts"` + Image string `json: "image"` + CMD string `json: "cmd"` + Instances int `default 1, json: "inst"` +} + +func TasksFromJSON(uri string) ([]Task, error) { + + var tasks []Task + + file, err := os.Open(uri) + if err != nil { + return nil, errors.Wrap(err, "Error opening file") + } + + err = json.NewDecoder(file).Decode(&tasks) + if err != nil { + return nil, errors.Wrap(err, "Error unmarshalling") + } + + return tasks, nil +} \ No newline at end of file diff --git a/workload_1.json b/workload_1.json new file mode 100644 index 0000000..db50981 --- /dev/null +++ b/workload_1.json @@ -0,0 +1,11 @@ +[ + { + "cpu": 3.0, + "ram": 4096, + "watts": 50, + "image": "gouravr/minife:v5", + "cmd": "cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100", + "inst": 1 + } + +] From 6e2a627038004dbb3e4d108477513919f72d2c2e Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 17 Sep 2016 18:55:35 -0400 Subject: [PATCH 018/102] Electron now launches a series of benchmarks and then shuts down when everything has been sucessfully scheduled --- scheduler.go | 44 +++++++++++++++++++++++++------------------- task.go | 12 ++++++------ workload_1.json | 2 +- 3 files changed, 32 insertions(+), 26 deletions(-) diff --git a/scheduler.go b/scheduler.go index 3dbddb7..36a9e55 100644 --- a/scheduler.go +++ b/scheduler.go @@ -9,7 +9,6 @@ import ( sched "github.com/mesos/mesos-go/scheduler" "log" "os" - "os/signal" "time" ) @@ -125,6 +124,7 @@ func (s *electronScheduler) Disconnected(sched.SchedulerDriver) { func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Printf("Received %d resource offers", len(offers)) + for _, offer := range offers { select { case <-s.shutdown: @@ -137,10 +137,6 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers default: } - if(len(s.tasks) <= 0) { - log.Println("Done with scheduling all tasks...") - os.Exit(0) - } tasks := []*mesos.TaskInfo{} @@ -153,17 +149,22 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers tasks = append(tasks, s.newTask(offer, task)) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) - // Delete scheduled task - s.tasks[i] = s.tasks[len(s.tasks)-1] - s.tasks = s.tasks[:len(s.tasks)-1] - taken = true + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- + + if *task.Instances <= 0 { + // All instances of task have been scheduled + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + taken = true + } } } // If there was no match for the task if !taken { - fmt.Println("There is enough resources to launch a task!") + fmt.Println("There is not enough resources to launch a task!") driver.DeclineOffer(offer.Id, defaultFilter) } @@ -234,6 +235,11 @@ func main() { os.Exit(1) } + log.Println("Scheduling the following tasks:") + for _, task := range tasks { + fmt.Println(task) + } + scheduler := newElectronScheduler(tasks) driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, @@ -250,23 +256,23 @@ func main() { // Catch interrupt go func() { - c := make(chan os.Signal, 1) - signal.Notify(c, os.Interrupt, os.Kill) - s := <-c - if s != os.Interrupt { - return - } - log.Println("Electron is shutting down") - close(scheduler.shutdown) + for { + if (len(scheduler.tasks) <= 0) { + log.Println("Done with all tasks, shutting down") + close(scheduler.shutdown) + break + } + } select { case <-scheduler.done: - case <-time.After(shutdownTimeout): +// case <-time.After(shutdownTimeout): } // Done shutting down driver.Stop(false) + }() log.Printf("Starting...") diff --git a/task.go b/task.go index 467379c..2014932 100644 --- a/task.go +++ b/task.go @@ -7,12 +7,12 @@ import ( ) type Task struct { - CPU float64 `json: "cpu"` - RAM float64 `json: "ram"` - Watts float64 `json: "watts"` - Image string `json: "image"` - CMD string `json: "cmd"` - Instances int `default 1, json: "inst"` + CPU float64 `json:"cpu"` + RAM float64 `json:"ram"` + Watts float64 `json:"watts"` + Image string `json:"image"` + CMD string `json:"cmd"` + Instances *int `json:"inst"` } func TasksFromJSON(uri string) ([]Task, error) { diff --git a/workload_1.json b/workload_1.json index db50981..e4bb9ed 100644 --- a/workload_1.json +++ b/workload_1.json @@ -5,7 +5,7 @@ "watts": 50, "image": "gouravr/minife:v5", "cmd": "cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100", - "inst": 1 + "inst": 10 } ] From 512e0e7b5caf4d06fda44cd55b4803b50267030c Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 19 Sep 2016 17:51:32 -0400 Subject: [PATCH 019/102] Bug fix for running multiple different benchmarks. Loop did not exit upon being able to take an offer. --- scheduler.go | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scheduler.go b/scheduler.go index 36a9e55..81640fe 100644 --- a/scheduler.go +++ b/scheduler.go @@ -128,8 +128,10 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers for _, offer := range offers { select { case <-s.shutdown: - log.Println("Shutting down: declining offer on [", offer.Hostname, "]") + log.Println("Shutting down: declining offer on [", offer.GetHostname(), "]") driver.DeclineOffer(offer.Id, defaultFilter) + + log.Println("Number of tasks running: ", s.tasksRunning) if s.tasksRunning == 0 { close(s.done) } @@ -149,16 +151,21 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers tasks = append(tasks, s.newTask(offer, task)) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + taken = true + fmt.Println("Inst: ", *task.Instances) *task.Instances-- if *task.Instances <= 0 { + fmt.Println("Tasks left: ", len(s.tasks)-1) + fmt.Println("Position: ", i) // All instances of task have been scheduled s.tasks[i] = s.tasks[len(s.tasks)-1] s.tasks = s.tasks[:len(s.tasks)-1] - taken = true } + break + } } From 992455048a69cf0eb41867e27d52c414f162fcf0 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 19 Sep 2016 20:25:10 -0400 Subject: [PATCH 020/102] Fixed deadlock issue while running a large number of benchmarks. Changed names of tasks to indicate what they are running. Added name to task schema to append it to the name more easily. --- scheduler.go | 53 ++++++++++++++++++++++++++++++---------------------- task.go | 1 + 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/scheduler.go b/scheduler.go index 81640fe..8f5feb3 100644 --- a/scheduler.go +++ b/scheduler.go @@ -18,11 +18,10 @@ const ( var ( defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} + longFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1000)} ) -// Decides if to take an offer or not -func offerDecision(offer *mesos.Offer, task Task) bool { - +func OfferAgg(offer *mesos.Offer) (float64, float64, float64) { var cpus, mem, watts float64 for _, resource := range offer.Resources { @@ -36,10 +35,16 @@ func offerDecision(offer *mesos.Offer, task Task) bool { } } + return cpus, mem, watts +} + +// Decides if to take an offer or not +func TakeOffer(offer *mesos.Offer, task Task) bool { + + cpus, mem, watts := OfferAgg(offer) //TODO: Insert watts calculation here instead of taking them as a parameter - if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { return true } @@ -84,7 +89,7 @@ func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskIn taskID := s.tasksCreated s.tasksCreated++ return &mesos.TaskInfo{ - Name: proto.String("Electron_" + fmt.Sprintf("Electron-%d", taskID)), + Name: proto.String(fmt.Sprintf("Electron-%s-%d", task.Name, *task.Instances)), TaskId: &mesos.TaskID{ Value: proto.String(fmt.Sprintf("Electron-%d", taskID)), }, @@ -129,12 +134,9 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers select { case <-s.shutdown: log.Println("Shutting down: declining offer on [", offer.GetHostname(), "]") - driver.DeclineOffer(offer.Id, defaultFilter) + driver.DeclineOffer(offer.Id, longFilter) - log.Println("Number of tasks running: ", s.tasksRunning) - if s.tasksRunning == 0 { - close(s.done) - } + log.Println("Number of tasks still running: ", s.tasksRunning) continue default: } @@ -147,8 +149,10 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers taken := false for i, task := range s.tasks { // Decision to take the offer or not - if offerDecision(offer, task) { + if TakeOffer(offer, task) { tasks = append(tasks, s.newTask(offer, task)) + + log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) taken = true @@ -157,21 +161,28 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers *task.Instances-- if *task.Instances <= 0 { - fmt.Println("Tasks left: ", len(s.tasks)-1) - fmt.Println("Position: ", i) - // All instances of task have been scheduled + // All instances of task have been scheduled, remove it s.tasks[i] = s.tasks[len(s.tasks)-1] s.tasks = s.tasks[:len(s.tasks)-1] + + + if(len(s.tasks) <= 0) { + log.Println("Done scheduling all tasks") + close(s.shutdown) + } } - break + break // Offer taken, move on } } // If there was no match for the task if !taken { - fmt.Println("There is not enough resources to launch a task!") + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) driver.DeclineOffer(offer.Id, defaultFilter) } @@ -193,6 +204,7 @@ func (s *electronScheduler) StatusUpdate(driver sched.SchedulerDriver, status *m } } } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) } func (s *electronScheduler) FrameworkMessage( @@ -264,12 +276,9 @@ func main() { // Catch interrupt go func() { - for { - if (len(scheduler.tasks) <= 0) { - log.Println("Done with all tasks, shutting down") - close(scheduler.shutdown) - break - } + select { + case <-scheduler.shutdown: + // case <-time.After(shutdownTimeout): } select { diff --git a/task.go b/task.go index 2014932..9d4bd01 100644 --- a/task.go +++ b/task.go @@ -7,6 +7,7 @@ import ( ) type Task struct { + Name string `json:"name"` CPU float64 `json:"cpu"` RAM float64 `json:"ram"` Watts float64 `json:"watts"` From 58c6bfe7a8d7d5f63a2f3f65aa62f907ad5b8d5b Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 22 Sep 2016 18:34:05 -0400 Subject: [PATCH 021/102] PCP code is now able to deal with receiving information asynchronously from pmdumptext --- metrics.go | 1 + pcp.go | 27 -------------------------- pcp/pcp.go | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 57 insertions(+), 27 deletions(-) create mode 100644 metrics.go delete mode 100644 pcp.go create mode 100644 pcp/pcp.go diff --git a/metrics.go b/metrics.go new file mode 100644 index 0000000..6b8a3c4 --- /dev/null +++ b/metrics.go @@ -0,0 +1 @@ +package electron diff --git a/pcp.go b/pcp.go deleted file mode 100644 index a45ba32..0000000 --- a/pcp.go +++ /dev/null @@ -1,27 +0,0 @@ -package main - -import ( - "fmt" - "log" - "os" - "os/exec" - "time" -) - -func PCP() { - cmd := exec.Command("sh", "-c", "pmdumptext -m -l -o -d , -c config") - time := time.Now().Format("200601021504") - stdout, err := os.Create("./"+time+".txt") - cmd.Stdout = stdout - fmt.Println("PCP started: ") - - if err != nil { - log.Fatal(err) - } - if err := cmd.Start(); err != nil { - log.Fatal(err) - } - if err := cmd.Wait(); err != nil { - log.Fatal(err) - } -} diff --git a/pcp/pcp.go b/pcp/pcp.go new file mode 100644 index 0000000..3f9c592 --- /dev/null +++ b/pcp/pcp.go @@ -0,0 +1,56 @@ +package main + +import ( + "fmt" + "log" + "os/exec" + "bufio" + "strings" +) + +func main() { + const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" // We always want the most granular + cmd := exec.Command("sh", "-c", pcpCommand) +// time := time.Now().Format("200601021504") + +// stdout, err := os.Create("./"+time+".txt") + pipe, err := cmd.StdoutPipe() + + //cmd.Stdout = stdout + + scanner := bufio.NewScanner(pipe) + + go func() { + // Get names of the columns + scanner.Scan() + + headers := strings.Split(scanner.Text(), ",") + + for _, hostMetric := range headers { + split := strings.Split(hostMetric, ":") + fmt.Printf("Host %s: Metric: %s\n", split[0], split[1]) + } + + // Throw away first set of results + scanner.Scan() + + + seconds := 0 + for scanner.Scan() { + fmt.Println("Second ", seconds , " val: ", strings.Split(scanner.Text(), ",")) + seconds++ + } + }() + + fmt.Println("PCP started: ") + + if err != nil { + log.Fatal(err) + } + if err := cmd.Start(); err != nil { + log.Fatal(err) + } + if err := cmd.Wait(); err != nil { + log.Fatal(err) + } +} From 4e2a1d99fd6ffaad7f772e5eac78d5c08e89b4d5 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 22 Sep 2016 19:54:06 -0400 Subject: [PATCH 022/102] Tied metric columns to values, next step will be determining how to store them and retrieve them --- pcp/pcp.go | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/pcp/pcp.go b/pcp/pcp.go index 3f9c592..4388ebe 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -1,19 +1,19 @@ package main import ( + "bufio" "fmt" "log" "os/exec" - "bufio" "strings" ) func main() { const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" // We always want the most granular cmd := exec.Command("sh", "-c", pcpCommand) -// time := time.Now().Format("200601021504") + // time := time.Now().Format("200601021504") -// stdout, err := os.Create("./"+time+".txt") + // stdout, err := os.Create("./"+time+".txt") pipe, err := cmd.StdoutPipe() //cmd.Stdout = stdout @@ -34,15 +34,20 @@ func main() { // Throw away first set of results scanner.Scan() - seconds := 0 for scanner.Scan() { - fmt.Println("Second ", seconds , " val: ", strings.Split(scanner.Text(), ",")) + fmt.Printf("Second: %d\n", seconds) + for i, val := range strings.Split(scanner.Text(), ",") { + fmt.Printf("host metric: %s val: %s\n", headers[i], val) + } + seconds++ + + fmt.Println("--------------------------------") } }() - fmt.Println("PCP started: ") + fmt.Println("PCP started: ") if err != nil { log.Fatal(err) From 3801fbd7d6ab2d6f18fe35e8247af0ccbef9f76a Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 22 Sep 2016 20:20:22 -0400 Subject: [PATCH 023/102] Detection of co-scheduled benchmarks is complete --- scheduler.go | 46 ++++++++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 16 deletions(-) diff --git a/scheduler.go b/scheduler.go index 8f5feb3..4c0c14c 100644 --- a/scheduler.go +++ b/scheduler.go @@ -21,6 +21,15 @@ var ( longFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1000)} ) +func CoLocated(tasks map[string]bool) { + + for task := range tasks { + log.Println(task) + } + + fmt.Println("---------------------") +} + func OfferAgg(offer *mesos.Offer) (float64, float64, float64) { var cpus, mem, watts float64 @@ -58,8 +67,9 @@ type electronScheduler struct { tasksCreated int tasksRunning int tasks []Task + metrics map[string]Metric + running map[string]map[string]bool - dockerExecutor *mesos.ExecutorInfo // This channel is closed when the program receives an interrupt, // signalling that the program should shut down. @@ -73,25 +83,30 @@ type electronScheduler struct { func newElectronScheduler(tasks []Task) *electronScheduler { s := &electronScheduler{ - - dockerExecutor: &mesos.ExecutorInfo{ - ExecutorId: &mesos.ExecutorID{Value: proto.String("docker-runner")}, - Name: proto.String("Runner"), - }, tasks: tasks, shutdown: make(chan struct{}), done: make(chan struct{}), + running: make(map[string]map[string]bool), } return s } func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskInfo { - taskID := s.tasksCreated + taskID := fmt.Sprintf("Electron-%s-%d", task.Name, *task.Instances) s.tasksCreated++ + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Add task to list of tasks running on node + s.running[offer.GetSlaveId().GoString()][taskID] = true + return &mesos.TaskInfo{ - Name: proto.String(fmt.Sprintf("Electron-%s-%d", task.Name, *task.Instances)), + Name: proto.String(taskID), TaskId: &mesos.TaskID{ - Value: proto.String(fmt.Sprintf("Electron-%d", taskID)), + Value: proto.String(taskID), }, SlaveId: offer.SlaveId, Resources: []*mesos.Resource{ @@ -150,6 +165,10 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers for i, task := range s.tasks { // Decision to take the offer or not if TakeOffer(offer, task) { + + log.Println("Co-Located with: ") + CoLocated(s.running[offer.GetSlaveId().GoString()]) + tasks = append(tasks, s.newTask(offer, task)) log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) @@ -195,6 +214,7 @@ func (s *electronScheduler) StatusUpdate(driver sched.SchedulerDriver, status *m if *status.State == mesos.TaskState_TASK_RUNNING { s.tasksRunning++ } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()],*status.TaskId.Value) s.tasksRunning-- if s.tasksRunning == 0 { select { @@ -214,13 +234,7 @@ func (s *electronScheduler) FrameworkMessage( message string) { log.Println("Getting a framework message: ", message) - switch *executorID.Value { - case *s.dockerExecutor.ExecutorId.Value: - log.Print("Received framework message ", message) - - default: - log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) - } + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) } func (s *electronScheduler) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { From cd430eede00c0e7d71bcf673ee7c17de8c62bec2 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 26 Sep 2016 19:14:51 -0400 Subject: [PATCH 024/102] PCP is now controlled by main scheduler. It will start recording upon the acceptance of the very first offer. pcp is now it's own package. README has been updated with instructions on how to create workloads and running instructions. --- README.md | 41 ++++++++++++++++++++++++++++++--- pcp/pcp.go | 64 ++++++++++++++++++++++++++++++++++++++-------------- scheduler.go | 37 +++++++++++++++++++++++++----- 3 files changed, 116 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index 801a7d4..b0154c7 100644 --- a/README.md +++ b/README.md @@ -3,8 +3,43 @@ Electron: A power budget manager To Do: - * Define schema for what workload would look like - * Add queue for jobs to be executed * Create metrics for each task launched [Time to schedule, run time, power used] * Have calibration phase? - * Add ability to use constraints + * Add ability to use constraints + * Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average + + + +** Requires Performance-Copilot tool pmdumptext ** + + + +How to run: + +`./electron -workload ` + + +Workload schema: + +``` +[ + { + "name": "minife", + "cpu": 3.0, + "ram": 4096, + "watts": 50, + "image": "gouravr/minife:v5", + "cmd": "cd src && mpirun -np 1 miniFE.x -nx 100 -ny 100 -nz 100", + "inst": 9 + }, + { + "name": "dgemm", + "cpu": 3.0, + "ram": 4096, + "watts": 50, + "image": "gouravr/dgemm:v2", + "cmd": "/./mt-dgemm 1024", + "inst": 9 + } +] +``` \ No newline at end of file diff --git a/pcp/pcp.go b/pcp/pcp.go index 4388ebe..b9743ab 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -1,61 +1,91 @@ -package main +package pcp import ( "bufio" - "fmt" "log" "os/exec" - "strings" + "time" + "os" ) -func main() { - const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" // We always want the most granular +func Start(quit chan struct{}, logging *bool) { + const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" cmd := exec.Command("sh", "-c", pcpCommand) - // time := time.Now().Format("200601021504") + startTime := time.Now().Format("20060102150405") + + + logFile, err := os.Create("./"+startTime+".pcplog") + if err != nil { + log.Fatal(err) + } + + defer logFile.Close() - // stdout, err := os.Create("./"+time+".txt") pipe, err := cmd.StdoutPipe() - + if err != nil { + log.Fatal(err) + } //cmd.Stdout = stdout scanner := bufio.NewScanner(pipe) - go func() { + go func(logging *bool) { // Get names of the columns scanner.Scan() + // Write to logfile + logFile.WriteString(scanner.Text() + "\n") + + /* headers := strings.Split(scanner.Text(), ",") for _, hostMetric := range headers { split := strings.Split(hostMetric, ":") fmt.Printf("Host %s: Metric: %s\n", split[0], split[1]) } + */ // Throw away first set of results scanner.Scan() seconds := 0 for scanner.Scan() { + + + if(*logging) { + log.Println("Logging PCP...") + logFile.WriteString(scanner.Text() + "\n") + } + + /* fmt.Printf("Second: %d\n", seconds) for i, val := range strings.Split(scanner.Text(), ",") { fmt.Printf("host metric: %s val: %s\n", headers[i], val) - } + }*/ seconds++ - fmt.Println("--------------------------------") + // fmt.Println("--------------------------------") } - }() + }(logging) + + log.Println("PCP logging started") - fmt.Println("PCP started: ") - if err != nil { - log.Fatal(err) - } if err := cmd.Start(); err != nil { log.Fatal(err) } + + select{ + case <- quit: + log.Println("Stopping PCP logging in 5 seconds") + time.Sleep(5 * time.Second) + cmd.Process.Kill() + return + } + + /* if err := cmd.Wait(); err != nil { log.Fatal(err) - } + }*/ } diff --git a/scheduler.go b/scheduler.go index 4c0c14c..5b0e080 100644 --- a/scheduler.go +++ b/scheduler.go @@ -10,6 +10,7 @@ import ( "log" "os" "time" + "bitbucket.org/bingcloud/electron/pcp" ) const ( @@ -66,17 +67,26 @@ func TakeOffer(offer *mesos.Offer, task Task) bool { type electronScheduler struct { tasksCreated int tasksRunning int - tasks []Task - metrics map[string]Metric - running map[string]map[string]bool + tasks []Task + metrics map[string]Metric + running map[string]map[string]bool + + + // First set of PCP values are garbage values, signal to logger to start recording after + // we actually schedule a task + recordPCP bool // This channel is closed when the program receives an interrupt, // signalling that the program should shut down. - shutdown chan struct{} + shutdown chan struct{} // This channel is closed after shutdown is closed, and only when all // outstanding tasks have been cleaned up - done chan struct{} + done chan struct{} + + + // Controls when to shutdown pcp logging + pcpLog chan struct{} } // New electron scheduler @@ -86,7 +96,9 @@ func newElectronScheduler(tasks []Task) *electronScheduler { tasks: tasks, shutdown: make(chan struct{}), done: make(chan struct{}), + pcpLog: make(chan struct{}), running: make(map[string]map[string]bool), + recordPCP: false, } return s } @@ -95,6 +107,12 @@ func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskIn taskID := fmt.Sprintf("Electron-%s-%d", task.Name, *task.Instances) s.tasksCreated++ + if !s.recordPCP { + // Turn on logging + s.recordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + // If this is our first time running into this Agent if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) @@ -253,7 +271,7 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { func main() { master := flag.String("master", "xavier:5050", "Location of leading Mesos master") - tasksFile := flag.String("tasks", "", "JSON file containing task definitions") + tasksFile := flag.String("workload", "", "JSON file containing task definitions") flag.Parse() @@ -287,16 +305,23 @@ func main() { return } + go pcp.Start(scheduler.pcpLog, &scheduler.recordPCP) + time.Sleep(1 * time.Second) + // Catch interrupt go func() { + // Signals we have scheduled every task we have select { case <-scheduler.shutdown: // case <-time.After(shutdownTimeout): } + // Signals all tasks have finished select { case <-scheduler.done: + close(scheduler.pcpLog) + time.Sleep(5 * time.Second) //Wait for PCP to log a few more seconds // case <-time.After(shutdownTimeout): } From 0503e2ab5693d022b56f3dd3d115cf8d72f6cfb2 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 26 Sep 2016 19:29:30 -0400 Subject: [PATCH 025/102] Added option to run without taking watts --- scheduler.go | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scheduler.go b/scheduler.go index 5b0e080..9735291 100644 --- a/scheduler.go +++ b/scheduler.go @@ -20,6 +20,7 @@ const ( var ( defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} longFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1000)} + IGNORE_WATTS = false ) func CoLocated(tasks map[string]bool) { @@ -53,6 +54,10 @@ func TakeOffer(offer *mesos.Offer, task Task) bool { cpus, mem, watts := OfferAgg(offer) + if(IGNORE_WATTS) { + task.Watts = 0.0 // Don't take any watts in the offer + } + //TODO: Insert watts calculation here instead of taking them as a parameter if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { @@ -272,8 +277,10 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { func main() { master := flag.String("master", "xavier:5050", "Location of leading Mesos master") tasksFile := flag.String("workload", "", "JSON file containing task definitions") + ignoreWatts := flag.Bool("ignoreWatts", false, "Don't use watts from offers") flag.Parse() + IGNORE_WATTS = *ignoreWatts if *tasksFile == "" { fmt.Println("No file containing tasks specifiction provided.") From 3b3e64ec475f72b6f193a05e087a7173b4628e61 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 26 Sep 2016 19:31:16 -0400 Subject: [PATCH 026/102] Adding documentation for ignoreWatts --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index b0154c7..f08dc71 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ To Do: How to run: -`./electron -workload ` +`./electron -workload -ignoreWatts ` Workload schema: From 7666167d878665e4f38ec79f391eeab80271a021 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 26 Sep 2016 19:32:25 -0400 Subject: [PATCH 027/102] More documentation --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index f08dc71..f7e8a33 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,8 @@ To Do: -** Requires Performance-Copilot tool pmdumptext ** +**Requires Performance-Copilot tool pmdumptext to be installed on the +machine on which electron is launched for logging to work** From c4ea0321c1e26a771f45a5022e0c4e327d89cec4 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 27 Sep 2016 18:12:50 -0400 Subject: [PATCH 028/102] Added ability to tie benchmark to single node. Uses offer hostname for this, maybe a better soluition would be to look at offer attributes. Added shorthand for workloads flag -w --- scheduler.go | 23 +++++++++++++++++++---- task.go | 1 + 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/scheduler.go b/scheduler.go index 9735291..6cfdbe5 100644 --- a/scheduler.go +++ b/scheduler.go @@ -11,6 +11,7 @@ import ( "os" "time" "bitbucket.org/bingcloud/electron/pcp" + "strings" ) const ( @@ -179,13 +180,21 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers default: } - tasks := []*mesos.TaskInfo{} // First fit strategy taken := false for i, task := range s.tasks { + + // Check host if it exists + if task.Host != "" { + // Don't take offer if it doesn't match our task's host requirement + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + } + // Decision to take the offer or not if TakeOffer(offer, task) { @@ -274,10 +283,16 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { log.Printf("Receiving an error: %s", err) } +var master = flag.String("master", "xavier:5050", "Location of leading Mesos master") +var tasksFile = flag.String("workload", "", "JSON file containing task definitions") +var ignoreWatts = flag.Bool("ignoreWatts", false, "Don't use watts from offers") + +// Short hand args +func init(){ + flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions") +} + func main() { - master := flag.String("master", "xavier:5050", "Location of leading Mesos master") - tasksFile := flag.String("workload", "", "JSON file containing task definitions") - ignoreWatts := flag.Bool("ignoreWatts", false, "Don't use watts from offers") flag.Parse() IGNORE_WATTS = *ignoreWatts diff --git a/task.go b/task.go index 9d4bd01..cbc0e5a 100644 --- a/task.go +++ b/task.go @@ -14,6 +14,7 @@ type Task struct { Image string `json:"image"` CMD string `json:"cmd"` Instances *int `json:"inst"` + Host string `json:"host"` } func TasksFromJSON(uri string) ([]Task, error) { From 23d7014cfee32051b189f75143ea7cbe2fc5e0dc Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 27 Sep 2016 19:15:32 -0400 Subject: [PATCH 029/102] Adding shorthands --- scheduler.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scheduler.go b/scheduler.go index 6cfdbe5..6552ad3 100644 --- a/scheduler.go +++ b/scheduler.go @@ -285,11 +285,13 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { var master = flag.String("master", "xavier:5050", "Location of leading Mesos master") var tasksFile = flag.String("workload", "", "JSON file containing task definitions") -var ignoreWatts = flag.Bool("ignoreWatts", false, "Don't use watts from offers") +var ignoreWatts = flag.Bool("ignoreWatts", false, "Ignore watts in offers") // Short hand args func init(){ - flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions") + flag.StringVar(master, "m", "xavier:5050", "Location of leading Mesos master (shorthand)") + flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions (shorthand)") + flag.BoolVar(ignoreWatts, "i", false, "Ignore watts in offers (shorthand)") } func main() { From 52d012a7ee07d061705ca3f1099d041969f381a1 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 27 Sep 2016 20:22:07 -0400 Subject: [PATCH 030/102] Added the ability to prefix a pcplog --- scheduler.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scheduler.go b/scheduler.go index 6552ad3..cb5ce2e 100644 --- a/scheduler.go +++ b/scheduler.go @@ -286,12 +286,14 @@ func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { var master = flag.String("master", "xavier:5050", "Location of leading Mesos master") var tasksFile = flag.String("workload", "", "JSON file containing task definitions") var ignoreWatts = flag.Bool("ignoreWatts", false, "Ignore watts in offers") +var pcplogPrefix = flag.String("logPrefix", "", "Prefix for pcplog") // Short hand args func init(){ flag.StringVar(master, "m", "xavier:5050", "Location of leading Mesos master (shorthand)") flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions (shorthand)") flag.BoolVar(ignoreWatts, "i", false, "Ignore watts in offers (shorthand)") + flag.StringVar(pcplogPrefix, "p", "", "Prefix for pcplog") } func main() { @@ -329,7 +331,7 @@ func main() { return } - go pcp.Start(scheduler.pcpLog, &scheduler.recordPCP) + go pcp.Start(scheduler.pcpLog, &scheduler.recordPCP, *pcplogPrefix) time.Sleep(1 * time.Second) // Catch interrupt From 5dd64f1e16d321828f69e9b6d909e1d6730aa060 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 7 Oct 2016 19:29:36 -0400 Subject: [PATCH 031/102] Added Error state. Fixed bug with tasks returning error because 0.0 watts was requested as a resource. Changed name to be more readable by eliminating electron. PCP logs can now have a prefix. --- pcp/pcp.go | 4 ++-- scheduler.go | 36 +++++++++++++++++------------------- states.go | 8 ++++++-- 3 files changed, 25 insertions(+), 23 deletions(-) diff --git a/pcp/pcp.go b/pcp/pcp.go index b9743ab..0c8d0e7 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -8,13 +8,13 @@ import ( "os" ) -func Start(quit chan struct{}, logging *bool) { +func Start(quit chan struct{}, logging *bool, prefix string) { const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" cmd := exec.Command("sh", "-c", pcpCommand) startTime := time.Now().Format("20060102150405") - logFile, err := os.Create("./"+startTime+".pcplog") + logFile, err := os.Create("./"+prefix+startTime+".pcplog") if err != nil { log.Fatal(err) } diff --git a/scheduler.go b/scheduler.go index cb5ce2e..742cc4a 100644 --- a/scheduler.go +++ b/scheduler.go @@ -55,10 +55,6 @@ func TakeOffer(offer *mesos.Offer, task Task) bool { cpus, mem, watts := OfferAgg(offer) - if(IGNORE_WATTS) { - task.Watts = 0.0 // Don't take any watts in the offer - } - //TODO: Insert watts calculation here instead of taking them as a parameter if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { @@ -68,8 +64,7 @@ func TakeOffer(offer *mesos.Offer, task Task) bool { return false } -// rendlerScheduler implements the Scheduler interface and stores -// the state needed for Rendler to function. +// electronScheduler implements the Scheduler interface type electronScheduler struct { tasksCreated int tasksRunning int @@ -77,12 +72,10 @@ type electronScheduler struct { metrics map[string]Metric running map[string]map[string]bool - - // First set of PCP values are garbage values, signal to logger to start recording after - // we actually schedule a task + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule a new task recordPCP bool - // This channel is closed when the program receives an interrupt, // signalling that the program should shut down. shutdown chan struct{} @@ -110,7 +103,7 @@ func newElectronScheduler(tasks []Task) *electronScheduler { } func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskInfo { - taskID := fmt.Sprintf("Electron-%s-%d", task.Name, *task.Instances) + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) s.tasksCreated++ if !s.recordPCP { @@ -125,19 +118,24 @@ func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskIn } // Add task to list of tasks running on node - s.running[offer.GetSlaveId().GoString()][taskID] = true + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if(!IGNORE_WATTS) { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } return &mesos.TaskInfo{ - Name: proto.String(taskID), + Name: proto.String(taskName), TaskId: &mesos.TaskID{ - Value: proto.String(taskID), + Value: proto.String("electron-" + taskName), }, SlaveId: offer.SlaveId, - Resources: []*mesos.Resource{ - mesosutil.NewScalarResource("cpus", task.CPU), - mesosutil.NewScalarResource("mem", task.RAM), - mesosutil.NewScalarResource("watts", task.Watts), - }, + Resources: resources, Command: &mesos.CommandInfo{ Value: proto.String(task.CMD), }, diff --git a/states.go b/states.go index d3b8afa..69a227b 100644 --- a/states.go +++ b/states.go @@ -2,6 +2,7 @@ package main import ( mesos "github.com/mesos/mesos-go/mesosproto" + "fmt" ) // NameFor returns the string name for a TaskState. @@ -21,8 +22,10 @@ func NameFor(state *mesos.TaskState) string { return "TASK_KILLED" // TERMINAL case mesos.TaskState_TASK_LOST: return "TASK_LOST" // TERMINAL + case mesos.TaskState_TASK_ERROR: + return "TASK_ERROR" default: - return "UNKNOWN" + return fmt.Sprintf("UNKNOWN: %d", *state) } } @@ -33,7 +36,8 @@ func IsTerminal(state *mesos.TaskState) bool { case mesos.TaskState_TASK_FINISHED, mesos.TaskState_TASK_FAILED, mesos.TaskState_TASK_KILLED, - mesos.TaskState_TASK_LOST: + mesos.TaskState_TASK_LOST, + mesos.TaskState_TASK_ERROR: return true default: return false From 8f9b29cbcb62f73e561e33888c3ac901d0922a6b Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 7 Oct 2016 20:47:59 -0400 Subject: [PATCH 032/102] SIGINT is now captured. Linux/MacOS shutdown for pcpdumptext child processes added to clean up after we finish running. Made all tasks run on bridged network mode so that benchmarks that heavily utilize network are protected from eachother. I.e: Tradebeans, tradesoap --- pcp/pcp.go | 14 ++++++++------ scheduler.go | 23 ++++++++++++++++++----- 2 files changed, 26 insertions(+), 11 deletions(-) diff --git a/pcp/pcp.go b/pcp/pcp.go index 0c8d0e7..4329477 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -6,11 +6,13 @@ import ( "os/exec" "time" "os" + "syscall" ) func Start(quit chan struct{}, logging *bool, prefix string) { const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" cmd := exec.Command("sh", "-c", pcpCommand) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} startTime := time.Now().Format("20060102150405") @@ -76,16 +78,16 @@ func Start(quit chan struct{}, logging *bool, prefix string) { log.Fatal(err) } + pgid, err := syscall.Getpgid(cmd.Process.Pid) + select{ case <- quit: log.Println("Stopping PCP logging in 5 seconds") time.Sleep(5 * time.Second) - cmd.Process.Kill() + + // http://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly + // kill process and all children processes + syscall.Kill(-pgid, 15) return } - - /* - if err := cmd.Wait(); err != nil { - log.Fatal(err) - }*/ } diff --git a/scheduler.go b/scheduler.go index 742cc4a..c3a92eb 100644 --- a/scheduler.go +++ b/scheduler.go @@ -12,6 +12,7 @@ import ( "time" "bitbucket.org/bingcloud/electron/pcp" "strings" + "os/signal" ) const ( @@ -143,6 +144,7 @@ func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskIn Type: mesos.ContainerInfo_DOCKER.Enum(), Docker: &mesos.ContainerInfo_DockerInfo{ Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated }, }, @@ -170,7 +172,7 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers for _, offer := range offers { select { case <-s.shutdown: - log.Println("Shutting down: declining offer on [", offer.GetHostname(), "]") + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") driver.DeclineOffer(offer.Id, longFilter) log.Println("Number of tasks still running: ", s.tasksRunning) @@ -214,15 +216,12 @@ func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers s.tasks[i] = s.tasks[len(s.tasks)-1] s.tasks = s.tasks[:len(s.tasks)-1] - if(len(s.tasks) <= 0) { log.Println("Done scheduling all tasks") close(s.shutdown) } } - break // Offer taken, move on - } } @@ -332,7 +331,21 @@ func main() { go pcp.Start(scheduler.pcpLog, &scheduler.recordPCP, *pcplogPrefix) time.Sleep(1 * time.Second) + // Attempt to handle signint to not leave pmdumptext running // Catch interrupt + go func() { + c := make(chan os.Signal, 1) + signal.Notify(c, os.Interrupt, os.Kill) + s := <-c + if s != os.Interrupt { + close(scheduler.pcpLog) + return + } + + log.Printf("Received SIGINT...stopping") + close(scheduler.done) + }() + go func() { // Signals we have scheduled every task we have @@ -341,7 +354,7 @@ func main() { // case <-time.After(shutdownTimeout): } - // Signals all tasks have finished + // All tasks have finished select { case <-scheduler.done: close(scheduler.pcpLog) From 5e9db05d4c62a9229169896c4d20bbd93222976c Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 7 Oct 2016 21:04:58 -0400 Subject: [PATCH 033/102] Temporary blank struct for metrics --- metrics.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/metrics.go b/metrics.go index 6b8a3c4..d044424 100644 --- a/metrics.go +++ b/metrics.go @@ -1 +1,5 @@ -package electron +package main + +type Metric struct{ + +} From 407c350d3c41b291d44c30e921d8eeb4964cff96 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Fri, 7 Oct 2016 21:05:57 -0400 Subject: [PATCH 034/102] Adding some fields --- metrics.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/metrics.go b/metrics.go index d044424..2041007 100644 --- a/metrics.go +++ b/metrics.go @@ -1,5 +1,8 @@ package main type Metric struct{ - + Name string `json:"name"` + CPU float64 `json:"cpu"` + RAM float64 `json:"ram"` + Watts float64 `json:"watts"` } From c2e2b7e55420dfb322ce0049410ab050d5270d21 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 13 Oct 2016 17:15:09 -0400 Subject: [PATCH 035/102] Moved schedulers from the main programs to schedulers package. Can now choose different scheduelrs to use. Work on code sharing between schedulers remains to be done. --- def/metric.go | 8 + task.go => def/task.go | 22 +-- metrics.go | 8 - pcp/pcp.go | 33 ++-- scheduler.go | 301 ++---------------------------- schedulers/binpackwatts.go | 241 ++++++++++++++++++++++++ schedulers/firstfit.go | 242 ++++++++++++++++++++++++ schedulers/helpers.go | 39 ++++ states.go => schedulers/states.go | 4 +- 9 files changed, 575 insertions(+), 323 deletions(-) create mode 100644 def/metric.go rename task.go => def/task.go (58%) delete mode 100644 metrics.go create mode 100644 schedulers/binpackwatts.go create mode 100644 schedulers/firstfit.go create mode 100644 schedulers/helpers.go rename states.go => schedulers/states.go (98%) diff --git a/def/metric.go b/def/metric.go new file mode 100644 index 0000000..6278a31 --- /dev/null +++ b/def/metric.go @@ -0,0 +1,8 @@ +package def + +type Metric struct { + Name string `json:"name"` + CPU float64 `json:"cpu"` + RAM float64 `json:"ram"` + Watts float64 `json:"watts"` +} diff --git a/task.go b/def/task.go similarity index 58% rename from task.go rename to def/task.go index cbc0e5a..94629d1 100644 --- a/task.go +++ b/def/task.go @@ -1,20 +1,20 @@ -package main +package def import ( "encoding/json" - "os" "github.com/pkg/errors" + "os" ) type Task struct { - Name string `json:"name"` - CPU float64 `json:"cpu"` - RAM float64 `json:"ram"` - Watts float64 `json:"watts"` - Image string `json:"image"` - CMD string `json:"cmd"` - Instances *int `json:"inst"` - Host string `json:"host"` + Name string `json:"name"` + CPU float64 `json:"cpu"` + RAM float64 `json:"ram"` + Watts float64 `json:"watts"` + Image string `json:"image"` + CMD string `json:"cmd"` + Instances *int `json:"inst"` + Host string `json:"host"` } func TasksFromJSON(uri string) ([]Task, error) { @@ -32,4 +32,4 @@ func TasksFromJSON(uri string) ([]Task, error) { } return tasks, nil -} \ No newline at end of file +} diff --git a/metrics.go b/metrics.go deleted file mode 100644 index 2041007..0000000 --- a/metrics.go +++ /dev/null @@ -1,8 +0,0 @@ -package main - -type Metric struct{ - Name string `json:"name"` - CPU float64 `json:"cpu"` - RAM float64 `json:"ram"` - Watts float64 `json:"watts"` -} diff --git a/pcp/pcp.go b/pcp/pcp.go index 4329477..8a1d46d 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -3,10 +3,10 @@ package pcp import ( "bufio" "log" - "os/exec" - "time" "os" + "os/exec" "syscall" + "time" ) func Start(quit chan struct{}, logging *bool, prefix string) { @@ -15,8 +15,7 @@ func Start(quit chan struct{}, logging *bool, prefix string) { cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} startTime := time.Now().Format("20060102150405") - - logFile, err := os.Create("./"+prefix+startTime+".pcplog") + logFile, err := os.Create("./" + prefix + startTime + ".pcplog") if err != nil { log.Fatal(err) } @@ -39,12 +38,12 @@ func Start(quit chan struct{}, logging *bool, prefix string) { logFile.WriteString(scanner.Text() + "\n") /* - headers := strings.Split(scanner.Text(), ",") + headers := strings.Split(scanner.Text(), ",") - for _, hostMetric := range headers { - split := strings.Split(hostMetric, ":") - fmt.Printf("Host %s: Metric: %s\n", split[0], split[1]) - } + for _, hostMetric := range headers { + split := strings.Split(hostMetric, ":") + fmt.Printf("Host %s: Metric: %s\n", split[0], split[1]) + } */ // Throw away first set of results @@ -53,17 +52,16 @@ func Start(quit chan struct{}, logging *bool, prefix string) { seconds := 0 for scanner.Scan() { - - if(*logging) { + if *logging { log.Println("Logging PCP...") logFile.WriteString(scanner.Text() + "\n") } /* - fmt.Printf("Second: %d\n", seconds) - for i, val := range strings.Split(scanner.Text(), ",") { - fmt.Printf("host metric: %s val: %s\n", headers[i], val) - }*/ + fmt.Printf("Second: %d\n", seconds) + for i, val := range strings.Split(scanner.Text(), ",") { + fmt.Printf("host metric: %s val: %s\n", headers[i], val) + }*/ seconds++ @@ -73,15 +71,14 @@ func Start(quit chan struct{}, logging *bool, prefix string) { log.Println("PCP logging started") - if err := cmd.Start(); err != nil { log.Fatal(err) } pgid, err := syscall.Getpgid(cmd.Process.Pid) - select{ - case <- quit: + select { + case <-quit: log.Println("Stopping PCP logging in 5 seconds") time.Sleep(5 * time.Second) diff --git a/scheduler.go b/scheduler.go index c3a92eb..6b11506 100644 --- a/scheduler.go +++ b/scheduler.go @@ -1,310 +1,43 @@ package main import ( + "bitbucket.org/bingcloud/electron/def" + "bitbucket.org/bingcloud/electron/pcp" + "bitbucket.org/bingcloud/electron/schedulers" "flag" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" - "github.com/mesos/mesos-go/mesosutil" sched "github.com/mesos/mesos-go/scheduler" "log" "os" - "time" - "bitbucket.org/bingcloud/electron/pcp" - "strings" "os/signal" + "time" ) -const ( - shutdownTimeout = time.Duration(30) * time.Second -) - -var ( - defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} - longFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1000)} - IGNORE_WATTS = false -) - -func CoLocated(tasks map[string]bool) { - - for task := range tasks { - log.Println(task) - } - - fmt.Println("---------------------") -} - -func OfferAgg(offer *mesos.Offer) (float64, float64, float64) { - var cpus, mem, watts float64 - - for _, resource := range offer.Resources { - switch resource.GetName() { - case "cpus": - cpus += *resource.GetScalar().Value - case "mem": - mem += *resource.GetScalar().Value - case "watts": - watts += *resource.GetScalar().Value - } - } - - return cpus, mem, watts -} - -// Decides if to take an offer or not -func TakeOffer(offer *mesos.Offer, task Task) bool { - - cpus, mem, watts := OfferAgg(offer) - - //TODO: Insert watts calculation here instead of taking them as a parameter - - if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { - return true - } - - return false -} - -// electronScheduler implements the Scheduler interface -type electronScheduler struct { - tasksCreated int - tasksRunning int - tasks []Task - metrics map[string]Metric - running map[string]map[string]bool - - // First set of PCP values are garbage values, signal to logger to start recording when we're - // about to schedule a new task - recordPCP bool - - // This channel is closed when the program receives an interrupt, - // signalling that the program should shut down. - shutdown chan struct{} - // This channel is closed after shutdown is closed, and only when all - // outstanding tasks have been cleaned up - done chan struct{} - - - // Controls when to shutdown pcp logging - pcpLog chan struct{} -} - -// New electron scheduler -func newElectronScheduler(tasks []Task) *electronScheduler { - - s := &electronScheduler{ - tasks: tasks, - shutdown: make(chan struct{}), - done: make(chan struct{}), - pcpLog: make(chan struct{}), - running: make(map[string]map[string]bool), - recordPCP: false, - } - return s -} - -func (s *electronScheduler) newTask(offer *mesos.Offer, task Task) *mesos.TaskInfo { - taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) - s.tasksCreated++ - - if !s.recordPCP { - // Turn on logging - s.recordPCP = true - time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts - } - - // If this is our first time running into this Agent - if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { - s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) - } - - // Add task to list of tasks running on node - s.running[offer.GetSlaveId().GoString()][taskName] = true - - resources := []*mesos.Resource{ - mesosutil.NewScalarResource("cpus", task.CPU), - mesosutil.NewScalarResource("mem", task.RAM), - } - - if(!IGNORE_WATTS) { - resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) - } - - return &mesos.TaskInfo{ - Name: proto.String(taskName), - TaskId: &mesos.TaskID{ - Value: proto.String("electron-" + taskName), - }, - SlaveId: offer.SlaveId, - Resources: resources, - Command: &mesos.CommandInfo{ - Value: proto.String(task.CMD), - }, - Container: &mesos.ContainerInfo{ - Type: mesos.ContainerInfo_DOCKER.Enum(), - Docker: &mesos.ContainerInfo_DockerInfo{ - Image: proto.String(task.Image), - Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated - }, - - }, - } -} - -func (s *electronScheduler) Registered( - _ sched.SchedulerDriver, - frameworkID *mesos.FrameworkID, - masterInfo *mesos.MasterInfo) { - log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) -} - -func (s *electronScheduler) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { - log.Printf("Framework re-registered with master %s", masterInfo) -} - -func (s *electronScheduler) Disconnected(sched.SchedulerDriver) { - log.Println("Framework disconnected with master") -} - -func (s *electronScheduler) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { - log.Printf("Received %d resource offers", len(offers)) - - for _, offer := range offers { - select { - case <-s.shutdown: - log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") - driver.DeclineOffer(offer.Id, longFilter) - - log.Println("Number of tasks still running: ", s.tasksRunning) - continue - default: - } - - tasks := []*mesos.TaskInfo{} - - // First fit strategy - - taken := false - for i, task := range s.tasks { - - // Check host if it exists - if task.Host != "" { - // Don't take offer if it doesn't match our task's host requirement - if !strings.HasPrefix(*offer.Hostname, task.Host) { - continue - } - } - - // Decision to take the offer or not - if TakeOffer(offer, task) { - - log.Println("Co-Located with: ") - CoLocated(s.running[offer.GetSlaveId().GoString()]) - - tasks = append(tasks, s.newTask(offer, task)) - - log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) - - taken = true - - fmt.Println("Inst: ", *task.Instances) - *task.Instances-- - - if *task.Instances <= 0 { - // All instances of task have been scheduled, remove it - s.tasks[i] = s.tasks[len(s.tasks)-1] - s.tasks = s.tasks[:len(s.tasks)-1] - - if(len(s.tasks) <= 0) { - log.Println("Done scheduling all tasks") - close(s.shutdown) - } - } - break // Offer taken, move on - } - } - - // If there was no match for the task - if !taken { - fmt.Println("There is not enough resources to launch a task:") - cpus, mem, watts := OfferAgg(offer) - - log.Printf("\n", cpus, mem, watts) - driver.DeclineOffer(offer.Id, defaultFilter) - } - - } -} - -func (s *electronScheduler) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { - log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) - - if *status.State == mesos.TaskState_TASK_RUNNING { - s.tasksRunning++ - } else if IsTerminal(status.State) { - delete(s.running[status.GetSlaveId().GoString()],*status.TaskId.Value) - s.tasksRunning-- - if s.tasksRunning == 0 { - select { - case <-s.shutdown: - close(s.done) - default: - } - } - } - log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) -} - -func (s *electronScheduler) FrameworkMessage( - driver sched.SchedulerDriver, - executorID *mesos.ExecutorID, - slaveID *mesos.SlaveID, - message string) { - - log.Println("Getting a framework message: ", message) - log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) -} - -func (s *electronScheduler) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { - log.Printf("Offer %s rescinded", offerID) -} -func (s *electronScheduler) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { - log.Printf("Slave %s lost", slaveID) -} -func (s *electronScheduler) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { - log.Printf("Executor %s on slave %s was lost", executorID, slaveID) -} - -func (s *electronScheduler) Error(_ sched.SchedulerDriver, err string) { - log.Printf("Receiving an error: %s", err) -} - var master = flag.String("master", "xavier:5050", "Location of leading Mesos master") var tasksFile = flag.String("workload", "", "JSON file containing task definitions") var ignoreWatts = flag.Bool("ignoreWatts", false, "Ignore watts in offers") var pcplogPrefix = flag.String("logPrefix", "", "Prefix for pcplog") // Short hand args -func init(){ +func init() { flag.StringVar(master, "m", "xavier:5050", "Location of leading Mesos master (shorthand)") flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions (shorthand)") flag.BoolVar(ignoreWatts, "i", false, "Ignore watts in offers (shorthand)") - flag.StringVar(pcplogPrefix, "p", "", "Prefix for pcplog") + flag.StringVar(pcplogPrefix, "p", "", "Prefix for pcplog (shorthand)") } func main() { flag.Parse() - IGNORE_WATTS = *ignoreWatts - if *tasksFile == "" { fmt.Println("No file containing tasks specifiction provided.") os.Exit(1) } - tasks, err := TasksFromJSON(*tasksFile) - if(err != nil || len(tasks) == 0) { + tasks, err := def.TasksFromJSON(*tasksFile) + if err != nil || len(tasks) == 0 { fmt.Println("Invalid tasks specification file provided") os.Exit(1) } @@ -314,7 +47,7 @@ func main() { fmt.Println(task) } - scheduler := newElectronScheduler(tasks) + scheduler := schedulers.NewFirstFit(tasks, *ignoreWatts) driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, Framework: &mesos.FrameworkInfo{ @@ -328,7 +61,7 @@ func main() { return } - go pcp.Start(scheduler.pcpLog, &scheduler.recordPCP, *pcplogPrefix) + go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix) time.Sleep(1 * time.Second) // Attempt to handle signint to not leave pmdumptext running @@ -338,28 +71,28 @@ func main() { signal.Notify(c, os.Interrupt, os.Kill) s := <-c if s != os.Interrupt { - close(scheduler.pcpLog) + close(scheduler.PCPLog) return } log.Printf("Received SIGINT...stopping") - close(scheduler.done) + close(scheduler.Done) }() go func() { // Signals we have scheduled every task we have select { - case <-scheduler.shutdown: - // case <-time.After(shutdownTimeout): + case <-scheduler.Shutdown: + // case <-time.After(shutdownTimeout): } // All tasks have finished select { - case <-scheduler.done: - close(scheduler.pcpLog) + case <-scheduler.Done: + close(scheduler.PCPLog) time.Sleep(5 * time.Second) //Wait for PCP to log a few more seconds -// case <-time.After(shutdownTimeout): + // case <-time.After(shutdownTimeout): } // Done shutting down diff --git a/schedulers/binpackwatts.go b/schedulers/binpackwatts.go new file mode 100644 index 0000000..690e793 --- /dev/null +++ b/schedulers/binpackwatts.go @@ -0,0 +1,241 @@ +package schedulers + +import ( + "bitbucket.org/bingcloud/electron/def" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "strings" + "time" +) + +// Decides if to take an offer or not +func (*BinPackWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { + + cpus, mem, watts := OfferAgg(offer) + + //TODO: Insert watts calculation here instead of taking them as a parameter + + if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { + return true + } + + return false +} + +type BinPackWatts struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + ignoreWatts bool + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule a new task + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up + Done chan struct{} + + // Controls when to shutdown pcp logging + PCPLog chan struct{} +} + +// New electron scheduler +func NewBinPackWatts(tasks []def.Task, ignoreWatts bool) *BinPackWatts { + + s := &BinPackWatts{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + RecordPCP: false, + } + return s +} + +func (s *BinPackWatts) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Add task to list of tasks running on node + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if !s.ignoreWatts { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *BinPackWatts) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *BinPackWatts) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *BinPackWatts) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } + + tasks := []*mesos.TaskInfo{} + + // First fit strategy + + taken := false + for i, task := range s.tasks { + + // Check host if it exists + if task.Host != "" { + // Don't take offer if it doesn't match our task's host requirement + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + } + + // Decision to take the offer or not + if s.takeOffer(offer, task) { + + log.Println("Co-Located with: ") + coLocated(s.running[offer.GetSlaveId().GoString()]) + + tasks = append(tasks, s.newTask(offer, task)) + + log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + + taken = true + + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- + + if *task.Instances <= 0 { + // All instances of task have been scheduled, remove it + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + close(s.Shutdown) + } + } + break // Offer taken, move on + } + } + + // If there was no match for the task + if !taken { + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + + } +} + +func (s *BinPackWatts) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + close(s.Done) + default: + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *BinPackWatts) FrameworkMessage( + driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *BinPackWatts) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} +func (s *BinPackWatts) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} +func (s *BinPackWatts) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *BinPackWatts) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} diff --git a/schedulers/firstfit.go b/schedulers/firstfit.go new file mode 100644 index 0000000..91a68f1 --- /dev/null +++ b/schedulers/firstfit.go @@ -0,0 +1,242 @@ +package schedulers + +import ( + "bitbucket.org/bingcloud/electron/def" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "strings" + "time" +) + +// Decides if to take an offer or not +func (*FirstFit) takeOffer(offer *mesos.Offer, task def.Task) bool { + + cpus, mem, watts := OfferAgg(offer) + + //TODO: Insert watts calculation here instead of taking them as a parameter + + if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { + return true + } + + return false +} + +// electronScheduler implements the Scheduler interface +type FirstFit struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + ignoreWatts bool + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule a new task + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up + Done chan struct{} + + // Controls when to shutdown pcp logging + PCPLog chan struct{} +} + +// New electron scheduler +func NewFirstFit(tasks []def.Task, ignoreWatts bool) *FirstFit { + + s := &FirstFit{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + RecordPCP: false, + } + return s +} + +func (s *FirstFit) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Add task to list of tasks running on node + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if !s.ignoreWatts { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *FirstFit) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *FirstFit) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *FirstFit) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +func (s *FirstFit) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } + + tasks := []*mesos.TaskInfo{} + + // First fit strategy + + taken := false + for i, task := range s.tasks { + + // Check host if it exists + if task.Host != "" { + // Don't take offer if it doesn't match our task's host requirement + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + } + + // Decision to take the offer or not + if s.takeOffer(offer, task) { + + log.Println("Co-Located with: ") + coLocated(s.running[offer.GetSlaveId().GoString()]) + + tasks = append(tasks, s.newTask(offer, task)) + + log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + + taken = true + + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- + + if *task.Instances <= 0 { + // All instances of task have been scheduled, remove it + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + close(s.Shutdown) + } + } + break // Offer taken, move on + } + } + + // If there was no match for the task + if !taken { + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + + } +} + +func (s *FirstFit) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + close(s.Done) + default: + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *FirstFit) FrameworkMessage( + driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *FirstFit) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} +func (s *FirstFit) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} +func (s *FirstFit) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *FirstFit) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} diff --git a/schedulers/helpers.go b/schedulers/helpers.go new file mode 100644 index 0000000..2c6ffd2 --- /dev/null +++ b/schedulers/helpers.go @@ -0,0 +1,39 @@ +package schedulers + +import ( + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "log" +) + +var ( + defaultFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1)} + longFilter = &mesos.Filters{RefuseSeconds: proto.Float64(1000)} +) + +func OfferAgg(offer *mesos.Offer) (float64, float64, float64) { + var cpus, mem, watts float64 + + for _, resource := range offer.Resources { + switch resource.GetName() { + case "cpus": + cpus += *resource.GetScalar().Value + case "mem": + mem += *resource.GetScalar().Value + case "watts": + watts += *resource.GetScalar().Value + } + } + + return cpus, mem, watts +} + +func coLocated(tasks map[string]bool) { + + for task := range tasks { + log.Println(task) + } + + fmt.Println("---------------------") +} diff --git a/states.go b/schedulers/states.go similarity index 98% rename from states.go rename to schedulers/states.go index 69a227b..8aa775e 100644 --- a/states.go +++ b/schedulers/states.go @@ -1,8 +1,8 @@ -package main +package schedulers import ( - mesos "github.com/mesos/mesos-go/mesosproto" "fmt" + mesos "github.com/mesos/mesos-go/mesosproto" ) // NameFor returns the string name for a TaskState. From 97002ea3535090f041e32e0b0847c912915b5323 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Thu, 13 Oct 2016 17:50:54 -0400 Subject: [PATCH 036/102] Adding First Fit Watts Only --- schedulers/firstfitwattsonly.go | 236 ++++++++++++++++++++++++++++++++ 1 file changed, 236 insertions(+) create mode 100644 schedulers/firstfitwattsonly.go diff --git a/schedulers/firstfitwattsonly.go b/schedulers/firstfitwattsonly.go new file mode 100644 index 0000000..b19ce3b --- /dev/null +++ b/schedulers/firstfitwattsonly.go @@ -0,0 +1,236 @@ +package schedulers + +import ( + "bitbucket.org/bingcloud/electron/def" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "strings" + "time" +) + +// Decides if to take an offer or not +func (*FirstFitWattsOnly) takeOffer(offer *mesos.Offer, task def.Task) bool { + + _, _, watts := OfferAgg(offer) + + //TODO: Insert watts calculation here instead of taking them as a parameter + + if watts >= task.Watts { + return true + } + + return false +} + +type FirstFitWattsOnly struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + ignoreWatts bool + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule a new task + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up + Done chan struct{} + + // Controls when to shutdown pcp logging + PCPLog chan struct{} +} + +// New electron scheduler +func NewFirstFitWattsOnly(tasks []def.Task, ignoreWatts bool) *FirstFitWattsOnly { + + s := &FirstFitWattsOnly{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + RecordPCP: false, + } + return s +} + +func (s *FirstFitWattsOnly) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Add task to list of tasks running on node + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("watts", task.Watts), + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *FirstFitWattsOnly) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *FirstFitWattsOnly) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *FirstFitWattsOnly) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +func (s *FirstFitWattsOnly) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } + + tasks := []*mesos.TaskInfo{} + + // First fit strategy + + taken := false + for i, task := range s.tasks { + + // Check host if it exists + if task.Host != "" { + // Don't take offer if it doesn't match our task's host requirement + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + } + + // Decision to take the offer or not + if s.takeOffer(offer, task) { + + log.Println("Co-Located with: ") + coLocated(s.running[offer.GetSlaveId().GoString()]) + + tasks = append(tasks, s.newTask(offer, task)) + + log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + + taken = true + + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- + + if *task.Instances <= 0 { + // All instances of task have been scheduled, remove it + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + close(s.Shutdown) + } + } + break // Offer taken, move on + } + } + + // If there was no match for the task + if !taken { + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + + } +} + +func (s *FirstFitWattsOnly) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + close(s.Done) + default: + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *FirstFitWattsOnly) FrameworkMessage( + driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *FirstFitWattsOnly) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} +func (s *FirstFitWattsOnly) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} +func (s *FirstFitWattsOnly) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *FirstFitWattsOnly) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} From 617fb8ac0fe7d9ab1261985b36102cd8b35547b1 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sat, 15 Oct 2016 21:24:14 -0400 Subject: [PATCH 037/102] Sorting based on watts value added. bin packing based on watts is almost complete. --- def/task.go | 14 ++ schedulers/binpackwatts.go | 54 ++++--- schedulers/firstfitsortedwatts.go | 244 ++++++++++++++++++++++++++++++ 3 files changed, 287 insertions(+), 25 deletions(-) create mode 100644 schedulers/firstfitsortedwatts.go diff --git a/def/task.go b/def/task.go index 94629d1..5e0f8de 100644 --- a/def/task.go +++ b/def/task.go @@ -33,3 +33,17 @@ func TasksFromJSON(uri string) ([]Task, error) { return tasks, nil } + +type WattsSorter []Task + +func (slice WattsSorter) Len() int { + return len(slice) +} + +func (slice WattsSorter) Less(i, j int) bool { + return slice[i].Watts < slice[j].Watts +} + +func (slice WattsSorter) Swap(i, j int) { + slice[i], slice[j] = slice[j], slice[i] +} diff --git a/schedulers/binpackwatts.go b/schedulers/binpackwatts.go index 690e793..9656ddc 100644 --- a/schedulers/binpackwatts.go +++ b/schedulers/binpackwatts.go @@ -8,6 +8,7 @@ import ( "github.com/mesos/mesos-go/mesosutil" sched "github.com/mesos/mesos-go/scheduler" "log" + "sort" "strings" "time" ) @@ -51,6 +52,7 @@ type BinPackWatts struct { // New electron scheduler func NewBinPackWatts(tasks []def.Task, ignoreWatts bool) *BinPackWatts { + sort.Sort(def.WattsSorter(tasks)) s := &BinPackWatts{ tasks: tasks, @@ -142,9 +144,10 @@ func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*me tasks := []*mesos.TaskInfo{} - // First fit strategy + _, _, offer_watts := OfferAgg(offer) taken := false + totalWatts := 0.0 for i, task := range s.tasks { // Check host if it exists @@ -155,45 +158,46 @@ func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*me } } - // Decision to take the offer or not - if s.takeOffer(offer, task) { + for *task.Instances > 0 { + // Does the task fit + if offer_watts >= (totalWatts + task.Watts) { - log.Println("Co-Located with: ") - coLocated(s.running[offer.GetSlaveId().GoString()]) + taken = true + totalWatts += task.Watts + log.Println("Co-Located with: ") + coLocated(s.running[offer.GetSlaveId().GoString()]) + tasks = append(tasks, s.newTask(offer, task)) - tasks = append(tasks, s.newTask(offer, task)) + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- - log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + if *task.Instances <= 0 { + // All instances of task have been scheduled, remove it + s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) - taken = true - - fmt.Println("Inst: ", *task.Instances) - *task.Instances-- - - if *task.Instances <= 0 { - // All instances of task have been scheduled, remove it - s.tasks[i] = s.tasks[len(s.tasks)-1] - s.tasks = s.tasks[:len(s.tasks)-1] - - if len(s.tasks) <= 0 { - log.Println("Done scheduling all tasks") - close(s.Shutdown) + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + close(s.Shutdown) + } } + } else { + break // Continue on to next offer } - break // Offer taken, move on } } - // If there was no match for the task - if !taken { + if taken { + log.Printf("Starting on [%s]\n", offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + } else { + + // If there was no match for the task fmt.Println("There is not enough resources to launch a task:") cpus, mem, watts := OfferAgg(offer) log.Printf("\n", cpus, mem, watts) driver.DeclineOffer(offer.Id, defaultFilter) } - } } diff --git a/schedulers/firstfitsortedwatts.go b/schedulers/firstfitsortedwatts.go new file mode 100644 index 0000000..521f773 --- /dev/null +++ b/schedulers/firstfitsortedwatts.go @@ -0,0 +1,244 @@ +package schedulers + +import ( + "bitbucket.org/bingcloud/electron/def" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "strings" + "time" + "sort" +) + +// Decides if to take an offer or not +func (*FirstFitSortedWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { + + cpus, mem, watts := OfferAgg(offer) + + //TODO: Insert watts calculation here instead of taking them as a parameter + + if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { + return true + } + + return false +} + +// electronScheduler implements the Scheduler interface +type FirstFitSortedWatts struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + ignoreWatts bool + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule a new task + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up + Done chan struct{} + + // Controls when to shutdown pcp logging + PCPLog chan struct{} +} + +// New electron scheduler +func NewFirstFitSortedWatts(tasks []def.Task, ignoreWatts bool) *FirstFitSortedWatts { + + sort.Sort(def.WattsSorter(tasks)) + + s := &FirstFitSortedWatts{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + RecordPCP: false, + } + return s +} + +func (s *FirstFitSortedWatts) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Add task to list of tasks running on node + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if !s.ignoreWatts { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *FirstFitSortedWatts) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *FirstFitSortedWatts) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *FirstFitSortedWatts) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +func (s *FirstFitSortedWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } + + tasks := []*mesos.TaskInfo{} + + // First fit strategy + + taken := false + for i, task := range s.tasks { + + // Check host if it exists + if task.Host != "" { + // Don't take offer if it doesn't match our task's host requirement + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + } + + // Decision to take the offer or not + if s.takeOffer(offer, task) { + + log.Println("Co-Located with: ") + coLocated(s.running[offer.GetSlaveId().GoString()]) + + tasks = append(tasks, s.newTask(offer, task)) + + log.Printf("Starting %s on [%s]\n", task.Name, offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, tasks, defaultFilter) + + taken = true + + fmt.Println("Inst: ", *task.Instances) + *task.Instances-- + + if *task.Instances <= 0 { + // All instances of task have been scheduled, remove it + s.tasks= append(s.tasks[:i], s.tasks[i+1:]...) + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + close(s.Shutdown) + } + } + break // Offer taken, move on + } + } + + // If there was no match for the task + if !taken { + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + + } +} + +func (s *FirstFitSortedWatts) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + close(s.Done) + default: + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *FirstFitSortedWatts) FrameworkMessage( + driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *FirstFitSortedWatts) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} +func (s *FirstFitSortedWatts) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} +func (s *FirstFitSortedWatts) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *FirstFitSortedWatts) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} From 39aa3cacc4a385b7a14035eae1ba7b74952bafe1 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Sun, 16 Oct 2016 14:48:31 -0400 Subject: [PATCH 038/102] Binpacking algorithm based on 3 dimensions using a list of tasks sorted by watts --- ...{binpackwatts.go => binpacksortedwatts.go} | 40 +++++++++++-------- 1 file changed, 23 insertions(+), 17 deletions(-) rename schedulers/{binpackwatts.go => binpacksortedwatts.go} (79%) diff --git a/schedulers/binpackwatts.go b/schedulers/binpacksortedwatts.go similarity index 79% rename from schedulers/binpackwatts.go rename to schedulers/binpacksortedwatts.go index 9656ddc..d73b64c 100644 --- a/schedulers/binpackwatts.go +++ b/schedulers/binpacksortedwatts.go @@ -14,7 +14,7 @@ import ( ) // Decides if to take an offer or not -func (*BinPackWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { +func (*BinPackSortedWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { cpus, mem, watts := OfferAgg(offer) @@ -27,7 +27,7 @@ func (*BinPackWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { return false } -type BinPackWatts struct { +type BinPackSortedWatts struct { tasksCreated int tasksRunning int tasks []def.Task @@ -51,10 +51,10 @@ type BinPackWatts struct { } // New electron scheduler -func NewBinPackWatts(tasks []def.Task, ignoreWatts bool) *BinPackWatts { +func NewBinPackSortedWatts(tasks []def.Task, ignoreWatts bool) *BinPackSortedWatts { sort.Sort(def.WattsSorter(tasks)) - s := &BinPackWatts{ + s := &BinPackSortedWatts{ tasks: tasks, ignoreWatts: ignoreWatts, Shutdown: make(chan struct{}), @@ -66,7 +66,7 @@ func NewBinPackWatts(tasks []def.Task, ignoreWatts bool) *BinPackWatts { return s } -func (s *BinPackWatts) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { +func (s *BinPackSortedWatts) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) s.tasksCreated++ @@ -113,22 +113,22 @@ func (s *BinPackWatts) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInf } } -func (s *BinPackWatts) Registered( +func (s *BinPackSortedWatts) Registered( _ sched.SchedulerDriver, frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) { log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) } -func (s *BinPackWatts) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { +func (s *BinPackSortedWatts) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { log.Printf("Framework re-registered with master %s", masterInfo) } -func (s *BinPackWatts) Disconnected(sched.SchedulerDriver) { +func (s *BinPackSortedWatts) Disconnected(sched.SchedulerDriver) { log.Println("Framework disconnected with master") } -func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { +func (s *BinPackSortedWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Printf("Received %d resource offers", len(offers)) for _, offer := range offers { @@ -144,10 +144,12 @@ func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*me tasks := []*mesos.TaskInfo{} - _, _, offer_watts := OfferAgg(offer) + offer_cpu, offer_ram, offer_watts := OfferAgg(offer) taken := false totalWatts := 0.0 + totalCPU := 0.0 + totalRAM := 0.0 for i, task := range s.tasks { // Check host if it exists @@ -160,10 +162,14 @@ func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*me for *task.Instances > 0 { // Does the task fit - if offer_watts >= (totalWatts + task.Watts) { + if (s.ignoreWatts || offer_watts >= (totalWatts+task.Watts)) && + (offer_cpu >= (totalCPU + task.CPU)) && + (offer_ram >= (totalRAM + task.RAM)) { taken = true totalWatts += task.Watts + totalCPU += task.CPU + totalRAM += task.RAM log.Println("Co-Located with: ") coLocated(s.running[offer.GetSlaveId().GoString()]) tasks = append(tasks, s.newTask(offer, task)) @@ -201,7 +207,7 @@ func (s *BinPackWatts) ResourceOffers(driver sched.SchedulerDriver, offers []*me } } -func (s *BinPackWatts) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { +func (s *BinPackSortedWatts) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { @@ -220,7 +226,7 @@ func (s *BinPackWatts) StatusUpdate(driver sched.SchedulerDriver, status *mesos. log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) } -func (s *BinPackWatts) FrameworkMessage( +func (s *BinPackSortedWatts) FrameworkMessage( driver sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, @@ -230,16 +236,16 @@ func (s *BinPackWatts) FrameworkMessage( log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) } -func (s *BinPackWatts) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { +func (s *BinPackSortedWatts) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { log.Printf("Offer %s rescinded", offerID) } -func (s *BinPackWatts) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { +func (s *BinPackSortedWatts) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { log.Printf("Slave %s lost", slaveID) } -func (s *BinPackWatts) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { +func (s *BinPackSortedWatts) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { log.Printf("Executor %s on slave %s was lost", executorID, slaveID) } -func (s *BinPackWatts) Error(_ sched.SchedulerDriver, err string) { +func (s *BinPackSortedWatts) Error(_ sched.SchedulerDriver, err string) { log.Printf("Receiving an error: %s", err) } From e568dcc508a25b8d71ecdf6d46f12fa5b9900f29 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 17 Oct 2016 21:31:10 -0400 Subject: [PATCH 039/102] Averages are calculated correctly, PCP is able to determine a victim to be capped --- pcp/test/power.go | 190 +++++++++++++++++++++++++++++++++++++++++++++ pcp/test/victim.go | 3 + 2 files changed, 193 insertions(+) create mode 100644 pcp/test/power.go create mode 100644 pcp/test/victim.go diff --git a/pcp/test/power.go b/pcp/test/power.go new file mode 100644 index 0000000..6b7160a --- /dev/null +++ b/pcp/test/power.go @@ -0,0 +1,190 @@ +package main + +import ( + "bufio" + "fmt" + "log" + "os" + "os/exec" + "strings" + "syscall" + "time" + "strconv" + "math" + "container/ring" + "sort" +) + +type Victim struct { + Watts float64 + Host string +} + +type VictimSorter []Victim + +func (slice VictimSorter) Len() int { + return len(slice) +} + +func (slice VictimSorter) Less(i, j int) bool { + return slice[i].Watts >= slice[j].Watts +} + +func (slice VictimSorter) Swap(i, j int) { + slice[i], slice[j] = slice[j], slice[i] +} + +var RAPLUnits = math.Pow(2, -32) + +func mean(values *ring.Ring) float64 { + + total := 0.0 + count := 0.0 + + values.Do(func(x interface{}){ + if val, ok := x.(float64); ok { //Add it if we can get a float + total += val + count++ + } + }) + + if count == 0.0 { + return 0.0 + } + + + count /= 2 + + return (total/count) +} + +//func median(values *ring.Ring) { + +//} + + +func main() { + + prefix := "test" + logging := new(bool) + *logging = true + const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" + cmd := exec.Command("sh", "-c", pcpCommand) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + startTime := time.Now().Format("20060102150405") + + logFile, err := os.Create("./" + prefix + startTime + ".pcplog") + if err != nil { + log.Fatal(err) + } + + defer logFile.Close() + + pipe, err := cmd.StdoutPipe() + if err != nil { + log.Fatal(err) + } + //cmd.Stdout = stdout + + scanner := bufio.NewScanner(pipe) + + go func(logging *bool) { + // Get names of the columns + scanner.Scan() + + // Write to logfile + logFile.WriteString(scanner.Text() + "\n") + + headers := strings.Split(scanner.Text(), ",") + + powerIndexes := make([]int, 0, 0) + powerAverage := make(map[string]*ring.Ring) + indexToHost := make(map[int]string) + + for i, hostMetric := range headers { + split := strings.Split(hostMetric, ":") + fmt.Printf("%d Host %s: Metric: %s\n", i, split[0], split[1]) + + if strings.Contains(split[1], "RAPL_ENERGY_PKG") { + fmt.Println("Index: ", i) + powerIndexes = append(powerIndexes, i) + indexToHost[i] = split[0] + powerAverage[split[0]] = ring.New(10) // Two PKS per node, 10 = 5 seconds tracking + } + } + + // Throw away first set of results + scanner.Scan() + + seconds := 0 + for scanner.Scan() { + + if *logging { + log.Println("Logging PCP...") + split := strings.Split(scanner.Text(), ",") + logFile.WriteString(scanner.Text() + "\n") + + + totalPower := 0.0 + for _,powerIndex := range powerIndexes { + power, _ := strconv.ParseFloat(split[powerIndex], 64) + + host := indexToHost[powerIndex] + + powerAverage[host].Value = power + powerAverage[host] = powerAverage[host].Next() + + log.Printf("Host: %s, Index: %d, Power: %f", indexToHost[powerIndex], powerIndex, (power * RAPLUnits)) + + totalPower += power + } + + log.Println("Total power: ", totalPower * RAPLUnits) + + victims := make([]Victim, 8, 8) + + // TODO: Just keep track of the largest to reduce fron nlogn to n + for name,ring := range powerAverage { + victims = append(victims, Victim{mean(ring), name}) + //log.Printf("host: %s, Avg: %f", name, mean(ring) * RAPLUnits) + } + sort.Sort(VictimSorter(victims)) + log.Printf("Current Victim %s Avg. Wattage: %f", victims[0].Host, victims[0].Watts * RAPLUnits) + } + + /* + fmt.Printf("Second: %d\n", seconds) + for i, val := range strings.Split(scanner.Text(), ",") { + fmt.Printf("host metric: %s val: %s\n", headers[i], val) + }*/ + + seconds++ + + // fmt.Println("--------------------------------") + } + }(logging) + + log.Println("PCP logging started") + + if err := cmd.Start(); err != nil { + log.Fatal(err) + } + + if err := cmd.Wait(); err != nil { + log.Fatal(err) + } + + /* + pgid, err := syscall.Getpgid(cmd.Process.Pid) + + select { + case <-quit: + log.Println("Stopping PCP logging in 5 seconds") + time.Sleep(5 * time.Second) + + // http://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly + // kill process and all children processes + syscall.Kill(-pgid, 15) + return + }*/ +} diff --git a/pcp/test/victim.go b/pcp/test/victim.go new file mode 100644 index 0000000..1110061 --- /dev/null +++ b/pcp/test/victim.go @@ -0,0 +1,3 @@ +package main + + From 254528f4f1e5656c3cc4f0586de397bd3bde0176 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Mon, 17 Oct 2016 22:12:59 -0400 Subject: [PATCH 040/102] Initial version of command which will cap system --- pcp/ssh/raplcap.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 pcp/ssh/raplcap.go diff --git a/pcp/ssh/raplcap.go b/pcp/ssh/raplcap.go new file mode 100644 index 0000000..f5c6ceb --- /dev/null +++ b/pcp/ssh/raplcap.go @@ -0,0 +1,27 @@ +package ssh + +import ( + "golang.org/x/crypto/ssh" + "fmt" +) + +func main() { + sshConfig := &ssh.ClientConfig{ + User: "rapl", + Auth: []ssh.AuthMethod{ + ssh.Password("pankajlikesdanceswithwolves#!@#"), + }, + } + + connection, err := ssh.Dial("tcp", "host:port", sshConfig) + if err != nil { + return nil, fmt.Errorf("Failed to dial: %s", err) + } + + session, err := connection.NewSession() + if err != nil { + return nil, fmt.Errorf("Failed to create session: %s", err) + } + + err = session.Run("sudo /misc/shared_data/rdelval1/RAPL_PKG_Throttle.py 100") +} \ No newline at end of file From 1386049d3044b843db05dd3cce073383be211ce9 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 18 Oct 2016 14:56:07 -0400 Subject: [PATCH 041/102] rapl capping via ssh command in rapl package --- pcp/ssh/raplcap.go | 27 --------------------------- pcp/test/power.go | 10 +--------- rapl/cap.go | 39 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 36 deletions(-) delete mode 100644 pcp/ssh/raplcap.go create mode 100644 rapl/cap.go diff --git a/pcp/ssh/raplcap.go b/pcp/ssh/raplcap.go deleted file mode 100644 index f5c6ceb..0000000 --- a/pcp/ssh/raplcap.go +++ /dev/null @@ -1,27 +0,0 @@ -package ssh - -import ( - "golang.org/x/crypto/ssh" - "fmt" -) - -func main() { - sshConfig := &ssh.ClientConfig{ - User: "rapl", - Auth: []ssh.AuthMethod{ - ssh.Password("pankajlikesdanceswithwolves#!@#"), - }, - } - - connection, err := ssh.Dial("tcp", "host:port", sshConfig) - if err != nil { - return nil, fmt.Errorf("Failed to dial: %s", err) - } - - session, err := connection.NewSession() - if err != nil { - return nil, fmt.Errorf("Failed to create session: %s", err) - } - - err = session.Run("sudo /misc/shared_data/rdelval1/RAPL_PKG_Throttle.py 100") -} \ No newline at end of file diff --git a/pcp/test/power.go b/pcp/test/power.go index 6b7160a..b061e07 100644 --- a/pcp/test/power.go +++ b/pcp/test/power.go @@ -109,7 +109,7 @@ func main() { fmt.Println("Index: ", i) powerIndexes = append(powerIndexes, i) indexToHost[i] = split[0] - powerAverage[split[0]] = ring.New(10) // Two PKS per node, 10 = 5 seconds tracking + powerAverage[split[0]] = ring.New(10) // Two PKGS per node, 10 = 5 seconds tracking } } @@ -152,15 +152,7 @@ func main() { log.Printf("Current Victim %s Avg. Wattage: %f", victims[0].Host, victims[0].Watts * RAPLUnits) } - /* - fmt.Printf("Second: %d\n", seconds) - for i, val := range strings.Split(scanner.Text(), ",") { - fmt.Printf("host metric: %s val: %s\n", headers[i], val) - }*/ - seconds++ - - // fmt.Println("--------------------------------") } }(logging) diff --git a/rapl/cap.go b/rapl/cap.go new file mode 100644 index 0000000..c25eb66 --- /dev/null +++ b/rapl/cap.go @@ -0,0 +1,39 @@ +package rapl + +import ( + "golang.org/x/crypto/ssh" + "github.com/pkg/errors" + "strconv" +) + +func Cap(host, username string, percentage int) (error) { + + if percentage > 100 || percentage < 0 { + return errors.New("Percentage is out of range") + } + + sshConfig := &ssh.ClientConfig{ + User: username, + Auth: []ssh.AuthMethod{ + // TODO: CHANGE and MAKE THIS USE SSH KEY BEFORE MAKING PUBLIC!!!! + ssh.Password("pankajlikesdanceswithwolves#!@#"), + }, + } + + connection, err := ssh.Dial("tcp", host+":22", sshConfig) + if err != nil { + return errors.Wrap(err, "Failed to dial") + } + + session, err := connection.NewSession() + if err != nil { + return errors.Wrap(err, "Failed to create session") + } + + err = session.Run("sudo /misc/shared_data/rdelval1/RAPL_PKG_Throttle.py " + strconv.Itoa(percentage)) + if err != nil { + return errors.Wrap(err, "Failed to run RAPL script") + } + + return nil +} From cf3d83e71242e52efc8101263211df877e8944ce Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 18 Oct 2016 17:38:49 -0400 Subject: [PATCH 042/102] Finished integrating dynamic capping policy that uses a high and a low threshold to start capping and uncapping node via an ssh commands that triggers a python script. Commit has sensitive data, scrub before releasing to public. --- pcp/loganddynamiccap.go | 214 ++++++++++++++++++++++++++++++++++++++++ pcp/test/power.go | 182 ---------------------------------- pcp/test/victim.go | 3 - pcp/victim.go | 20 ++++ scheduler.go | 12 ++- 5 files changed, 245 insertions(+), 186 deletions(-) create mode 100644 pcp/loganddynamiccap.go delete mode 100644 pcp/test/power.go delete mode 100644 pcp/test/victim.go create mode 100644 pcp/victim.go diff --git a/pcp/loganddynamiccap.go b/pcp/loganddynamiccap.go new file mode 100644 index 0000000..577a395 --- /dev/null +++ b/pcp/loganddynamiccap.go @@ -0,0 +1,214 @@ +package pcp + +import ( + "bufio" + "container/ring" + "log" + "math" + "os" + "os/exec" + "sort" + "strconv" + "strings" + "syscall" + "time" + "bitbucket.org/bingcloud/electron/rapl" +) + +var RAPLUnits = math.Pow(2, -32) + +func meanPKG(history *ring.Ring) float64 { + + total := 0.0 + count := 0.0 + + history.Do(func(x interface{}) { + if val, ok := x.(float64); ok { //Add it if we can get a float + total += val + count++ + } + }) + + if count == 0.0 { + return 0.0 + } + + count /= 2 + + return (total / count) +} + +func meanCluster(history *ring.Ring) float64 { + + total := 0.0 + count := 0.0 + + history.Do(func(x interface{}) { + if val, ok := x.(float64); ok { //Add it if we can get a float + total += val + count++ + } + }) + + if count == 0.0 { + return 0.0 + } + + return (total / count) +} + +func StartLogAndDynamicCap(quit chan struct{}, logging *bool, prefix string, hiThreshold, loThreshold float64) { + const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" + cmd := exec.Command("sh", "-c", pcpCommand) + cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} + startTime := time.Now().Format("20060102150405") + + if hiThreshold < loThreshold { + log.Println("High threshold is lower than low threshold!") + } + + logFile, err := os.Create("./" + prefix + startTime + ".pcplog") + if err != nil { + log.Fatal(err) + } + + defer logFile.Close() + + pipe, err := cmd.StdoutPipe() + if err != nil { + log.Fatal(err) + } + //cmd.Stdout = stdout + + scanner := bufio.NewScanner(pipe) + + go func(logging *bool, hiThreshold, loThreshold float64) { + // Get names of the columns + scanner.Scan() + + // Write to logfile + logFile.WriteString(scanner.Text() + "\n") + + headers := strings.Split(scanner.Text(), ",") + + powerIndexes := make([]int, 0, 0) + powerHistories := make(map[string]*ring.Ring) + indexToHost := make(map[int]string) + + for i, hostMetric := range headers { + split := strings.Split(hostMetric, ":") + //log.Printf("%d Host %s: Metric: %s\n", i, split[0], split[1]) + + if strings.Contains(split[1], "RAPL_ENERGY_PKG") { + //fmt.Println("Index: ", i) + powerIndexes = append(powerIndexes, i) + indexToHost[i] = split[0] + powerHistories[split[0]] = ring.New(10) // Two PKGS per node, 10 = 5 seconds tracking + } + } + + // Throw away first set of results + scanner.Scan() + + cappedHosts := make(map[string]bool) + orderCapped := make([]string, 0, 8) + clusterPowerHist := ring.New(5) + seconds := 0 + + for scanner.Scan() { + + if *logging { + log.Println("Logging PCP...") + split := strings.Split(scanner.Text(), ",") + logFile.WriteString(scanner.Text() + "\n") + + totalPower := 0.0 + for _, powerIndex := range powerIndexes { + power, _ := strconv.ParseFloat(split[powerIndex], 64) + + host := indexToHost[powerIndex] + + powerHistories[host].Value = power + powerHistories[host] = powerHistories[host].Next() + + log.Printf("Host: %s, Power: %f", indexToHost[powerIndex], (power * RAPLUnits)) + + totalPower += power + } + clusterPower := totalPower * RAPLUnits + + clusterPowerHist.Value = clusterPower + clusterPowerHist = clusterPowerHist.Next() + + clusterMean := meanCluster(clusterPowerHist) + + log.Printf("Total power: %f, %d Sec Avg: %f", clusterPower, clusterPowerHist.Len(), clusterMean) + + if clusterMean > hiThreshold { + log.Printf("Need to cap a node") + // Create statics for all victims and choose one to cap + victims := make([]Victim, 0, 8) + + // TODO: Just keep track of the largest to reduce fron nlogn to n + for name, history := range powerHistories { + + histMean := meanPKG(history) + // Consider doing mean calculations using go routines if we need to speed up + victims = append(victims, Victim{Watts: histMean, Host: name}) + //log.Printf("host: %s, Avg: %f", name, histMean * RAPLUnits) + } + + sort.Sort(VictimSorter(victims)) // Sort by average wattage + + // From best victim to worst, if everyone is already capped NOOP + for _, victim := range victims { + // Only cap if host hasn't been capped yet + if !cappedHosts[victim.Host] { + cappedHosts[victim.Host] = true + orderCapped = append(orderCapped, victim.Host) + log.Printf("Capping Victim %s Avg. Wattage: %f", victim.Host, victim.Watts*RAPLUnits) + if err := rapl.Cap(victim.Host, "rapl", 50); err != nil { + log.Print("Error capping host") + } + break // Only cap one machine at at time + } + } + + } else if clusterMean < loThreshold { + + if len(orderCapped) > 0 { + host := orderCapped[len(orderCapped)-1] + orderCapped = orderCapped[:len(orderCapped)-1] + cappedHosts[host] = false + // User RAPL package to send uncap + log.Printf("Uncapping host %s", host) + if err := rapl.Cap(host, "rapl", 100); err != nil { + log.Print("Error uncapping host") + } + } + } + } + + seconds++ + } + }(logging, hiThreshold, loThreshold) + + log.Println("PCP logging started") + + if err := cmd.Start(); err != nil { + log.Fatal(err) + } + + pgid, err := syscall.Getpgid(cmd.Process.Pid) + + select { + case <-quit: + log.Println("Stopping PCP logging in 5 seconds") + time.Sleep(5 * time.Second) + + // http://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly + // kill process and all children processes + syscall.Kill(-pgid, 15) + return + } +} diff --git a/pcp/test/power.go b/pcp/test/power.go deleted file mode 100644 index b061e07..0000000 --- a/pcp/test/power.go +++ /dev/null @@ -1,182 +0,0 @@ -package main - -import ( - "bufio" - "fmt" - "log" - "os" - "os/exec" - "strings" - "syscall" - "time" - "strconv" - "math" - "container/ring" - "sort" -) - -type Victim struct { - Watts float64 - Host string -} - -type VictimSorter []Victim - -func (slice VictimSorter) Len() int { - return len(slice) -} - -func (slice VictimSorter) Less(i, j int) bool { - return slice[i].Watts >= slice[j].Watts -} - -func (slice VictimSorter) Swap(i, j int) { - slice[i], slice[j] = slice[j], slice[i] -} - -var RAPLUnits = math.Pow(2, -32) - -func mean(values *ring.Ring) float64 { - - total := 0.0 - count := 0.0 - - values.Do(func(x interface{}){ - if val, ok := x.(float64); ok { //Add it if we can get a float - total += val - count++ - } - }) - - if count == 0.0 { - return 0.0 - } - - - count /= 2 - - return (total/count) -} - -//func median(values *ring.Ring) { - -//} - - -func main() { - - prefix := "test" - logging := new(bool) - *logging = true - const pcpCommand string = "pmdumptext -m -l -f '' -t 1.0 -d , -c config" - cmd := exec.Command("sh", "-c", pcpCommand) - cmd.SysProcAttr = &syscall.SysProcAttr{Setpgid: true} - startTime := time.Now().Format("20060102150405") - - logFile, err := os.Create("./" + prefix + startTime + ".pcplog") - if err != nil { - log.Fatal(err) - } - - defer logFile.Close() - - pipe, err := cmd.StdoutPipe() - if err != nil { - log.Fatal(err) - } - //cmd.Stdout = stdout - - scanner := bufio.NewScanner(pipe) - - go func(logging *bool) { - // Get names of the columns - scanner.Scan() - - // Write to logfile - logFile.WriteString(scanner.Text() + "\n") - - headers := strings.Split(scanner.Text(), ",") - - powerIndexes := make([]int, 0, 0) - powerAverage := make(map[string]*ring.Ring) - indexToHost := make(map[int]string) - - for i, hostMetric := range headers { - split := strings.Split(hostMetric, ":") - fmt.Printf("%d Host %s: Metric: %s\n", i, split[0], split[1]) - - if strings.Contains(split[1], "RAPL_ENERGY_PKG") { - fmt.Println("Index: ", i) - powerIndexes = append(powerIndexes, i) - indexToHost[i] = split[0] - powerAverage[split[0]] = ring.New(10) // Two PKGS per node, 10 = 5 seconds tracking - } - } - - // Throw away first set of results - scanner.Scan() - - seconds := 0 - for scanner.Scan() { - - if *logging { - log.Println("Logging PCP...") - split := strings.Split(scanner.Text(), ",") - logFile.WriteString(scanner.Text() + "\n") - - - totalPower := 0.0 - for _,powerIndex := range powerIndexes { - power, _ := strconv.ParseFloat(split[powerIndex], 64) - - host := indexToHost[powerIndex] - - powerAverage[host].Value = power - powerAverage[host] = powerAverage[host].Next() - - log.Printf("Host: %s, Index: %d, Power: %f", indexToHost[powerIndex], powerIndex, (power * RAPLUnits)) - - totalPower += power - } - - log.Println("Total power: ", totalPower * RAPLUnits) - - victims := make([]Victim, 8, 8) - - // TODO: Just keep track of the largest to reduce fron nlogn to n - for name,ring := range powerAverage { - victims = append(victims, Victim{mean(ring), name}) - //log.Printf("host: %s, Avg: %f", name, mean(ring) * RAPLUnits) - } - sort.Sort(VictimSorter(victims)) - log.Printf("Current Victim %s Avg. Wattage: %f", victims[0].Host, victims[0].Watts * RAPLUnits) - } - - seconds++ - } - }(logging) - - log.Println("PCP logging started") - - if err := cmd.Start(); err != nil { - log.Fatal(err) - } - - if err := cmd.Wait(); err != nil { - log.Fatal(err) - } - - /* - pgid, err := syscall.Getpgid(cmd.Process.Pid) - - select { - case <-quit: - log.Println("Stopping PCP logging in 5 seconds") - time.Sleep(5 * time.Second) - - // http://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly - // kill process and all children processes - syscall.Kill(-pgid, 15) - return - }*/ -} diff --git a/pcp/test/victim.go b/pcp/test/victim.go deleted file mode 100644 index 1110061..0000000 --- a/pcp/test/victim.go +++ /dev/null @@ -1,3 +0,0 @@ -package main - - diff --git a/pcp/victim.go b/pcp/victim.go new file mode 100644 index 0000000..47c0ebb --- /dev/null +++ b/pcp/victim.go @@ -0,0 +1,20 @@ +package pcp + +type Victim struct { + Watts float64 + Host string +} + +type VictimSorter []Victim + +func (slice VictimSorter) Len() int { + return len(slice) +} + +func (slice VictimSorter) Less(i, j int) bool { + return slice[i].Watts >= slice[j].Watts +} + +func (slice VictimSorter) Swap(i, j int) { + slice[i], slice[j] = slice[j], slice[i] +} diff --git a/scheduler.go b/scheduler.go index 6b11506..93bbdf4 100644 --- a/scheduler.go +++ b/scheduler.go @@ -19,6 +19,8 @@ var master = flag.String("master", "xavier:5050", "Location of leading Mesos mas var tasksFile = flag.String("workload", "", "JSON file containing task definitions") var ignoreWatts = flag.Bool("ignoreWatts", false, "Ignore watts in offers") var pcplogPrefix = flag.String("logPrefix", "", "Prefix for pcplog") +var hiThreshold = flag.Float64("hiThreshold", 0.0, "Upperbound for when we should start capping") +var loThreshold = flag.Float64("loThreshold", 0.0, "Lowerbound for when we should start uncapping") // Short hand args func init() { @@ -26,6 +28,8 @@ func init() { flag.StringVar(tasksFile, "w", "", "JSON file containing task definitions (shorthand)") flag.BoolVar(ignoreWatts, "i", false, "Ignore watts in offers (shorthand)") flag.StringVar(pcplogPrefix, "p", "", "Prefix for pcplog (shorthand)") + flag.Float64Var(hiThreshold, "ht", 700.0, "Upperbound for when we should start capping (shorthand)") + flag.Float64Var(loThreshold, "lt", 400.0, "Lowerbound for when we should start uncapping (shorthand)") } func main() { @@ -36,6 +40,11 @@ func main() { os.Exit(1) } + if *hiThreshold < *loThreshold { + fmt.Println("High threshold is of a lower value than low threhold.") + os.Exit(1) + } + tasks, err := def.TasksFromJSON(*tasksFile) if err != nil || len(tasks) == 0 { fmt.Println("Invalid tasks specification file provided") @@ -61,7 +70,8 @@ func main() { return } - go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix) + //go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix) + go pcp.StartLogAndDynamicCap(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix, *hiThreshold, *loThreshold) time.Sleep(1 * time.Second) // Attempt to handle signint to not leave pmdumptext running From 7c7d80b5579a9f130656a469a009465607f465a9 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 18 Oct 2016 17:39:44 -0400 Subject: [PATCH 043/102] Minor renaming of receivers in some schedulers --- schedulers/firstfit.go | 4 ++-- schedulers/firstfitsortedwatts.go | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/schedulers/firstfit.go b/schedulers/firstfit.go index 91a68f1..bdfad7e 100644 --- a/schedulers/firstfit.go +++ b/schedulers/firstfit.go @@ -13,13 +13,13 @@ import ( ) // Decides if to take an offer or not -func (*FirstFit) takeOffer(offer *mesos.Offer, task def.Task) bool { +func (s *FirstFit) takeOffer(offer *mesos.Offer, task def.Task) bool { cpus, mem, watts := OfferAgg(offer) //TODO: Insert watts calculation here instead of taking them as a parameter - if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { + if cpus >= task.CPU && mem >= task.RAM && (s.ignoreWatts || watts >= task.Watts) { return true } diff --git a/schedulers/firstfitsortedwatts.go b/schedulers/firstfitsortedwatts.go index 521f773..7214822 100644 --- a/schedulers/firstfitsortedwatts.go +++ b/schedulers/firstfitsortedwatts.go @@ -14,13 +14,13 @@ import ( ) // Decides if to take an offer or not -func (*FirstFitSortedWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { +func (s *FirstFitSortedWatts) takeOffer(offer *mesos.Offer, task def.Task) bool { cpus, mem, watts := OfferAgg(offer) //TODO: Insert watts calculation here instead of taking them as a parameter - if cpus >= task.CPU && mem >= task.RAM && watts >= task.Watts { + if cpus >= task.CPU && mem >= task.RAM && (s.ignoreWatts || watts >= task.Watts) { return true } From 86790133e1d5da88a230970c6870d1dcb9b0e6a6 Mon Sep 17 00:00:00 2001 From: Renan DelValle Date: Tue, 18 Oct 2016 17:42:09 -0400 Subject: [PATCH 044/102] Go FMT run on entire project --- pcp/loganddynamiccap.go | 2 +- pcp/victim.go | 2 +- rapl/cap.go | 4 ++-- schedulers/firstfitsortedwatts.go | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/pcp/loganddynamiccap.go b/pcp/loganddynamiccap.go index 577a395..cc2776f 100644 --- a/pcp/loganddynamiccap.go +++ b/pcp/loganddynamiccap.go @@ -1,6 +1,7 @@ package pcp import ( + "bitbucket.org/bingcloud/electron/rapl" "bufio" "container/ring" "log" @@ -12,7 +13,6 @@ import ( "strings" "syscall" "time" - "bitbucket.org/bingcloud/electron/rapl" ) var RAPLUnits = math.Pow(2, -32) diff --git a/pcp/victim.go b/pcp/victim.go index 47c0ebb..801cc48 100644 --- a/pcp/victim.go +++ b/pcp/victim.go @@ -2,7 +2,7 @@ package pcp type Victim struct { Watts float64 - Host string + Host string } type VictimSorter []Victim diff --git a/rapl/cap.go b/rapl/cap.go index c25eb66..20cd945 100644 --- a/rapl/cap.go +++ b/rapl/cap.go @@ -1,12 +1,12 @@ package rapl import ( - "golang.org/x/crypto/ssh" "github.com/pkg/errors" + "golang.org/x/crypto/ssh" "strconv" ) -func Cap(host, username string, percentage int) (error) { +func Cap(host, username string, percentage int) error { if percentage > 100 || percentage < 0 { return errors.New("Percentage is out of range") diff --git a/schedulers/firstfitsortedwatts.go b/schedulers/firstfitsortedwatts.go index 7214822..faab082 100644 --- a/schedulers/firstfitsortedwatts.go +++ b/schedulers/firstfitsortedwatts.go @@ -8,9 +8,9 @@ import ( "github.com/mesos/mesos-go/mesosutil" sched "github.com/mesos/mesos-go/scheduler" "log" + "sort" "strings" "time" - "sort" ) // Decides if to take an offer or not @@ -177,7 +177,7 @@ func (s *FirstFitSortedWatts) ResourceOffers(driver sched.SchedulerDriver, offer if *task.Instances <= 0 { // All instances of task have been scheduled, remove it - s.tasks= append(s.tasks[:i], s.tasks[i+1:]...) + s.tasks = append(s.tasks[:i], s.tasks[i+1:]...) if len(s.tasks) <= 0 { log.Println("Done scheduling all tasks") From 353439464e93a9331cab8874e5827b997a27da34 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 31 Oct 2016 21:38:40 -0400 Subject: [PATCH 045/102] Added proactive dynamic capping (FCFS and Rank based). Also added a primitive readme file. --- .../proactive_dynamic_capping/README.md | 10 + schedulers/proactive_dynamic_capping/main.go | 99 ++++++++ .../src/constants/constants.go | 39 +++ .../src/github.com/montanaflynn/stats | 1 + .../src/proactive_dynamic_capping/capper.go | 235 ++++++++++++++++++ .../src/task/task.go | 73 ++++++ .../src/utilities/utils.go | 9 + 7 files changed, 466 insertions(+) create mode 100644 schedulers/proactive_dynamic_capping/README.md create mode 100644 schedulers/proactive_dynamic_capping/main.go create mode 100644 schedulers/proactive_dynamic_capping/src/constants/constants.go create mode 160000 schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats create mode 100644 schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go create mode 100644 schedulers/proactive_dynamic_capping/src/task/task.go create mode 100644 schedulers/proactive_dynamic_capping/src/utilities/utils.go diff --git a/schedulers/proactive_dynamic_capping/README.md b/schedulers/proactive_dynamic_capping/README.md new file mode 100644 index 0000000..60f4431 --- /dev/null +++ b/schedulers/proactive_dynamic_capping/README.md @@ -0,0 +1,10 @@ +##Proactive Dynamic Capping + +Perform Cluster wide dynamic capping. + +Offer 2 methods: + 1. First Come First Serve -- For each task that needs to be scheduled, in the order in which it arrives, compute the cluster wide cap. + 2. Rank based cluster wide capping -- Sort a given set of tasks to be scheduled, in ascending order of requested watts, and then compute the cluster wide cap for each of the tasks in the ordered set. + +#Note + The github.com folder contains a library that is required to compute the median of a given set of values. diff --git a/schedulers/proactive_dynamic_capping/main.go b/schedulers/proactive_dynamic_capping/main.go new file mode 100644 index 0000000..d705017 --- /dev/null +++ b/schedulers/proactive_dynamic_capping/main.go @@ -0,0 +1,99 @@ +package main + +import ( + "constants" + "fmt" + "math/rand" + "task" + "proactive_dynamic_capping" + ) + +func sample_available_power() map[string]float64{ + return map[string]float64{ + "stratos-001":100.0, + "stratos-002":150.0, + "stratos-003":80.0, + "stratos-004":90.0, + } +} + +func get_random_power(min, max int) int { + return rand.Intn(max - min) + min +} + +func cap_value_one_task_fcfs(capper *proactive_dynamic_capping.Capper) { + fmt.Println("==== FCFS, Number of tasks: 1 ====") + available_power := sample_available_power() + tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", + "minife_command", 4.0, 10, 50, 1) + if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { + fmt.Println("task = " + tsk.String()) + fmt.Printf("cap value = %f\n", cap_value) + } +} + +func cap_value_window_size_tasks_fcfs(capper *proactive_dynamic_capping.Capper) { + fmt.Println() + fmt.Println("==== FCFS, Number of tasks: 3 (window size) ====") + available_power := sample_available_power() + for i := 0; i < constants.Window_size; i++ { + tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", + "minife_command", 4.0, 10, get_random_power(30, 150), 1) + fmt.Printf("task%d = %s\n", i, tsk.String()) + if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { + fmt.Printf("CAP: %f\n", cap_value) + } + } +} + +func cap_value_more_than_window_size_tasks_fcfs(capper *proactive_dynamic_capping.Capper) { + fmt.Println() + fmt.Println("==== FCFS, Number of tasks: >3 (> window_size) ====") + available_power := sample_available_power() + for i := 0; i < constants.Window_size + 2; i++ { + tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", + "minife_command", 4.0, 10, get_random_power(30, 150), 1) + fmt.Printf("task%d = %s\n", i, tsk.String()) + if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { + fmt.Printf("CAP: %f\n", cap_value) + } + } +} + +func cap_values_for_ranked_tasks(capper *proactive_dynamic_capping.Capper) { + fmt.Println() + fmt.Println("==== Ranked, Number of tasks: 5 (window size + 2) ====") + available_power := sample_available_power() + var tasks_to_schedule []*task.Task + for i := 0; i < constants.Window_size + 2; i++ { + tasks_to_schedule = append(tasks_to_schedule, + task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", + "minife_command", 4.0, 10, get_random_power(30, 150), 1)) + } + // Printing the tasks that need to be scheduled. + index := 0 + for _, tsk := range tasks_to_schedule { + fmt.Printf("task%d = %s\n", index, tsk.String()) + index++ + } + if sorted_tasks_to_be_scheduled, cwcv, err := capper.Ranked_determine_cap(available_power, tasks_to_schedule); err == nil { + fmt.Printf("The cap values are: ") + fmt.Println(cwcv) + fmt.Println("The order of tasks to be scheduled :-") + for _, tsk := range sorted_tasks_to_be_scheduled { + fmt.Println(tsk.String()) + } + } +} + +func main() { + capper := proactive_dynamic_capping.GetInstance() + cap_value_one_task_fcfs(capper) + capper.Clear() + cap_value_window_size_tasks_fcfs(capper) + capper.Clear() + cap_value_more_than_window_size_tasks_fcfs(capper) + capper.Clear() + cap_values_for_ranked_tasks(capper) + capper.Clear() +} diff --git a/schedulers/proactive_dynamic_capping/src/constants/constants.go b/schedulers/proactive_dynamic_capping/src/constants/constants.go new file mode 100644 index 0000000..0b1a0cc --- /dev/null +++ b/schedulers/proactive_dynamic_capping/src/constants/constants.go @@ -0,0 +1,39 @@ +/* +Constants that are used across scripts +1. The available hosts = stratos-00x (x varies from 1 to 8) +2. cap_margin = percentage of the requested power to allocate +3. power_threshold = overloading factor +4. total_power = total power per node +5. window_size = number of tasks to consider for computation of the dynamic cap. +*/ +package constants + +var Hosts = []string{"stratos-001", "stratos-002", + "stratos-003", "stratos-004", + "stratos-005", "stratos-006", + "stratos-007", "stratos-008"} + +/* + Margin with respect to the required power for a job. + So, if power required = 10W, the node would be capped to 75%*10W. + This value can be changed upon convenience. +*/ +var Cap_margin = 0.75 + +// Lower bound of the power threshold for a tasks +var Power_threshold = 0.6 + +// Total power per node +var Total_power = map[string]float64 { + "stratos-001": 100.0, + "stratos-002": 150.0, + "stratos-003": 80.0, + "stratos-004": 90.0, + "stratos-005": 200.0, + "stratos-006": 100.0, + "stratos-007": 175.0, + "stratos-008": 175.0, +} + +// Window size for running average +var Window_size = 3 diff --git a/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats b/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats new file mode 160000 index 0000000..60dcacf --- /dev/null +++ b/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats @@ -0,0 +1 @@ +Subproject commit 60dcacf48f43d6dd654d0ed94120ff5806c5ca5c diff --git a/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go b/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go new file mode 100644 index 0000000..4e183f3 --- /dev/null +++ b/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go @@ -0,0 +1,235 @@ +/* +Cluster wide dynamic capping +Step1. Compute running average of tasks in window. +Step2. Compute what percentage of available power of each node, is the running average. +Step3. Compute the median of the percentages and this is the percentage that the cluster needs to be cpaped at. + +1. First Fit Scheduling -- Perform the above steps for each task that needs to be scheduled. +2. Rank based Scheduling -- Sort a set of tasks to be scheduled, in ascending order of power, and then perform the above steps for each of them in the sorted order. +*/ + +package proactive_dynamic_capping + +import ( + "constants" + "container/list" + "errors" + "github.com/montanaflynn/stats" + "task" + "sort" + "sync" +) + +// Structure containing utility data structures used to compute cluster wide dyanmic cap. +type Capper struct { + // window of tasks. + window_of_tasks list.List + // The current sum of requested powers of the tasks in the window. + current_sum float64 + // The current number of tasks in the window. + number_of_tasks_in_window int +} + +// Defining constructor for Capper. +func NewCapper() *Capper { + return &Capper{current_sum: 0.0, number_of_tasks_in_window: 0} +} + +// For locking on operations that may result in race conditions. +var mutex sync.Mutex + +// Singleton instance of Capper +var singleton_capper *Capper +// Retrieve the singleton instance of Capper. +func GetInstance() *Capper { + if singleton_capper == nil { + mutex.Lock() + singleton_capper = NewCapper() + mutex.Unlock() + } else { + // Do nothing + } + return singleton_capper +} + +// Clear and initialize all the members of Capper. +func (capper Capper) Clear() { + capper.window_of_tasks.Init() + capper.current_sum = 0 + capper.number_of_tasks_in_window = 0 +} + +// Compute the average of watts of all the tasks in the window. +func (capper Capper) average() float64 { + return capper.current_sum / float64(capper.window_of_tasks.Len()) +} + +/* + Compute the running average + + Using Capper#window_of_tasks to store the tasks in the window. Task at position 0 (oldest task) removed when window is full and new task arrives. +*/ +func (capper Capper) running_average_of_watts(tsk *task.Task) float64 { + var average float64 + if capper.number_of_tasks_in_window < constants.Window_size { + capper.window_of_tasks.PushBack(tsk) + capper.number_of_tasks_in_window++ + capper.current_sum += float64(tsk.Watts) + } else { + task_to_remove_element := capper.window_of_tasks.Front() + if task_to_remove, ok := task_to_remove_element.Value.(*task.Task); ok { + capper.current_sum -= float64(task_to_remove.Watts) + capper.window_of_tasks.Remove(task_to_remove_element) + } + capper.window_of_tasks.PushBack(tsk) + capper.current_sum += float64(tsk.Watts) + } + average = capper.average() + return average +} + +/* + Calculating cap value + + 1. Sorting the values of running_average_available_power_percentage in ascending order. + 2. Computing the median of the above sorted values. + 3. The median is now the cap value. +*/ +func (capper Capper) get_cap(running_average_available_power_percentage map[string]float64) float64 { + var values []float64 + // Validation + if running_average_available_power_percentage == nil { + return 100.0 + } + for _, apower := range running_average_available_power_percentage { + values = append(values, apower) + } + // sorting the values in ascending order + sort.Float64s(values) + // Calculating the median + if median, err := stats.Median(values); err == nil { + return median + } + // should never reach here. If here, then just setting the cap value to be 100 + return 100.0 +} + +// In place sorting of tasks to be scheduled based on the requested watts. +func qsort_tasks(low int, high int, tasks_to_sort []*task.Task) { + i := low + j := high + // calculating the pivot + pivot_index := low + (high - low)/2 + pivot := tasks_to_sort[pivot_index] + for i <= j { + for tasks_to_sort[i].Watts < pivot.Watts { + i++ + } + for tasks_to_sort[j].Watts > pivot.Watts { + j-- + } + if i <= j { + temp := tasks_to_sort[i] + tasks_to_sort[i] = tasks_to_sort[j] + tasks_to_sort[j] = temp + i++ + j-- + } + } + if low < j { + qsort_tasks(low, j, tasks_to_sort) + } + if i < high { + qsort_tasks(i, high, tasks_to_sort) + } +} + +// Sorting tasks in ascending order of requested watts. +func (capper Capper) sort_tasks(tasks_to_sort []*task.Task) { + qsort_tasks(0, len(tasks_to_sort)-1, tasks_to_sort) +} + +/* +Remove entry for finished task. +Electron needs to call this whenever a task completes so that the finished task no longer contributes to the computation of the cluster wide cap. +*/ +func (capper Capper) Task_finished(finished_task *task.Task) { + // If the window is empty then just return. Should not be entering this condition as it would mean that there is a bug. + if capper.window_of_tasks.Len() == 0 { + return + } + + // Checking whether the finished task is currently present in the window of tasks. + var task_element_to_remove *list.Element + for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { + if tsk, ok := task_element.Value.(*task.Task); ok { + if task.Compare(tsk, finished_task) { + task_element_to_remove = task_element + } + } + } + + // If finished task is there in the window of tasks, then we need to remove the task from the same and modify the members of Capper accordingly. + if task_to_remove, ok := task_element_to_remove.Value.(*task.Task); ok { + capper.window_of_tasks.Remove(task_element_to_remove) + capper.number_of_tasks_in_window -= 1 + capper.current_sum -= float64(task_to_remove.Watts) + } +} + +// Ranked based scheduling +func (capper Capper) Ranked_determine_cap(available_power map[string]float64, tasks_to_schedule []*task.Task) ([]*task.Task, map[int]float64, error) { + // Validation + if available_power == nil || len(tasks_to_schedule) == 0 { + return nil, nil, errors.New("No available power and no tasks to schedule.") + } else { + // Need to sort the tasks in ascending order of requested power + capper.sort_tasks(tasks_to_schedule) + + // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. + cluster_wide_cap_values := make(map[int]float64) + index := 0 + for _, tsk := range tasks_to_schedule { + /* + Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. + Calling Fcfs_determine_cap(...) just to avoid redundant code. + */ + if cap, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { + cluster_wide_cap_values[index] = cap + } else { + return nil, nil, err + } + index++ + } + // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. + return tasks_to_schedule, cluster_wide_cap_values, nil + } +} + +// First come first serve scheduling. +func (capper Capper) Fcfs_determine_cap(available_power map[string]float64, new_task *task.Task) (float64, error) { + // Validation + if available_power == nil { + // If no power available power, then capping the cluster at 100%. Electron might choose to queue the task. + return 100.0, errors.New("No available power.") + } else { + mutex.Lock() + // Need to calcualte the running average + running_average := capper.running_average_of_watts(new_task) + // What percent of available power for each node is the running average + running_average_available_power_percentage := make(map[string]float64) + for node, apower := range available_power { + if apower >= running_average { + running_average_available_power_percentage[node] = (running_average/apower) * 100 + } else { + // We don't consider this node in the offers + } + } + + // Determine the cluster wide cap value. + cap_value := capper.get_cap(running_average_available_power_percentage) + // Electron has to now cap the cluster to this value before launching the next task. + mutex.Unlock() + return cap_value, nil + } +} diff --git a/schedulers/proactive_dynamic_capping/src/task/task.go b/schedulers/proactive_dynamic_capping/src/task/task.go new file mode 100644 index 0000000..47d8aa5 --- /dev/null +++ b/schedulers/proactive_dynamic_capping/src/task/task.go @@ -0,0 +1,73 @@ +package task + +import ( + "constants" + "encoding/json" + "reflect" + "strconv" + "utilities" +) + +/* + Blueprint for the task. + Members: + image: + name: + host: + cmd: + cpu: + ram: + watts: + inst: +*/ +type Task struct { + Image string + Name string + Host string + CMD string + CPU float64 + RAM int + Watts int + Inst int +} + +// Defining a constructor for Task +func NewTask(image string, name string, host string, + cmd string, cpu float64, ram int, watts int, inst int) *Task { + return &Task{Image: image, Name: name, Host: host, CPU: cpu, + RAM: ram, Watts: watts, Inst: inst} +} + +// Update the host on which the task needs to be scheduled. +func (task Task) Update_host(new_host string) { + // Validation + if _, ok := constants.Total_power[new_host]; ok { + task.Host = new_host + } +} + +// Stringify task instance +func (task Task) String() string { + task_map := make(map[string]string) + task_map["image"] = task.Image + task_map["name"] = task.Name + task_map["host"] = task.Host + task_map["cmd"] = task.CMD + task_map["cpu"] = utils.FloatToString(task.CPU) + task_map["ram"] = strconv.Itoa(task.RAM) + task_map["watts"] = strconv.Itoa(task.Watts) + task_map["inst"] = strconv.Itoa(task.Inst) + + json_string, _ := json.Marshal(task_map) + return string(json_string) +} + +// Compare one task to another. 2 tasks are the same if all the corresponding members are the same. +func Compare(task *Task, other_task *Task) bool { + // If comparing the same pointers (checking the addresses). + if task == other_task { + return true + } + // Checking member equality + return reflect.DeepEqual(*task, *other_task) +} diff --git a/schedulers/proactive_dynamic_capping/src/utilities/utils.go b/schedulers/proactive_dynamic_capping/src/utilities/utils.go new file mode 100644 index 0000000..5f2e341 --- /dev/null +++ b/schedulers/proactive_dynamic_capping/src/utilities/utils.go @@ -0,0 +1,9 @@ +package utils + +import "strconv" + +// Convert float64 to string +func FloatToString(input float64) string { + // Precision is 2, Base is 64 + return strconv.FormatFloat(input, 'f', 2, 64) +} From fb2580bcad19989c441d311da544eec9ad67038f Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 31 Oct 2016 21:41:59 -0400 Subject: [PATCH 046/102] Modified readme to include commands to build and run the program and also added documentation to mention what main.go contains --- schedulers/proactive_dynamic_capping/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/schedulers/proactive_dynamic_capping/README.md b/schedulers/proactive_dynamic_capping/README.md index 60f4431..4066b73 100644 --- a/schedulers/proactive_dynamic_capping/README.md +++ b/schedulers/proactive_dynamic_capping/README.md @@ -6,5 +6,14 @@ Offer 2 methods: 1. First Come First Serve -- For each task that needs to be scheduled, in the order in which it arrives, compute the cluster wide cap. 2. Rank based cluster wide capping -- Sort a given set of tasks to be scheduled, in ascending order of requested watts, and then compute the cluster wide cap for each of the tasks in the ordered set. + +main.go contains a set of test functions for the above algorithm. + +#Please run the following commands to install dependencies and run the test code. +''' + go build + go run main.go +''' + #Note The github.com folder contains a library that is required to compute the median of a given set of values. From e27912f99ed42568c242cc4925e51d97aac5bdc7 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 31 Oct 2016 21:43:18 -0400 Subject: [PATCH 047/102] fixed an error in the readme file --- schedulers/proactive_dynamic_capping/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schedulers/proactive_dynamic_capping/README.md b/schedulers/proactive_dynamic_capping/README.md index 4066b73..e9f7332 100644 --- a/schedulers/proactive_dynamic_capping/README.md +++ b/schedulers/proactive_dynamic_capping/README.md @@ -10,10 +10,10 @@ Offer 2 methods: main.go contains a set of test functions for the above algorithm. #Please run the following commands to install dependencies and run the test code. -''' +``` go build go run main.go -''' +``` #Note The github.com folder contains a library that is required to compute the median of a given set of values. From 0c53bb386fd43f926cea1f9d9d5f96a6907f8742 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:47:03 -0500 Subject: [PATCH 048/102] Added TaskID as a field. Added a function UpdateHost() to update the host on which the task runs. Added a setter for TaskID. Added a comparator called Compare() that compares to instances of Task --- def/task.go | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/def/task.go b/def/task.go index 5e0f8de..19ed98c 100644 --- a/def/task.go +++ b/def/task.go @@ -1,9 +1,11 @@ package def import ( + "bitbucket.org/sunybingcloud/electron/constants" "encoding/json" "github.com/pkg/errors" "os" + "reflect" ) type Task struct { @@ -15,6 +17,7 @@ type Task struct { CMD string `json:"cmd"` Instances *int `json:"inst"` Host string `json:"host"` + TaskID string `json:"taskID"` } func TasksFromJSON(uri string) ([]Task, error) { @@ -34,6 +37,34 @@ func TasksFromJSON(uri string) ([]Task, error) { return tasks, nil } +// Update the host on which the task needs to be scheduled. +func (tsk *Task) UpdateHost(new_host string) bool { + // Validation + is_correct_host := false + for _, existing_host := range constants.Hosts { + if host == existing_host { + is_correct_host = true + } + } + if !is_correct_host { + return false + } else { + tsk.Host = new_host + return true + } +} + +// Set the taskID of the task. +func (tsk *Task) SetTaskID(taskID string) bool { + // Validation + if taskID == "" { + return false + } else { + tsk.TaskID = taskID + return true + } +} + type WattsSorter []Task func (slice WattsSorter) Len() int { @@ -47,3 +78,22 @@ func (slice WattsSorter) Less(i, j int) bool { func (slice WattsSorter) Swap(i, j int) { slice[i], slice[j] = slice[j], slice[i] } + +// Compare two tasks. +func Compare(task1 *Task, task2 *Task) bool { + // If comparing the same pointers (checking the addresses). + if task1 == task2 { + return true + } + // Checking member equality + if reflect.DeepEqual(*task1, *task2) { + // Need to check for the task ID + if task1.TaskID == task2.TaskID { + return true + } else { + return false + } + } else { + return false + } +} From 522b606165dd9eea984e7a03352763045785f726 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:48:41 -0500 Subject: [PATCH 049/102] changed bingcloud to sunybingcloud in the import statements. --- pcp/loganddynamiccap.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pcp/loganddynamiccap.go b/pcp/loganddynamiccap.go index cc2776f..4b17fb5 100644 --- a/pcp/loganddynamiccap.go +++ b/pcp/loganddynamiccap.go @@ -1,7 +1,7 @@ package pcp import ( - "bitbucket.org/bingcloud/electron/rapl" + "bitbucket.org/sunybingcloud/electron/rapl" "bufio" "container/ring" "log" From 6d0a3f8a902610b38d50279011f1f08507278c03 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:49:39 -0500 Subject: [PATCH 050/102] scheduler is now an instance of ProactiveClusterwideCapFCFS --- scheduler.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scheduler.go b/scheduler.go index 93bbdf4..168b72e 100644 --- a/scheduler.go +++ b/scheduler.go @@ -1,9 +1,9 @@ package main import ( - "bitbucket.org/bingcloud/electron/def" - "bitbucket.org/bingcloud/electron/pcp" - "bitbucket.org/bingcloud/electron/schedulers" + "bitbucket.org/sunybingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/pcp" + "bitbucket.org/sunybingcloud/electron/schedulers" "flag" "fmt" "github.com/golang/protobuf/proto" @@ -56,7 +56,7 @@ func main() { fmt.Println(task) } - scheduler := schedulers.NewFirstFit(tasks, *ignoreWatts) + scheduler := schedulers.NewProactiveClusterwideCapFCFS(tasks, *ignoreWatts) driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, Framework: &mesos.FrameworkInfo{ From bfbca705c8e681498b6d62cf1f2591c23043fd3a Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:50:00 -0500 Subject: [PATCH 051/102] changed bingcloud to sunybingcloud in the import statements. --- schedulers/binpacksortedwatts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schedulers/binpacksortedwatts.go b/schedulers/binpacksortedwatts.go index d73b64c..b05a3e3 100644 --- a/schedulers/binpacksortedwatts.go +++ b/schedulers/binpacksortedwatts.go @@ -1,7 +1,7 @@ package schedulers import ( - "bitbucket.org/bingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/def" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" From d0e37811d69d1802a8b926bbf5be21055e1aeaef Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:50:21 -0500 Subject: [PATCH 052/102] changed bingcloud to sunybingcloud in the import statements. --- schedulers/firstfit.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schedulers/firstfit.go b/schedulers/firstfit.go index bdfad7e..e426ab1 100644 --- a/schedulers/firstfit.go +++ b/schedulers/firstfit.go @@ -1,7 +1,7 @@ package schedulers import ( - "bitbucket.org/bingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/def" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" From c1fa0b1a6ea4fde16e0c87d686b655068b33c29c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:50:40 -0500 Subject: [PATCH 053/102] changed bingcloud to sunybingcloud in the import statements. --- schedulers/firstfitsortedwatts.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schedulers/firstfitsortedwatts.go b/schedulers/firstfitsortedwatts.go index faab082..9067e1c 100644 --- a/schedulers/firstfitsortedwatts.go +++ b/schedulers/firstfitsortedwatts.go @@ -1,7 +1,7 @@ package schedulers import ( - "bitbucket.org/bingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/def" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" From 31bbacf149be872fef00b9d29292dfe23aef70ca Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:50:53 -0500 Subject: [PATCH 054/102] changed bingcloud to sunybingcloud in the import statements. --- schedulers/firstfitwattsonly.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schedulers/firstfitwattsonly.go b/schedulers/firstfitwattsonly.go index b19ce3b..e5962a7 100644 --- a/schedulers/firstfitwattsonly.go +++ b/schedulers/firstfitwattsonly.go @@ -1,7 +1,7 @@ package schedulers import ( - "bitbucket.org/bingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/def" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" From d3e4914f5813cb9deae5d2024b4f4233fddcdefd Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:55:28 -0500 Subject: [PATCH 055/102] Defined constants that help in scheduling of tasks. --- constants/constants.go | 103 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 constants/constants.go diff --git a/constants/constants.go b/constants/constants.go new file mode 100644 index 0000000..50ca468 --- /dev/null +++ b/constants/constants.go @@ -0,0 +1,103 @@ +/* +Constants that are used across scripts +1. The available hosts = stratos-00x (x varies from 1 to 8) +2. cap_margin = percentage of the requested power to allocate +3. power_threshold = overloading factor +4. total_power = total power per node +5. window_size = number of tasks to consider for computation of the dynamic cap. + +Also, exposing functions to update or initialize some of the constants. +*/ +package constants + +var Hosts = []string{"stratos-001", "stratos-002", + "stratos-003", "stratos-004", + "stratos-005", "stratos-006", + "stratos-007", "stratos-008"} + +// Add a new host to the slice of hosts. +func AddNewHost(new_host string) bool { + // Validation + if new_host == "" { + return false + } else { + Hosts = append(Hosts, new_host) + return true + } +} + +// Lower bound of the percentage of requested power, that can be allocated to a task. +var Power_threshold = 0.6 // Right now saying that a task will never be given lesser than 60% of the power it requested. + +/* + Margin with respect to the required power for a job. + So, if power required = 10W, the node would be capped to 75%*10W. + This value can be changed upon convenience. +*/ +var Cap_margin = 0.75 + +// Modify the cap margin. +func UpdateCapMargin(new_cap_margin float64) bool { + // Checking if the new_cap_margin is less than the power threshold. + if new_cap_margin < Starvation_factor { + return false + } else { + Cap_margin = new_cap_margin + return true + } +} + + +// Threshold factor that would make (Cap_margin * task.Watts) equal to (60/100 * task.Watts). +var Starvation_factor = 0.8 + +// Total power per node. +var Total_power map[string]float64 + +// Initialize the total power per node. This should be done before accepting any set of tasks for scheduling. +func AddTotalPowerForHost(host string, total_power float64) bool { + // Validation + is_correct_host := false + for _, existing_host := range Hosts { + if host == existing_host { + is_correct_host = true + } + } + + if !is_correct_host { + return false + } else { + Total_power[host] = total_power + return true + } +} + +// Window size for running average +var Window_size = 10 + +// Update the window size. +func UpdateWindowSize(new_window_size int) bool { + // Validation + if new_window_size == 0 { + return false + } else{ + Window_size = new_window_size + return true + } +} + +// Time duration between successive cluster wide capping. +var Clusterwide_cap_interval = 10.0 // Right now capping the cluster at 10 second intervals. + +// Modify the cluster wide capping interval. We can update the interval depending on the workload. +// TODO: If the workload is heavy then we can set a longer interval, while on the other hand, +// if the workload is light then a smaller interval is sufficient. +func UpdateClusterwideCapInterval(new_interval float64) bool { + // Validation + if new_interval == 0.0 { + return false + } else { + Clusterwide_cap_interval = new_interval + return true + } +} From c728b1f733cb2a2d679c3f004833133e3e2906b6 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:57:36 -0500 Subject: [PATCH 056/102] Proactive cluster wide capper that defines two types of schedulers. First fit scheduling, that uses running average of task.Watts to calculate the cluster wide cap, and Ranked based cluster wide capper that ranks the tasks (sorts) based on the watts required and then performs fcfs in the sorted order. --- schedulers/proactiveclusterwidecappers.go | 244 ++++++++++++++++++++++ 1 file changed, 244 insertions(+) create mode 100644 schedulers/proactiveclusterwidecappers.go diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go new file mode 100644 index 0000000..7b94982 --- /dev/null +++ b/schedulers/proactiveclusterwidecappers.go @@ -0,0 +1,244 @@ +/* +Cluster wide dynamic capping +Step1. Compute running average of tasks in window. +Step2. Compute what percentage of available power of each node, is the running average. +Step3. Compute the median of the percentages and this is the percentage that the cluster needs to be cpaped at. + +1. First Fit Scheduling -- Perform the above steps for each task that needs to be scheduled. +2. Rank based Scheduling -- Sort a set of tasks to be scheduled, in ascending order of power, and then perform the above steps for each of them in the sorted order. + +This is not a scheduler but a scheduling scheme that schedulers can use. +*/ +package schedulers + +import ( + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/def" + "container/list" + "errors" + "github.com/montanaflynn/stats" + "sort" + "sync" +) + +// Structure containing utility data structures used to compute cluster-wide dynamic cap. +type clusterwideCapper struct { + // window of tasks. + window_of_tasks list.list + // The current sum of requested powers of the tasks in the window. + current_sum float64 + // The current number of tasks in the window. + number_of_tasks_in_window int +} + +// Defining constructor for clusterwideCapper. Please don't call this directly and instead use getClusterwideCapperInstance(). +func newClusterwideCapper() *clusterwideCapper { + return &clusterwideCapper{current_sum: 0.0, number_of_tasks_in_window: 0} +} + +// For locking on operations that may result in race conditions. +var clusterwide_capper_mutex sync.Mutex + +// Singleton instance of clusterwideCapper +var singleton_capper *clusterwideCapper +// Retrieve the singleton instance of clusterwideCapper. +func getClusterwideCapperInstance() *clusterwideCapper { + if singleton_capper == nil { + clusterwide_capper_mutex.Lock() + singleton_capper = newClusterwideCapper() + clusterwide_capper_mutex.Unlock() + } else { + // Do nothing + } + return singleton_capper +} + +// Clear and initialize all the members of clusterwideCapper. +func (capper clusterwideCapper) clear() { + capper.window_of_tasks.Init() + capper.current_sum = 0 + capper.number_of_tasks_in_window = 0 +} + +// Compute the average of watts of all the tasks in the window. +func (capper clusterwideCapper) average() float64 { + return capper.current_sum / float64(capper.window_of_tasks.Len()) +} + +/* +Compute the running average. + +Using clusterwideCapper#window_of_tasks to store the tasks. +Task at position 0 (oldest task) is removed when the window is full and new task arrives. +*/ +func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 { + var average float64 + if capper.number_of_tasks_in_window < constants.Window_size { + capper.window_of_tasks.PushBack(tsk) + capper.number_of_tasks_in_window++ + capper.current_sum += float64(tsk.Watts) + } else { + task_to_remove_element := capper.window_of_tasks.Front() + if task_to_remove, ok := task_to_remove_element.Value.(*def.Task); ok { + capper.current_sum -= float64(task_to_remove.Watts) + capper.window_of_tasks.Remove(task_to_remove_element) + } + capper.window_of_tasks.PushBack(tsk) + capper.current_sum += float64(tsk.Watts) + } + average = capper.average() + return average +} + +/* +Calculating cap value. + +1. Sorting the values of running_average_available_power_percentage in ascending order. +2. Computing the median of the above sorted values. +3. The median is now the cap value. +*/ +func (capper clusterwideCapper) get_cap(running_average_available_power_percentage map[string]float64) float64 { + var values []float64 + // Validation + if running_average_available_power_percentage == nil { + return 100.0 + } + for _, apower := range running_average_available_power_percentage { + values = append(values, apower) + } + // sorting the values in ascending order + sort.Float64s(values) + // Calculating the median + if median, err := stats.Median(values); err == nil { + return median + } + // should never reach here. If here, then just setting the cap value to be 100 + return 100.0 +} + +/* Quick sort algorithm to sort tasks, in place, +in ascending order of power.*/ +func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort []*def.Task) { + i := low + j := high + // calculating the pivot + pivot_index := low + (high - low)/2 + pivot := tasks_to_sort[pivot_index] + for i <= j { + for tasks_to_sort[i].Watts < pivot.Watts { + i++ + } + for tasks_to_sort[j].Watts > pivot.Watts { + j-- + } + if i <= j { + temp := tasks_to_sort[i] + tasks_to_sort[i] = tasks_to_sort[j] + tasks_to_sort[j] = temp + i++ + j-- + } + } + if low < j { + capper.quick_sort(low, j, tasks_to_sort) + } + if i < high { + capper.quick_sort(i, high, tasks_to_sort) + } +} + +// Sorting tasks in ascending order of requested watts. +func (capper clusterwideCapper) sort_tasks(tasks_to_sort []*def.Task) { + capper.quick_sort(0, len(tasks_to_sort)-1, tasks_to_sort) +} + +/* +Remove entry for finished task. +This function is called when a task completes. This completed task needs to be removed from the window of tasks (if it is still present) + so that it doesn't contribute to the computation of the cap value. +*/ +func (capper clusterwideCapper) taskFinished(taskID string) { + // If the window is empty the just return. This condition should technically return false. + if capper.window_of_tasks.Len() == 0 { + return + } + + // Checking whether the task with the given taskID is currently present in the window of tasks. + var task_element_to_remove *list.Element + for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { + if tsk, ok := task_element.Value.(*def.Task); ok { + if task.TaskID == taskID { + task_element_to_remove = task_element + } + } + } + + // If finished task is there in the window of tasks, then we need to remove the task from the same and modify the members of clusterwideCapper accordingly. + if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { + capper.window_of_tasks.Remove(task_element_to_remove) + capper.number_of_tasks_in_window -= 1 + capper.current_sum -= float64(task_to_remove.Watts) + } +} + +// Ranked based scheduling. +func (capper clusterwideCapper) rankedDetermineCap(available_power map[string]float64, + tasks_to_schedule []*def.Task) ([]*def.Task, map[string]float64, error) { + // Validation + if available_power == nil || len(tasks_to_schedule) == 0 { + return nil, nil, errors.New("Invalid argument: available_power, tasks_to_schedule") + } else { + // Need to sort the tasks in ascending order of requested power. + capper.sort_tasks(tasks_to_schedule) + + // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. + cluster_wide_cap_values := make(map[int]float64) + index := 0 + for _, tsk := range tasks_to_schedule { + /* + Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. + Calling Fcfs_determine_cap(...) just to avoid redundant code. + */ + if cap, err := capper.fcfsDetermineCap(available_power, tsk); err == nil { + cluster_wide_cap_values[index] = cap + } else { + return nil, nil, err + } + index++ + } + // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. + return tasks_to_schedule, cluster_wide_cap_values, nil + } +} + +// First come first serve shceduling. +func (capper clusterwideCapper) fcfsDetermineCap(available_power map[string]float64, new_task *def.Task) (float64, error) { + // Validation + if available_power == nil { + return 100, errors.New("Invalid argument: available_power") + } else { + clusterwide_capper_mutex.Lock() + // Need to calculate the running average + running_average := capper.running_average_of_watts(new_task) + // What percent of available_power for each node is the running average. + running_average_available_power_percentage := make(map[string]float64) + for host, apower := range available_power { + if apower >= running_average { + running_average_available_power_percentage[host] = (running_average/apower) * 100 + } else { + // We don't consider this host in the offers. + } + } + + // Determine the cluster wide cap value. + cap_value := capper.get_cap(running_average_available_power_percentage) + // Need to cap the cluster to this value before launching the next task. + clusterwide_capper_mutex.Unlock() + return cap_value, nil + } +} + +// Stringer for an instance of clusterwideCapper +func (capper clusterwideCapper) string() string { + return "Clusterwide Capper -- Proactively cap the entire cluster." +} From 8e0ed6a0140be1d6bf83cbae743072ec54c9762c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 19:59:20 -0500 Subject: [PATCH 057/102] Proactive cluster wide first come first server scheduler. This schedules tasks based on the fcfs cluster wide capping policy defined in proactiveclusterwidecappers.go --- schedulers/proactiveclusterwidecappingfcfs.go | 269 ++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 schedulers/proactiveclusterwidecappingfcfs.go diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go new file mode 100644 index 0000000..ac77dee --- /dev/null +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -0,0 +1,269 @@ +package schedulers + +import ( + "bitbucket.org/sunybingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/rapl" + "errors" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "sort" + "strings" + "sync" + "time" +) + +// electronScheduler implements the Scheduler interface. +type ProactiveClusterwideCapFCFS struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + ignoreWatts bool + capper *clusterwideCapper + ticker *time.Ticker + isCapping bool + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule the new task. + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up. + Done chan struct{} + + // Controls when to shutdown pcp logging. + PCPLog chan struct{} +} + +// New electron scheduler. +func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *ProactiveClusterwideCapFCFS { + s := &ProactiveClusterwideCapFCFS { + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(mapp[string]map[string]bool), + RecordPCP: false, + capper: getClusterwideCapperInstance(), + ticker: time.NewTicker(constants.Clusterwide_cap_interval * time.Second), + isCapping: false + } + return s +} + +func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging. + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Setting the task ID to the task. This is done so that we can consider each task to be different, + // even though they have the same parameters. + task.SetTaskID(proto.String(taskName)) + // Add task to the list of tasks running on the node. + s.running[offer.GetSlaveId().GoString()][taskName] = true + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if !s.ignoreWatts { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *ProactiveClusterwideCapFCFS) Registered( + _ sched.SchedulerDriver, + framewordID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *ProactiveClusterwideCapFCFS) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { + log.Println("Framework disconnected with master") +} + +// go routine to cap the entire cluster in regular intervals of time. +func (s *ProactiveClusterwideCapFCFS) startCapping(currentCapValue float64, mutex sync.Mutex) { + go func() { + for tick := range s.ticker.C { + // Need to cap the cluster to the currentCapValue. + if currentCapValue > 0.0 { + mutex.Lock() + for _, host := range constants.Hosts { + if err := rapl.Cap(host, int(math.Floor(currentCapValue + 0.5))); err != nil { + fmt.Println(err) + } else { + fmt.Println("Successfully capped %s to %d\\%", host, currentCapValue) + } + } + mutex.Unlock() + } + } + } +} + +// TODO: Need to reduce the time complexity: looping over offers twice (Possible to do it just once?). +func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + // retrieving the available power for all the hosts in the offers. + available_power := make(map[string]float64) + for _, offer := range offers { + _, _, offer_watts := OfferAgg(offer) + available_power[offer.Hostname] = offer_watts + } + + for _, offer := range offers { + select { + case <-s.Shutdown; + log.Println("Done scheduling tasks: declining offerf on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number og tasks still running: ", s.tasksRunning) + continue + default: + } + + /* + Clusterwide Capping strategy + + For each task in s.tasks, + 1. I need to check whether the mesos offer can be taken or not (based on CPU and RAM). + 2. If the tasks fits the offer then I need to detemrine the cluster wide cap. + 3. First need to cap the cluster to the determine cap value and then launch the task on the host corresponding to the offer. + + Capping the cluster for every task would create a lot of overhead. Hence, clusterwide capping is performed at regular intervals. + TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. + Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. + */ + offer_cpu, offer_ram, _ := OfferAgg(offer) + + taken := false + currentCapValue := 0.0 // initial value to indicate that we haven't capped the cluster yet. + var mutex sync.Mutex + + for _, task := range s.tasks { + // Don't take offer if it doesn't match our task's host requirement. + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + + // Does the task fit. + if (s.ignoreWatts || offer_cpu >= task.CPU ||| offer_ram >= task.RAM) { + taken = true + mutex.Lock() + tempCap, err = s.capper.fcfsDetermineCap(available_power, task) + if err == nil { + currentCapValue = tempCap + } else { + fmt.Println("Failed to determine cluster wide cap: " + err.String()) + } + mutex.Unlock() + fmt.Printf("Starting on [%s]\n", offer.GetHostname()) + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, [s.newTask(offer, task)], defaultFilter) + } else { + // Task doesn't fit the offer. Move onto the next offer. + } + } + + // If no task fit the offer, then declining the offer. + if !taken { + fmt.Println("There is not enough resources to launch a task:") + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + } +} + +func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + // Need to remove the task from the window of tasks. + s.capper.taskFinished(status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + close(s.Done) + default: + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *ProactiveClusterwideCapFCFS) FrameworkMessage(driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *ProactiveClusterwideCapFCFS) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} + +func (s *ProactiveClusterwideCapFCFS) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} + +func (s *ProactiveClusterwideCapFCFS) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *ProactiveClusterwideCapFCFS) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} From 04d6d4b63159aed89c16dad159721dd0e2f8383c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 20:01:22 -0500 Subject: [PATCH 058/102] Utility data structures and functions. --- utilities/utils.go | 54 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 utilities/utils.go diff --git a/utilities/utils.go b/utilities/utils.go new file mode 100644 index 0000000..d6406d6 --- /dev/null +++ b/utilities/utils.go @@ -0,0 +1,54 @@ +package utilities + +import "errors" + +/* +The Pair and PairList have been taken from google groups forum, +https://groups.google.com/forum/#!topic/golang-nuts/FT7cjmcL7gw +*/ + +// Utility struct that helps in sorting the available power by value. +type Pair struct { + Key string + Value float64 +} + +// A slice of pairs that implements the sort.Interface to sort by value. +type PairList []Pair + +// Swap pairs in the PairList +func (plist PairList) Swap(i, j int) { + plist[i], plist[j] = plist[j], plist[i] +} + +// function to return the length of the pairlist. +func (plist PairList) Len() int { + return len(plist) +} + +// function to compare two elements in pairlist. +func (plist PairList) Less(i, j int) bool { + return plist[i].Value < plist[j].Value +} + +// convert a PairList to a map[string]float64 +func OrderedKeys(plist PairList) ([]string, error) { + // Validation + if plist == nil { + return nil, errors.New("Invalid argument: plist") + } + ordered_keys := make([]string, len(plist)) + for _, pair := range plist { + ordered_keys = append(ordered_keys, pair.Key) + } + return ordered_keys, nil +} + +// determine the max value +func Max(a, b float64) float64 { + if a > b { + return a + } else { + return b + } +} From 37b918d4942aaf8f479d11473a6e0bd623b7be8e Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 20:05:48 -0500 Subject: [PATCH 059/102] Removed these files and integrated them with electron --- .../proactive_dynamic_capping/README.md | 19 -- schedulers/proactive_dynamic_capping/main.go | 99 -------- .../src/constants/constants.go | 39 --- .../src/github.com/montanaflynn/stats | 1 - .../src/proactive_dynamic_capping/capper.go | 235 ------------------ .../src/task/task.go | 73 ------ .../src/utilities/utils.go | 9 - 7 files changed, 475 deletions(-) delete mode 100644 schedulers/proactive_dynamic_capping/README.md delete mode 100644 schedulers/proactive_dynamic_capping/main.go delete mode 100644 schedulers/proactive_dynamic_capping/src/constants/constants.go delete mode 160000 schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats delete mode 100644 schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go delete mode 100644 schedulers/proactive_dynamic_capping/src/task/task.go delete mode 100644 schedulers/proactive_dynamic_capping/src/utilities/utils.go diff --git a/schedulers/proactive_dynamic_capping/README.md b/schedulers/proactive_dynamic_capping/README.md deleted file mode 100644 index e9f7332..0000000 --- a/schedulers/proactive_dynamic_capping/README.md +++ /dev/null @@ -1,19 +0,0 @@ -##Proactive Dynamic Capping - -Perform Cluster wide dynamic capping. - -Offer 2 methods: - 1. First Come First Serve -- For each task that needs to be scheduled, in the order in which it arrives, compute the cluster wide cap. - 2. Rank based cluster wide capping -- Sort a given set of tasks to be scheduled, in ascending order of requested watts, and then compute the cluster wide cap for each of the tasks in the ordered set. - - -main.go contains a set of test functions for the above algorithm. - -#Please run the following commands to install dependencies and run the test code. -``` - go build - go run main.go -``` - -#Note - The github.com folder contains a library that is required to compute the median of a given set of values. diff --git a/schedulers/proactive_dynamic_capping/main.go b/schedulers/proactive_dynamic_capping/main.go deleted file mode 100644 index d705017..0000000 --- a/schedulers/proactive_dynamic_capping/main.go +++ /dev/null @@ -1,99 +0,0 @@ -package main - -import ( - "constants" - "fmt" - "math/rand" - "task" - "proactive_dynamic_capping" - ) - -func sample_available_power() map[string]float64{ - return map[string]float64{ - "stratos-001":100.0, - "stratos-002":150.0, - "stratos-003":80.0, - "stratos-004":90.0, - } -} - -func get_random_power(min, max int) int { - return rand.Intn(max - min) + min -} - -func cap_value_one_task_fcfs(capper *proactive_dynamic_capping.Capper) { - fmt.Println("==== FCFS, Number of tasks: 1 ====") - available_power := sample_available_power() - tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", - "minife_command", 4.0, 10, 50, 1) - if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { - fmt.Println("task = " + tsk.String()) - fmt.Printf("cap value = %f\n", cap_value) - } -} - -func cap_value_window_size_tasks_fcfs(capper *proactive_dynamic_capping.Capper) { - fmt.Println() - fmt.Println("==== FCFS, Number of tasks: 3 (window size) ====") - available_power := sample_available_power() - for i := 0; i < constants.Window_size; i++ { - tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", - "minife_command", 4.0, 10, get_random_power(30, 150), 1) - fmt.Printf("task%d = %s\n", i, tsk.String()) - if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { - fmt.Printf("CAP: %f\n", cap_value) - } - } -} - -func cap_value_more_than_window_size_tasks_fcfs(capper *proactive_dynamic_capping.Capper) { - fmt.Println() - fmt.Println("==== FCFS, Number of tasks: >3 (> window_size) ====") - available_power := sample_available_power() - for i := 0; i < constants.Window_size + 2; i++ { - tsk := task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", - "minife_command", 4.0, 10, get_random_power(30, 150), 1) - fmt.Printf("task%d = %s\n", i, tsk.String()) - if cap_value, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { - fmt.Printf("CAP: %f\n", cap_value) - } - } -} - -func cap_values_for_ranked_tasks(capper *proactive_dynamic_capping.Capper) { - fmt.Println() - fmt.Println("==== Ranked, Number of tasks: 5 (window size + 2) ====") - available_power := sample_available_power() - var tasks_to_schedule []*task.Task - for i := 0; i < constants.Window_size + 2; i++ { - tasks_to_schedule = append(tasks_to_schedule, - task.NewTask("gouravr/minife:v5", "minife:v5", "stratos-001", - "minife_command", 4.0, 10, get_random_power(30, 150), 1)) - } - // Printing the tasks that need to be scheduled. - index := 0 - for _, tsk := range tasks_to_schedule { - fmt.Printf("task%d = %s\n", index, tsk.String()) - index++ - } - if sorted_tasks_to_be_scheduled, cwcv, err := capper.Ranked_determine_cap(available_power, tasks_to_schedule); err == nil { - fmt.Printf("The cap values are: ") - fmt.Println(cwcv) - fmt.Println("The order of tasks to be scheduled :-") - for _, tsk := range sorted_tasks_to_be_scheduled { - fmt.Println(tsk.String()) - } - } -} - -func main() { - capper := proactive_dynamic_capping.GetInstance() - cap_value_one_task_fcfs(capper) - capper.Clear() - cap_value_window_size_tasks_fcfs(capper) - capper.Clear() - cap_value_more_than_window_size_tasks_fcfs(capper) - capper.Clear() - cap_values_for_ranked_tasks(capper) - capper.Clear() -} diff --git a/schedulers/proactive_dynamic_capping/src/constants/constants.go b/schedulers/proactive_dynamic_capping/src/constants/constants.go deleted file mode 100644 index 0b1a0cc..0000000 --- a/schedulers/proactive_dynamic_capping/src/constants/constants.go +++ /dev/null @@ -1,39 +0,0 @@ -/* -Constants that are used across scripts -1. The available hosts = stratos-00x (x varies from 1 to 8) -2. cap_margin = percentage of the requested power to allocate -3. power_threshold = overloading factor -4. total_power = total power per node -5. window_size = number of tasks to consider for computation of the dynamic cap. -*/ -package constants - -var Hosts = []string{"stratos-001", "stratos-002", - "stratos-003", "stratos-004", - "stratos-005", "stratos-006", - "stratos-007", "stratos-008"} - -/* - Margin with respect to the required power for a job. - So, if power required = 10W, the node would be capped to 75%*10W. - This value can be changed upon convenience. -*/ -var Cap_margin = 0.75 - -// Lower bound of the power threshold for a tasks -var Power_threshold = 0.6 - -// Total power per node -var Total_power = map[string]float64 { - "stratos-001": 100.0, - "stratos-002": 150.0, - "stratos-003": 80.0, - "stratos-004": 90.0, - "stratos-005": 200.0, - "stratos-006": 100.0, - "stratos-007": 175.0, - "stratos-008": 175.0, -} - -// Window size for running average -var Window_size = 3 diff --git a/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats b/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats deleted file mode 160000 index 60dcacf..0000000 --- a/schedulers/proactive_dynamic_capping/src/github.com/montanaflynn/stats +++ /dev/null @@ -1 +0,0 @@ -Subproject commit 60dcacf48f43d6dd654d0ed94120ff5806c5ca5c diff --git a/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go b/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go deleted file mode 100644 index 4e183f3..0000000 --- a/schedulers/proactive_dynamic_capping/src/proactive_dynamic_capping/capper.go +++ /dev/null @@ -1,235 +0,0 @@ -/* -Cluster wide dynamic capping -Step1. Compute running average of tasks in window. -Step2. Compute what percentage of available power of each node, is the running average. -Step3. Compute the median of the percentages and this is the percentage that the cluster needs to be cpaped at. - -1. First Fit Scheduling -- Perform the above steps for each task that needs to be scheduled. -2. Rank based Scheduling -- Sort a set of tasks to be scheduled, in ascending order of power, and then perform the above steps for each of them in the sorted order. -*/ - -package proactive_dynamic_capping - -import ( - "constants" - "container/list" - "errors" - "github.com/montanaflynn/stats" - "task" - "sort" - "sync" -) - -// Structure containing utility data structures used to compute cluster wide dyanmic cap. -type Capper struct { - // window of tasks. - window_of_tasks list.List - // The current sum of requested powers of the tasks in the window. - current_sum float64 - // The current number of tasks in the window. - number_of_tasks_in_window int -} - -// Defining constructor for Capper. -func NewCapper() *Capper { - return &Capper{current_sum: 0.0, number_of_tasks_in_window: 0} -} - -// For locking on operations that may result in race conditions. -var mutex sync.Mutex - -// Singleton instance of Capper -var singleton_capper *Capper -// Retrieve the singleton instance of Capper. -func GetInstance() *Capper { - if singleton_capper == nil { - mutex.Lock() - singleton_capper = NewCapper() - mutex.Unlock() - } else { - // Do nothing - } - return singleton_capper -} - -// Clear and initialize all the members of Capper. -func (capper Capper) Clear() { - capper.window_of_tasks.Init() - capper.current_sum = 0 - capper.number_of_tasks_in_window = 0 -} - -// Compute the average of watts of all the tasks in the window. -func (capper Capper) average() float64 { - return capper.current_sum / float64(capper.window_of_tasks.Len()) -} - -/* - Compute the running average - - Using Capper#window_of_tasks to store the tasks in the window. Task at position 0 (oldest task) removed when window is full and new task arrives. -*/ -func (capper Capper) running_average_of_watts(tsk *task.Task) float64 { - var average float64 - if capper.number_of_tasks_in_window < constants.Window_size { - capper.window_of_tasks.PushBack(tsk) - capper.number_of_tasks_in_window++ - capper.current_sum += float64(tsk.Watts) - } else { - task_to_remove_element := capper.window_of_tasks.Front() - if task_to_remove, ok := task_to_remove_element.Value.(*task.Task); ok { - capper.current_sum -= float64(task_to_remove.Watts) - capper.window_of_tasks.Remove(task_to_remove_element) - } - capper.window_of_tasks.PushBack(tsk) - capper.current_sum += float64(tsk.Watts) - } - average = capper.average() - return average -} - -/* - Calculating cap value - - 1. Sorting the values of running_average_available_power_percentage in ascending order. - 2. Computing the median of the above sorted values. - 3. The median is now the cap value. -*/ -func (capper Capper) get_cap(running_average_available_power_percentage map[string]float64) float64 { - var values []float64 - // Validation - if running_average_available_power_percentage == nil { - return 100.0 - } - for _, apower := range running_average_available_power_percentage { - values = append(values, apower) - } - // sorting the values in ascending order - sort.Float64s(values) - // Calculating the median - if median, err := stats.Median(values); err == nil { - return median - } - // should never reach here. If here, then just setting the cap value to be 100 - return 100.0 -} - -// In place sorting of tasks to be scheduled based on the requested watts. -func qsort_tasks(low int, high int, tasks_to_sort []*task.Task) { - i := low - j := high - // calculating the pivot - pivot_index := low + (high - low)/2 - pivot := tasks_to_sort[pivot_index] - for i <= j { - for tasks_to_sort[i].Watts < pivot.Watts { - i++ - } - for tasks_to_sort[j].Watts > pivot.Watts { - j-- - } - if i <= j { - temp := tasks_to_sort[i] - tasks_to_sort[i] = tasks_to_sort[j] - tasks_to_sort[j] = temp - i++ - j-- - } - } - if low < j { - qsort_tasks(low, j, tasks_to_sort) - } - if i < high { - qsort_tasks(i, high, tasks_to_sort) - } -} - -// Sorting tasks in ascending order of requested watts. -func (capper Capper) sort_tasks(tasks_to_sort []*task.Task) { - qsort_tasks(0, len(tasks_to_sort)-1, tasks_to_sort) -} - -/* -Remove entry for finished task. -Electron needs to call this whenever a task completes so that the finished task no longer contributes to the computation of the cluster wide cap. -*/ -func (capper Capper) Task_finished(finished_task *task.Task) { - // If the window is empty then just return. Should not be entering this condition as it would mean that there is a bug. - if capper.window_of_tasks.Len() == 0 { - return - } - - // Checking whether the finished task is currently present in the window of tasks. - var task_element_to_remove *list.Element - for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { - if tsk, ok := task_element.Value.(*task.Task); ok { - if task.Compare(tsk, finished_task) { - task_element_to_remove = task_element - } - } - } - - // If finished task is there in the window of tasks, then we need to remove the task from the same and modify the members of Capper accordingly. - if task_to_remove, ok := task_element_to_remove.Value.(*task.Task); ok { - capper.window_of_tasks.Remove(task_element_to_remove) - capper.number_of_tasks_in_window -= 1 - capper.current_sum -= float64(task_to_remove.Watts) - } -} - -// Ranked based scheduling -func (capper Capper) Ranked_determine_cap(available_power map[string]float64, tasks_to_schedule []*task.Task) ([]*task.Task, map[int]float64, error) { - // Validation - if available_power == nil || len(tasks_to_schedule) == 0 { - return nil, nil, errors.New("No available power and no tasks to schedule.") - } else { - // Need to sort the tasks in ascending order of requested power - capper.sort_tasks(tasks_to_schedule) - - // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. - cluster_wide_cap_values := make(map[int]float64) - index := 0 - for _, tsk := range tasks_to_schedule { - /* - Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. - Calling Fcfs_determine_cap(...) just to avoid redundant code. - */ - if cap, err := capper.Fcfs_determine_cap(available_power, tsk); err == nil { - cluster_wide_cap_values[index] = cap - } else { - return nil, nil, err - } - index++ - } - // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. - return tasks_to_schedule, cluster_wide_cap_values, nil - } -} - -// First come first serve scheduling. -func (capper Capper) Fcfs_determine_cap(available_power map[string]float64, new_task *task.Task) (float64, error) { - // Validation - if available_power == nil { - // If no power available power, then capping the cluster at 100%. Electron might choose to queue the task. - return 100.0, errors.New("No available power.") - } else { - mutex.Lock() - // Need to calcualte the running average - running_average := capper.running_average_of_watts(new_task) - // What percent of available power for each node is the running average - running_average_available_power_percentage := make(map[string]float64) - for node, apower := range available_power { - if apower >= running_average { - running_average_available_power_percentage[node] = (running_average/apower) * 100 - } else { - // We don't consider this node in the offers - } - } - - // Determine the cluster wide cap value. - cap_value := capper.get_cap(running_average_available_power_percentage) - // Electron has to now cap the cluster to this value before launching the next task. - mutex.Unlock() - return cap_value, nil - } -} diff --git a/schedulers/proactive_dynamic_capping/src/task/task.go b/schedulers/proactive_dynamic_capping/src/task/task.go deleted file mode 100644 index 47d8aa5..0000000 --- a/schedulers/proactive_dynamic_capping/src/task/task.go +++ /dev/null @@ -1,73 +0,0 @@ -package task - -import ( - "constants" - "encoding/json" - "reflect" - "strconv" - "utilities" -) - -/* - Blueprint for the task. - Members: - image: - name: - host: - cmd: - cpu: - ram: - watts: - inst: -*/ -type Task struct { - Image string - Name string - Host string - CMD string - CPU float64 - RAM int - Watts int - Inst int -} - -// Defining a constructor for Task -func NewTask(image string, name string, host string, - cmd string, cpu float64, ram int, watts int, inst int) *Task { - return &Task{Image: image, Name: name, Host: host, CPU: cpu, - RAM: ram, Watts: watts, Inst: inst} -} - -// Update the host on which the task needs to be scheduled. -func (task Task) Update_host(new_host string) { - // Validation - if _, ok := constants.Total_power[new_host]; ok { - task.Host = new_host - } -} - -// Stringify task instance -func (task Task) String() string { - task_map := make(map[string]string) - task_map["image"] = task.Image - task_map["name"] = task.Name - task_map["host"] = task.Host - task_map["cmd"] = task.CMD - task_map["cpu"] = utils.FloatToString(task.CPU) - task_map["ram"] = strconv.Itoa(task.RAM) - task_map["watts"] = strconv.Itoa(task.Watts) - task_map["inst"] = strconv.Itoa(task.Inst) - - json_string, _ := json.Marshal(task_map) - return string(json_string) -} - -// Compare one task to another. 2 tasks are the same if all the corresponding members are the same. -func Compare(task *Task, other_task *Task) bool { - // If comparing the same pointers (checking the addresses). - if task == other_task { - return true - } - // Checking member equality - return reflect.DeepEqual(*task, *other_task) -} diff --git a/schedulers/proactive_dynamic_capping/src/utilities/utils.go b/schedulers/proactive_dynamic_capping/src/utilities/utils.go deleted file mode 100644 index 5f2e341..0000000 --- a/schedulers/proactive_dynamic_capping/src/utilities/utils.go +++ /dev/null @@ -1,9 +0,0 @@ -package utils - -import "strconv" - -// Convert float64 to string -func FloatToString(input float64) string { - // Precision is 2, Base is 64 - return strconv.FormatFloat(input, 'f', 2, 64) -} From c16e33d99d5b5f86c8528011088787ed67c5edbb Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 20:07:04 -0500 Subject: [PATCH 060/102] No change made. --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f7e8a33..ae50170 100644 --- a/README.md +++ b/README.md @@ -5,10 +5,10 @@ To Do: * Create metrics for each task launched [Time to schedule, run time, power used] * Have calibration phase? - * Add ability to use constraints + * Add ability to use constraints * Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average - + **Requires Performance-Copilot tool pmdumptext to be installed on the machine on which electron is launched for logging to work** @@ -43,4 +43,4 @@ Workload schema: "inst": 9 } ] -``` \ No newline at end of file +``` From 58289ed90f33bdba76aa4f9dfedabe8e97f4536b Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 21:16:06 -0500 Subject: [PATCH 061/102] commented out the constant Clusterwide_cap_interval and its setter function. Instead hardcoding this in proactiveclusterwidecappingfcfs.go --- constants/constants.go | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index 50ca468..133d61f 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -86,18 +86,18 @@ func UpdateWindowSize(new_window_size int) bool { } } -// Time duration between successive cluster wide capping. -var Clusterwide_cap_interval = 10.0 // Right now capping the cluster at 10 second intervals. - -// Modify the cluster wide capping interval. We can update the interval depending on the workload. -// TODO: If the workload is heavy then we can set a longer interval, while on the other hand, -// if the workload is light then a smaller interval is sufficient. -func UpdateClusterwideCapInterval(new_interval float64) bool { - // Validation - if new_interval == 0.0 { - return false - } else { - Clusterwide_cap_interval = new_interval - return true - } -} +// // Time duration between successive cluster wide capping. +// var Clusterwide_cap_interval = 10 // Right now capping the cluster at 10 second intervals. +// +// // Modify the cluster wide capping interval. We can update the interval depending on the workload. +// // TODO: If the workload is heavy then we can set a longer interval, while on the other hand, +// // if the workload is light then a smaller interval is sufficient. +// func UpdateClusterwideCapInterval(new_interval int) bool { +// // Validation +// if new_interval == 0.0 { +// return false +// } else { +// Clusterwide_cap_interval = new_interval +// return true +// } +// } From 0f8a2d3f0c851d0547c3ed2265ef4c0ffbeda32c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 21:16:39 -0500 Subject: [PATCH 062/102] fixed a an error. --- def/task.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/def/task.go b/def/task.go index 19ed98c..c7326c6 100644 --- a/def/task.go +++ b/def/task.go @@ -42,7 +42,7 @@ func (tsk *Task) UpdateHost(new_host string) bool { // Validation is_correct_host := false for _, existing_host := range constants.Hosts { - if host == existing_host { + if new_host == existing_host { is_correct_host = true } } From 7b19cfb4fc9ad62e3f55f595023de71f425e2bc3 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 21:16:57 -0500 Subject: [PATCH 063/102] fixed an error --- schedulers/proactiveclusterwidecappers.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 7b94982..65bdf2e 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -24,7 +24,7 @@ import ( // Structure containing utility data structures used to compute cluster-wide dynamic cap. type clusterwideCapper struct { // window of tasks. - window_of_tasks list.list + window_of_tasks list.List // The current sum of requested powers of the tasks in the window. current_sum float64 // The current number of tasks in the window. @@ -167,7 +167,7 @@ func (capper clusterwideCapper) taskFinished(taskID string) { var task_element_to_remove *list.Element for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { if tsk, ok := task_element.Value.(*def.Task); ok { - if task.TaskID == taskID { + if tsk.TaskID == taskID { task_element_to_remove = task_element } } @@ -183,7 +183,7 @@ func (capper clusterwideCapper) taskFinished(taskID string) { // Ranked based scheduling. func (capper clusterwideCapper) rankedDetermineCap(available_power map[string]float64, - tasks_to_schedule []*def.Task) ([]*def.Task, map[string]float64, error) { + tasks_to_schedule []*def.Task) ([]*def.Task, map[int]float64, error) { // Validation if available_power == nil || len(tasks_to_schedule) == 0 { return nil, nil, errors.New("Invalid argument: available_power, tasks_to_schedule") From ed25adcc6c07cf518c32f226589bf6df63205b1b Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 10 Nov 2016 21:18:05 -0500 Subject: [PATCH 064/102] Made a check to see whether cluster wide capping has started and if not then starting the go routine that performs the cluster wide capping at regular intervals. --- schedulers/proactiveclusterwidecappingfcfs.go | 66 +++++++++++-------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index ac77dee..ae116f8 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -4,17 +4,16 @@ import ( "bitbucket.org/sunybingcloud/electron/def" "bitbucket.org/sunybingcloud/electron/constants" "bitbucket.org/sunybingcloud/electron/rapl" - "errors" "fmt" "github.com/golang/protobuf/proto" mesos "github.com/mesos/mesos-go/mesosproto" "github.com/mesos/mesos-go/mesosutil" sched "github.com/mesos/mesos-go/scheduler" "log" - "sort" "strings" "sync" "time" + "math" ) // electronScheduler implements the Scheduler interface. @@ -53,11 +52,11 @@ func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *Proacti Shutdown: make(chan struct{}), Done: make(chan struct{}), PCPLog: make(chan struct{}), - running: make(mapp[string]map[string]bool), + running: make(map[string]map[string]bool), RecordPCP: false, capper: getClusterwideCapperInstance(), - ticker: time.NewTicker(constants.Clusterwide_cap_interval * time.Second), - isCapping: false + ticker: time.NewTicker(10 * time.Second), + isCapping: false, } return s } @@ -79,7 +78,7 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) // Setting the task ID to the task. This is done so that we can consider each task to be different, // even though they have the same parameters. - task.SetTaskID(proto.String(taskName)) + task.SetTaskID(*proto.String(taskName)) // Add task to the list of tasks running on the node. s.running[offer.GetSlaveId().GoString()][taskName] = true @@ -114,7 +113,7 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) func (s *ProactiveClusterwideCapFCFS) Registered( _ sched.SchedulerDriver, - framewordID *mesos.FrameworkID, + frameworkID *mesos.FrameworkID, masterInfo *mesos.MasterInfo) { log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) } @@ -128,23 +127,27 @@ func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { } // go routine to cap the entire cluster in regular intervals of time. -func (s *ProactiveClusterwideCapFCFS) startCapping(currentCapValue float64, mutex sync.Mutex) { +var currentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. +func (s *ProactiveClusterwideCapFCFS) startCapping(mutex sync.Mutex) { go func() { - for tick := range s.ticker.C { - // Need to cap the cluster to the currentCapValue. - if currentCapValue > 0.0 { - mutex.Lock() - for _, host := range constants.Hosts { - if err := rapl.Cap(host, int(math.Floor(currentCapValue + 0.5))); err != nil { - fmt.Println(err) - } else { - fmt.Println("Successfully capped %s to %d\\%", host, currentCapValue) + for { + select { + case <- s.ticker.C: + // Need to cap the cluster to the currentCapValue. + if currentCapValue > 0.0 { + mutex.Lock() + for _, host := range constants.Hosts { + if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue + 0.5))); err != nil { + fmt.Println(err) + } else { + fmt.Printf("Successfully capped %s to %d\\% at %\n", host, currentCapValue) + } + } + mutex.Unlock() } - } - mutex.Unlock() } } - } + }() } // TODO: Need to reduce the time complexity: looping over offers twice (Possible to do it just once?). @@ -155,12 +158,12 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive available_power := make(map[string]float64) for _, offer := range offers { _, _, offer_watts := OfferAgg(offer) - available_power[offer.Hostname] = offer_watts + available_power[*offer.Hostname] = offer_watts } for _, offer := range offers { select { - case <-s.Shutdown; + case <-s.Shutdown: log.Println("Done scheduling tasks: declining offerf on [", offer.GetHostname(), "]") driver.DeclineOffer(offer.Id, longFilter) @@ -184,9 +187,14 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive offer_cpu, offer_ram, _ := OfferAgg(offer) taken := false - currentCapValue := 0.0 // initial value to indicate that we haven't capped the cluster yet. var mutex sync.Mutex + // If haven't started cluster wide capping then doing so, + if !s.isCapping { + s.startCapping(mutex) + s.isCapping = true + } + for _, task := range s.tasks { // Don't take offer if it doesn't match our task's host requirement. if !strings.HasPrefix(*offer.Hostname, task.Host) { @@ -194,18 +202,20 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive } // Does the task fit. - if (s.ignoreWatts || offer_cpu >= task.CPU ||| offer_ram >= task.RAM) { + if (s.ignoreWatts || offer_cpu >= task.CPU || offer_ram >= task.RAM) { taken = true mutex.Lock() - tempCap, err = s.capper.fcfsDetermineCap(available_power, task) + tempCap, err := s.capper.fcfsDetermineCap(available_power, &task) if err == nil { currentCapValue = tempCap } else { - fmt.Println("Failed to determine cluster wide cap: " + err.String()) + fmt.Printf("Failed to determine cluster wide cap: ") + fmt.Println(err) } mutex.Unlock() fmt.Printf("Starting on [%s]\n", offer.GetHostname()) - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, [s.newTask(offer, task)], defaultFilter) + to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) } else { // Task doesn't fit the offer. Move onto the next offer. } @@ -230,7 +240,7 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, } else if IsTerminal(status.State) { delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) // Need to remove the task from the window of tasks. - s.capper.taskFinished(status.TaskId.Value) + s.capper.taskFinished(*status.TaskId.Value) s.tasksRunning-- if s.tasksRunning == 0 { select { From bbb83f419702c76861ce09e77285a82e8df65de2 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 14 Nov 2016 22:40:31 -0500 Subject: [PATCH 065/102] Added another log message to log the name of the file to which the pcplogs are getting written to. --- pcp/pcp.go | 1 + 1 file changed, 1 insertion(+) diff --git a/pcp/pcp.go b/pcp/pcp.go index 8a1d46d..3e65a70 100644 --- a/pcp/pcp.go +++ b/pcp/pcp.go @@ -19,6 +19,7 @@ func Start(quit chan struct{}, logging *bool, prefix string) { if err != nil { log.Fatal(err) } + log.Println("Writing pcp logs to file: " + logFile.Name()) defer logFile.Close() From 5dc3baab553461847fef7de5d693c9600925cf49 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 14 Nov 2016 22:42:22 -0500 Subject: [PATCH 066/102] changed extrema to non-extrema. This was done so that proactive cluster wide capping scheme doesn't conflict with the extrema capping scheme. --- scheduler.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scheduler.go b/scheduler.go index 168b72e..356587f 100644 --- a/scheduler.go +++ b/scheduler.go @@ -2,8 +2,8 @@ package main import ( "bitbucket.org/sunybingcloud/electron/def" - "bitbucket.org/sunybingcloud/electron/pcp" "bitbucket.org/sunybingcloud/electron/schedulers" + "bitbucket.org/sunybingcloud/electron/pcp" "flag" "fmt" "github.com/golang/protobuf/proto" @@ -70,8 +70,8 @@ func main() { return } - //go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix) - go pcp.StartLogAndDynamicCap(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix, *hiThreshold, *loThreshold) + go pcp.Start(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix) + //go pcp.StartLogAndDynamicCap(scheduler.PCPLog, &scheduler.RecordPCP, *pcplogPrefix, *hiThreshold, *loThreshold) time.Sleep(1 * time.Second) // Attempt to handle signint to not leave pmdumptext running From 3551de20da2080f2b065d283a9183b0c7f542612 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 14 Nov 2016 22:43:05 -0500 Subject: [PATCH 067/102] The computation of the cluster wide cap now considers total power per node rather than the available power per node. Also, added function recap(...) that is called to compute the cluster wide cap once a task completes. This value is used to change the cluster wide cap once a task completes." --- schedulers/proactiveclusterwidecappers.go | 130 ++++++++++++++-------- 1 file changed, 83 insertions(+), 47 deletions(-) diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 65bdf2e..8d7a55e 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -1,24 +1,22 @@ /* Cluster wide dynamic capping -Step1. Compute running average of tasks in window. -Step2. Compute what percentage of available power of each node, is the running average. -Step3. Compute the median of the percentages and this is the percentage that the cluster needs to be cpaped at. +Step1. Compute the running average of watts of tasks in window. +Step2. Compute what percentage of total power of each node, is the running average. +Step3. Compute the median of the percetages and this is the percentage that the cluster needs to be capped at. -1. First Fit Scheduling -- Perform the above steps for each task that needs to be scheduled. -2. Rank based Scheduling -- Sort a set of tasks to be scheduled, in ascending order of power, and then perform the above steps for each of them in the sorted order. +1. First fit scheduling -- Perform the above steps for each task that needs to be scheduled. This is not a scheduler but a scheduling scheme that schedulers can use. */ package schedulers import ( - "bitbucket.org/sunybingcloud/electron/constants" - "bitbucket.org/sunybingcloud/electron/def" - "container/list" - "errors" - "github.com/montanaflynn/stats" - "sort" - "sync" + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/def" + "container/list" + "errors" + "github.com/montanaflynn/stats" + "sort" ) // Structure containing utility data structures used to compute cluster-wide dynamic cap. @@ -36,17 +34,12 @@ func newClusterwideCapper() *clusterwideCapper { return &clusterwideCapper{current_sum: 0.0, number_of_tasks_in_window: 0} } -// For locking on operations that may result in race conditions. -var clusterwide_capper_mutex sync.Mutex - // Singleton instance of clusterwideCapper var singleton_capper *clusterwideCapper // Retrieve the singleton instance of clusterwideCapper. func getClusterwideCapperInstance() *clusterwideCapper { if singleton_capper == nil { - clusterwide_capper_mutex.Lock() singleton_capper = newClusterwideCapper() - clusterwide_capper_mutex.Unlock() } else { // Do nothing } @@ -76,15 +69,15 @@ func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 if capper.number_of_tasks_in_window < constants.Window_size { capper.window_of_tasks.PushBack(tsk) capper.number_of_tasks_in_window++ - capper.current_sum += float64(tsk.Watts) + capper.current_sum += float64(tsk.Watts) * constants.Cap_margin } else { task_to_remove_element := capper.window_of_tasks.Front() if task_to_remove, ok := task_to_remove_element.Value.(*def.Task); ok { - capper.current_sum -= float64(task_to_remove.Watts) + capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin capper.window_of_tasks.Remove(task_to_remove_element) } capper.window_of_tasks.PushBack(tsk) - capper.current_sum += float64(tsk.Watts) + capper.current_sum += float64(tsk.Watts) * constants.Cap_margin } average = capper.average() return average @@ -93,20 +86,20 @@ func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 /* Calculating cap value. -1. Sorting the values of running_average_available_power_percentage in ascending order. -2. Computing the median of the above sorted values. -3. The median is now the cap value. +1. Sorting the values of running_average_to_total_power_percentage in ascending order. +2. Computing the median of above sorted values. +3. The median is now the cap. */ -func (capper clusterwideCapper) get_cap(running_average_available_power_percentage map[string]float64) float64 { +func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentage map[string]float64) float64 { var values []float64 // Validation - if running_average_available_power_percentage == nil { + if running_average_to_total_power_percentage == nil { return 100.0 } - for _, apower := range running_average_available_power_percentage { + for _, apower := range running_average_to_total_power_percentage { values = append(values, apower) } - // sorting the values in ascending order + // sorting the values in ascending order. sort.Float64s(values) // Calculating the median if median, err := stats.Median(values); err == nil { @@ -116,8 +109,51 @@ func (capper clusterwideCapper) get_cap(running_average_available_power_percenta return 100.0 } -/* Quick sort algorithm to sort tasks, in place, -in ascending order of power.*/ +/* +Recapping the entire cluster. + +1. Remove the task that finished from the list of running tasks. +2. Compute the average allocated power of each of the tasks that are currently running. +3. For each host, determine the ratio of the average to the total power. +4. Determine the median of the ratios and this would be the new cluster wide cap. + +This needs to be called whenever a task finishes execution. +*/ +func (capper clusterwideCapper) recap(total_power map[string]float64, + task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { + // Validation + if total_power == nil || task_monitor == nil { + return 100.0, errors.New("Invalid argument: total_power, task_monitor") + } + total_allocated_power := 0.0 + total_running_tasks := 0 + for _, tasks := range task_monitor { + index := 0 + for i, task := range tasks { + if task.TaskID == finished_taskId { + index = i + continue + } + total_allocated_power += float64(task.Watts) * constants.Cap_margin + total_running_tasks++ + } + tasks = append(tasks[:index], tasks[index+1:]...) + } + average := total_allocated_power / float64(total_running_tasks) + ratios := []float64{} + for _, tpower := range total_power { + ratios = append(ratios, (average/tpower) * 100) + } + sort.Float64s(ratios) + median, err := stats.Median(ratios) + if err == nil { + return median, nil + } else { + return 100, err + } +} + +/* Quick sort algorithm to sort tasks, in place, in ascending order of power.*/ func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort []*def.Task) { i := low j := high @@ -154,7 +190,8 @@ func (capper clusterwideCapper) sort_tasks(tasks_to_sort []*def.Task) { /* Remove entry for finished task. -This function is called when a task completes. This completed task needs to be removed from the window of tasks (if it is still present) +This function is called when a task completes. +This completed task needs to be removed from the window of tasks (if it is still present) so that it doesn't contribute to the computation of the cap value. */ func (capper clusterwideCapper) taskFinished(taskID string) { @@ -173,11 +210,11 @@ func (capper clusterwideCapper) taskFinished(taskID string) { } } - // If finished task is there in the window of tasks, then we need to remove the task from the same and modify the members of clusterwideCapper accordingly. + // Ee need to remove the task from the window. if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { capper.window_of_tasks.Remove(task_element_to_remove) capper.number_of_tasks_in_window -= 1 - capper.current_sum -= float64(task_to_remove.Watts) + capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin } } @@ -211,34 +248,33 @@ func (capper clusterwideCapper) rankedDetermineCap(available_power map[string]fl } } -// First come first serve shceduling. -func (capper clusterwideCapper) fcfsDetermineCap(available_power map[string]float64, new_task *def.Task) (float64, error) { +// First come first serve scheduling. +func (capper clusterwideCapper) fcfsDetermineCap(total_power map[string]float64, + new_task *def.Task) (float64, error) { // Validation - if available_power == nil { - return 100, errors.New("Invalid argument: available_power") + if total_power == nil { + return 100, errors.New("Invalid argument: total_power") } else { - clusterwide_capper_mutex.Lock() // Need to calculate the running average running_average := capper.running_average_of_watts(new_task) - // What percent of available_power for each node is the running average. - running_average_available_power_percentage := make(map[string]float64) - for host, apower := range available_power { - if apower >= running_average { - running_average_available_power_percentage[host] = (running_average/apower) * 100 + // For each node, calculate the percentage of the running average to the total power. + running_average_to_total_power_percentage := make(map[string]float64) + for host, tpower := range total_power { + if tpower >= running_average { + running_average_to_total_power_percentage[host] = (running_average/tpower) * 100 } else { - // We don't consider this host in the offers. + // We don't consider this host for the computation of the cluster wide cap. } } // Determine the cluster wide cap value. - cap_value := capper.get_cap(running_average_available_power_percentage) - // Need to cap the cluster to this value before launching the next task. - clusterwide_capper_mutex.Unlock() + cap_value := capper.get_cap(running_average_to_total_power_percentage) + // Need to cap the cluster to this value. return cap_value, nil } } // Stringer for an instance of clusterwideCapper func (capper clusterwideCapper) string() string { - return "Clusterwide Capper -- Proactively cap the entire cluster." + return "Cluster Capper -- Proactively cap the entire cluster." } From 4d13c432c4fcf6dd10043ce539aa0901203ae300 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 14 Nov 2016 22:46:38 -0500 Subject: [PATCH 068/102] Kept track of totalPower per node. The watts resource for the first offer corresponds to the total power per node. Removed tasks, that had all their instances scheduled, from the list of tasks to schedule. Also, calling recap(...) every time a task completes to determine the new cluster wide cap." --- schedulers/proactiveclusterwidecappingfcfs.go | 132 +++++++++++++----- 1 file changed, 100 insertions(+), 32 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index ae116f8..679686a 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -10,12 +10,21 @@ import ( "github.com/mesos/mesos-go/mesosutil" sched "github.com/mesos/mesos-go/scheduler" "log" - "strings" - "sync" - "time" "math" + "strings" + "time" ) +// Decides if to take an offer or not +func (_ *ProactiveClusterwideCapFCFS) takeOffer(offer *mesos.Offer, task def.Task) bool { + offer_cpu, offer_mem, _ := OfferAgg(offer) + + if offer_cpu >= task.CPU && offer_mem >= task.RAM { + return true + } + return false +} + // electronScheduler implements the Scheduler interface. type ProactiveClusterwideCapFCFS struct { tasksCreated int @@ -23,10 +32,14 @@ type ProactiveClusterwideCapFCFS struct { tasks []def.Task metrics map[string]def.Metric running map[string]map[string]bool + taskMonitor map[string][]def.Task // store tasks that are currently running. + availablePower map[string]float64 // available power for each node in the cluster. + totalPower map[string]float64 // total power for each node in the cluster. ignoreWatts bool capper *clusterwideCapper ticker *time.Ticker - isCapping bool + isCapping bool // indicate whether we are currently performing cluster wide capping. + //lock *sync.Mutex // First set of PCP values are garbage values, signal to logger to start recording when we're // about to schedule the new task. @@ -53,10 +66,14 @@ func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *Proacti Done: make(chan struct{}), PCPLog: make(chan struct{}), running: make(map[string]map[string]bool), + taskMonitor: make(map[string][]def.Task), + availablePower: make(map[string]float64), + totalPower: make(map[string]float64), RecordPCP: false, capper: getClusterwideCapperInstance(), - ticker: time.NewTicker(10 * time.Second), + ticker: time.NewTicker(5 * time.Second), isCapping: false, + //lock: new(sync.Mutex), } return s } @@ -81,6 +98,7 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) task.SetTaskID(*proto.String(taskName)) // Add task to the list of tasks running on the node. s.running[offer.GetSlaveId().GoString()][taskName] = true + s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} resources := []*mesos.Resource{ mesosutil.NewScalarResource("cpus", task.CPU), @@ -123,51 +141,76 @@ func (s *ProactiveClusterwideCapFCFS) Reregistered(_ sched.SchedulerDriver, mast } func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { + // Need to stop the capping process. + s.ticker.Stop() + s.isCapping = false log.Println("Framework disconnected with master") } // go routine to cap the entire cluster in regular intervals of time. var currentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. -func (s *ProactiveClusterwideCapFCFS) startCapping(mutex sync.Mutex) { +func (s *ProactiveClusterwideCapFCFS) startCapping() { go func() { for { select { case <- s.ticker.C: // Need to cap the cluster to the currentCapValue. if currentCapValue > 0.0 { - mutex.Lock() + //mutex.Lock() + //s.lock.Lock() for _, host := range constants.Hosts { + // Rounding curreCapValue to the nearest int. if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue + 0.5))); err != nil { fmt.Println(err) } else { - fmt.Printf("Successfully capped %s to %d\\% at %\n", host, currentCapValue) + fmt.Printf("Successfully capped %s to %f%\n", host, currentCapValue) } } - mutex.Unlock() + //mutex.Unlock() + //s.lock.Unlock() } } } }() } +// Stop cluster wide capping +func (s *ProactiveClusterwideCapFCFS) stopCapping() { + if s.isCapping { + log.Println("Stopping the cluster wide capping.") + s.ticker.Stop() + s.isCapping = false + } +} + // TODO: Need to reduce the time complexity: looping over offers twice (Possible to do it just once?). func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Printf("Received %d resource offers", len(offers)) // retrieving the available power for all the hosts in the offers. - available_power := make(map[string]float64) for _, offer := range offers { _, _, offer_watts := OfferAgg(offer) - available_power[*offer.Hostname] = offer_watts + s.availablePower[*offer.Hostname] = offer_watts + // setting total power if the first time. + if _, ok := s.totalPower[*offer.Hostname]; !ok { + s.totalPower[*offer.Hostname] = offer_watts + } + } + + for host, tpower := range s.totalPower { + fmt.Printf("TotalPower[%s] = %f\n", host, tpower) + } + for host, apower := range s.availablePower { + fmt.Printf("AvailablePower[%s] = %f\n", host, apower) } for _, offer := range offers { select { case <-s.Shutdown: - log.Println("Done scheduling tasks: declining offerf on [", offer.GetHostname(), "]") + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") driver.DeclineOffer(offer.Id, longFilter) - log.Println("Number og tasks still running: ", s.tasksRunning) + log.Println("Number of tasks still running: ", s.tasksRunning) continue default: } @@ -176,46 +219,64 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive Clusterwide Capping strategy For each task in s.tasks, - 1. I need to check whether the mesos offer can be taken or not (based on CPU and RAM). - 2. If the tasks fits the offer then I need to detemrine the cluster wide cap. - 3. First need to cap the cluster to the determine cap value and then launch the task on the host corresponding to the offer. + 1. Need to check whether the offer can be taken or not (based on CPU and RAM requirements). + 2. If the tasks fits the offer, then I need to detemrine the cluster wide cap. + 3. currentCapValue is updated with the determined cluster wide cap. - Capping the cluster for every task would create a lot of overhead. Hence, clusterwide capping is performed at regular intervals. + Cluster wide capping is currently performed at regular intervals of time. TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. */ - offer_cpu, offer_ram, _ := OfferAgg(offer) + //offer_cpu, offer_ram, _ := OfferAgg(offer) taken := false - var mutex sync.Mutex + //var mutex sync.Mutex - // If haven't started cluster wide capping then doing so, - if !s.isCapping { - s.startCapping(mutex) - s.isCapping = true - } - - for _, task := range s.tasks { + for i, task := range s.tasks { // Don't take offer if it doesn't match our task's host requirement. if !strings.HasPrefix(*offer.Hostname, task.Host) { continue } // Does the task fit. - if (s.ignoreWatts || offer_cpu >= task.CPU || offer_ram >= task.RAM) { + if s.takeOffer(offer, task) { + // Capping the cluster if haven't yet started, + if !s.isCapping { + s.startCapping() + s.isCapping = true + } taken = true - mutex.Lock() - tempCap, err := s.capper.fcfsDetermineCap(available_power, &task) + //mutex.Lock() + //s.lock.Lock() + //tempCap, err := s.capper.fcfsDetermineCap(s.availablePower, &task) + tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) + if err == nil { currentCapValue = tempCap } else { - fmt.Printf("Failed to determine cluster wide cap: ") + fmt.Printf("Failed to determine new cluster wide cap: ") fmt.Println(err) } - mutex.Unlock() + //mutex.Unlock() + //s.lock.Unlock() fmt.Printf("Starting on [%s]\n", offer.GetHostname()) to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) + fmt.Printf("Inst: %d", *task.Instances) + *task.Instances-- + if *task.Instances <= 0 { + // All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule. + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + // Need to stop the cluster wide capping as there aren't any more tasks to schedule. + s.stopCapping() + close(s.Shutdown) + } + } + break // Offer taken, move on. } else { // Task doesn't fit the offer. Move onto the next offer. } @@ -223,7 +284,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive // If no task fit the offer, then declining the offer. if !taken { - fmt.Println("There is not enough resources to launch a task:") + fmt.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) cpus, mem, watts := OfferAgg(offer) log.Printf("\n", cpus, mem, watts) @@ -241,10 +302,17 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) // Need to remove the task from the window of tasks. s.capper.taskFinished(*status.TaskId.Value) + //currentCapValue, _ = s.capper.recap(s.availablePower, s.taskMonitor, *status.TaskId.Value) + // Determining the new cluster wide cap. + currentCapValue, _ = s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + log.Printf("Recapping the cluster to %f\n", currentCapValue) + s.tasksRunning-- if s.tasksRunning == 0 { select { case <-s.Shutdown: + // Need to stop the capping process. + s.stopCapping() close(s.Done) default: } From b7394b876283ac46bd67966f2eb8dcdf80f302ee Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 14 Nov 2016 22:53:06 -0500 Subject: [PATCH 069/102] formatted the code --- schedulers/proactiveclusterwidecappers.go | 359 +++++++-------- schedulers/proactiveclusterwidecappingfcfs.go | 434 +++++++++--------- 2 files changed, 397 insertions(+), 396 deletions(-) diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 8d7a55e..aa3eafa 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -11,51 +11,52 @@ This is not a scheduler but a scheduling scheme that schedulers can use. package schedulers import ( - "bitbucket.org/sunybingcloud/electron/constants" - "bitbucket.org/sunybingcloud/electron/def" - "container/list" - "errors" - "github.com/montanaflynn/stats" - "sort" + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/def" + "container/list" + "errors" + "github.com/montanaflynn/stats" + "sort" ) // Structure containing utility data structures used to compute cluster-wide dynamic cap. type clusterwideCapper struct { - // window of tasks. - window_of_tasks list.List - // The current sum of requested powers of the tasks in the window. - current_sum float64 - // The current number of tasks in the window. - number_of_tasks_in_window int + // window of tasks. + window_of_tasks list.List + // The current sum of requested powers of the tasks in the window. + current_sum float64 + // The current number of tasks in the window. + number_of_tasks_in_window int } // Defining constructor for clusterwideCapper. Please don't call this directly and instead use getClusterwideCapperInstance(). func newClusterwideCapper() *clusterwideCapper { - return &clusterwideCapper{current_sum: 0.0, number_of_tasks_in_window: 0} + return &clusterwideCapper{current_sum: 0.0, number_of_tasks_in_window: 0} } // Singleton instance of clusterwideCapper var singleton_capper *clusterwideCapper + // Retrieve the singleton instance of clusterwideCapper. func getClusterwideCapperInstance() *clusterwideCapper { - if singleton_capper == nil { - singleton_capper = newClusterwideCapper() - } else { - // Do nothing - } - return singleton_capper + if singleton_capper == nil { + singleton_capper = newClusterwideCapper() + } else { + // Do nothing + } + return singleton_capper } // Clear and initialize all the members of clusterwideCapper. func (capper clusterwideCapper) clear() { - capper.window_of_tasks.Init() - capper.current_sum = 0 - capper.number_of_tasks_in_window = 0 + capper.window_of_tasks.Init() + capper.current_sum = 0 + capper.number_of_tasks_in_window = 0 } // Compute the average of watts of all the tasks in the window. func (capper clusterwideCapper) average() float64 { - return capper.current_sum / float64(capper.window_of_tasks.Len()) + return capper.current_sum / float64(capper.window_of_tasks.Len()) } /* @@ -65,22 +66,22 @@ Using clusterwideCapper#window_of_tasks to store the tasks. Task at position 0 (oldest task) is removed when the window is full and new task arrives. */ func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 { - var average float64 - if capper.number_of_tasks_in_window < constants.Window_size { - capper.window_of_tasks.PushBack(tsk) - capper.number_of_tasks_in_window++ - capper.current_sum += float64(tsk.Watts) * constants.Cap_margin - } else { - task_to_remove_element := capper.window_of_tasks.Front() - if task_to_remove, ok := task_to_remove_element.Value.(*def.Task); ok { - capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin - capper.window_of_tasks.Remove(task_to_remove_element) - } - capper.window_of_tasks.PushBack(tsk) - capper.current_sum += float64(tsk.Watts) * constants.Cap_margin - } - average = capper.average() - return average + var average float64 + if capper.number_of_tasks_in_window < constants.Window_size { + capper.window_of_tasks.PushBack(tsk) + capper.number_of_tasks_in_window++ + capper.current_sum += float64(tsk.Watts) * constants.Cap_margin + } else { + task_to_remove_element := capper.window_of_tasks.Front() + if task_to_remove, ok := task_to_remove_element.Value.(*def.Task); ok { + capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin + capper.window_of_tasks.Remove(task_to_remove_element) + } + capper.window_of_tasks.PushBack(tsk) + capper.current_sum += float64(tsk.Watts) * constants.Cap_margin + } + average = capper.average() + return average } /* @@ -91,22 +92,22 @@ Calculating cap value. 3. The median is now the cap. */ func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentage map[string]float64) float64 { - var values []float64 - // Validation - if running_average_to_total_power_percentage == nil { - return 100.0 - } - for _, apower := range running_average_to_total_power_percentage { - values = append(values, apower) - } - // sorting the values in ascending order. - sort.Float64s(values) - // Calculating the median - if median, err := stats.Median(values); err == nil { - return median - } - // should never reach here. If here, then just setting the cap value to be 100 - return 100.0 + var values []float64 + // Validation + if running_average_to_total_power_percentage == nil { + return 100.0 + } + for _, apower := range running_average_to_total_power_percentage { + values = append(values, apower) + } + // sorting the values in ascending order. + sort.Float64s(values) + // Calculating the median + if median, err := stats.Median(values); err == nil { + return median + } + // should never reach here. If here, then just setting the cap value to be 100 + return 100.0 } /* @@ -120,72 +121,72 @@ Recapping the entire cluster. This needs to be called whenever a task finishes execution. */ func (capper clusterwideCapper) recap(total_power map[string]float64, - task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { - // Validation - if total_power == nil || task_monitor == nil { - return 100.0, errors.New("Invalid argument: total_power, task_monitor") - } - total_allocated_power := 0.0 - total_running_tasks := 0 - for _, tasks := range task_monitor { - index := 0 - for i, task := range tasks { - if task.TaskID == finished_taskId { - index = i - continue - } - total_allocated_power += float64(task.Watts) * constants.Cap_margin - total_running_tasks++ - } - tasks = append(tasks[:index], tasks[index+1:]...) - } - average := total_allocated_power / float64(total_running_tasks) - ratios := []float64{} - for _, tpower := range total_power { - ratios = append(ratios, (average/tpower) * 100) - } - sort.Float64s(ratios) - median, err := stats.Median(ratios) - if err == nil { - return median, nil - } else { - return 100, err - } + task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { + // Validation + if total_power == nil || task_monitor == nil { + return 100.0, errors.New("Invalid argument: total_power, task_monitor") + } + total_allocated_power := 0.0 + total_running_tasks := 0 + for _, tasks := range task_monitor { + index := 0 + for i, task := range tasks { + if task.TaskID == finished_taskId { + index = i + continue + } + total_allocated_power += float64(task.Watts) * constants.Cap_margin + total_running_tasks++ + } + tasks = append(tasks[:index], tasks[index+1:]...) + } + average := total_allocated_power / float64(total_running_tasks) + ratios := []float64{} + for _, tpower := range total_power { + ratios = append(ratios, (average/tpower)*100) + } + sort.Float64s(ratios) + median, err := stats.Median(ratios) + if err == nil { + return median, nil + } else { + return 100, err + } } /* Quick sort algorithm to sort tasks, in place, in ascending order of power.*/ func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort []*def.Task) { - i := low - j := high - // calculating the pivot - pivot_index := low + (high - low)/2 - pivot := tasks_to_sort[pivot_index] - for i <= j { - for tasks_to_sort[i].Watts < pivot.Watts { - i++ - } - for tasks_to_sort[j].Watts > pivot.Watts { - j-- - } - if i <= j { - temp := tasks_to_sort[i] - tasks_to_sort[i] = tasks_to_sort[j] - tasks_to_sort[j] = temp - i++ - j-- - } - } - if low < j { - capper.quick_sort(low, j, tasks_to_sort) - } - if i < high { - capper.quick_sort(i, high, tasks_to_sort) - } + i := low + j := high + // calculating the pivot + pivot_index := low + (high-low)/2 + pivot := tasks_to_sort[pivot_index] + for i <= j { + for tasks_to_sort[i].Watts < pivot.Watts { + i++ + } + for tasks_to_sort[j].Watts > pivot.Watts { + j-- + } + if i <= j { + temp := tasks_to_sort[i] + tasks_to_sort[i] = tasks_to_sort[j] + tasks_to_sort[j] = temp + i++ + j-- + } + } + if low < j { + capper.quick_sort(low, j, tasks_to_sort) + } + if i < high { + capper.quick_sort(i, high, tasks_to_sort) + } } // Sorting tasks in ascending order of requested watts. func (capper clusterwideCapper) sort_tasks(tasks_to_sort []*def.Task) { - capper.quick_sort(0, len(tasks_to_sort)-1, tasks_to_sort) + capper.quick_sort(0, len(tasks_to_sort)-1, tasks_to_sort) } /* @@ -195,86 +196,86 @@ This completed task needs to be removed from the window of tasks (if it is still so that it doesn't contribute to the computation of the cap value. */ func (capper clusterwideCapper) taskFinished(taskID string) { - // If the window is empty the just return. This condition should technically return false. - if capper.window_of_tasks.Len() == 0 { - return - } + // If the window is empty the just return. This condition should technically return false. + if capper.window_of_tasks.Len() == 0 { + return + } - // Checking whether the task with the given taskID is currently present in the window of tasks. - var task_element_to_remove *list.Element - for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { - if tsk, ok := task_element.Value.(*def.Task); ok { - if tsk.TaskID == taskID { - task_element_to_remove = task_element - } - } - } + // Checking whether the task with the given taskID is currently present in the window of tasks. + var task_element_to_remove *list.Element + for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { + if tsk, ok := task_element.Value.(*def.Task); ok { + if tsk.TaskID == taskID { + task_element_to_remove = task_element + } + } + } - // Ee need to remove the task from the window. - if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { - capper.window_of_tasks.Remove(task_element_to_remove) - capper.number_of_tasks_in_window -= 1 - capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin - } + // Ee need to remove the task from the window. + if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { + capper.window_of_tasks.Remove(task_element_to_remove) + capper.number_of_tasks_in_window -= 1 + capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin + } } // Ranked based scheduling. func (capper clusterwideCapper) rankedDetermineCap(available_power map[string]float64, - tasks_to_schedule []*def.Task) ([]*def.Task, map[int]float64, error) { - // Validation - if available_power == nil || len(tasks_to_schedule) == 0 { - return nil, nil, errors.New("Invalid argument: available_power, tasks_to_schedule") - } else { - // Need to sort the tasks in ascending order of requested power. - capper.sort_tasks(tasks_to_schedule) + tasks_to_schedule []*def.Task) ([]*def.Task, map[int]float64, error) { + // Validation + if available_power == nil || len(tasks_to_schedule) == 0 { + return nil, nil, errors.New("Invalid argument: available_power, tasks_to_schedule") + } else { + // Need to sort the tasks in ascending order of requested power. + capper.sort_tasks(tasks_to_schedule) - // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. - cluster_wide_cap_values := make(map[int]float64) - index := 0 - for _, tsk := range tasks_to_schedule { - /* - Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. - Calling Fcfs_determine_cap(...) just to avoid redundant code. - */ - if cap, err := capper.fcfsDetermineCap(available_power, tsk); err == nil { - cluster_wide_cap_values[index] = cap - } else { - return nil, nil, err - } - index++ - } - // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. - return tasks_to_schedule, cluster_wide_cap_values, nil - } + // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. + cluster_wide_cap_values := make(map[int]float64) + index := 0 + for _, tsk := range tasks_to_schedule { + /* + Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. + Calling Fcfs_determine_cap(...) just to avoid redundant code. + */ + if cap, err := capper.fcfsDetermineCap(available_power, tsk); err == nil { + cluster_wide_cap_values[index] = cap + } else { + return nil, nil, err + } + index++ + } + // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. + return tasks_to_schedule, cluster_wide_cap_values, nil + } } // First come first serve scheduling. func (capper clusterwideCapper) fcfsDetermineCap(total_power map[string]float64, - new_task *def.Task) (float64, error) { - // Validation - if total_power == nil { - return 100, errors.New("Invalid argument: total_power") - } else { - // Need to calculate the running average - running_average := capper.running_average_of_watts(new_task) - // For each node, calculate the percentage of the running average to the total power. - running_average_to_total_power_percentage := make(map[string]float64) - for host, tpower := range total_power { - if tpower >= running_average { - running_average_to_total_power_percentage[host] = (running_average/tpower) * 100 - } else { - // We don't consider this host for the computation of the cluster wide cap. - } - } + new_task *def.Task) (float64, error) { + // Validation + if total_power == nil { + return 100, errors.New("Invalid argument: total_power") + } else { + // Need to calculate the running average + running_average := capper.running_average_of_watts(new_task) + // For each node, calculate the percentage of the running average to the total power. + running_average_to_total_power_percentage := make(map[string]float64) + for host, tpower := range total_power { + if tpower >= running_average { + running_average_to_total_power_percentage[host] = (running_average / tpower) * 100 + } else { + // We don't consider this host for the computation of the cluster wide cap. + } + } - // Determine the cluster wide cap value. - cap_value := capper.get_cap(running_average_to_total_power_percentage) - // Need to cap the cluster to this value. - return cap_value, nil - } + // Determine the cluster wide cap value. + cap_value := capper.get_cap(running_average_to_total_power_percentage) + // Need to cap the cluster to this value. + return cap_value, nil + } } // Stringer for an instance of clusterwideCapper func (capper clusterwideCapper) string() string { - return "Cluster Capper -- Proactively cap the entire cluster." + return "Cluster Capper -- Proactively cap the entire cluster." } diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 679686a..b12cb7c 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -1,111 +1,111 @@ package schedulers import ( - "bitbucket.org/sunybingcloud/electron/def" - "bitbucket.org/sunybingcloud/electron/constants" - "bitbucket.org/sunybingcloud/electron/rapl" - "fmt" - "github.com/golang/protobuf/proto" - mesos "github.com/mesos/mesos-go/mesosproto" - "github.com/mesos/mesos-go/mesosutil" - sched "github.com/mesos/mesos-go/scheduler" - "log" - "math" - "strings" - "time" + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/rapl" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "math" + "strings" + "time" ) // Decides if to take an offer or not func (_ *ProactiveClusterwideCapFCFS) takeOffer(offer *mesos.Offer, task def.Task) bool { - offer_cpu, offer_mem, _ := OfferAgg(offer) + offer_cpu, offer_mem, _ := OfferAgg(offer) - if offer_cpu >= task.CPU && offer_mem >= task.RAM { - return true - } - return false + if offer_cpu >= task.CPU && offer_mem >= task.RAM { + return true + } + return false } // electronScheduler implements the Scheduler interface. type ProactiveClusterwideCapFCFS struct { - tasksCreated int - tasksRunning int - tasks []def.Task - metrics map[string]def.Metric - running map[string]map[string]bool - taskMonitor map[string][]def.Task // store tasks that are currently running. - availablePower map[string]float64 // available power for each node in the cluster. - totalPower map[string]float64 // total power for each node in the cluster. - ignoreWatts bool - capper *clusterwideCapper - ticker *time.Ticker - isCapping bool // indicate whether we are currently performing cluster wide capping. - //lock *sync.Mutex + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + taskMonitor map[string][]def.Task // store tasks that are currently running. + availablePower map[string]float64 // available power for each node in the cluster. + totalPower map[string]float64 // total power for each node in the cluster. + ignoreWatts bool + capper *clusterwideCapper + ticker *time.Ticker + isCapping bool // indicate whether we are currently performing cluster wide capping. + //lock *sync.Mutex - // First set of PCP values are garbage values, signal to logger to start recording when we're - // about to schedule the new task. - RecordPCP bool + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule the new task. + RecordPCP bool - // This channel is closed when the program receives an interrupt, - // signalling that the program should shut down. - Shutdown chan struct{} + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} - // This channel is closed after shutdown is closed, and only when all - // outstanding tasks have been cleaned up. - Done chan struct{} + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up. + Done chan struct{} - // Controls when to shutdown pcp logging. - PCPLog chan struct{} + // Controls when to shutdown pcp logging. + PCPLog chan struct{} } // New electron scheduler. func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *ProactiveClusterwideCapFCFS { - s := &ProactiveClusterwideCapFCFS { - tasks: tasks, - ignoreWatts: ignoreWatts, - Shutdown: make(chan struct{}), - Done: make(chan struct{}), - PCPLog: make(chan struct{}), - running: make(map[string]map[string]bool), - taskMonitor: make(map[string][]def.Task), - availablePower: make(map[string]float64), - totalPower: make(map[string]float64), - RecordPCP: false, - capper: getClusterwideCapperInstance(), - ticker: time.NewTicker(5 * time.Second), - isCapping: false, - //lock: new(sync.Mutex), - } - return s + s := &ProactiveClusterwideCapFCFS{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + taskMonitor: make(map[string][]def.Task), + availablePower: make(map[string]float64), + totalPower: make(map[string]float64), + RecordPCP: false, + capper: getClusterwideCapperInstance(), + ticker: time.NewTicker(5 * time.Second), + isCapping: false, + //lock: new(sync.Mutex), + } + return s } func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { - taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) - s.tasksCreated++ + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ - if !s.RecordPCP { - // Turn on logging. - s.RecordPCP = true - time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts - } + if !s.RecordPCP { + // Turn on logging. + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } - // If this is our first time running into this Agent - if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { - s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) - } + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } - // Setting the task ID to the task. This is done so that we can consider each task to be different, - // even though they have the same parameters. - task.SetTaskID(*proto.String(taskName)) - // Add task to the list of tasks running on the node. - s.running[offer.GetSlaveId().GoString()][taskName] = true - s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + // Setting the task ID to the task. This is done so that we can consider each task to be different, + // even though they have the same parameters. + task.SetTaskID(*proto.String(taskName)) + // Add task to the list of tasks running on the node. + s.running[offer.GetSlaveId().GoString()][taskName] = true + s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} - resources := []*mesos.Resource{ - mesosutil.NewScalarResource("cpus", task.CPU), - mesosutil.NewScalarResource("mem", task.RAM), - } + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } - if !s.ignoreWatts { + if !s.ignoreWatts { resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) } @@ -130,189 +130,189 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) } func (s *ProactiveClusterwideCapFCFS) Registered( - _ sched.SchedulerDriver, - frameworkID *mesos.FrameworkID, - masterInfo *mesos.MasterInfo) { - log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) } func (s *ProactiveClusterwideCapFCFS) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { - log.Printf("Framework re-registered with master %s", masterInfo) + log.Printf("Framework re-registered with master %s", masterInfo) } func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { - // Need to stop the capping process. - s.ticker.Stop() - s.isCapping = false - log.Println("Framework disconnected with master") + // Need to stop the capping process. + s.ticker.Stop() + s.isCapping = false + log.Println("Framework disconnected with master") } // go routine to cap the entire cluster in regular intervals of time. var currentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. func (s *ProactiveClusterwideCapFCFS) startCapping() { - go func() { - for { - select { - case <- s.ticker.C: - // Need to cap the cluster to the currentCapValue. - if currentCapValue > 0.0 { - //mutex.Lock() - //s.lock.Lock() - for _, host := range constants.Hosts { - // Rounding curreCapValue to the nearest int. - if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue + 0.5))); err != nil { - fmt.Println(err) - } else { - fmt.Printf("Successfully capped %s to %f%\n", host, currentCapValue) - } - } - //mutex.Unlock() - //s.lock.Unlock() - } - } - } - }() + go func() { + for { + select { + case <-s.ticker.C: + // Need to cap the cluster to the currentCapValue. + if currentCapValue > 0.0 { + //mutex.Lock() + //s.lock.Lock() + for _, host := range constants.Hosts { + // Rounding curreCapValue to the nearest int. + if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue+0.5))); err != nil { + fmt.Println(err) + } else { + fmt.Printf("Successfully capped %s to %f%\n", host, currentCapValue) + } + } + //mutex.Unlock() + //s.lock.Unlock() + } + } + } + }() } // Stop cluster wide capping func (s *ProactiveClusterwideCapFCFS) stopCapping() { - if s.isCapping { - log.Println("Stopping the cluster wide capping.") - s.ticker.Stop() - s.isCapping = false - } + if s.isCapping { + log.Println("Stopping the cluster wide capping.") + s.ticker.Stop() + s.isCapping = false + } } // TODO: Need to reduce the time complexity: looping over offers twice (Possible to do it just once?). func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { - log.Printf("Received %d resource offers", len(offers)) + log.Printf("Received %d resource offers", len(offers)) - // retrieving the available power for all the hosts in the offers. - for _, offer := range offers { - _, _, offer_watts := OfferAgg(offer) - s.availablePower[*offer.Hostname] = offer_watts - // setting total power if the first time. - if _, ok := s.totalPower[*offer.Hostname]; !ok { - s.totalPower[*offer.Hostname] = offer_watts - } - } + // retrieving the available power for all the hosts in the offers. + for _, offer := range offers { + _, _, offer_watts := OfferAgg(offer) + s.availablePower[*offer.Hostname] = offer_watts + // setting total power if the first time. + if _, ok := s.totalPower[*offer.Hostname]; !ok { + s.totalPower[*offer.Hostname] = offer_watts + } + } - for host, tpower := range s.totalPower { - fmt.Printf("TotalPower[%s] = %f\n", host, tpower) - } - for host, apower := range s.availablePower { - fmt.Printf("AvailablePower[%s] = %f\n", host, apower) - } + for host, tpower := range s.totalPower { + fmt.Printf("TotalPower[%s] = %f\n", host, tpower) + } + for host, apower := range s.availablePower { + fmt.Printf("AvailablePower[%s] = %f\n", host, apower) + } - for _, offer := range offers { - select { - case <-s.Shutdown: - log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") - driver.DeclineOffer(offer.Id, longFilter) + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) - log.Println("Number of tasks still running: ", s.tasksRunning) - continue - default: - } + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } - /* - Clusterwide Capping strategy + /* + Clusterwide Capping strategy - For each task in s.tasks, - 1. Need to check whether the offer can be taken or not (based on CPU and RAM requirements). - 2. If the tasks fits the offer, then I need to detemrine the cluster wide cap. - 3. currentCapValue is updated with the determined cluster wide cap. + For each task in s.tasks, + 1. Need to check whether the offer can be taken or not (based on CPU and RAM requirements). + 2. If the tasks fits the offer, then I need to detemrine the cluster wide cap. + 3. currentCapValue is updated with the determined cluster wide cap. - Cluster wide capping is currently performed at regular intervals of time. - TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. - Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. - */ - //offer_cpu, offer_ram, _ := OfferAgg(offer) + Cluster wide capping is currently performed at regular intervals of time. + TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. + Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. + */ + //offer_cpu, offer_ram, _ := OfferAgg(offer) - taken := false - //var mutex sync.Mutex + taken := false + //var mutex sync.Mutex - for i, task := range s.tasks { - // Don't take offer if it doesn't match our task's host requirement. - if !strings.HasPrefix(*offer.Hostname, task.Host) { - continue - } + for i, task := range s.tasks { + // Don't take offer if it doesn't match our task's host requirement. + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } - // Does the task fit. - if s.takeOffer(offer, task) { - // Capping the cluster if haven't yet started, - if !s.isCapping { - s.startCapping() - s.isCapping = true - } - taken = true - //mutex.Lock() - //s.lock.Lock() - //tempCap, err := s.capper.fcfsDetermineCap(s.availablePower, &task) - tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) + // Does the task fit. + if s.takeOffer(offer, task) { + // Capping the cluster if haven't yet started, + if !s.isCapping { + s.startCapping() + s.isCapping = true + } + taken = true + //mutex.Lock() + //s.lock.Lock() + //tempCap, err := s.capper.fcfsDetermineCap(s.availablePower, &task) + tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) - if err == nil { - currentCapValue = tempCap - } else { - fmt.Printf("Failed to determine new cluster wide cap: ") - fmt.Println(err) - } - //mutex.Unlock() - //s.lock.Unlock() - fmt.Printf("Starting on [%s]\n", offer.GetHostname()) - to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) - fmt.Printf("Inst: %d", *task.Instances) - *task.Instances-- - if *task.Instances <= 0 { - // All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule. - s.tasks[i] = s.tasks[len(s.tasks)-1] + if err == nil { + currentCapValue = tempCap + } else { + fmt.Printf("Failed to determine new cluster wide cap: ") + fmt.Println(err) + } + //mutex.Unlock() + //s.lock.Unlock() + fmt.Printf("Starting on [%s]\n", offer.GetHostname()) + to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) + fmt.Printf("Inst: %d", *task.Instances) + *task.Instances-- + if *task.Instances <= 0 { + // All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule. + s.tasks[i] = s.tasks[len(s.tasks)-1] s.tasks = s.tasks[:len(s.tasks)-1] if len(s.tasks) <= 0 { log.Println("Done scheduling all tasks") - // Need to stop the cluster wide capping as there aren't any more tasks to schedule. - s.stopCapping() + // Need to stop the cluster wide capping as there aren't any more tasks to schedule. + s.stopCapping() close(s.Shutdown) } - } - break // Offer taken, move on. - } else { - // Task doesn't fit the offer. Move onto the next offer. - } - } + } + break // Offer taken, move on. + } else { + // Task doesn't fit the offer. Move onto the next offer. + } + } - // If no task fit the offer, then declining the offer. - if !taken { - fmt.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) - cpus, mem, watts := OfferAgg(offer) + // If no task fit the offer, then declining the offer. + if !taken { + fmt.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) + cpus, mem, watts := OfferAgg(offer) - log.Printf("\n", cpus, mem, watts) - driver.DeclineOffer(offer.Id, defaultFilter) - } - } + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + } } func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { - log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { s.tasksRunning++ } else if IsTerminal(status.State) { delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) - // Need to remove the task from the window of tasks. - s.capper.taskFinished(*status.TaskId.Value) - //currentCapValue, _ = s.capper.recap(s.availablePower, s.taskMonitor, *status.TaskId.Value) - // Determining the new cluster wide cap. - currentCapValue, _ = s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) - log.Printf("Recapping the cluster to %f\n", currentCapValue) + // Need to remove the task from the window of tasks. + s.capper.taskFinished(*status.TaskId.Value) + //currentCapValue, _ = s.capper.recap(s.availablePower, s.taskMonitor, *status.TaskId.Value) + // Determining the new cluster wide cap. + currentCapValue, _ = s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + log.Printf("Recapping the cluster to %f\n", currentCapValue) s.tasksRunning-- if s.tasksRunning == 0 { select { case <-s.Shutdown: - // Need to stop the capping process. - s.stopCapping() + // Need to stop the capping process. + s.stopCapping() close(s.Done) default: } @@ -322,20 +322,20 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, } func (s *ProactiveClusterwideCapFCFS) FrameworkMessage(driver sched.SchedulerDriver, - executorID *mesos.ExecutorID, - slaveID *mesos.SlaveID, - message string) { + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { log.Println("Getting a framework message: ", message) log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) } func (s *ProactiveClusterwideCapFCFS) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { - log.Printf("Offer %s rescinded", offerID) + log.Printf("Offer %s rescinded", offerID) } func (s *ProactiveClusterwideCapFCFS) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { - log.Printf("Slave %s lost", slaveID) + log.Printf("Slave %s lost", slaveID) } func (s *ProactiveClusterwideCapFCFS) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { From cd644bbf691a226172a7106aec05d009f7e03d4e Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 15 Nov 2016 15:11:00 -0500 Subject: [PATCH 070/102] Formatted the code --- def/task.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/def/task.go b/def/task.go index c7326c6..63668ad 100644 --- a/def/task.go +++ b/def/task.go @@ -17,7 +17,7 @@ type Task struct { CMD string `json:"cmd"` Instances *int `json:"inst"` Host string `json:"host"` - TaskID string `json:"taskID"` + TaskID string `json:"taskID"` } func TasksFromJSON(uri string) ([]Task, error) { @@ -42,10 +42,10 @@ func (tsk *Task) UpdateHost(new_host string) bool { // Validation is_correct_host := false for _, existing_host := range constants.Hosts { - if new_host == existing_host { - is_correct_host = true - } - } + if new_host == existing_host { + is_correct_host = true + } + } if !is_correct_host { return false } else { From c1eaa453a2a334410bd60b6681fd243f288e5f54 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 17 Nov 2016 21:51:02 -0500 Subject: [PATCH 071/102] Sycnrhonized operations that change the value of the cluster wide cap. Added cleverRecap(...) that determines the recap value of the cluster at a much finer level, taking into account the average load on each node in the cluster. Bug fix in cap.go -- closed the session once capping had been done. This prevented from running out of file descriptors. --- constants/constants.go | 20 +-- rapl/cap.go | 1 + schedulers/proactiveclusterwidecappers.go | 106 ++++++++++++-- schedulers/proactiveclusterwidecappingfcfs.go | 135 +++++++++++++----- 4 files changed, 194 insertions(+), 68 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index 133d61f..cc6d705 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -34,7 +34,7 @@ var Power_threshold = 0.6 // Right now saying that a task will never be given le So, if power required = 10W, the node would be capped to 75%*10W. This value can be changed upon convenience. */ -var Cap_margin = 0.75 +var Cap_margin = 0.70 // Modify the cap margin. func UpdateCapMargin(new_cap_margin float64) bool { @@ -84,20 +84,4 @@ func UpdateWindowSize(new_window_size int) bool { Window_size = new_window_size return true } -} - -// // Time duration between successive cluster wide capping. -// var Clusterwide_cap_interval = 10 // Right now capping the cluster at 10 second intervals. -// -// // Modify the cluster wide capping interval. We can update the interval depending on the workload. -// // TODO: If the workload is heavy then we can set a longer interval, while on the other hand, -// // if the workload is light then a smaller interval is sufficient. -// func UpdateClusterwideCapInterval(new_interval int) bool { -// // Validation -// if new_interval == 0.0 { -// return false -// } else { -// Clusterwide_cap_interval = new_interval -// return true -// } -// } +} \ No newline at end of file diff --git a/rapl/cap.go b/rapl/cap.go index 20cd945..b15d352 100644 --- a/rapl/cap.go +++ b/rapl/cap.go @@ -26,6 +26,7 @@ func Cap(host, username string, percentage int) error { } session, err := connection.NewSession() + defer session.Close() if err != nil { return errors.Wrap(err, "Failed to create session") } diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index aa3eafa..38aaca0 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -16,6 +16,7 @@ import ( "container/list" "errors" "github.com/montanaflynn/stats" + "log" "sort" ) @@ -110,6 +111,68 @@ func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentag return 100.0 } +/* +Recapping the entire cluster. Also, removing the finished task from the list of running tasks. + +We would, at this point, have a better knowledge about the state of the cluster. + +1. Calculate the total allocated watts per node in the cluster. +2. Compute the ratio of the total watts usage per node to the total power for that node. + This would give us the load on that node. +3. Now, compute the average load across all the nodes in the cluster. + This would be the cap value. +*/ +func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, + task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { + // Validation + if total_power == nil || task_monitor == nil { + return 100.0, errors.New("Invalid argument: total_power, task_monitor") + } + // watts usage on each node in the cluster. + watts_usages := make(map[string][]float64) + host_of_finished_task := "" + index_of_finished_task := -1 + for _, host := range constants.Hosts { + watts_usages[host] = []float64{0.0} + } + for host, tasks := range task_monitor { + for i, task := range tasks { + if task.TaskID == finished_taskId { + host_of_finished_task = host + index_of_finished_task = i + // Not considering this task + continue + } + watts_usages[host] = append(watts_usages[host], float64(task.Watts) * constants.Cap_margin) + } + } + + // Updating task monitor + if host_of_finished_task != "" && index_of_finished_task != -1 { + log.Printf("Removing task with task [%s] from the list of running tasks\n", + task_monitor[host_of_finished_task][index_of_finished_task].TaskID) + task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], + task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + } + + // load on each node in the cluster. + loads := []float64{} + for host, usages := range watts_usages { + total_usage := 0.0 + for _, usage := range usages { + total_usage += usage + } + loads = append(loads, total_usage / total_power[host]) + } + // Now need to compute the average load. + total_load := 0.0 + for _, load := range loads { + total_load += load + } + average_load := total_load / float64(len(loads)) // this would be the cap value. + return average_load, nil +} + /* Recapping the entire cluster. @@ -128,18 +191,35 @@ func (capper clusterwideCapper) recap(total_power map[string]float64, } total_allocated_power := 0.0 total_running_tasks := 0 - for _, tasks := range task_monitor { - index := 0 - for i, task := range tasks { - if task.TaskID == finished_taskId { - index = i - continue - } - total_allocated_power += float64(task.Watts) * constants.Cap_margin - total_running_tasks++ - } - tasks = append(tasks[:index], tasks[index+1:]...) - } + + host_of_finished_task := "" + index_of_finished_task := -1 + for host, tasks := range task_monitor { + for i, task := range tasks { + if task.TaskID == finished_taskId { + host_of_finished_task = host + index_of_finished_task = i + // Not considering this task for the computation of total_allocated_power and total_running_tasks + continue + } + total_allocated_power += (float64(task.Watts) * constants.Cap_margin) + total_running_tasks++ + } + } + + // Updating task monitor + if host_of_finished_task != "" && index_of_finished_task != -1 { + log.Printf("Removing task with task [%s] from the list of running tasks\n", + task_monitor[host_of_finished_task][index_of_finished_task].TaskID) + task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], + task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + } + + // For the last task, total_allocated_power and total_running_tasks would be 0 + if total_allocated_power == 0 && total_running_tasks == 0 { + return 100, errors.New("No task running on the cluster.") + } + average := total_allocated_power / float64(total_running_tasks) ratios := []float64{} for _, tpower := range total_power { @@ -211,7 +291,7 @@ func (capper clusterwideCapper) taskFinished(taskID string) { } } - // Ee need to remove the task from the window. + // we need to remove the task from the window. if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { capper.window_of_tasks.Remove(task_element_to_remove) capper.number_of_tasks_in_window -= 1 diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index b12cb7c..59c3ac5 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -12,14 +12,15 @@ import ( "log" "math" "strings" + "sync" "time" ) // Decides if to take an offer or not func (_ *ProactiveClusterwideCapFCFS) takeOffer(offer *mesos.Offer, task def.Task) bool { - offer_cpu, offer_mem, _ := OfferAgg(offer) + offer_cpu, offer_mem, offer_watts := OfferAgg(offer) - if offer_cpu >= task.CPU && offer_mem >= task.RAM { + if offer_cpu >= task.CPU && offer_mem >= task.RAM && offer_watts >= task.Watts { return true } return false @@ -38,8 +39,9 @@ type ProactiveClusterwideCapFCFS struct { ignoreWatts bool capper *clusterwideCapper ticker *time.Ticker + recapTicker *time.Ticker isCapping bool // indicate whether we are currently performing cluster wide capping. - //lock *sync.Mutex + isRecapping bool // indicate whether we are currently performing cluster wide re-capping. // First set of PCP values are garbage values, signal to logger to start recording when we're // about to schedule the new task. @@ -71,13 +73,17 @@ func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *Proacti totalPower: make(map[string]float64), RecordPCP: false, capper: getClusterwideCapperInstance(), - ticker: time.NewTicker(5 * time.Second), + ticker: time.NewTicker(10 * time.Second), + recapTicker: time.NewTicker(20 * time.Second), isCapping: false, - //lock: new(sync.Mutex), + isRecapping: false, } return s } +// mutex +var mutex sync.Mutex + func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) s.tasksCreated++ @@ -95,10 +101,14 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) // Setting the task ID to the task. This is done so that we can consider each task to be different, // even though they have the same parameters. - task.SetTaskID(*proto.String(taskName)) + task.SetTaskID(*proto.String("electron-" + taskName)) // Add task to the list of tasks running on the node. s.running[offer.GetSlaveId().GoString()][taskName] = true - s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + if len(s.taskMonitor[offer.GetSlaveId().GoString()]) == 0 { + s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + } else { + s.taskMonitor[offer.GetSlaveId().GoString()] = append(s.taskMonitor[offer.GetSlaveId().GoString()], task) + } resources := []*mesos.Resource{ mesosutil.NewScalarResource("cpus", task.CPU), @@ -143,7 +153,10 @@ func (s *ProactiveClusterwideCapFCFS) Reregistered(_ sched.SchedulerDriver, mast func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { // Need to stop the capping process. s.ticker.Stop() + s.recapTicker.Stop() + mutex.Lock() s.isCapping = false + mutex.Unlock() log.Println("Framework disconnected with master") } @@ -155,20 +168,44 @@ func (s *ProactiveClusterwideCapFCFS) startCapping() { select { case <-s.ticker.C: // Need to cap the cluster to the currentCapValue. + mutex.Lock() if currentCapValue > 0.0 { - //mutex.Lock() - //s.lock.Lock() for _, host := range constants.Hosts { // Rounding curreCapValue to the nearest int. if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue+0.5))); err != nil { - fmt.Println(err) - } else { - fmt.Printf("Successfully capped %s to %f%\n", host, currentCapValue) + log.Println(err) } } - //mutex.Unlock() - //s.lock.Unlock() + log.Printf("Capped the cluster to %d", int(math.Floor(currentCapValue+0.5))) } + mutex.Unlock() + } + } + }() +} + +// go routine to cap the entire cluster in regular intervals of time. +var recapValue = 0.0 // The cluster wide cap value when recapping. +func (s *ProactiveClusterwideCapFCFS) startRecapping() { + go func() { + for { + select { + case <-s.recapTicker.C: + mutex.Lock() + // If stopped performing cluster wide capping then we need to explicitly cap the entire cluster. + //if !s.isCapping && s.isRecapping && recapValue > 0.0 { + if s.isRecapping && recapValue > 0.0 { + for _, host := range constants.Hosts { + // Rounding curreCapValue to the nearest int. + if err := rapl.Cap(host, "rapl", int(math.Floor(recapValue+0.5))); err != nil { + log.Println(err) + } + } + log.Printf("Recapped the cluster to %d", int(math.Floor(recapValue+0.5))) + } + // setting recapping to false + s.isRecapping = false + mutex.Unlock() } } }() @@ -179,7 +216,22 @@ func (s *ProactiveClusterwideCapFCFS) stopCapping() { if s.isCapping { log.Println("Stopping the cluster wide capping.") s.ticker.Stop() + mutex.Lock() s.isCapping = false + s.isRecapping = true + mutex.Unlock() + } +} + +// Stop cluster wide Recapping +func (s *ProactiveClusterwideCapFCFS) stopRecapping() { + // If not capping, then definitely recapping. + if !s.isCapping && s.isRecapping { + log.Println("Stopping the cluster wide re-capping.") + s.recapTicker.Stop() + mutex.Lock() + s.isRecapping = false + mutex.Unlock() } } @@ -198,10 +250,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive } for host, tpower := range s.totalPower { - fmt.Printf("TotalPower[%s] = %f\n", host, tpower) - } - for host, apower := range s.availablePower { - fmt.Printf("AvailablePower[%s] = %f\n", host, apower) + log.Printf("TotalPower[%s] = %f", host, tpower) } for _, offer := range offers { @@ -227,10 +276,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. */ - //offer_cpu, offer_ram, _ := OfferAgg(offer) - taken := false - //var mutex sync.Mutex for i, task := range s.tasks { // Don't take offer if it doesn't match our task's host requirement. @@ -242,27 +288,26 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive if s.takeOffer(offer, task) { // Capping the cluster if haven't yet started, if !s.isCapping { - s.startCapping() + mutex.Lock() s.isCapping = true + mutex.Unlock() + s.startCapping() } taken = true - //mutex.Lock() - //s.lock.Lock() - //tempCap, err := s.capper.fcfsDetermineCap(s.availablePower, &task) tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) if err == nil { + mutex.Lock() currentCapValue = tempCap + mutex.Unlock() } else { - fmt.Printf("Failed to determine new cluster wide cap: ") - fmt.Println(err) + log.Printf("Failed to determine new cluster wide cap: ") + log.Println(err) } - //mutex.Unlock() - //s.lock.Unlock() - fmt.Printf("Starting on [%s]\n", offer.GetHostname()) + log.Printf("Starting on [%s]\n", offer.GetHostname()) to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) - fmt.Printf("Inst: %d", *task.Instances) + log.Printf("Inst: %d", *task.Instances) *task.Instances-- if *task.Instances <= 0 { // All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule. @@ -273,6 +318,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive log.Println("Done scheduling all tasks") // Need to stop the cluster wide capping as there aren't any more tasks to schedule. s.stopCapping() + s.startRecapping() // Load changes after every task finishes and hence we need to change the capping of the cluster. close(s.Shutdown) } } @@ -284,7 +330,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive // If no task fit the offer, then declining the offer. if !taken { - fmt.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) + log.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) cpus, mem, watts := OfferAgg(offer) log.Printf("\n", cpus, mem, watts) @@ -294,7 +340,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive } func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { - log.Printf("Received task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) + log.Printf("Received task status [%s] for task [%s]\n", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { s.tasksRunning++ @@ -302,17 +348,32 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) // Need to remove the task from the window of tasks. s.capper.taskFinished(*status.TaskId.Value) - //currentCapValue, _ = s.capper.recap(s.availablePower, s.taskMonitor, *status.TaskId.Value) // Determining the new cluster wide cap. - currentCapValue, _ = s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) - log.Printf("Recapping the cluster to %f\n", currentCapValue) + tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + if err == nil { + // if new determined cap value is different from the current recap value then we need to recap. + if int(math.Floor(tempCap+0.5)) != int(math.Floor(recapValue+0.5)) { + recapValue = tempCap + mutex.Lock() + s.isRecapping = true + mutex.Unlock() + log.Printf("Determined re-cap value: %f\n", recapValue) + } else { + mutex.Lock() + s.isRecapping = false + mutex.Unlock() + } + } else { + // Not updating currentCapValue + log.Println(err) + } s.tasksRunning-- if s.tasksRunning == 0 { select { case <-s.Shutdown: // Need to stop the capping process. - s.stopCapping() + s.stopRecapping() close(s.Done) default: } From ec4f4e0f0374e8c19f2fc41d9d6b3a1fab875174 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 22 Nov 2016 17:02:58 -0500 Subject: [PATCH 072/102] removed rankedDetermineCap(...) as it was not needed. This algorithm has been integrated into proactiveclusterwidecappingranked.go --- schedulers/proactiveclusterwidecappers.go | 52 ++++++----------------- 1 file changed, 13 insertions(+), 39 deletions(-) diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 38aaca0..6da6873 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -5,6 +5,7 @@ Step2. Compute what percentage of total power of each node, is the running avera Step3. Compute the median of the percetages and this is the percentage that the cluster needs to be capped at. 1. First fit scheduling -- Perform the above steps for each task that needs to be scheduled. +2. Ranked based scheduling -- Sort the tasks to be scheduled, in ascending order, and then determine the cluster wide cap. This is not a scheduler but a scheduling scheme that schedulers can use. */ @@ -121,6 +122,9 @@ We would, at this point, have a better knowledge about the state of the cluster. This would give us the load on that node. 3. Now, compute the average load across all the nodes in the cluster. This would be the cap value. + +Note: Although this would ensure lesser power usage, it might increase makespan if there is a heavy workload on just one node. +TODO: return a map[string]float64 that contains the recap value per node. This way, we can provide the right amount of power per node. */ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { @@ -235,23 +239,23 @@ func (capper clusterwideCapper) recap(total_power map[string]float64, } /* Quick sort algorithm to sort tasks, in place, in ascending order of power.*/ -func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort []*def.Task) { +func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort *[]def.Task) { i := low j := high // calculating the pivot pivot_index := low + (high-low)/2 - pivot := tasks_to_sort[pivot_index] + pivot := (*tasks_to_sort)[pivot_index] for i <= j { - for tasks_to_sort[i].Watts < pivot.Watts { + for (*tasks_to_sort)[i].Watts < pivot.Watts { i++ } - for tasks_to_sort[j].Watts > pivot.Watts { + for (*tasks_to_sort)[j].Watts > pivot.Watts { j-- } if i <= j { - temp := tasks_to_sort[i] - tasks_to_sort[i] = tasks_to_sort[j] - tasks_to_sort[j] = temp + temp := (*tasks_to_sort)[i] + (*tasks_to_sort)[i] = (*tasks_to_sort)[j] + (*tasks_to_sort)[j] = temp i++ j-- } @@ -265,8 +269,8 @@ func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort []*d } // Sorting tasks in ascending order of requested watts. -func (capper clusterwideCapper) sort_tasks(tasks_to_sort []*def.Task) { - capper.quick_sort(0, len(tasks_to_sort)-1, tasks_to_sort) +func (capper clusterwideCapper) sort_tasks(tasks_to_sort *[]def.Task) { + capper.quick_sort(0, len(*tasks_to_sort)-1, tasks_to_sort) } /* @@ -299,36 +303,6 @@ func (capper clusterwideCapper) taskFinished(taskID string) { } } -// Ranked based scheduling. -func (capper clusterwideCapper) rankedDetermineCap(available_power map[string]float64, - tasks_to_schedule []*def.Task) ([]*def.Task, map[int]float64, error) { - // Validation - if available_power == nil || len(tasks_to_schedule) == 0 { - return nil, nil, errors.New("Invalid argument: available_power, tasks_to_schedule") - } else { - // Need to sort the tasks in ascending order of requested power. - capper.sort_tasks(tasks_to_schedule) - - // Now, for each task in the sorted set of tasks, we need to use the Fcfs_determine_cap logic. - cluster_wide_cap_values := make(map[int]float64) - index := 0 - for _, tsk := range tasks_to_schedule { - /* - Note that even though Fcfs_determine_cap is called, we have sorted the tasks aprior and thus, the tasks are scheduled in the sorted fashion. - Calling Fcfs_determine_cap(...) just to avoid redundant code. - */ - if cap, err := capper.fcfsDetermineCap(available_power, tsk); err == nil { - cluster_wide_cap_values[index] = cap - } else { - return nil, nil, err - } - index++ - } - // Now returning the sorted set of tasks and the cluster wide cap values for each task that is launched. - return tasks_to_schedule, cluster_wide_cap_values, nil - } -} - // First come first serve scheduling. func (capper clusterwideCapper) fcfsDetermineCap(total_power map[string]float64, new_task *def.Task) (float64, error) { From 4bc81707e03d9537cdb4166189c51baf98efc5fa Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 22 Nov 2016 17:04:30 -0500 Subject: [PATCH 073/102] Added another line that needs to be uncommented to choose cleverRecap. --- schedulers/proactiveclusterwidecappingfcfs.go | 73 ++++++++++--------- 1 file changed, 37 insertions(+), 36 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 59c3ac5..c352cb1 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -82,7 +82,7 @@ func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *Proacti } // mutex -var mutex sync.Mutex +var fcfsMutex sync.Mutex func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) @@ -154,58 +154,58 @@ func (s *ProactiveClusterwideCapFCFS) Disconnected(sched.SchedulerDriver) { // Need to stop the capping process. s.ticker.Stop() s.recapTicker.Stop() - mutex.Lock() + fcfsMutex.Lock() s.isCapping = false - mutex.Unlock() + fcfsMutex.Unlock() log.Println("Framework disconnected with master") } // go routine to cap the entire cluster in regular intervals of time. -var currentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. +var fcfsCurrentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. func (s *ProactiveClusterwideCapFCFS) startCapping() { go func() { for { select { case <-s.ticker.C: - // Need to cap the cluster to the currentCapValue. - mutex.Lock() - if currentCapValue > 0.0 { + // Need to cap the cluster to the fcfsCurrentCapValue. + fcfsMutex.Lock() + if fcfsCurrentCapValue > 0.0 { for _, host := range constants.Hosts { // Rounding curreCapValue to the nearest int. - if err := rapl.Cap(host, "rapl", int(math.Floor(currentCapValue+0.5))); err != nil { + if err := rapl.Cap(host, "rapl", int(math.Floor(fcfsCurrentCapValue+0.5))); err != nil { log.Println(err) } } - log.Printf("Capped the cluster to %d", int(math.Floor(currentCapValue+0.5))) + log.Printf("Capped the cluster to %d", int(math.Floor(fcfsCurrentCapValue+0.5))) } - mutex.Unlock() + fcfsMutex.Unlock() } } }() } // go routine to cap the entire cluster in regular intervals of time. -var recapValue = 0.0 // The cluster wide cap value when recapping. +var fcfsRecapValue = 0.0 // The cluster wide cap value when recapping. func (s *ProactiveClusterwideCapFCFS) startRecapping() { go func() { for { select { case <-s.recapTicker.C: - mutex.Lock() + fcfsMutex.Lock() // If stopped performing cluster wide capping then we need to explicitly cap the entire cluster. - //if !s.isCapping && s.isRecapping && recapValue > 0.0 { - if s.isRecapping && recapValue > 0.0 { + //if !s.isCapping && s.isRecapping && fcfsRecapValue > 0.0 { + if s.isRecapping && fcfsRecapValue > 0.0 { for _, host := range constants.Hosts { // Rounding curreCapValue to the nearest int. - if err := rapl.Cap(host, "rapl", int(math.Floor(recapValue+0.5))); err != nil { + if err := rapl.Cap(host, "rapl", int(math.Floor(fcfsRecapValue+0.5))); err != nil { log.Println(err) } } - log.Printf("Recapped the cluster to %d", int(math.Floor(recapValue+0.5))) + log.Printf("Recapped the cluster to %d", int(math.Floor(fcfsRecapValue+0.5))) } // setting recapping to false s.isRecapping = false - mutex.Unlock() + fcfsMutex.Unlock() } } }() @@ -216,10 +216,10 @@ func (s *ProactiveClusterwideCapFCFS) stopCapping() { if s.isCapping { log.Println("Stopping the cluster wide capping.") s.ticker.Stop() - mutex.Lock() + fcfsMutex.Lock() s.isCapping = false s.isRecapping = true - mutex.Unlock() + fcfsMutex.Unlock() } } @@ -229,9 +229,9 @@ func (s *ProactiveClusterwideCapFCFS) stopRecapping() { if !s.isCapping && s.isRecapping { log.Println("Stopping the cluster wide re-capping.") s.recapTicker.Stop() - mutex.Lock() + fcfsMutex.Lock() s.isRecapping = false - mutex.Unlock() + fcfsMutex.Unlock() } } @@ -270,7 +270,7 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive For each task in s.tasks, 1. Need to check whether the offer can be taken or not (based on CPU and RAM requirements). 2. If the tasks fits the offer, then I need to detemrine the cluster wide cap. - 3. currentCapValue is updated with the determined cluster wide cap. + 3. fcfsCurrentCapValue is updated with the determined cluster wide cap. Cluster wide capping is currently performed at regular intervals of time. TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. @@ -288,18 +288,18 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive if s.takeOffer(offer, task) { // Capping the cluster if haven't yet started, if !s.isCapping { - mutex.Lock() + fcfsMutex.Lock() s.isCapping = true - mutex.Unlock() + fcfsMutex.Unlock() s.startCapping() } taken = true tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) if err == nil { - mutex.Lock() - currentCapValue = tempCap - mutex.Unlock() + fcfsMutex.Lock() + fcfsCurrentCapValue = tempCap + fcfsMutex.Unlock() } else { log.Printf("Failed to determine new cluster wide cap: ") log.Println(err) @@ -350,21 +350,22 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, s.capper.taskFinished(*status.TaskId.Value) // Determining the new cluster wide cap. tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + //tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) if err == nil { // if new determined cap value is different from the current recap value then we need to recap. - if int(math.Floor(tempCap+0.5)) != int(math.Floor(recapValue+0.5)) { - recapValue = tempCap - mutex.Lock() + if int(math.Floor(tempCap+0.5)) != int(math.Floor(fcfsRecapValue+0.5)) { + fcfsRecapValue = tempCap + fcfsMutex.Lock() s.isRecapping = true - mutex.Unlock() - log.Printf("Determined re-cap value: %f\n", recapValue) + fcfsMutex.Unlock() + log.Printf("Determined re-cap value: %f\n", fcfsRecapValue) } else { - mutex.Lock() + fcfsMutex.Lock() s.isRecapping = false - mutex.Unlock() + fcfsMutex.Unlock() } } else { - // Not updating currentCapValue + // Not updating fcfsCurrentCapValue log.Println(err) } @@ -372,7 +373,7 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, if s.tasksRunning == 0 { select { case <-s.Shutdown: - // Need to stop the capping process. + // Need to stop the recapping process. s.stopRecapping() close(s.Done) default: From 87e2c802dd6a60d7acf711e00281daaa51a09680 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 22 Nov 2016 17:07:08 -0500 Subject: [PATCH 074/102] Praoctive cluster wide capping after ranking the tasks based on the requested watts --- .../proactiveclusterwidecappingranked.go | 423 ++++++++++++++++++ 1 file changed, 423 insertions(+) create mode 100644 schedulers/proactiveclusterwidecappingranked.go diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go new file mode 100644 index 0000000..0ef4d77 --- /dev/null +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -0,0 +1,423 @@ +/* +Ranked based cluster wide capping. + +Note: Sorting the tasks right in the beginning, in ascending order of watts. + You are hence certain that the tasks that didn't fit are the ones that require more resources, + and hence, you can find a way to address that issue. + On the other hand, if you use first fit to fit the tasks and then sort them to determine the cap, + you are never certain as which tasks are the ones that don't fit and hence, it becomes much harder + to address this issue. +*/ +package schedulers + +import ( + "bitbucket.org/sunybingcloud/electron/constants" + "bitbucket.org/sunybingcloud/electron/def" + "bitbucket.org/sunybingcloud/electron/rapl" + "fmt" + "github.com/golang/protobuf/proto" + mesos "github.com/mesos/mesos-go/mesosproto" + "github.com/mesos/mesos-go/mesosutil" + sched "github.com/mesos/mesos-go/scheduler" + "log" + "math" + "strings" + "sync" + "time" +) + +// Decides if to taken an offer or not +func (_ *ProactiveClusterwideCapRanked) takeOffer(offer *mesos.Offer, task def.Task) bool { + offer_cpu, offer_mem, offer_watts := OfferAgg(offer) + + if offer_cpu >= task.CPU && offer_mem >= task.RAM && offer_watts >= task.Watts { + return true + } + return false +} + +// electronScheduler implements the Scheduler interface +type ProactiveClusterwideCapRanked struct { + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool + taskMonitor map[string][]def.Task // store tasks that are currently running. + availablePower map[string]float64 // available power for each node in the cluster. + totalPower map[string]float64 // total power for each node in the cluster. + ignoreWatts bool + capper *clusterwideCapper + ticker *time.Ticker + recapTicker *time.Ticker + isCapping bool // indicate whether we are currently performing cluster wide capping. + isRecapping bool // indicate whether we are currently performing cluster wide re-capping. + + // First set of PCP values are garbage values, signal to logger to start recording when we're + // about to schedule the new task. + RecordPCP bool + + // This channel is closed when the program receives an interrupt, + // signalling that the program should shut down. + Shutdown chan struct{} + + // This channel is closed after shutdown is closed, and only when all + // outstanding tasks have been cleaned up. + Done chan struct{} + + // Controls when to shutdown pcp logging. + PCPLog chan struct{} +} + +// New electron scheduler. +func NewProactiveClusterwideCapRanked(tasks []def.Task, ignoreWatts bool) *ProactiveClusterwideCapRanked { + s := &ProactiveClusterwideCapRanked{ + tasks: tasks, + ignoreWatts: ignoreWatts, + Shutdown: make(chan struct{}), + Done: make(chan struct{}), + PCPLog: make(chan struct{}), + running: make(map[string]map[string]bool), + taskMonitor: make(map[string][]def.Task), + availablePower: make(map[string]float64), + totalPower: make(map[string]float64), + RecordPCP: false, + capper: getClusterwideCapperInstance(), + ticker: time.NewTicker(10 * time.Second), + recapTicker: time.NewTicker(20 * time.Second), + isCapping: false, + isRecapping: false, + } + return s +} + +// mutex +var rankedMutex sync.Mutex + +func (s *ProactiveClusterwideCapRanked) newTask(offer *mesos.Offer, task def.Task) *mesos.TaskInfo { + taskName := fmt.Sprintf("%s-%d", task.Name, *task.Instances) + s.tasksCreated++ + + if !s.RecordPCP { + // Turn on logging. + s.RecordPCP = true + time.Sleep(1 * time.Second) // Make sure we're recording by the time the first task starts + } + + // If this is our first time running into this Agent + if _, ok := s.running[offer.GetSlaveId().GoString()]; !ok { + s.running[offer.GetSlaveId().GoString()] = make(map[string]bool) + } + + // Setting the task ID to the task. This is done so that we can consider each task to be different, + // even though they have the same parameters. + task.SetTaskID(*proto.String("electron-" + taskName)) + // Add task to the list of tasks running on the node. + s.running[offer.GetSlaveId().GoString()][taskName] = true + if len(s.taskMonitor[offer.GetSlaveId().GoString()]) == 0 { + s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + } else { + s.taskMonitor[offer.GetSlaveId().GoString()] = append(s.taskMonitor[offer.GetSlaveId().GoString()], task) + } + + resources := []*mesos.Resource{ + mesosutil.NewScalarResource("cpus", task.CPU), + mesosutil.NewScalarResource("mem", task.RAM), + } + + if !s.ignoreWatts { + resources = append(resources, mesosutil.NewScalarResource("watts", task.Watts)) + } + + return &mesos.TaskInfo{ + Name: proto.String(taskName), + TaskId: &mesos.TaskID{ + Value: proto.String("electron-" + taskName), + }, + SlaveId: offer.SlaveId, + Resources: resources, + Command: &mesos.CommandInfo{ + Value: proto.String(task.CMD), + }, + Container: &mesos.ContainerInfo{ + Type: mesos.ContainerInfo_DOCKER.Enum(), + Docker: &mesos.ContainerInfo_DockerInfo{ + Image: proto.String(task.Image), + Network: mesos.ContainerInfo_DockerInfo_BRIDGE.Enum(), // Run everything isolated + }, + }, + } +} + +func (s *ProactiveClusterwideCapRanked) Registered( + _ sched.SchedulerDriver, + frameworkID *mesos.FrameworkID, + masterInfo *mesos.MasterInfo) { + log.Printf("Framework %s registered with master %s", frameworkID, masterInfo) +} + +func (s *ProactiveClusterwideCapRanked) Reregistered(_ sched.SchedulerDriver, masterInfo *mesos.MasterInfo) { + log.Printf("Framework re-registered with master %s", masterInfo) +} + +func (s *ProactiveClusterwideCapRanked) Disconnected(sched.SchedulerDriver) { + // Need to stop the capping process. + s.ticker.Stop() + s.recapTicker.Stop() + rankedMutex.Lock() + s.isCapping = false + rankedMutex.Unlock() + log.Println("Framework disconnected with master") +} + +// go routine to cap the entire cluster in regular intervals of time. +var rankedCurrentCapValue = 0.0 // initial value to indicate that we haven't capped the cluster yet. +func (s *ProactiveClusterwideCapRanked) startCapping() { + go func() { + for { + select { + case <-s.ticker.C: + // Need to cap the cluster to the rankedCurrentCapValue. + rankedMutex.Lock() + if rankedCurrentCapValue > 0.0 { + for _, host := range constants.Hosts { + // Rounding curreCapValue to the nearest int. + if err := rapl.Cap(host, "rapl", int(math.Floor(rankedCurrentCapValue+0.5))); err != nil { + log.Println(err) + } + } + log.Printf("Capped the cluster to %d", int(math.Floor(rankedCurrentCapValue+0.5))) + } + rankedMutex.Unlock() + } + } + }() +} + +// go routine to cap the entire cluster in regular intervals of time. +var rankedRecapValue = 0.0 // The cluster wide cap value when recapping. +func (s *ProactiveClusterwideCapRanked) startRecapping() { + go func() { + for { + select { + case <-s.recapTicker.C: + rankedMutex.Lock() + // If stopped performing cluster wide capping then we need to explicitly cap the entire cluster. + //if !s.isCapping && s.isRecapping && rankedRecapValue > 0.0 { + if s.isRecapping && rankedRecapValue > 0.0 { + for _, host := range constants.Hosts { + // Rounding curreCapValue to the nearest int. + if err := rapl.Cap(host, "rapl", int(math.Floor(rankedRecapValue+0.5))); err != nil { + log.Println(err) + } + } + log.Printf("Recapped the cluster to %d", int(math.Floor(rankedRecapValue+0.5))) + } + // setting recapping to false + s.isRecapping = false + rankedMutex.Unlock() + } + } + }() +} + +// Stop cluster wide capping +func (s *ProactiveClusterwideCapRanked) stopCapping() { + log.Println("Stopping the cluster wide capping.") + s.ticker.Stop() + rankedMutex.Lock() + s.isCapping = false + s.isRecapping = true + rankedMutex.Unlock() +} + +// Stop cluster wide Recapping +func (s *ProactiveClusterwideCapRanked) stopRecapping() { + log.Println("Stopping the cluster wide re-capping.") + s.recapTicker.Stop() + rankedMutex.Lock() + s.isRecapping = false + rankedMutex.Unlock() +} + +func (s *ProactiveClusterwideCapRanked) ResouceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { + log.Printf("Received %d resource offers", len(offers)) + + // retrieving the available power for all the hosts in the offers. + for _, offer := range offers { + _, _, offer_watts := OfferAgg(offer) + s.availablePower[*offer.Hostname] = offer_watts + // setting total power if the first time. + if _, ok := s.totalPower[*offer.Hostname]; !ok { + s.totalPower[*offer.Hostname] = offer_watts + } + } + + for host, tpower := range s.totalPower { + log.Printf("TotalPower[%s] = %f", host, tpower) + } + + // sorting the tasks in ascending order of watts. + s.capper.sort_tasks(&s.tasks) + // displaying the ranked tasks. + log.Println("The ranked tasks are:\n---------------------\n\t[") + for rank, task := range s.tasks { + log.Printf("\t\t%d: %s\n", rank+1, task.TaskID) + } + log.Println("\t]") + + for _, offer := range offers { + select { + case <-s.Shutdown: + log.Println("Done scheduling tasks: declining offer on [", offer.GetHostname(), "]") + driver.DeclineOffer(offer.Id, longFilter) + + log.Println("Number of tasks still running: ", s.tasksRunning) + continue + default: + } + + /* + Ranked cluster wide capping strategy + + For each task in the sorted tasks, + 1. Need to check whether the offer can be taken or not (based on CPU, RAM and WATTS requirements). + 2. If the task fits the offer, then need to determine the cluster wide cap.' + 3. rankedCurrentCapValue is updated with the determined cluster wide cap. + + Once we are done scheduling all the tasks, + we start recalculating the cluster wide cap each time a task finishes. + + Cluster wide capping is currently performed at regular intervals of time. + */ + taken := false + + for i, task := range s.tasks { + // Don't take offer if it doesn't match our task's host requirement. + if !strings.HasPrefix(*offer.Hostname, task.Host) { + continue + } + + // Does the task fit. + if s.takeOffer(offer, task) { + // Capping the cluster if haven't yet started + if !s.isCapping { + rankedMutex.Lock() + s.isCapping = true + rankedMutex.Unlock() + s.startCapping() + } + taken = true + tempCap, err := s.capper.fcfsDetermineCap(s.totalPower, &task) + + if err == nil { + rankedMutex.Lock() + rankedCurrentCapValue = tempCap + rankedMutex.Unlock() + } else { + log.Println("Failed to determine the new cluster wide cap: ", err) + } + log.Printf("Starting on [%s]\n", offer.GetHostname()) + to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) + log.Printf("Inst: %d", *task.Instances) + *task.Instances-- + if *task.Instances <= 0 { + // All instances of the task have been scheduled. Need to remove it from the list of tasks to schedule. + s.tasks[i] = s.tasks[len(s.tasks)-1] + s.tasks = s.tasks[:len(s.tasks)-1] + + if len(s.tasks) <= 0 { + log.Println("Done scheduling all tasks") + // Need to stop the cluster wide capping as there aren't any more tasks to schedule. + s.stopCapping() + s.startRecapping() + close(s.Shutdown) + } + } + break // Offer taken, move on. + } else { + // Task doesn't fit the offer. Move onto the next offer. + } + } + + // If no tasks fit the offer, then declining the offer. + if !taken { + log.Printf("There is not enough resources to launch a task on Host: %s\n", offer.GetHostname()) + cpus, mem, watts := OfferAgg(offer) + + log.Printf("\n", cpus, mem, watts) + driver.DeclineOffer(offer.Id, defaultFilter) + } + } +} + +func (s *ProactiveClusterwideCapRanked) StatusUpdate(driver sched.SchedulerDriver, status *mesos.TaskStatus) { + log.Printf("Received task status [%s] for task [%s]\n", NameFor(status.State), *status.TaskId.Value) + + if *status.State == mesos.TaskState_TASK_RUNNING { + s.tasksRunning++ + } else if IsTerminal(status.State) { + delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + s.tasksRunning-- + if s.tasksRunning == 0 { + select { + case <-s.Shutdown: + // Need to stop the recapping process. + s.stopRecapping() + close(s.Done) + default: + } + } else { + // Need to remove the task from the window + s.capper.taskFinished(*status.TaskId.Value) + // Determining the new cluster wide cap. + tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + // tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + + if err == nil { + // If new determined cap value is different from the current recap value then we need to recap. + if int(math.Floor(tempCap+0.5)) != int(math.Floor(rankedRecapValue+0.5)) { + rankedRecapValue = tempCap + rankedMutex.Lock() + s.isRecapping = true + rankedMutex.Unlock() + log.Printf("Determined re-cap value: %f\n", rankedRecapValue) + } else { + rankedMutex.Lock() + s.isRecapping = false + rankedMutex.Unlock() + } + } else { + // Not updating rankedCurrentCapValue + log.Println(err) + } + } + } + log.Printf("DONE: Task status [%s] for task [%s]", NameFor(status.State), *status.TaskId.Value) +} + +func (s *ProactiveClusterwideCapRanked) FrameworkMessage(driver sched.SchedulerDriver, + executorID *mesos.ExecutorID, + slaveID *mesos.SlaveID, + message string) { + + log.Println("Getting a framework message: ", message) + log.Printf("Received a framework message from some unknown source: %s", *executorID.Value) +} + +func (s *ProactiveClusterwideCapRanked) OfferRescinded(_ sched.SchedulerDriver, offerID *mesos.OfferID) { + log.Printf("Offer %s rescinded", offerID) +} + +func (s *ProactiveClusterwideCapRanked) SlaveLost(_ sched.SchedulerDriver, slaveID *mesos.SlaveID) { + log.Printf("Slave %s lost", slaveID) +} + +func (s *ProactiveClusterwideCapRanked) ExecutorLost(_ sched.SchedulerDriver, executorID *mesos.ExecutorID, slaveID *mesos.SlaveID, status int) { + log.Printf("Executor %s on slave %s was lost", executorID, slaveID) +} + +func (s *ProactiveClusterwideCapRanked) Error(_ sched.SchedulerDriver, err string) { + log.Printf("Receiving an error: %s", err) +} From 1184016f8c74c35016896fe4e337a273038df07c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 22 Nov 2016 17:08:27 -0500 Subject: [PATCH 075/102] changed the window size and capmargin to create differernt configurations. No chnage made to the code. --- constants/constants.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/constants/constants.go b/constants/constants.go index cc6d705..6563715 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -34,7 +34,7 @@ var Power_threshold = 0.6 // Right now saying that a task will never be given le So, if power required = 10W, the node would be capped to 75%*10W. This value can be changed upon convenience. */ -var Cap_margin = 0.70 +var Cap_margin = 0.7 // Modify the cap margin. func UpdateCapMargin(new_cap_margin float64) bool { From d555c379243836a7dd34a4319fa974baa5f045c8 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 23 Nov 2016 19:18:19 -0500 Subject: [PATCH 076/102] Made a mention to use --help option to get more information about the other command line options --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ae50170..aa625e4 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ machine on which electron is launched for logging to work** -How to run: +How to run (Use the --help option to get information about other command-line options): `./electron -workload -ignoreWatts ` From e5aaf2dbe9c67b2eba7a28f471f83a0188f1986b Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 23 Nov 2016 19:18:56 -0500 Subject: [PATCH 077/102] README to list the different scheduling algorithms defined --- schedulers/README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 schedulers/README.md diff --git a/schedulers/README.md b/schedulers/README.md new file mode 100644 index 0000000..7d725ae --- /dev/null +++ b/schedulers/README.md @@ -0,0 +1,11 @@ +Electron: Scheduling Algorithms +================================ + +To Do: + * Design changes -- Possible to have one scheduler with different scheduling schemes? + +Scheduling Algorithms: + * Bin-packing with sorted watts + * FCFS Proactive Cluster-wide Capping + * First Fit + * First Fit with sorted watts From 5329fce402656285f087eeeb15ad92496b55d8c9 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 23 Nov 2016 19:20:22 -0500 Subject: [PATCH 078/102] Fixed an error in markdown --- schedulers/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/schedulers/README.md b/schedulers/README.md index 7d725ae..58b9f04 100644 --- a/schedulers/README.md +++ b/schedulers/README.md @@ -5,6 +5,7 @@ To Do: * Design changes -- Possible to have one scheduler with different scheduling schemes? Scheduling Algorithms: + * Bin-packing with sorted watts * FCFS Proactive Cluster-wide Capping * First Fit From 8bbecf9d5f6bc9721bd9e0d9f231091160e3be67 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 23 Nov 2016 19:24:27 -0500 Subject: [PATCH 079/102] Markdown fix in README --- schedulers/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/schedulers/README.md b/schedulers/README.md index 58b9f04..15c0d74 100644 --- a/schedulers/README.md +++ b/schedulers/README.md @@ -2,6 +2,7 @@ Electron: Scheduling Algorithms ================================ To Do: + * Design changes -- Possible to have one scheduler with different scheduling schemes? Scheduling Algorithms: From 42e28d69e1463061e358042074f977bc0aa1be52 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 16:04:11 -0500 Subject: [PATCH 080/102] changed the hosts from stratos-00x to stratos-00x.cs.binghamton.edu --- constants/constants.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index 6563715..5dc98ea 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -10,10 +10,10 @@ Also, exposing functions to update or initialize some of the constants. */ package constants -var Hosts = []string{"stratos-001", "stratos-002", - "stratos-003", "stratos-004", - "stratos-005", "stratos-006", - "stratos-007", "stratos-008"} +var Hosts = []string{"stratos-001.cs.binghamton.edu", "stratos-002.cs.binghamton.edu", + "stratos-003.cs.binghamton.edu", "stratos-004.cs.binghamton.edu", + "stratos-005.cs.binghamton.edu", "stratos-006.cs.binghamton.edu", + "stratos-007.cs.binghamton.edu", "stratos-008.cs.binghamton.edu"} // Add a new host to the slice of hosts. func AddNewHost(new_host string) bool { @@ -34,7 +34,7 @@ var Power_threshold = 0.6 // Right now saying that a task will never be given le So, if power required = 10W, the node would be capped to 75%*10W. This value can be changed upon convenience. */ -var Cap_margin = 0.7 +var Cap_margin = 0.50 // Modify the cap margin. func UpdateCapMargin(new_cap_margin float64) bool { @@ -73,7 +73,7 @@ func AddTotalPowerForHost(host string, total_power float64) bool { } // Window size for running average -var Window_size = 10 +var Window_size = 160 // Update the window size. func UpdateWindowSize(new_window_size int) bool { From a255351952b5d74257065f9281091984433ad07d Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 16:05:55 -0500 Subject: [PATCH 081/102] fixed bug in cleverRecap(...). Now we switch from the primitive recap to the clever recap as the cap determined by the later would be lesser when the cluster is relatively idle. --- schedulers/proactiveclusterwidecappers.go | 91 ++++++++++++++--------- 1 file changed, 57 insertions(+), 34 deletions(-) diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 6da6873..505ac76 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -113,18 +113,11 @@ func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentag } /* -Recapping the entire cluster. Also, removing the finished task from the list of running tasks. +A recapping strategy which decides between 2 different recapping schemes. +1. the regular scheme based on the average power usage across the cluster. +2. A scheme based on the average of the loads on each node in the cluster. -We would, at this point, have a better knowledge about the state of the cluster. - -1. Calculate the total allocated watts per node in the cluster. -2. Compute the ratio of the total watts usage per node to the total power for that node. - This would give us the load on that node. -3. Now, compute the average load across all the nodes in the cluster. - This would be the cap value. - -Note: Although this would ensure lesser power usage, it might increase makespan if there is a heavy workload on just one node. -TODO: return a map[string]float64 that contains the recap value per node. This way, we can provide the right amount of power per node. +The recap value picked the least among the two. */ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { @@ -132,49 +125,79 @@ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, if total_power == nil || task_monitor == nil { return 100.0, errors.New("Invalid argument: total_power, task_monitor") } + + // determining the recap value by calling the regular recap(...) + toggle := false + recapValue, err := capper.recap(total_power, task_monitor, finished_taskId) + if err == nil { + toggle = true + } + // watts usage on each node in the cluster. watts_usages := make(map[string][]float64) host_of_finished_task := "" - index_of_finished_task := -1 + index_of_finished_task := -1 for _, host := range constants.Hosts { watts_usages[host] = []float64{0.0} } for host, tasks := range task_monitor { for i, task := range tasks { if task.TaskID == finished_taskId { - host_of_finished_task = host - index_of_finished_task = i - // Not considering this task - continue - } + host_of_finished_task = host + index_of_finished_task = i + // Not considering this task for the computation of total_allocated_power and total_running_tasks + continue + } watts_usages[host] = append(watts_usages[host], float64(task.Watts) * constants.Cap_margin) } } - // Updating task monitor + // Updating task monitor. If recap(...) has deleted the finished task from the taskMonitor, + // then this will be ignored. if host_of_finished_task != "" && index_of_finished_task != -1 { log.Printf("Removing task with task [%s] from the list of running tasks\n", - task_monitor[host_of_finished_task][index_of_finished_task].TaskID) + task_monitor[host_of_finished_task][index_of_finished_task].TaskID) task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], - task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + task_monitor[host_of_finished_task][index_of_finished_task+1:]...) } - // load on each node in the cluster. - loads := []float64{} - for host, usages := range watts_usages { - total_usage := 0.0 - for _, usage := range usages { - total_usage += usage + // Need to check whether there are still tasks running on the cluster. If not then we return an error. + clusterIdle := true + for _, tasks := range task_monitor { + if len(tasks) > 0 { + clusterIdle = false + } + } + + if !clusterIdle { + // load on each node in the cluster. + loads := []float64{0.0} + for host, usages := range watts_usages { + total_usage := 0.0 + for _, usage := range usages { + total_usage += usage + } + loads = append(loads, total_usage / total_power[host]) + } + + // Now need to compute the average load. + total_load := 0.0 + for _, load := range loads { + total_load += load + } + average_load := (total_load / float64(len(loads)) * 100.0) // this would be the cap value. + // If toggle is true, then we need to return the least recap value. + if toggle { + if average_load <= recapValue { + return average_load, nil + } else { + return recapValue, nil + } + } else { + return average_load, nil } - loads = append(loads, total_usage / total_power[host]) } - // Now need to compute the average load. - total_load := 0.0 - for _, load := range loads { - total_load += load - } - average_load := total_load / float64(len(loads)) // this would be the cap value. - return average_load, nil + return 100.0, errors.New("No task running on the cluster.") } /* From f6ec974182bd12b407fc21a3df4c177b46351cdb Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 16:19:45 -0500 Subject: [PATCH 082/102] changed the keys in taskMonitor from offer.SlaveId() to offer.Hostname. --- schedulers/proactiveclusterwidecappingfcfs.go | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index c352cb1..c749071 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -104,10 +104,10 @@ func (s *ProactiveClusterwideCapFCFS) newTask(offer *mesos.Offer, task def.Task) task.SetTaskID(*proto.String("electron-" + taskName)) // Add task to the list of tasks running on the node. s.running[offer.GetSlaveId().GoString()][taskName] = true - if len(s.taskMonitor[offer.GetSlaveId().GoString()]) == 0 { - s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + if len(s.taskMonitor[*offer.Hostname]) == 0 { + s.taskMonitor[*offer.Hostname] = []def.Task{task} } else { - s.taskMonitor[offer.GetSlaveId().GoString()] = append(s.taskMonitor[offer.GetSlaveId().GoString()], task) + s.taskMonitor[*offer.Hostname] = append(s.taskMonitor[*offer.Hostname], task) } resources := []*mesos.Resource{ @@ -349,8 +349,8 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, // Need to remove the task from the window of tasks. s.capper.taskFinished(*status.TaskId.Value) // Determining the new cluster wide cap. - tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) - //tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + //tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) if err == nil { // if new determined cap value is different from the current recap value then we need to recap. if int(math.Floor(tempCap+0.5)) != int(math.Floor(fcfsRecapValue+0.5)) { From 81d795f197905a3008724a281d1e0bac36978641 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 17:42:08 -0500 Subject: [PATCH 083/102] formatted the code --- constants/constants.go | 79 +++++++------ schedulers/proactiveclusterwidecappers.go | 105 +++++++++--------- schedulers/proactiveclusterwidecappingfcfs.go | 2 +- utilities/utils.go | 38 +++---- 4 files changed, 113 insertions(+), 111 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index 5dc98ea..2d9b516 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -11,19 +11,19 @@ Also, exposing functions to update or initialize some of the constants. package constants var Hosts = []string{"stratos-001.cs.binghamton.edu", "stratos-002.cs.binghamton.edu", - "stratos-003.cs.binghamton.edu", "stratos-004.cs.binghamton.edu", - "stratos-005.cs.binghamton.edu", "stratos-006.cs.binghamton.edu", - "stratos-007.cs.binghamton.edu", "stratos-008.cs.binghamton.edu"} + "stratos-003.cs.binghamton.edu", "stratos-004.cs.binghamton.edu", + "stratos-005.cs.binghamton.edu", "stratos-006.cs.binghamton.edu", + "stratos-007.cs.binghamton.edu", "stratos-008.cs.binghamton.edu"} // Add a new host to the slice of hosts. func AddNewHost(new_host string) bool { - // Validation - if new_host == "" { - return false - } else { - Hosts = append(Hosts, new_host) - return true - } + // Validation + if new_host == "" { + return false + } else { + Hosts = append(Hosts, new_host) + return true + } } // Lower bound of the percentage of requested power, that can be allocated to a task. @@ -38,16 +38,15 @@ var Cap_margin = 0.50 // Modify the cap margin. func UpdateCapMargin(new_cap_margin float64) bool { - // Checking if the new_cap_margin is less than the power threshold. - if new_cap_margin < Starvation_factor { - return false - } else { - Cap_margin = new_cap_margin - return true - } + // Checking if the new_cap_margin is less than the power threshold. + if new_cap_margin < Starvation_factor { + return false + } else { + Cap_margin = new_cap_margin + return true + } } - // Threshold factor that would make (Cap_margin * task.Watts) equal to (60/100 * task.Watts). var Starvation_factor = 0.8 @@ -56,32 +55,32 @@ var Total_power map[string]float64 // Initialize the total power per node. This should be done before accepting any set of tasks for scheduling. func AddTotalPowerForHost(host string, total_power float64) bool { - // Validation - is_correct_host := false - for _, existing_host := range Hosts { - if host == existing_host { - is_correct_host = true - } - } + // Validation + is_correct_host := false + for _, existing_host := range Hosts { + if host == existing_host { + is_correct_host = true + } + } - if !is_correct_host { - return false - } else { - Total_power[host] = total_power - return true - } + if !is_correct_host { + return false + } else { + Total_power[host] = total_power + return true + } } // Window size for running average -var Window_size = 160 +var Window_size = 10 // Update the window size. func UpdateWindowSize(new_window_size int) bool { - // Validation - if new_window_size == 0 { - return false - } else{ - Window_size = new_window_size - return true - } -} \ No newline at end of file + // Validation + if new_window_size == 0 { + return false + } else { + Window_size = new_window_size + return true + } +} diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 505ac76..5de8a47 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -17,7 +17,7 @@ import ( "container/list" "errors" "github.com/montanaflynn/stats" - "log" + "log" "sort" ) @@ -118,8 +118,11 @@ A recapping strategy which decides between 2 different recapping schemes. 2. A scheme based on the average of the loads on each node in the cluster. The recap value picked the least among the two. + +The cleverRecap scheme works well when the cluster is relatively idle and until then, + the primitive recapping scheme works better. */ -func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, +func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { // Validation if total_power == nil || task_monitor == nil { @@ -136,48 +139,48 @@ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, // watts usage on each node in the cluster. watts_usages := make(map[string][]float64) host_of_finished_task := "" - index_of_finished_task := -1 + index_of_finished_task := -1 for _, host := range constants.Hosts { watts_usages[host] = []float64{0.0} } for host, tasks := range task_monitor { for i, task := range tasks { if task.TaskID == finished_taskId { - host_of_finished_task = host - index_of_finished_task = i - // Not considering this task for the computation of total_allocated_power and total_running_tasks - continue - } - watts_usages[host] = append(watts_usages[host], float64(task.Watts) * constants.Cap_margin) + host_of_finished_task = host + index_of_finished_task = i + // Not considering this task for the computation of total_allocated_power and total_running_tasks + continue + } + watts_usages[host] = append(watts_usages[host], float64(task.Watts)*constants.Cap_margin) } } // Updating task monitor. If recap(...) has deleted the finished task from the taskMonitor, - // then this will be ignored. - if host_of_finished_task != "" && index_of_finished_task != -1 { - log.Printf("Removing task with task [%s] from the list of running tasks\n", - task_monitor[host_of_finished_task][index_of_finished_task].TaskID) - task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], - task_monitor[host_of_finished_task][index_of_finished_task+1:]...) - } + // then this will be ignored. Else (this is only when an error occured with recap(...)), we remove it here. + if host_of_finished_task != "" && index_of_finished_task != -1 { + log.Printf("Removing task with task [%s] from the list of running tasks\n", + task_monitor[host_of_finished_task][index_of_finished_task].TaskID) + task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], + task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + } - // Need to check whether there are still tasks running on the cluster. If not then we return an error. - clusterIdle := true - for _, tasks := range task_monitor { - if len(tasks) > 0 { - clusterIdle = false - } - } + // Need to check whether there are still tasks running on the cluster. If not then we return an error. + clusterIdle := true + for _, tasks := range task_monitor { + if len(tasks) > 0 { + clusterIdle = false + } + } - if !clusterIdle { - // load on each node in the cluster. + if !clusterIdle { + // load on each node in the cluster. loads := []float64{0.0} for host, usages := range watts_usages { total_usage := 0.0 for _, usage := range usages { total_usage += usage } - loads = append(loads, total_usage / total_power[host]) + loads = append(loads, total_usage/total_power[host]) } // Now need to compute the average load. @@ -219,33 +222,33 @@ func (capper clusterwideCapper) recap(total_power map[string]float64, total_allocated_power := 0.0 total_running_tasks := 0 - host_of_finished_task := "" - index_of_finished_task := -1 - for host, tasks := range task_monitor { - for i, task := range tasks { - if task.TaskID == finished_taskId { - host_of_finished_task = host - index_of_finished_task = i - // Not considering this task for the computation of total_allocated_power and total_running_tasks - continue - } - total_allocated_power += (float64(task.Watts) * constants.Cap_margin) - total_running_tasks++ - } - } + host_of_finished_task := "" + index_of_finished_task := -1 + for host, tasks := range task_monitor { + for i, task := range tasks { + if task.TaskID == finished_taskId { + host_of_finished_task = host + index_of_finished_task = i + // Not considering this task for the computation of total_allocated_power and total_running_tasks + continue + } + total_allocated_power += (float64(task.Watts) * constants.Cap_margin) + total_running_tasks++ + } + } - // Updating task monitor - if host_of_finished_task != "" && index_of_finished_task != -1 { - log.Printf("Removing task with task [%s] from the list of running tasks\n", - task_monitor[host_of_finished_task][index_of_finished_task].TaskID) - task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], - task_monitor[host_of_finished_task][index_of_finished_task+1:]...) - } + // Updating task monitor + if host_of_finished_task != "" && index_of_finished_task != -1 { + log.Printf("Removing task with task [%s] from the list of running tasks\n", + task_monitor[host_of_finished_task][index_of_finished_task].TaskID) + task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], + task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + } - // For the last task, total_allocated_power and total_running_tasks would be 0 - if total_allocated_power == 0 && total_running_tasks == 0 { - return 100, errors.New("No task running on the cluster.") - } + // For the last task, total_allocated_power and total_running_tasks would be 0 + if total_allocated_power == 0 && total_running_tasks == 0 { + return 100, errors.New("No task running on the cluster.") + } average := total_allocated_power / float64(total_running_tasks) ratios := []float64{} diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index c749071..4a13574 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -76,7 +76,7 @@ func NewProactiveClusterwideCapFCFS(tasks []def.Task, ignoreWatts bool) *Proacti ticker: time.NewTicker(10 * time.Second), recapTicker: time.NewTicker(20 * time.Second), isCapping: false, - isRecapping: false, + isRecapping: false, } return s } diff --git a/utilities/utils.go b/utilities/utils.go index d6406d6..ede4f64 100644 --- a/utilities/utils.go +++ b/utilities/utils.go @@ -9,8 +9,8 @@ https://groups.google.com/forum/#!topic/golang-nuts/FT7cjmcL7gw // Utility struct that helps in sorting the available power by value. type Pair struct { - Key string - Value float64 + Key string + Value float64 } // A slice of pairs that implements the sort.Interface to sort by value. @@ -18,37 +18,37 @@ type PairList []Pair // Swap pairs in the PairList func (plist PairList) Swap(i, j int) { - plist[i], plist[j] = plist[j], plist[i] + plist[i], plist[j] = plist[j], plist[i] } // function to return the length of the pairlist. func (plist PairList) Len() int { - return len(plist) + return len(plist) } // function to compare two elements in pairlist. func (plist PairList) Less(i, j int) bool { - return plist[i].Value < plist[j].Value + return plist[i].Value < plist[j].Value } // convert a PairList to a map[string]float64 func OrderedKeys(plist PairList) ([]string, error) { - // Validation - if plist == nil { - return nil, errors.New("Invalid argument: plist") - } - ordered_keys := make([]string, len(plist)) - for _, pair := range plist { - ordered_keys = append(ordered_keys, pair.Key) - } - return ordered_keys, nil + // Validation + if plist == nil { + return nil, errors.New("Invalid argument: plist") + } + ordered_keys := make([]string, len(plist)) + for _, pair := range plist { + ordered_keys = append(ordered_keys, pair.Key) + } + return ordered_keys, nil } // determine the max value func Max(a, b float64) float64 { - if a > b { - return a - } else { - return b - } + if a > b { + return a + } else { + return b + } } From 72bd54374c5025244404af5a0f125621dae08b3a Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 18:00:55 -0500 Subject: [PATCH 084/102] Added proactive cluster wide capping with ranked tasks as another scheduler. --- .../proactiveclusterwidecappingranked.go | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index 0ef4d77..a6f8aa8 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -38,11 +38,11 @@ func (_ *ProactiveClusterwideCapRanked) takeOffer(offer *mesos.Offer, task def.T // electronScheduler implements the Scheduler interface type ProactiveClusterwideCapRanked struct { - tasksCreated int - tasksRunning int - tasks []def.Task - metrics map[string]def.Metric - running map[string]map[string]bool + tasksCreated int + tasksRunning int + tasks []def.Task + metrics map[string]def.Metric + running map[string]map[string]bool taskMonitor map[string][]def.Task // store tasks that are currently running. availablePower map[string]float64 // available power for each node in the cluster. totalPower map[string]float64 // total power for each node in the cluster. @@ -86,7 +86,7 @@ func NewProactiveClusterwideCapRanked(tasks []def.Task, ignoreWatts bool) *Proac ticker: time.NewTicker(10 * time.Second), recapTicker: time.NewTicker(20 * time.Second), isCapping: false, - isRecapping: false, + isRecapping: false, } return s } @@ -114,10 +114,10 @@ func (s *ProactiveClusterwideCapRanked) newTask(offer *mesos.Offer, task def.Tas task.SetTaskID(*proto.String("electron-" + taskName)) // Add task to the list of tasks running on the node. s.running[offer.GetSlaveId().GoString()][taskName] = true - if len(s.taskMonitor[offer.GetSlaveId().GoString()]) == 0 { - s.taskMonitor[offer.GetSlaveId().GoString()] = []def.Task{task} + if len(s.taskMonitor[*offer.Hostname]) == 0 { + s.taskMonitor[*offer.Hostname] = []def.Task{task} } else { - s.taskMonitor[offer.GetSlaveId().GoString()] = append(s.taskMonitor[offer.GetSlaveId().GoString()], task) + s.taskMonitor[*offer.Hostname] = append(s.taskMonitor[*offer.Hostname], task) } resources := []*mesos.Resource{ From 6b32c9eedae9346a5818b7f9100cd3c9ffe7dfe8 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 18:02:21 -0500 Subject: [PATCH 085/102] Added the proactive cluster wide capping with ranked tasks scheduler to the list of schedulers. --- schedulers/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/schedulers/README.md b/schedulers/README.md index 15c0d74..a61559b 100644 --- a/schedulers/README.md +++ b/schedulers/README.md @@ -9,5 +9,6 @@ Scheduling Algorithms: * Bin-packing with sorted watts * FCFS Proactive Cluster-wide Capping + * Ranked Proactive Cluster-wide Capping * First Fit * First Fit with sorted watts From 2a9c0eb81a1f7799524d0f2166cd662e46e732e4 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 19:32:50 -0500 Subject: [PATCH 086/102] Removed a comment --- utilities/utils.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/utilities/utils.go b/utilities/utils.go index ede4f64..b953446 100644 --- a/utilities/utils.go +++ b/utilities/utils.go @@ -2,11 +2,6 @@ package utilities import "errors" -/* -The Pair and PairList have been taken from google groups forum, -https://groups.google.com/forum/#!topic/golang-nuts/FT7cjmcL7gw -*/ - // Utility struct that helps in sorting the available power by value. type Pair struct { Key string From 6ae59d4be54dc6195c6130f5eb06c61b7a16a0e7 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Fri, 25 Nov 2016 20:21:01 -0500 Subject: [PATCH 087/102] Revert "Removed a comment" This reverts commit fcdffb5c1006d7938d8be4aacad7ec5a7b78f20e. --- utilities/utils.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utilities/utils.go b/utilities/utils.go index b953446..ede4f64 100644 --- a/utilities/utils.go +++ b/utilities/utils.go @@ -2,6 +2,11 @@ package utilities import "errors" +/* +The Pair and PairList have been taken from google groups forum, +https://groups.google.com/forum/#!topic/golang-nuts/FT7cjmcL7gw +*/ + // Utility struct that helps in sorting the available power by value. type Pair struct { Key string From 55ea017a9a5d828a028337c3f6a8009090cea8bb Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 16:29:12 -0500 Subject: [PATCH 088/102] Removed a commented line. --- schedulers/proactiveclusterwidecappingfcfs.go | 1 - schedulers/proactiveclusterwidecappingranked.go | 1 - 2 files changed, 2 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 4a13574..5ff439f 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -193,7 +193,6 @@ func (s *ProactiveClusterwideCapFCFS) startRecapping() { case <-s.recapTicker.C: fcfsMutex.Lock() // If stopped performing cluster wide capping then we need to explicitly cap the entire cluster. - //if !s.isCapping && s.isRecapping && fcfsRecapValue > 0.0 { if s.isRecapping && fcfsRecapValue > 0.0 { for _, host := range constants.Hosts { // Rounding curreCapValue to the nearest int. diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index a6f8aa8..3c9ef81 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -203,7 +203,6 @@ func (s *ProactiveClusterwideCapRanked) startRecapping() { case <-s.recapTicker.C: rankedMutex.Lock() // If stopped performing cluster wide capping then we need to explicitly cap the entire cluster. - //if !s.isCapping && s.isRecapping && rankedRecapValue > 0.0 { if s.isRecapping && rankedRecapValue > 0.0 { for _, host := range constants.Hosts { // Rounding curreCapValue to the nearest int. From 50d1d790518878a07266ba438069d79c1dd6f0e9 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 17:18:33 -0500 Subject: [PATCH 089/102] fixed naming convensions to be camel cased. Reformatted the code. --- constants/constants.go | 42 ++-- def/task.go | 14 +- schedulers/proactiveclusterwidecappers.go | 226 +++++++++--------- schedulers/proactiveclusterwidecappingfcfs.go | 4 +- .../proactiveclusterwidecappingranked.go | 2 +- utilities/utils.go | 6 +- 6 files changed, 147 insertions(+), 147 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index 2d9b516..fb06a9d 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -16,71 +16,71 @@ var Hosts = []string{"stratos-001.cs.binghamton.edu", "stratos-002.cs.binghamton "stratos-007.cs.binghamton.edu", "stratos-008.cs.binghamton.edu"} // Add a new host to the slice of hosts. -func AddNewHost(new_host string) bool { +func AddNewHost(newHost string) bool { // Validation - if new_host == "" { + if newHost == "" { return false } else { - Hosts = append(Hosts, new_host) + Hosts = append(Hosts, newHost) return true } } // Lower bound of the percentage of requested power, that can be allocated to a task. -var Power_threshold = 0.6 // Right now saying that a task will never be given lesser than 60% of the power it requested. +var PowerThreshold = 0.6 // Right now saying that a task will never be given lesser than 60% of the power it requested. /* Margin with respect to the required power for a job. So, if power required = 10W, the node would be capped to 75%*10W. This value can be changed upon convenience. */ -var Cap_margin = 0.50 +var CapMargin = 0.70 // Modify the cap margin. -func UpdateCapMargin(new_cap_margin float64) bool { +func UpdateCapMargin(newCapMargin float64) bool { // Checking if the new_cap_margin is less than the power threshold. - if new_cap_margin < Starvation_factor { + if newCapMargin < StarvationFactor { return false } else { - Cap_margin = new_cap_margin + CapMargin = newCapMargin return true } } // Threshold factor that would make (Cap_margin * task.Watts) equal to (60/100 * task.Watts). -var Starvation_factor = 0.8 +var StarvationFactor = 0.8 // Total power per node. -var Total_power map[string]float64 +var TotalPower map[string]float64 // Initialize the total power per node. This should be done before accepting any set of tasks for scheduling. -func AddTotalPowerForHost(host string, total_power float64) bool { +func AddTotalPowerForHost(host string, totalPower float64) bool { // Validation - is_correct_host := false - for _, existing_host := range Hosts { - if host == existing_host { - is_correct_host = true + isCorrectHost := false + for _, existingHost := range Hosts { + if host == existingHost { + isCorrectHost = true } } - if !is_correct_host { + if !isCorrectHost { return false } else { - Total_power[host] = total_power + TotalPower[host] = totalPower return true } } // Window size for running average -var Window_size = 10 +var WindowSize = 160 // Update the window size. -func UpdateWindowSize(new_window_size int) bool { +func UpdateWindowSize(newWindowSize int) bool { // Validation - if new_window_size == 0 { + if newWindowSize == 0 { return false } else { - Window_size = new_window_size + WindowSize = newWindowSize return true } } diff --git a/def/task.go b/def/task.go index 63668ad..9699812 100644 --- a/def/task.go +++ b/def/task.go @@ -38,18 +38,18 @@ func TasksFromJSON(uri string) ([]Task, error) { } // Update the host on which the task needs to be scheduled. -func (tsk *Task) UpdateHost(new_host string) bool { +func (tsk *Task) UpdateHost(newHost string) bool { // Validation - is_correct_host := false - for _, existing_host := range constants.Hosts { - if new_host == existing_host { - is_correct_host = true + isCorrectHost := false + for _, existingHost := range constants.Hosts { + if newHost == existingHost { + isCorrectHost = true } } - if !is_correct_host { + if !isCorrectHost { return false } else { - tsk.Host = new_host + tsk.Host = newHost return true } } diff --git a/schedulers/proactiveclusterwidecappers.go b/schedulers/proactiveclusterwidecappers.go index 5de8a47..e943d37 100644 --- a/schedulers/proactiveclusterwidecappers.go +++ b/schedulers/proactiveclusterwidecappers.go @@ -24,63 +24,63 @@ import ( // Structure containing utility data structures used to compute cluster-wide dynamic cap. type clusterwideCapper struct { // window of tasks. - window_of_tasks list.List + windowOfTasks list.List // The current sum of requested powers of the tasks in the window. - current_sum float64 + currentSum float64 // The current number of tasks in the window. - number_of_tasks_in_window int + numberOfTasksInWindow int } // Defining constructor for clusterwideCapper. Please don't call this directly and instead use getClusterwideCapperInstance(). func newClusterwideCapper() *clusterwideCapper { - return &clusterwideCapper{current_sum: 0.0, number_of_tasks_in_window: 0} + return &clusterwideCapper{currentSum: 0.0, numberOfTasksInWindow: 0} } // Singleton instance of clusterwideCapper -var singleton_capper *clusterwideCapper +var singletonCapper *clusterwideCapper // Retrieve the singleton instance of clusterwideCapper. func getClusterwideCapperInstance() *clusterwideCapper { - if singleton_capper == nil { - singleton_capper = newClusterwideCapper() + if singletonCapper == nil { + singletonCapper = newClusterwideCapper() } else { // Do nothing } - return singleton_capper + return singletonCapper } // Clear and initialize all the members of clusterwideCapper. func (capper clusterwideCapper) clear() { - capper.window_of_tasks.Init() - capper.current_sum = 0 - capper.number_of_tasks_in_window = 0 + capper.windowOfTasks.Init() + capper.currentSum = 0 + capper.numberOfTasksInWindow = 0 } // Compute the average of watts of all the tasks in the window. func (capper clusterwideCapper) average() float64 { - return capper.current_sum / float64(capper.window_of_tasks.Len()) + return capper.currentSum / float64(capper.windowOfTasks.Len()) } /* Compute the running average. -Using clusterwideCapper#window_of_tasks to store the tasks. +Using clusterwideCapper#windowOfTasks to store the tasks. Task at position 0 (oldest task) is removed when the window is full and new task arrives. */ -func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 { +func (capper clusterwideCapper) runningAverageOfWatts(tsk *def.Task) float64 { var average float64 - if capper.number_of_tasks_in_window < constants.Window_size { - capper.window_of_tasks.PushBack(tsk) - capper.number_of_tasks_in_window++ - capper.current_sum += float64(tsk.Watts) * constants.Cap_margin + if capper.numberOfTasksInWindow < constants.WindowSize { + capper.windowOfTasks.PushBack(tsk) + capper.numberOfTasksInWindow++ + capper.currentSum += float64(tsk.Watts) * constants.CapMargin } else { - task_to_remove_element := capper.window_of_tasks.Front() - if task_to_remove, ok := task_to_remove_element.Value.(*def.Task); ok { - capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin - capper.window_of_tasks.Remove(task_to_remove_element) + taskToRemoveElement := capper.windowOfTasks.Front() + if taskToRemove, ok := taskToRemoveElement.Value.(*def.Task); ok { + capper.currentSum -= float64(taskToRemove.Watts) * constants.CapMargin + capper.windowOfTasks.Remove(taskToRemoveElement) } - capper.window_of_tasks.PushBack(tsk) - capper.current_sum += float64(tsk.Watts) * constants.Cap_margin + capper.windowOfTasks.PushBack(tsk) + capper.currentSum += float64(tsk.Watts) * constants.CapMargin } average = capper.average() return average @@ -89,17 +89,17 @@ func (capper clusterwideCapper) running_average_of_watts(tsk *def.Task) float64 /* Calculating cap value. -1. Sorting the values of running_average_to_total_power_percentage in ascending order. +1. Sorting the values of runningAverageToTotalPowerPercentage in ascending order. 2. Computing the median of above sorted values. 3. The median is now the cap. */ -func (capper clusterwideCapper) get_cap(running_average_to_total_power_percentage map[string]float64) float64 { +func (capper clusterwideCapper) getCap(runningAverageToTotalPowerPercentage map[string]float64) float64 { var values []float64 // Validation - if running_average_to_total_power_percentage == nil { + if runningAverageToTotalPowerPercentage == nil { return 100.0 } - for _, apower := range running_average_to_total_power_percentage { + for _, apower := range runningAverageToTotalPowerPercentage { values = append(values, apower) } // sorting the values in ascending order. @@ -122,51 +122,51 @@ The recap value picked the least among the two. The cleverRecap scheme works well when the cluster is relatively idle and until then, the primitive recapping scheme works better. */ -func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, - task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { +func (capper clusterwideCapper) cleverRecap(totalPower map[string]float64, + taskMonitor map[string][]def.Task, finishedTaskId string) (float64, error) { // Validation - if total_power == nil || task_monitor == nil { - return 100.0, errors.New("Invalid argument: total_power, task_monitor") + if totalPower == nil || taskMonitor == nil { + return 100.0, errors.New("Invalid argument: totalPower, taskMonitor") } // determining the recap value by calling the regular recap(...) toggle := false - recapValue, err := capper.recap(total_power, task_monitor, finished_taskId) + recapValue, err := capper.recap(totalPower, taskMonitor, finishedTaskId) if err == nil { toggle = true } // watts usage on each node in the cluster. - watts_usages := make(map[string][]float64) - host_of_finished_task := "" - index_of_finished_task := -1 + wattsUsages := make(map[string][]float64) + hostOfFinishedTask := "" + indexOfFinishedTask := -1 for _, host := range constants.Hosts { - watts_usages[host] = []float64{0.0} + wattsUsages[host] = []float64{0.0} } - for host, tasks := range task_monitor { + for host, tasks := range taskMonitor { for i, task := range tasks { - if task.TaskID == finished_taskId { - host_of_finished_task = host - index_of_finished_task = i - // Not considering this task for the computation of total_allocated_power and total_running_tasks + if task.TaskID == finishedTaskId { + hostOfFinishedTask = host + indexOfFinishedTask = i + // Not considering this task for the computation of totalAllocatedPower and totalRunningTasks continue } - watts_usages[host] = append(watts_usages[host], float64(task.Watts)*constants.Cap_margin) + wattsUsages[host] = append(wattsUsages[host], float64(task.Watts)*constants.CapMargin) } } // Updating task monitor. If recap(...) has deleted the finished task from the taskMonitor, // then this will be ignored. Else (this is only when an error occured with recap(...)), we remove it here. - if host_of_finished_task != "" && index_of_finished_task != -1 { + if hostOfFinishedTask != "" && indexOfFinishedTask != -1 { log.Printf("Removing task with task [%s] from the list of running tasks\n", - task_monitor[host_of_finished_task][index_of_finished_task].TaskID) - task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], - task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + taskMonitor[hostOfFinishedTask][indexOfFinishedTask].TaskID) + taskMonitor[hostOfFinishedTask] = append(taskMonitor[hostOfFinishedTask][:indexOfFinishedTask], + taskMonitor[hostOfFinishedTask][indexOfFinishedTask+1:]...) } // Need to check whether there are still tasks running on the cluster. If not then we return an error. clusterIdle := true - for _, tasks := range task_monitor { + for _, tasks := range taskMonitor { if len(tasks) > 0 { clusterIdle = false } @@ -175,29 +175,29 @@ func (capper clusterwideCapper) cleverRecap(total_power map[string]float64, if !clusterIdle { // load on each node in the cluster. loads := []float64{0.0} - for host, usages := range watts_usages { - total_usage := 0.0 + for host, usages := range wattsUsages { + totalUsage := 0.0 for _, usage := range usages { - total_usage += usage + totalUsage += usage } - loads = append(loads, total_usage/total_power[host]) + loads = append(loads, totalUsage/totalPower[host]) } // Now need to compute the average load. - total_load := 0.0 + totalLoad := 0.0 for _, load := range loads { - total_load += load + totalLoad += load } - average_load := (total_load / float64(len(loads)) * 100.0) // this would be the cap value. + averageLoad := (totalLoad / float64(len(loads)) * 100.0) // this would be the cap value. // If toggle is true, then we need to return the least recap value. if toggle { - if average_load <= recapValue { - return average_load, nil + if averageLoad <= recapValue { + return averageLoad, nil } else { return recapValue, nil } } else { - return average_load, nil + return averageLoad, nil } } return 100.0, errors.New("No task running on the cluster.") @@ -213,46 +213,46 @@ Recapping the entire cluster. This needs to be called whenever a task finishes execution. */ -func (capper clusterwideCapper) recap(total_power map[string]float64, - task_monitor map[string][]def.Task, finished_taskId string) (float64, error) { +func (capper clusterwideCapper) recap(totalPower map[string]float64, + taskMonitor map[string][]def.Task, finishedTaskId string) (float64, error) { // Validation - if total_power == nil || task_monitor == nil { - return 100.0, errors.New("Invalid argument: total_power, task_monitor") + if totalPower == nil || taskMonitor == nil { + return 100.0, errors.New("Invalid argument: totalPower, taskMonitor") } - total_allocated_power := 0.0 - total_running_tasks := 0 + totalAllocatedPower := 0.0 + totalRunningTasks := 0 - host_of_finished_task := "" - index_of_finished_task := -1 - for host, tasks := range task_monitor { + hostOfFinishedTask := "" + indexOfFinishedTask := -1 + for host, tasks := range taskMonitor { for i, task := range tasks { - if task.TaskID == finished_taskId { - host_of_finished_task = host - index_of_finished_task = i - // Not considering this task for the computation of total_allocated_power and total_running_tasks + if task.TaskID == finishedTaskId { + hostOfFinishedTask = host + indexOfFinishedTask = i + // Not considering this task for the computation of totalAllocatedPower and totalRunningTasks continue } - total_allocated_power += (float64(task.Watts) * constants.Cap_margin) - total_running_tasks++ + totalAllocatedPower += (float64(task.Watts) * constants.CapMargin) + totalRunningTasks++ } } // Updating task monitor - if host_of_finished_task != "" && index_of_finished_task != -1 { + if hostOfFinishedTask != "" && indexOfFinishedTask != -1 { log.Printf("Removing task with task [%s] from the list of running tasks\n", - task_monitor[host_of_finished_task][index_of_finished_task].TaskID) - task_monitor[host_of_finished_task] = append(task_monitor[host_of_finished_task][:index_of_finished_task], - task_monitor[host_of_finished_task][index_of_finished_task+1:]...) + taskMonitor[hostOfFinishedTask][indexOfFinishedTask].TaskID) + taskMonitor[hostOfFinishedTask] = append(taskMonitor[hostOfFinishedTask][:indexOfFinishedTask], + taskMonitor[hostOfFinishedTask][indexOfFinishedTask+1:]...) } - // For the last task, total_allocated_power and total_running_tasks would be 0 - if total_allocated_power == 0 && total_running_tasks == 0 { + // For the last task, totalAllocatedPower and totalRunningTasks would be 0 + if totalAllocatedPower == 0 && totalRunningTasks == 0 { return 100, errors.New("No task running on the cluster.") } - average := total_allocated_power / float64(total_running_tasks) + average := totalAllocatedPower / float64(totalRunningTasks) ratios := []float64{} - for _, tpower := range total_power { + for _, tpower := range totalPower { ratios = append(ratios, (average/tpower)*100) } sort.Float64s(ratios) @@ -265,38 +265,38 @@ func (capper clusterwideCapper) recap(total_power map[string]float64, } /* Quick sort algorithm to sort tasks, in place, in ascending order of power.*/ -func (capper clusterwideCapper) quick_sort(low int, high int, tasks_to_sort *[]def.Task) { +func (capper clusterwideCapper) quickSort(low int, high int, tasksToSort *[]def.Task) { i := low j := high // calculating the pivot - pivot_index := low + (high-low)/2 - pivot := (*tasks_to_sort)[pivot_index] + pivotIndex := low + (high-low)/2 + pivot := (*tasksToSort)[pivotIndex] for i <= j { - for (*tasks_to_sort)[i].Watts < pivot.Watts { + for (*tasksToSort)[i].Watts < pivot.Watts { i++ } - for (*tasks_to_sort)[j].Watts > pivot.Watts { + for (*tasksToSort)[j].Watts > pivot.Watts { j-- } if i <= j { - temp := (*tasks_to_sort)[i] - (*tasks_to_sort)[i] = (*tasks_to_sort)[j] - (*tasks_to_sort)[j] = temp + temp := (*tasksToSort)[i] + (*tasksToSort)[i] = (*tasksToSort)[j] + (*tasksToSort)[j] = temp i++ j-- } } if low < j { - capper.quick_sort(low, j, tasks_to_sort) + capper.quickSort(low, j, tasksToSort) } if i < high { - capper.quick_sort(i, high, tasks_to_sort) + capper.quickSort(i, high, tasksToSort) } } // Sorting tasks in ascending order of requested watts. -func (capper clusterwideCapper) sort_tasks(tasks_to_sort *[]def.Task) { - capper.quick_sort(0, len(*tasks_to_sort)-1, tasks_to_sort) +func (capper clusterwideCapper) sortTasks(tasksToSort *[]def.Task) { + capper.quickSort(0, len(*tasksToSort)-1, tasksToSort) } /* @@ -307,51 +307,51 @@ This completed task needs to be removed from the window of tasks (if it is still */ func (capper clusterwideCapper) taskFinished(taskID string) { // If the window is empty the just return. This condition should technically return false. - if capper.window_of_tasks.Len() == 0 { + if capper.windowOfTasks.Len() == 0 { return } // Checking whether the task with the given taskID is currently present in the window of tasks. - var task_element_to_remove *list.Element - for task_element := capper.window_of_tasks.Front(); task_element != nil; task_element = task_element.Next() { - if tsk, ok := task_element.Value.(*def.Task); ok { + var taskElementToRemove *list.Element + for taskElement := capper.windowOfTasks.Front(); taskElement != nil; taskElement = taskElement.Next() { + if tsk, ok := taskElement.Value.(*def.Task); ok { if tsk.TaskID == taskID { - task_element_to_remove = task_element + taskElementToRemove = taskElement } } } // we need to remove the task from the window. - if task_to_remove, ok := task_element_to_remove.Value.(*def.Task); ok { - capper.window_of_tasks.Remove(task_element_to_remove) - capper.number_of_tasks_in_window -= 1 - capper.current_sum -= float64(task_to_remove.Watts) * constants.Cap_margin + if taskToRemove, ok := taskElementToRemove.Value.(*def.Task); ok { + capper.windowOfTasks.Remove(taskElementToRemove) + capper.numberOfTasksInWindow -= 1 + capper.currentSum -= float64(taskToRemove.Watts) * constants.CapMargin } } // First come first serve scheduling. -func (capper clusterwideCapper) fcfsDetermineCap(total_power map[string]float64, - new_task *def.Task) (float64, error) { +func (capper clusterwideCapper) fcfsDetermineCap(totalPower map[string]float64, + newTask *def.Task) (float64, error) { // Validation - if total_power == nil { - return 100, errors.New("Invalid argument: total_power") + if totalPower == nil { + return 100, errors.New("Invalid argument: totalPower") } else { // Need to calculate the running average - running_average := capper.running_average_of_watts(new_task) + runningAverage := capper.runningAverageOfWatts(newTask) // For each node, calculate the percentage of the running average to the total power. - running_average_to_total_power_percentage := make(map[string]float64) - for host, tpower := range total_power { - if tpower >= running_average { - running_average_to_total_power_percentage[host] = (running_average / tpower) * 100 + runningAverageToTotalPowerPercentage := make(map[string]float64) + for host, tpower := range totalPower { + if tpower >= runningAverage { + runningAverageToTotalPowerPercentage[host] = (runningAverage / tpower) * 100 } else { // We don't consider this host for the computation of the cluster wide cap. } } // Determine the cluster wide cap value. - cap_value := capper.get_cap(running_average_to_total_power_percentage) + capValue := capper.getCap(runningAverageToTotalPowerPercentage) // Need to cap the cluster to this value. - return cap_value, nil + return capValue, nil } } diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 5ff439f..49094cd 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -304,8 +304,8 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive log.Println(err) } log.Printf("Starting on [%s]\n", offer.GetHostname()) - to_schedule := []*mesos.TaskInfo{s.newTask(offer, task)} - driver.LaunchTasks([]*mesos.OfferID{offer.Id}, to_schedule, defaultFilter) + toSchedule := []*mesos.TaskInfo{s.newTask(offer, task)} + driver.LaunchTasks([]*mesos.OfferID{offer.Id}, toSchedule, defaultFilter) log.Printf("Inst: %d", *task.Instances) *task.Instances-- if *task.Instances <= 0 { diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index 3c9ef81..f6ea425 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -257,7 +257,7 @@ func (s *ProactiveClusterwideCapRanked) ResouceOffers(driver sched.SchedulerDriv } // sorting the tasks in ascending order of watts. - s.capper.sort_tasks(&s.tasks) + s.capper.sortTasks(&s.tasks) // displaying the ranked tasks. log.Println("The ranked tasks are:\n---------------------\n\t[") for rank, task := range s.tasks { diff --git a/utilities/utils.go b/utilities/utils.go index ede4f64..c53df74 100644 --- a/utilities/utils.go +++ b/utilities/utils.go @@ -37,11 +37,11 @@ func OrderedKeys(plist PairList) ([]string, error) { if plist == nil { return nil, errors.New("Invalid argument: plist") } - ordered_keys := make([]string, len(plist)) + orderedKeys := make([]string, len(plist)) for _, pair := range plist { - ordered_keys = append(ordered_keys, pair.Key) + orderedKeys = append(orderedKeys, pair.Key) } - return ordered_keys, nil + return orderedKeys, nil } // determine the max value From 512918e1e85d58d268e1c6f7e21a58baead5eceb Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 20:00:12 -0500 Subject: [PATCH 090/102] Added explanation for StarvationThreshold. Removed TotalPower as it was embedded inside the schedulers. --- constants/constants.go | 43 ++++++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/constants/constants.go b/constants/constants.go index fb06a9d..c9d7b36 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -26,7 +26,11 @@ func AddNewHost(newHost string) bool { } } -// Lower bound of the percentage of requested power, that can be allocated to a task. +/* + Lower bound of the percentage of requested power, that can be allocated to a task. + + Note: This constant is not used for the proactive cluster wide capping schemes. +*/ var PowerThreshold = 0.6 // Right now saying that a task will never be given lesser than 60% of the power it requested. /* @@ -47,29 +51,22 @@ func UpdateCapMargin(newCapMargin float64) bool { } } -// Threshold factor that would make (Cap_margin * task.Watts) equal to (60/100 * task.Watts). -var StarvationFactor = 0.8 +/* + The factor, that when multiplied with (task.Watts * CapMargin) results in (task.Watts * PowerThreshold). + This is used to check whether available power, for a host in an offer, is not less than (PowerThreshold * task.Watts), + which is assumed to result in starvation of the task. + Here is an example, + Suppose a task requires 100W of power. Assuming CapMargin = 0.75 and PowerThreshold = 0.6. + So, the assumed allocated watts is 75W. + Now, when we get an offer, we need to check whether the available power, for the host in that offer, is + not less than 60% (the PowerTreshold) of the requested power (100W). + To put it in other words, + availablePower >= 100W * 0.75 * X + where X is the StarvationFactor (80% in this case) -// Total power per node. -var TotalPower map[string]float64 - -// Initialize the total power per node. This should be done before accepting any set of tasks for scheduling. -func AddTotalPowerForHost(host string, totalPower float64) bool { - // Validation - isCorrectHost := false - for _, existingHost := range Hosts { - if host == existingHost { - isCorrectHost = true - } - } - - if !isCorrectHost { - return false - } else { - TotalPower[host] = totalPower - return true - } -} + Note: This constant is not used for the proactive cluster wide capping schemes. +*/ +var StarvationFactor = PowerThreshold / CapMargin // Window size for running average var WindowSize = 160 From 1c8265b87ba5d9a54774dc37012cdf74a7114faf Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 20:01:25 -0500 Subject: [PATCH 091/102] Updated comments --- constants/constants.go | 1 - 1 file changed, 1 deletion(-) diff --git a/constants/constants.go b/constants/constants.go index c9d7b36..08ad28a 100644 --- a/constants/constants.go +++ b/constants/constants.go @@ -3,7 +3,6 @@ Constants that are used across scripts 1. The available hosts = stratos-00x (x varies from 1 to 8) 2. cap_margin = percentage of the requested power to allocate 3. power_threshold = overloading factor -4. total_power = total power per node 5. window_size = number of tasks to consider for computation of the dynamic cap. Also, exposing functions to update or initialize some of the constants. From 6289b8954d5ac80b517e345e9f706721cffe9209 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 20:02:03 -0500 Subject: [PATCH 092/102] Prevented the use of reflect and instead compared the TaskIDs of the tasks. --- def/task.go | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/def/task.go b/def/task.go index 9699812..e52acb3 100644 --- a/def/task.go +++ b/def/task.go @@ -5,7 +5,6 @@ import ( "encoding/json" "github.com/pkg/errors" "os" - "reflect" ) type Task struct { @@ -85,15 +84,9 @@ func Compare(task1 *Task, task2 *Task) bool { if task1 == task2 { return true } - // Checking member equality - if reflect.DeepEqual(*task1, *task2) { - // Need to check for the task ID - if task1.TaskID == task2.TaskID { - return true - } else { - return false - } - } else { + if task1.TaskID != task2.TaskID { return false + } else { + return true } } From 795daf68a4bcb89ca2daa755d0aa0b2ec092019c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 22:28:44 -0500 Subject: [PATCH 093/102] Added to TODO (config template per scheduler). --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index aa625e4..636c6d2 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ To Do: * Have calibration phase? * Add ability to use constraints * Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average + * Make parameters corresponding to each scheduler configurable (possible to have a config template for each scheduler?) From fae3a35403de13e02e1c3a180ffced81193611dd Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 22:29:13 -0500 Subject: [PATCH 094/102] Added to TODO (generic running average computation). --- schedulers/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/schedulers/README.md b/schedulers/README.md index a61559b..11f25a0 100644 --- a/schedulers/README.md +++ b/schedulers/README.md @@ -4,6 +4,7 @@ Electron: Scheduling Algorithms To Do: * Design changes -- Possible to have one scheduler with different scheduling schemes? + * Make the running average calculation generic, so that schedulers in the future can use it and not implement their own. Scheduling Algorithms: From 89a294b36515c9d838f8137d0869031917114b67 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Mon, 28 Nov 2016 22:38:02 -0500 Subject: [PATCH 095/102] Removed TODO comment in ResourceOffers(...), that wasn't necessary anymore. --- schedulers/proactiveclusterwidecappingfcfs.go | 3 --- 1 file changed, 3 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 49094cd..37fa19a 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -234,7 +234,6 @@ func (s *ProactiveClusterwideCapFCFS) stopRecapping() { } } -// TODO: Need to reduce the time complexity: looping over offers twice (Possible to do it just once?). func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Printf("Received %d resource offers", len(offers)) @@ -272,8 +271,6 @@ func (s *ProactiveClusterwideCapFCFS) ResourceOffers(driver sched.SchedulerDrive 3. fcfsCurrentCapValue is updated with the determined cluster wide cap. Cluster wide capping is currently performed at regular intervals of time. - TODO: We can choose to cap the cluster only if the clusterwide cap varies more than the current clusterwide cap. - Although this sounds like a better approach, it only works when the resource requirements of neighbouring tasks are similar. */ taken := false From 8767cc61da4846e3f382dd4bf14e8860c70a8a2c Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 29 Nov 2016 15:22:01 -0500 Subject: [PATCH 096/102] Added TODO to fix the -p option. --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 636c6d2..a46ec9f 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ To Do: * Add ability to use constraints * Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average * Make parameters corresponding to each scheduler configurable (possible to have a config template for each scheduler?) + * Fix the -p option. From 24d2b89aa049dd960ce25856d487cef3668b348b Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 29 Nov 2016 22:26:55 -0500 Subject: [PATCH 097/102] synchronized operations on tasksRunning and hence prevented the previously occuring race condition. --- schedulers/proactiveclusterwidecappingfcfs.go | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/schedulers/proactiveclusterwidecappingfcfs.go b/schedulers/proactiveclusterwidecappingfcfs.go index 37fa19a..a96d496 100644 --- a/schedulers/proactiveclusterwidecappingfcfs.go +++ b/schedulers/proactiveclusterwidecappingfcfs.go @@ -339,7 +339,9 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, log.Printf("Received task status [%s] for task [%s]\n", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { + fcfsMutex.Lock() s.tasksRunning++ + fcfsMutex.Unlock() } else if IsTerminal(status.State) { delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) // Need to remove the task from the window of tasks. @@ -365,7 +367,9 @@ func (s *ProactiveClusterwideCapFCFS) StatusUpdate(driver sched.SchedulerDriver, log.Println(err) } + fcfsMutex.Lock() s.tasksRunning-- + fcfsMutex.Unlock() if s.tasksRunning == 0 { select { case <-s.Shutdown: From 7bd562cacb88f19f0151d50ee04f19c7a9393f44 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Tue, 29 Nov 2016 23:00:03 -0500 Subject: [PATCH 098/102] synchronized operations on tasksRunning and hence, prevented previously occuring race condition. --- .../proactiveclusterwidecappingranked.go | 31 ++++++++++++------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index f6ea425..d2565f3 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -222,21 +222,26 @@ func (s *ProactiveClusterwideCapRanked) startRecapping() { // Stop cluster wide capping func (s *ProactiveClusterwideCapRanked) stopCapping() { - log.Println("Stopping the cluster wide capping.") - s.ticker.Stop() - rankedMutex.Lock() - s.isCapping = false - s.isRecapping = true - rankedMutex.Unlock() + if s.isCapping { + log.Println("Stopping the cluster wide capping.") + s.ticker.Stop() + fcfsMutex.Lock() + s.isCapping = false + s.isRecapping = true + fcfsMutex.Unlock() + } } // Stop cluster wide Recapping func (s *ProactiveClusterwideCapRanked) stopRecapping() { - log.Println("Stopping the cluster wide re-capping.") - s.recapTicker.Stop() - rankedMutex.Lock() - s.isRecapping = false - rankedMutex.Unlock() + // If not capping, then definitely recapping. + if !s.isCapping && s.isRecapping { + log.Println("Stopping the cluster wide re-capping.") + s.recapTicker.Stop() + fcfsMutex.Lock() + s.isRecapping = false + fcfsMutex.Unlock() + } } func (s *ProactiveClusterwideCapRanked) ResouceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { @@ -355,10 +360,14 @@ func (s *ProactiveClusterwideCapRanked) StatusUpdate(driver sched.SchedulerDrive log.Printf("Received task status [%s] for task [%s]\n", NameFor(status.State), *status.TaskId.Value) if *status.State == mesos.TaskState_TASK_RUNNING { + rankedMutex.Lock() s.tasksRunning++ + rankedMutex.Unlock() } else if IsTerminal(status.State) { delete(s.running[status.GetSlaveId().GoString()], *status.TaskId.Value) + rankedMutex.Lock() s.tasksRunning-- + rankedMutex.Unlock() if s.tasksRunning == 0 { select { case <-s.Shutdown: From f548f06f2dd44eeb9fc0a74e1dd90facf30b1cc5 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 30 Nov 2016 21:51:55 -0500 Subject: [PATCH 099/102] Fixed the -p option. Changed the README for the same. --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index a46ec9f..23a0801 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,6 @@ To Do: * Add ability to use constraints * Running average calculations https://en.wikipedia.org/wiki/Moving_average#Exponential_moving_average * Make parameters corresponding to each scheduler configurable (possible to have a config template for each scheduler?) - * Fix the -p option. @@ -19,7 +18,7 @@ machine on which electron is launched for logging to work** How to run (Use the --help option to get information about other command-line options): -`./electron -workload -ignoreWatts ` +`./electron -workload ` Workload schema: From 57b2756fc6b0b97ea42acde5de91b1e095c52a66 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 30 Nov 2016 22:05:42 -0500 Subject: [PATCH 100/102] Updated readme to include run command with and without ignoreWatts --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index 23a0801..0443c66 100644 --- a/README.md +++ b/README.md @@ -20,6 +20,10 @@ How to run (Use the --help option to get information about other command-line op `./electron -workload ` +To run electron with ignoreWatts, run the following command, + +`./electron -workload -ignoreWatts` + Workload schema: From f55169c91c2d87079f1c49bef5c0880dc2b6b809 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Thu, 1 Dec 2016 18:31:49 -0500 Subject: [PATCH 101/102] Fixed corner case bug with sorting of tasks. Not sorting if there are no more tasks to schedule. --- schedulers/proactiveclusterwidecappingranked.go | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index d2565f3..963de40 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -244,7 +244,7 @@ func (s *ProactiveClusterwideCapRanked) stopRecapping() { } } -func (s *ProactiveClusterwideCapRanked) ResouceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { +func (s *ProactiveClusterwideCapRanked) ResourceOffers(driver sched.SchedulerDriver, offers []*mesos.Offer) { log.Printf("Received %d resource offers", len(offers)) // retrieving the available power for all the hosts in the offers. @@ -262,14 +262,15 @@ func (s *ProactiveClusterwideCapRanked) ResouceOffers(driver sched.SchedulerDriv } // sorting the tasks in ascending order of watts. - s.capper.sortTasks(&s.tasks) - // displaying the ranked tasks. - log.Println("The ranked tasks are:\n---------------------\n\t[") - for rank, task := range s.tasks { - log.Printf("\t\t%d: %s\n", rank+1, task.TaskID) + if (len(s.tasks) > 0) { + s.capper.sortTasks(&s.tasks) + // calculating the total number of tasks ranked. + numberOfRankedTasks := 0 + for _, task := range s.tasks { + numberOfRankedTasks += *task.Instances + } + log.Printf("Ranked %d tasks in ascending order of tasks.", numberOfRankedTasks) } - log.Println("\t]") - for _, offer := range offers { select { case <-s.Shutdown: From 2d3b4841517799f7fddb1dae8505ca69fcfbc222 Mon Sep 17 00:00:00 2001 From: Pradyumna Kaushik Date: Wed, 7 Dec 2016 01:11:30 -0500 Subject: [PATCH 102/102] Using ranked proactive cluster wide capper with clever recapping --- scheduler.go | 2 +- schedulers/proactiveclusterwidecappingranked.go | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/scheduler.go b/scheduler.go index 356587f..280d788 100644 --- a/scheduler.go +++ b/scheduler.go @@ -56,7 +56,7 @@ func main() { fmt.Println(task) } - scheduler := schedulers.NewProactiveClusterwideCapFCFS(tasks, *ignoreWatts) + scheduler := schedulers.NewProactiveClusterwideCapRanked(tasks, *ignoreWatts) driver, err := sched.NewMesosSchedulerDriver(sched.DriverConfig{ Master: *master, Framework: &mesos.FrameworkInfo{ diff --git a/schedulers/proactiveclusterwidecappingranked.go b/schedulers/proactiveclusterwidecappingranked.go index 963de40..69ae26f 100644 --- a/schedulers/proactiveclusterwidecappingranked.go +++ b/schedulers/proactiveclusterwidecappingranked.go @@ -381,8 +381,8 @@ func (s *ProactiveClusterwideCapRanked) StatusUpdate(driver sched.SchedulerDrive // Need to remove the task from the window s.capper.taskFinished(*status.TaskId.Value) // Determining the new cluster wide cap. - tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) - // tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + //tempCap, err := s.capper.recap(s.totalPower, s.taskMonitor, *status.TaskId.Value) + tempCap, err := s.capper.cleverRecap(s.totalPower, s.taskMonitor, *status.TaskId.Value) if err == nil { // If new determined cap value is different from the current recap value then we need to recap.