Unit testing for def/ module.

Added unit tests to test code in def/ module.
This commit is contained in:
Pradyumna Kaushik 2019-10-12 06:48:45 +00:00
parent e24b8a08c9
commit bac60e872a
396 changed files with 83991 additions and 13209 deletions

View file

@ -1,3 +1,41 @@
/*
Gokmeans is a simple k-means clusterer that determines centroids with the Train function,
and then classifies additional observations with the Nearest function.
package main
import (
"fmt"
"github.com/mdesenfants/gokmeans"
)
var observations []gokmeans.Node = []gokmeans.Node {
gokmeans.Node{20.0, 20.0, 20.0, 20.0},
gokmeans.Node{21.0, 21.0, 21.0, 21.0},
gokmeans.Node{100.5, 100.5, 100.5, 100.5},
gokmeans.Node{50.1, 50.1, 50.1, 50.1},
gokmeans.Node{64.2, 64.2, 64.2, 64.2},
}
func main() {
// Get a list of centroids and output the values
if success, centroids := gokmeans.Train(observations, 2, 50); success {
// Show the centroids
fmt.Println("The centroids are")
for _, centroid := range centroids {
fmt.Println(centroid)
}
// Output the clusters
fmt.Println("...")
for _, observation := range observations {
index := gokmeans.Nearest(observation, centroids)
fmt.Println(observation, "belongs in cluster", index+1, ".")
}
}
}
*/
package gokmeans
import (
@ -5,8 +43,12 @@ import (
"time"
)
// Node represents an observation of floating point values
type Node []float64
// Train takes an array of Nodes (observations), and produces as many centroids as specified by
// clusterCount. It will stop adjusting centroids after maxRounds is reached. If there are less
// observations than the number of centroids requested, then Train will return (false, nil).
func Train(Nodes []Node, clusterCount int, maxRounds int) (bool, []Node) {
if int(len(Nodes)) < clusterCount {
return false, nil
@ -36,6 +78,11 @@ func Train(Nodes []Node, clusterCount int, maxRounds int) (bool, []Node) {
copy(centroids[i], Nodes[r.Intn(len(Nodes))])
}
return Train2(Nodes, clusterCount, maxRounds, centroids)
}
// Provide initial centroids
func Train2(Nodes []Node, clusterCount int, maxRounds int, centroids []Node) (bool, []Node) {
// Train centroids
movement := true
for i := 0; i < maxRounds && movement; i++ {
@ -61,6 +108,7 @@ func Train(Nodes []Node, clusterCount int, maxRounds int) (bool, []Node) {
return true, centroids
}
// equal determines if two nodes have the same values.
func equal(node1, node2 Node) bool {
if len(node1) != len(node2) {
return false
@ -75,6 +123,7 @@ func equal(node1, node2 Node) bool {
return true
}
// Nearest return the index of the closest centroid from nodes
func Nearest(in Node, nodes []Node) int {
count := len(nodes)
@ -102,6 +151,7 @@ func Nearest(in Node, nodes []Node) int {
return mindex
}
// Distance determines the square Euclidean distance between two nodes
func distance(node1 Node, node2 Node) float64 {
length := len(node1)
squares := make(Node, length, length)
@ -126,6 +176,8 @@ func distance(node1 Node, node2 Node) float64 {
return sum
}
// meanNode takes an array of Nodes and returns a node which represents the average
// value for the provided nodes. This is used to center the centroids within their cluster.
func meanNode(values []Node) Node {
newNode := make(Node, len(values[0]))
@ -142,6 +194,8 @@ func meanNode(values []Node) Node {
return newNode
}
// wait stops a function from continuing until the provided channel has processed as
// many items as there are dimensions in the provided Node.
func wait(c chan int, values Node) {
count := len(values)