Goal To implement clustering On the following tasks consider
Solution
Solution: See the code below:
Note: it is in Java (KmeansDemo.java)
-----------------------------------------
package kmeansdemo;
/**
* Demonstrate K-means clustering
*
*/
public class KmeansDemo {
private int dataset[][]; // dataset
private int K; // no. of clusters
private int centriods[][]; // set of centroids
private int clusters[][][];
// constructor
public KmeansDemo() {
// TODO Auto-generated constructor stub
}
/**
* @param dataset
* @param k
* @param centriods
* @param clusters
*/
public KmeansDemo(int[][] dataset, int k, int[][] centriods) {
this.dataset = dataset;
K = k;
this.centriods = centriods;
this.clusters = new int[K][dataset.length][dataset[0].length];
}
// function to perform kmeans clustering
public void kmeans() {
int numDataPoints = dataset.length;
int numCentroids = K;
double[] distances = new double[K];
boolean clusteringRequired = true;
while (clusteringRequired) {
// perform clustering
for (int dataPoint = 0; dataPoint < numDataPoints; dataPoint++) {
// calculated distances from each centroid
for (int centroid = 0; centroid < numCentroids; centroid++) {
int redSqd = (int) Math.pow(dataset[dataPoint][0] - centriods[centroid][0], 2);
int greenSqd = (int) Math.pow(dataset[dataPoint][1] - centriods[centroid][1], 2);
int blueSqd = (int) Math.pow(dataset[dataPoint][2] - centriods[centroid][2], 2);
distances[centroid] = Math.sqrt(redSqd + greenSqd + blueSqd);
}
// find min. distance from a centroid to assign a cluster
int nearestCluster = 0;
for (int i = 1; i < K; i++) {
if (distances[i] < distances[nearestCluster]) {
nearestCluster = i;
}
}
// assign a cluster to data point in dataset
clusters[nearestCluster][dataPoint][0] = dataset[dataPoint][0];
clusters[nearestCluster][dataPoint][1] = dataset[dataPoint][1];
clusters[nearestCluster][dataPoint][2] = dataset[dataPoint][2];
}
// update centroids
int[][] previousCentroids = centriods; // preserve centroids
for (int i = 0; i < K; i++) {
// first calculate number of points in a cluster and sum of
// points
int pointsInCluster = 0;
int sumRed = 0;
int sumGreen = 0;
int sumBlue = 0;
for (int j = 0; j < numDataPoints; j++) {
if (clusters[i][j][0] == 0 && clusters[i][j][1] == 0 && clusters[i][j][2] == 0)
continue;
else {
sumRed += clusters[i][j][0];
sumGreen += clusters[i][j][1];
sumBlue += clusters[i][j][2];
pointsInCluster++;
}
}
// update
centriods[i][0] = sumRed / pointsInCluster;
centriods[i][1] = sumGreen / pointsInCluster;
centriods[i][2] = sumBlue / pointsInCluster;
}
// check whether to stop
int i;
for (i = 0; i < numCentroids; i++) {
if (previousCentroids[i][0] == centriods[i][0] && previousCentroids[i][1] == centriods[i][1]
&& previousCentroids[i][2] == centriods[i][2])
continue;
break;
}
if (i == K)
clusteringRequired = false;
}
}
/**
* @return the centriods
*/
int[][] getCentriods() {
return centriods;
}
/**
* @return the clusters
*/
int[][][] getClusters() {
return clusters;
}
/**
* @param args
*/
public static void main(String[] args) {
// See the comments below
// do process your file here and populate data
// initialize K=8
// initialize initial centroids in two dimensional array of size K X 3
// create an object of KmeansDemo class
// calls kmeans() function for doing clustering
// access clusters if required.
}
}
-----------------------------------------
Note: As you have not given actual data, it is not possible to tell actually how many clusters are there.


