Goal To implement clustering On the following tasks consider

Goal To implement clustering On the following tasks consider the data in the file \"Project4.xlsx\". file contains a large number (210,012) of length 3 vectors, each on one line. Each vector is actual the red, green, and blue intensity values of one of the pixels in an image. For your information, the image has 516 rows and 407 columns. The pixels in the file are listed row by row from top to bottom, and within each row from left to right. For example, the first pixel in the file is the uppermost left pixel in the image. The second line of the file contains the pixel to the right of that one, and so on. In this assignment, we will explore clustering methods, applying them in particular to the problem of dividing the pixels of the image into a small number of similar clusters. Consider the K-means clustering algorithm, as described in class. In particular, consider a version in which the inputs to the algorithm are: 1. The set of data to be clustered. (the vectors x1, x2, x3, ...O 2. The desired number of clusters, K. 3. Initial centroids for the K clusters. Then the algorithm proceeds by alternating: (l) assigning each instance to the class with the nearest centroid, and (2) recomputing the centroids of each class-until the assignments and centroids stop changing. Use your implementation to cluster the data in the file mentioned above, using K 8, and the initial centroids: 255 255 255 1255 0 0 0 128

Solution

Solution: See the code below:

Note: it is in Java (KmeansDemo.java)

-----------------------------------------

package kmeansdemo;

/**
* Demonstrate K-means clustering
*
*/
public class KmeansDemo {

   private int dataset[][]; // dataset
   private int K; // no. of clusters
   private int centriods[][]; // set of centroids
   private int clusters[][][];

   // constructor
   public KmeansDemo() {
       // TODO Auto-generated constructor stub
   }

   /**
   * @param dataset
   * @param k
   * @param centriods
   * @param clusters
   */
   public KmeansDemo(int[][] dataset, int k, int[][] centriods) {
       this.dataset = dataset;
       K = k;
       this.centriods = centriods;
       this.clusters = new int[K][dataset.length][dataset[0].length];
   }

   // function to perform kmeans clustering
   public void kmeans() {
       int numDataPoints = dataset.length;
       int numCentroids = K;
       double[] distances = new double[K];
       boolean clusteringRequired = true;
       while (clusteringRequired) {
           // perform clustering
           for (int dataPoint = 0; dataPoint < numDataPoints; dataPoint++) {
               // calculated distances from each centroid
               for (int centroid = 0; centroid < numCentroids; centroid++) {
                   int redSqd = (int) Math.pow(dataset[dataPoint][0] - centriods[centroid][0], 2);
                   int greenSqd = (int) Math.pow(dataset[dataPoint][1] - centriods[centroid][1], 2);
                   int blueSqd = (int) Math.pow(dataset[dataPoint][2] - centriods[centroid][2], 2);
                   distances[centroid] = Math.sqrt(redSqd + greenSqd + blueSqd);
               }

               // find min. distance from a centroid to assign a cluster
               int nearestCluster = 0;
               for (int i = 1; i < K; i++) {
                   if (distances[i] < distances[nearestCluster]) {
                       nearestCluster = i;
                   }
               }

               // assign a cluster to data point in dataset
               clusters[nearestCluster][dataPoint][0] = dataset[dataPoint][0];
               clusters[nearestCluster][dataPoint][1] = dataset[dataPoint][1];
               clusters[nearestCluster][dataPoint][2] = dataset[dataPoint][2];
           }

           // update centroids
           int[][] previousCentroids = centriods; // preserve centroids
           for (int i = 0; i < K; i++) {
               // first calculate number of points in a cluster and sum of
               // points
               int pointsInCluster = 0;
               int sumRed = 0;
               int sumGreen = 0;
               int sumBlue = 0;
               for (int j = 0; j < numDataPoints; j++) {
                   if (clusters[i][j][0] == 0 && clusters[i][j][1] == 0 && clusters[i][j][2] == 0)
                       continue;
                   else {
                       sumRed += clusters[i][j][0];
                       sumGreen += clusters[i][j][1];
                       sumBlue += clusters[i][j][2];
                       pointsInCluster++;
                   }
               }
               // update
               centriods[i][0] = sumRed / pointsInCluster;
               centriods[i][1] = sumGreen / pointsInCluster;
               centriods[i][2] = sumBlue / pointsInCluster;
           }

           // check whether to stop
           int i;
           for (i = 0; i < numCentroids; i++) {
               if (previousCentroids[i][0] == centriods[i][0] && previousCentroids[i][1] == centriods[i][1]
                       && previousCentroids[i][2] == centriods[i][2])
                   continue;
               break;
           }
           if (i == K)
               clusteringRequired = false;
       }
   }

   /**
   * @return the centriods
   */
   int[][] getCentriods() {
       return centriods;
   }

   /**
   * @return the clusters
   */
   int[][][] getClusters() {
       return clusters;
   }

   /**
   * @param args
   */
   public static void main(String[] args) {
       // See the comments below
       // do process your file here and populate data
       // initialize K=8
       // initialize initial centroids in two dimensional array of size K X 3
       // create an object of KmeansDemo class
       // calls kmeans() function for doing clustering
       // access clusters if required.
   }
}

-----------------------------------------

Note: As you have not given actual data, it is not possible to tell actually how many clusters are there.

 Goal To implement clustering On the following tasks consider the data in the file \
 Goal To implement clustering On the following tasks consider the data in the file \
 Goal To implement clustering On the following tasks consider the data in the file \

Get Help Now

Submit a Take Down Notice

Tutor
Tutor: Dr Jack
Most rated tutor on our site