Produce a new version of kmeans called MRKmeansMultirun Kmea
Solution
import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
 import java.util.concurrent.Callable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import smile.math.Math;
 import smile.util.MulticoreExecutor;
 public class KMeans extends PartitionClustering<double[]> {
 private static final Logger logger = LoggerFactory.getLogger(KMeans.class);
/**
 * The total distortion.
 */
 double distortion;
 /**
 * The centroids of each cluster.
 */
 double[][] centroids;
/**
 * Constructor.
 */
 KMeans() {
 }
/**
 * Returns the distortion.
 */
 public double distortion() {
 return distortion;
 }
/**
 * Returns the centroids.
 */
 public double[][] centroids() {
 return centroids;
 }
/**
 * Cluster a new instance.
 * @param x a new instance.
 * @return the cluster label, which is the index of nearest centroid.
 */
 @Override
 public int predict(double[] x) {
 double minDist = Double.MAX_VALUE;
 int bestCluster = 0;
for (int i = 0; i < k; i++) {
 double dist = Math.squaredDistance(x, centroids[i]);
 if (dist < minDist) {
 minDist = dist;
 bestCluster = i;
 }
 }
return bestCluster;
 }
 public KMeans(double[][] data, int k) {
 this(data, k, 100);
 }
public KMeans(double[][] data, int k, int maxIter) {
 this(new BBDTree(data), data, k, maxIter);
 }
/**
 * Constructor. Clustering data into k clusters.
 * @param bbd the BBD-tree of data for fast clustering.
 * @param data the input data of which each row is a sample.
 * @param k the number of clusters.
 * @param maxIter the maximum number of iterations for each running.
 */
 KMeans(BBDTree bbd, double[][] data, int k, int maxIter) {
 if (k < 2) {
 throw new IllegalArgumentException(\"Invalid number of clusters: \" + k);
 }
if (maxIter <= 0) {
 throw new IllegalArgumentException(\"Invalid maximum number of iterations: \" + maxIter);
 }
int n = data.length;
 int d = data[0].length;
this.k = k;
 distortion = Double.MAX_VALUE;
 y = seed(data, k, ClusteringDistance.EUCLIDEAN);
 size = new int[k];
 centroids = new double[k][d];
for (int i = 0; i < n; i++) {
 size[y[i]]++;
 }
for (int i = 0; i < n; i++) {
 for (int j = 0; j < d; j++) {
 centroids[y[i]][j] += data[i][j];
 }
 }
for (int i = 0; i < k; i++) {
 for (int j = 0; j < d; j++) {
 centroids[i][j] /= size[i];
 }
 }
double[][] sums = new double[k][d];
 for (int iter = 1; iter <= maxIter; iter++) {
 double dist = bbd.clustering(centroids, sums, size, y);
 for (int i = 0; i < k; i++) {
 if (size[i] > 0) {
 for (int j = 0; j < d; j++) {
 centroids[i][j] = sums[i][j] / size[i];
 }
 }
 }
if (distortion <= dist) {
 break;
 } else {
 distortion = dist;
 }
 }
 }
   
   
 public KMeans(double[][] data, int k, int maxIter, int runs) {
 if (k < 2) {
 throw new IllegalArgumentException(\"Invalid number of clusters: \" + k);
 }
if (maxIter <= 0) {
 throw new IllegalArgumentException(\"Invalid maximum number of iterations: \" + maxIter);
 }
if (runs <= 0) {
 throw new IllegalArgumentException(\"Invalid number of runs: \" + runs);
 }
BBDTree bbd = new BBDTree(data);
List<KMeansThread> tasks = new ArrayList<>();
 for (int i = 0; i < runs; i++) {
 tasks.add(new KMeansThread(bbd, data, k, maxIter));
 }
KMeans best = new KMeans();
 best.distortion = Double.MAX_VALUE;
try {
 List<KMeans> clusters = MulticoreExecutor.run(tasks);
 for (KMeans kmeans : clusters) {
 if (kmeans.distortion < best.distortion) {
 best = kmeans;
 }
 }
 } catch (Exception ex) {
 logger.error(\"Failed to run K-Means on multi-core\", ex);
for (int i = 0; i < runs; i++) {
 KMeans kmeans = lloyd(data, k, maxIter);
 if (kmeans.distortion < best.distortion) {
 best = kmeans;
 }
 }
 }
this.k = best.k;
 this.distortion = best.distortion;
 this.centroids = best.centroids;
 this.y = best.y;
 this.size = best.size;
 }
/**
 * Adapter for running BBD-Tree based K-Means algorithm in thread pool.
 */
 static class KMeansThread implements Callable<KMeans> {
final BBDTree bbd;
 final double[][] data;
 final int k;
 final int maxIter;
KMeansThread(BBDTree bbd, double[][] data, int k, int maxIter) {
 this.bbd = bbd;
 this.data = data;
 this.k = k;
 this.maxIter = maxIter;
 }
@Override
 public KMeans call() {
 return new KMeans(bbd, data, k, maxIter);
 }
 }
public static KMeans lloyd(double[][] data, int k) {
 return lloyd(data, k, 100);
 }
  
 public static KMeans lloyd(double[][] data, int k, int maxIter) {
 if (k < 2) {
 throw new IllegalArgumentException(\"Invalid number of clusters: \" + k);
 }
if (maxIter <= 0) {
 throw new IllegalArgumentException(\"Invalid maximum number of iterations: \" + maxIter);
 }
int n = data.length;
 int d = data[0].length;
 int[][] nd = new int[k][d]; // The number of non-missing values per cluster per variable.
double distortion = Double.MAX_VALUE;
 int[] size = new int[k];
 double[][] centroids = new double[k][d];
 int[] y = seed(data, k, ClusteringDistance.EUCLIDEAN_MISSING_VALUES);
int np = MulticoreExecutor.getThreadPoolSize();
 List<LloydThread> tasks = null;
 if (n >= 1000 && np >= 2) {
 tasks = new ArrayList<>(np + 1);
 int step = n / np;
 if (step < 100) {
 step = 100;
 }
int start = 0;
 int end = step;
 for (int i = 0; i < np-1; i++) {
 tasks.add(new LloydThread(data, centroids, y, start, end));
 start += step;
 end += step;
 }
 tasks.add(new LloydThread(data, centroids, y, start, n));
 }
for (int iter = 0; iter < maxIter; iter++) {
 Arrays.fill(size, 0);
 for (int i = 0; i < k; i++) {
 Arrays.fill(centroids[i], 0);
 Arrays.fill(nd[i], 0);
 }
for (int i = 0; i < n; i++) {
 int m = y[i];
 size[m]++;
 for (int j = 0; j < d; j++) {
 if (!Double.isNaN(data[i][j])) {
 centroids[m][j] += data[i][j];
 nd[m][j]++;
 }
 }
 }
for (int i = 0; i < k; i++) {
 for (int j = 0; j < d; j++) {
 centroids[i][j] /= nd[i][j];
 }
 }
double wcss = Double.NaN;
 if (tasks != null) {
 try {
 wcss = 0.0;
 for (double ss : MulticoreExecutor.run(tasks)) {
 wcss += ss;
 }
 } catch (Exception ex) {
 logger.error(\"Failed to run K-Means on multi-core\", ex);
wcss = Double.NaN;
 }
 }
if (Double.isNaN(wcss)) {
 wcss = 0.0;
 for (int i = 0; i < n; i++) {
 double nearest = Double.MAX_VALUE;
 for (int j = 0; j < k; j++) {
 double dist = squaredDistance(data[i], centroids[j]);
 if (nearest > dist) {
 y[i] = j;
 nearest = dist;
 }
 }
 wcss += nearest;
 }
 }
   
 if (distortion <= wcss) {
 break;
 } else {
 distortion = wcss;
 }
 }
// In case of early stop, we should recalculate centroids and clusterSize.
 Arrays.fill(size, 0);
 for (int i = 0; i < k; i++) {
 Arrays.fill(centroids[i], 0);
 Arrays.fill(nd[i], 0);
 }
for (int i = 0; i < n; i++) {
 int m = y[i];
 size[m]++;
 for (int j = 0; j < d; j++) {
 if (!Double.isNaN(data[i][j])) {
 centroids[m][j] += data[i][j];
 nd[m][j]++;
 }
 }
 }
for (int i = 0; i < k; i++) {
 for (int j = 0; j < d; j++) {
 centroids[i][j] /= nd[i][j];
 }
 }
KMeans kmeans = new KMeans();
 kmeans.k = k;
 kmeans.distortion = distortion;
 kmeans.size = size;
 kmeans.centroids = centroids;
 kmeans.y = y;
return kmeans;
 }
  
 public static KMeans lloyd(double[][] data, int k, int maxIter, int runs) {
 if (k < 2) {
 throw new IllegalArgumentException(\"Invalid number of clusters: \" + k);
 }
if (maxIter <= 0) {
 throw new IllegalArgumentException(\"Invalid maximum number of iterations: \" + maxIter);
 }
if (runs <= 0) {
 throw new IllegalArgumentException(\"Invalid number of runs: \" + runs);
 }
KMeans best = lloyd(data, k, maxIter);
for (int i = 1; i < runs; i++) {
 KMeans kmeans = lloyd(data, k, maxIter);
 if (kmeans.distortion < best.distortion) {
 best = kmeans;
 }
 }
return best;
 }
/**
 * Adapter for running Lloyd algorithm in thread pool.
 */
 static class LloydThread implements Callable<Double> {
/**
 * The start index of data portion for this task.
 */
 final int start;
 /**
 * The end index of data portion for this task.
 */
 final int end;
 final double[][] data;
 final int k;
 final double[][] centroids;
 int[] y;
LloydThread(double[][] data, double[][] centroids, int[] y, int start, int end) {
 this.data = data;
 this.k = centroids.length;
 this.y = y;
 this.centroids = centroids;
 this.start = start;
 this.end = end;
 }
@Override
 public Double call() {
 double wcss = 0.0;
 for (int i = start; i < end; i++) {
 double nearest = Double.MAX_VALUE;
 for (int j = 0; j < k; j++) {
 double dist = squaredDistance(data[i], centroids[j]);
 if (nearest > dist) {
 y[i] = j;
 nearest = dist;
 }
 }
 wcss += nearest;
 }
   
 return wcss;
 }
 }
@Override
 public String toString() {
 StringBuilder sb = new StringBuilder();
   
 sb.append(String.format(\"K-Means distortion: %.5f%n\", distortion));
 sb.append(String.format(\"Clusters of %d data points of dimension %d:%n\", y.length, centroids[0].length));
 for (int i = 0; i < k; i++) {
 int r = (int) Math.round(1000.0 * size[i] / y.length);
 sb.append(String.format(\"%3d\\t%5d (%2d.%1d%%)%n\", i, size[i], r / 10, r % 10));
 }
   
 return sb.toString();
 }
 }







