I have this code thanks to experts on year and i have recrea
I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError.
line 61 new_centroids=update_location(data_points, clusters, centroids)
a Float division by zero.
line 47 new_centroids={k:sum(v)/float(len(v)) for k,v in clusters.items()}
Code
import os.path
import sys
input_name=\'input2.txt\' #raw_input(\"Enter the name of the input file:\")
output_name=\'output.txt\'#raw_input(\"Enter the name of the output file:\")
num_clusters=5 #input(\"Enter the number of clusters:\");
if os.path.exists(input_name):
with open(input_name) as in_f:
content = in_f.readline()
data_points = [float(i) for i in content.strip().split(\" \")]
k=num_clusters
centroids = dict(zip(range(k),data_points[0:k]))
clusters = dict(zip(range(k),[[] for i in range(k)]))
point_assignments= dict(zip(range(k), clusters))
old_point_assignments=dict()
def assign_to_clusters(data_points, clusters, centroids,point_assignments):
for key,point in enumerate(data_points):
closest_index = (\'inf\')
index = 0
for i in range(len(centroids)):
distance = abs(point-centroids[i])
if distance < closest_index:
closest_index = distance
index = i
clusters[index].append(point)
point_assignments[index]=closest_index
return point_assignments
def update_location(data_points, clusters, centroids):
new_centroids={k:sum(v)/float(len(v)) for k,v in clusters.items()}
centroids.update(new_centroids)
return centroids
#Algorithm
iteration=0
point_assignments = assign_to_clusters(data_points, clusters, centroids,point_assignments)
np=dict(old_point_assignments)
while point_assignments != np:
iteration += 1
print \"\ \",\"Iteration\", iteration
for t, o in clusters.items():
print t, \'\', o
new_centroids=update_location(data_points, clusters, centroids)
old_point_assignments = point_assignments
np=dict(old_point_assignments)
clusters = dict(zip(range(k),[[] for i in range(k)]))
point_assignments = assign_to_clusters(data_points, clusters, new_centroids,point_assignments)
print \"\"
####do output
with open(output_name, \'w\') as f:
for c,p in clusters.items():
for points in p:
f.write(\"Point \" + str(points) + \" in \" + str(c) + \"\ \")
f.close()
It prints start to print the first interation but does not complete. Any help would be great.
Iteration 1
 0 [1.8]
 1 []
 2 []
 3 []
 4 []
Thanks
Solution
Please find the working code for the problem, previous code had many bugs, so I have changed the code a lot.
Please note that you should keep the input.txt file in same folder.
import os.path
 import sys
# print \"Please enter the input_data filename\"
 #in_file = raw_input().strip()
 # print \"Please enter the output filename\"
 #out_file = raw_input().strip()
 # print \"Please enter the number of clusters\"
 #k = int(raw_input().strip())
in_file = \"input.txt\"
 out_file = \"output.txt\"
 k = 4
if os.path.exists(in_file):
    with open(in_file) as in_f:
    content = in_f.readline()
l = [float(i) for i in content.strip().split(\" \")]
 centroids = l[0:k]
 centroids_prev = l[0:k]
it = 0
 change = 1000
 while((change > 0.01)and (it < 1000)):
    it = it + 1
    print \"Iteration \", it
    clusters = []
    for i in range(0,k):
        clusters.append([])
    for e in l:
        distances = []
        for c in centroids:
            distances.append(abs(c - e))
        clusters[distances.index(min(distances)) - 1].append(e)
    for ww in range(0,k):
        print ww, clusters[ww]
    #update centroids
    for x in range(0,len(clusters)):
        summ = 0.0
        for y in range(0,len(clusters[x])):
            summ = summ + clusters[x][y]
        if(len(clusters[x]) > 0):
            tmp = float(summ)/float(len(clusters[x]))
            centroids[x] = tmp
        else:
            tmp = 0.0
            centroids[x] = tmp
    centroids_prev.sort()
    centroids.sort()
    change = 0
    for s in range(0,k):
        change = change + abs(centroids[s] - centroids_prev[s])
        centroids_prev[s] = centroids[s]  
 f= open(out_file,\"w+\")
 for i in range(0,len(clusters)):
    for j in range(0,len(clusters[i])):
        f.write(\"Point \")
        f.write(str(clusters[i][j]))
        f.write(\" in clusters \")
        f.write(str(i))
        f.write(\"\ \")
 f.close()
Sample input.txt
1 800 400 2 3 4 5 401 402 403 404 801 802 803 805
Sample output.txt
Point 3.0 in clusters 0
 Point 4.0 in clusters 0
 Point 5.0 in clusters 0
 Point 400.0 in clusters 1
 Point 401.0 in clusters 1
 Point 402.0 in clusters 1
 Point 403.0 in clusters 1
 Point 404.0 in clusters 1
 Point 800.0 in clusters 2
 Point 801.0 in clusters 2
 Point 802.0 in clusters 2
 Point 803.0 in clusters 2
 Point 805.0 in clusters 2
 Point 1.0 in clusters 3
 Point 2.0 in clusters 3




