I have this code thanks to experts on year and i have recrea

I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError.

line 61 new_centroids=update_location(data_points, clusters, centroids)

a Float division by zero.

line 47 new_centroids={k:sum(v)/float(len(v)) for k,v in clusters.items()}

Code

import os.path

import sys

input_name=\'input2.txt\' #raw_input(\"Enter the name of the input file:\")

output_name=\'output.txt\'#raw_input(\"Enter the name of the output file:\")

num_clusters=5 #input(\"Enter the number of clusters:\");

if os.path.exists(input_name):

with open(input_name) as in_f:

content = in_f.readline()

data_points = [float(i) for i in content.strip().split(\" \")]

k=num_clusters

centroids = dict(zip(range(k),data_points[0:k]))

clusters = dict(zip(range(k),[[] for i in range(k)]))

point_assignments= dict(zip(range(k), clusters))

old_point_assignments=dict()

def assign_to_clusters(data_points, clusters, centroids,point_assignments):

for key,point in enumerate(data_points):

closest_index = (\'inf\')

index = 0

for i in range(len(centroids)):

distance = abs(point-centroids[i])

if distance < closest_index:

closest_index = distance

index = i

clusters[index].append(point)

point_assignments[index]=closest_index

return point_assignments

def update_location(data_points, clusters, centroids):

new_centroids={k:sum(v)/float(len(v)) for k,v in clusters.items()}

centroids.update(new_centroids)

return centroids

#Algorithm

iteration=0

point_assignments = assign_to_clusters(data_points, clusters, centroids,point_assignments)

np=dict(old_point_assignments)

while point_assignments != np:

iteration += 1

print \"\ \",\"Iteration\", iteration

for t, o in clusters.items():

print t, \'\', o

new_centroids=update_location(data_points, clusters, centroids)

old_point_assignments = point_assignments

np=dict(old_point_assignments)

clusters = dict(zip(range(k),[[] for i in range(k)]))

point_assignments = assign_to_clusters(data_points, clusters, new_centroids,point_assignments)

print \"\"

####do output

with open(output_name, \'w\') as f:

for c,p in clusters.items():

for points in p:

f.write(\"Point \" + str(points) + \" in \" + str(c) + \"\ \")

f.close()

It prints start to print the first interation but does not complete. Any help would be great.

Iteration 1
0 [1.8]
1 []
2 []
3 []
4 []

Thanks

Solution

Please find the working code for the problem, previous code had many bugs, so I have changed the code a lot.

Please note that you should keep the input.txt file in same folder.

import os.path
import sys

# print \"Please enter the input_data filename\"
#in_file = raw_input().strip()
# print \"Please enter the output filename\"
#out_file = raw_input().strip()
# print \"Please enter the number of clusters\"
#k = int(raw_input().strip())

in_file = \"input.txt\"
out_file = \"output.txt\"
k = 4

if os.path.exists(in_file):
   with open(in_file) as in_f:
   content = in_f.readline()

l = [float(i) for i in content.strip().split(\" \")]


centroids = l[0:k]
centroids_prev = l[0:k]

it = 0
change = 1000
while((change > 0.01)and (it < 1000)):
   it = it + 1
   print \"Iteration \", it
   clusters = []
   for i in range(0,k):
       clusters.append([])
   for e in l:
       distances = []
       for c in centroids:
           distances.append(abs(c - e))
       clusters[distances.index(min(distances)) - 1].append(e)
   for ww in range(0,k):
       print ww, clusters[ww]
   #update centroids
   for x in range(0,len(clusters)):
       summ = 0.0
       for y in range(0,len(clusters[x])):
           summ = summ + clusters[x][y]
       if(len(clusters[x]) > 0):
           tmp = float(summ)/float(len(clusters[x]))
           centroids[x] = tmp
       else:
           tmp = 0.0
           centroids[x] = tmp
   centroids_prev.sort()
   centroids.sort()
   change = 0
   for s in range(0,k):
       change = change + abs(centroids[s] - centroids_prev[s])
       centroids_prev[s] = centroids[s]  
f= open(out_file,\"w+\")
for i in range(0,len(clusters)):
   for j in range(0,len(clusters[i])):
       f.write(\"Point \")
       f.write(str(clusters[i][j]))
       f.write(\" in clusters \")
       f.write(str(i))
       f.write(\"\ \")
f.close()

Sample input.txt

1 800 400 2 3 4 5 401 402 403 404 801 802 803 805

Sample output.txt

Point 3.0 in clusters 0
Point 4.0 in clusters 0
Point 5.0 in clusters 0
Point 400.0 in clusters 1
Point 401.0 in clusters 1
Point 402.0 in clusters 1
Point 403.0 in clusters 1
Point 404.0 in clusters 1
Point 800.0 in clusters 2
Point 801.0 in clusters 2
Point 802.0 in clusters 2
Point 803.0 in clusters 2
Point 805.0 in clusters 2
Point 1.0 in clusters 3
Point 2.0 in clusters 3

I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError
I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError
I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError
I have this code thanks to experts on year and i have recreated it to fit my needs. However, I\'m getting two an error on lines 47 and 61 of a ZeroDivisionError

Get Help Now

Submit a Take Down Notice

Tutor
Tutor: Dr Jack
Most rated tutor on our site