K MEANS E AND M STEP (image segmentation also)
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_blobs
X,y = make_blobs(n_samples=500,n_features=2,centers=5,random_state=13)
print(X.shape,y.shape)
plt.figure(0)
plt.scatter(X[:,0],X[:,1],cmap=y)
plt.grid(True)
plt.show()
#this is an unsupervised learning algorithm we are not going to give y values but only x
#we will give y later for accuracy
color = ['red','green','yellow','orange','blue']
k=5
clusters={}
for kx in range(k):
center = 10*(2*np.random.random((X.shape[1],))-1)
#this generates 5 vectors in the range(-10,10)
points = []
cluster = {
'center':center,
'color':color[kx],
'points':points
}
clusters[kx] = cluster
def distance(v1,v2):
return np.sqrt(np.sum((v1-v2)**2))
"""
K means is an special case of expectation maximisation algorithm where this step is called E step
"""
def assign_point_to_clusters(X,clusters):
for ix in range(X.shape[0]):
dist=[]
curr_x=X[ix]
for i in range(5):
d = distance(curr_x,clusters[i]['center'])
dist.append(d)
current_cluster = np.argmin(dist)
clusters[current_cluster]['points'].append(curr_x)
for i in range(5):
print(len(clusters[i]['points']))
def updateClusters(X,clusters):
for i in range(0,k):
if np.array(clusters[i]['points']).shape != (0,):
center_x = np.mean(np.array(clusters[i]['points'])[:,0])
center_y = np.mean(np.array(clusters[i]['points'])[:,1])
clusters[i]['center'] = np.array([center_x,center_y])
clusters[i]['points'] = []
else: clusters[i]['center'] == 10*(2*np.random.random((X.shape[1],))-1)
assign_point_to_clusters(X,clusters)
def plotClusters(clusters):
for i in range(5):
try:
plt.scatter(np.array(clusters[i]['points'])[:,0],np.array(clusters[i]['points'])[:,1],color=
clusters[i]['color'])
except:
pass
plt.scatter(clusters[i]['center'][0],clusters[i]['center'][1],marker="1",color='black')
plt.show()
#call the functions
assign_point_to_clusters(X,clusters)
updateClusters(X,clusters)
plotClusters(clusters)
--------------------------------------------------------
OUTPUT -->
*******************DBSCAN*******************
--> IT DOESNT TAKE INPUT OF REQ NUMBER OF CLUSTERS
-->USED WHEN DATA IS NOT CONVEX AND CANT BE DISTINGUISHED USING STRAIGHT LINES
-->HAVE DIFFERENT APPROACH THAN KMEANS IT USES DENSITY FUNCTIONS
--> eps is a parameter it accepts which means the distance bw two points for them to be considered in same clusters
******************CODE***********************
from sklearn.cluster import DBSCAN
from sklearn.datasets import make_circles,make_moons
X,y = make_moons(n_samples=200,noise=0.1)
dbs = DBSCAN(eps=0.2,min_samples=2)
dbs.fit(X)
pred = dbs.fit_predict(X)
print(pred)
plt.scatter(X[:,0],X[:,1],c=pred)
********************OUTPUT*********************
image segmentation using kmeans
#EXTRACTING COLORS OUT OF THE IMAGE
i=1
plt.figure(0,figsize=(15,2))
colors=[]
for each_col in centers:
plt.subplot(1,15,i)
plt.axis("off")
i+=1
colors.append(each_col)
#color switch
a = np.zeros((500,500,3),dtype='uint8')
a[:,:,:] = each_col
plt.imshow(a)
plt.show()
segmenting our original image
kms.labels_
for ix in range(new_img.shape[0]):
new_img[ix] = colors[kms.labels_[ix]]
new_img = new_img.reshape((img.shape))
plt.imshow(new_img)
plt.show()
cv2.imshow('img_new',new_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
Perform dbscan on normal data using make_blobs
ReplyDelete