K MEANS E AND M STEP (image segmentation also)

 import numpy as np

import matplotlib.pyplot as plt

from sklearn.datasets import make_blobs


X,y = make_blobs(n_samples=500,n_features=2,centers=5,random_state=13)

print(X.shape,y.shape)


plt.figure(0)

plt.scatter(X[:,0],X[:,1],cmap=y)

plt.grid(True)

plt.show()


#this is an unsupervised learning algorithm we are not going to give y values but only x

#we will give y later for accuracy

color = ['red','green','yellow','orange','blue']

k=5

clusters={}


for kx in range(k):

    center = 10*(2*np.random.random((X.shape[1],))-1)

    #this generates 5 vectors in the range(-10,10)

    points = []

    cluster = {

        'center':center,

        'color':color[kx],

        'points':points

      }

    clusters[kx] = cluster

    

    

def distance(v1,v2):

    return np.sqrt(np.sum((v1-v2)**2))


"""

K means is an special case of expectation maximisation algorithm where this step is called E step

"""


def assign_point_to_clusters(X,clusters):

    for ix in range(X.shape[0]):

        dist=[]

        curr_x=X[ix]

        for i in range(5):

            d = distance(curr_x,clusters[i]['center'])

            dist.append(d)


        current_cluster = np.argmin(dist)

        clusters[current_cluster]['points'].append(curr_x)

        

    for i in range(5):

        print(len(clusters[i]['points']))

           

            

def updateClusters(X,clusters):

    for i in range(0,k):

        if np.array(clusters[i]['points']).shape != (0,):

            center_x = np.mean(np.array(clusters[i]['points'])[:,0])

            center_y = np.mean(np.array(clusters[i]['points'])[:,1])

            clusters[i]['center'] = np.array([center_x,center_y])

            clusters[i]['points'] = []

        else: clusters[i]['center'] == 10*(2*np.random.random((X.shape[1],))-1)   


    assign_point_to_clusters(X,clusters)    

        


def plotClusters(clusters):

    for i in range(5):

        try:

            plt.scatter(np.array(clusters[i]['points'])[:,0],np.array(clusters[i]['points'])[:,1],color=

                   clusters[i]['color'])

        except:

            pass

        plt.scatter(clusters[i]['center'][0],clusters[i]['center'][1],marker="1",color='black')

    

    plt.show()    

    


        

#call the functions 

assign_point_to_clusters(X,clusters)

updateClusters(X,clusters)

plotClusters(clusters)


--------------------------------------------------------

OUTPUT -->

101
99
300
0
0


*******************DBSCAN*******************

--> IT DOESNT TAKE INPUT OF REQ NUMBER OF CLUSTERS

-->USED WHEN DATA IS NOT CONVEX AND CANT BE DISTINGUISHED USING STRAIGHT LINES

-->HAVE DIFFERENT APPROACH THAN KMEANS IT USES DENSITY FUNCTIONS

--> eps is a parameter it accepts which means the distance bw two points for them to be considered in same clusters

******************CODE***********************

from sklearn.cluster import DBSCAN

from sklearn.datasets import make_circles,make_moons

X,y = make_moons(n_samples=200,noise=0.1)

dbs = DBSCAN(eps=0.2,min_samples=2)

dbs.fit(X)

pred = dbs.fit_predict(X)

print(pred)

plt.scatter(X[:,0],X[:,1],c=pred)      

********************OUTPUT*********************

[ 0  0  0  0  0  0  0  1  0  0  1  0  1  0  1  1  1  0  1  0  0  1  1  1
  1  0  0  0  0  1  0  1  1  1  1  0  1  0  1  1  0  1  1  0  1  1  0  1
  0  0  0  0  0  1  1  0  1  0  0  1  1  0  1  1  1  1  0  0  0  1  1  1
  1  1  1  1  0  1  1  0  0  0  1  1 -1  0  1  1  0  0  0  0  0  1  0  0
  1  0  1  0  1  0  1  1  0  1  0  0  1  0  1  0  0  1  1  1  1  0  1  0
  1  0  1  0  1  1  1  1  1  1  0  0  1  1  0  0  0  1  1  0  0  0  0  0
  1  0  1  0  0  1  0  1  1  1  0  0  0  0  0  0  0  1  0  1  0  1  1  1
  1  1  1  1  1  1  1  1  0  0  0  1  1  0  1  0  0  1  0  0  0  1  0  1
  0  0  0  1  0  1  1  0]
Out[53]:
<matplotlib.collections.PathCollection at 0x1cf6cc88>




image segmentation using kmeans

#READING THE IMAGE

import cv2
img = cv2.imread('messi.jpg')
print(img.shape)
#cv2.imshow('messi',img)
#cv2.waitKey(0)
#cv2.destroyAllWindows()
im2 = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
plt.imshow(im2)

#RESHAPING IMAGE IN A 2D ARRAY

all_pixels = img.reshape(433*770,3)
from sklearn.cluster import KMeans
dominant_colors = 15
kms = KMeans(n_clusters = dominant_colors)
kms.fit(all_pixels)


#EXTRACTING COLORS OUT OF THE IMAGE

i=1

plt.figure(0,figsize=(15,2))

colors=[]

for each_col in centers:

    plt.subplot(1,15,i)

    plt.axis("off")

    i+=1

    

    colors.append(each_col)

    #color switch

    a = np.zeros((500,500,3),dtype='uint8')

    a[:,:,:] = each_col

    plt.imshow(a)

    

plt.show()      

segmenting our original image


new_img = np.zeros((433*770,3),dtype='uint8')

print(new_img.shape)

 kms.labels_

for ix in range(new_img.shape[0]):

    new_img[ix] = colors[kms.labels_[ix]]

new_img = new_img.reshape((img.shape))

plt.imshow(new_img)

plt.show()

cv2.imshow('img_new',new_img)

cv2.waitKey(0)

cv2.destroyAllWindows()


       




Comments

Post a Comment

Popular posts from this blog

starting cv2 and face recognition.