NAIVE BAYES CLASSIFIER AND LINEAR DISCRIMINANT ANALYSIS COMPARISON

#IMPORT LIBRARIES FIRST

import pandas as pd

from sklearn.preprocessing import LabelEncoder

from sklearn.model_selection import train_test_split

import numpy as np

from sklearn.naive_bayes import GaussianNB

from sklearn.metrics import confusion_matrix

from sklearn.metrics import roc_auc_score

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

#LOAD CSV FILE AS PANDAS DATAFRAME

df = pd.read_csv('mushrooms.csv')


# convert categorical data to numerical data


le = LabelEncoder()
ds = df.apply(le.fit_transform)

#df.apply(function) this apply method takes input of function.

#prepare x and y data

x_data = ds.loc[:,'cap-shape':'habitat']
y_data = ds.loc[:,'class']

#split the data into testing and training

x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.2,random_state=0)
print(x_train.shape,x_test.shape)
print(y_train.shape,y_test.shape)

#make naive bayes method alias.

gnb = GaussianNB()

#predict values using naive bayes method.

y_pred = gnb.fit(x_train, y_train).predict(x_test)
print(confusion_matrix(y_test,y_pred))
print(roc_auc_score(y_test,y_pred))    #THIS PREDICTS ACCURACY FOR OUR METHOD 91% IN 
#
#OUR CASE.

#PERFORM SIMILAR TASK FOR LDA(linear discriminant analysis.)
clf_lda = LinearDiscriminantAnalysis()
clf_lda.fit(x_train,y_train)
y_pred2 = clf_lda.predict(x_test)
print('*******************LDA*****************************')
print(confusion_matrix(y_test,y_pred2))
print(roc_auc_score(y_test,y_pred2))


----------------------------------------------OUTPUT----------------------------------------

(6499, 22) (1625, 22)
(6499,) (1625,)
[[773  79]
 [ 58 715]]
0.9161223268893219
***********************LDA***********************************
[[823  29]
 [ 51 722]]
0.9499928636068242






Comments

Popular posts from this blog

starting cv2 and face recognition.