NAIVE BAYES CLASSIFIER AND LINEAR DISCRIMINANT ANALYSIS COMPARISON
#IMPORT LIBRARIES FIRST
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import confusion_matrix
from sklearn.metrics import roc_auc_score
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
#LOAD CSV FILE AS PANDAS DATAFRAME
df = pd.read_csv('mushrooms.csv')
# convert categorical data to numerical data
le = LabelEncoder()
ds = df.apply(le.fit_transform)
#df.apply(function) this apply method takes input of function.
#prepare x and y data
x_data = ds.loc[:,'cap-shape':'habitat']
y_data = ds.loc[:,'class']
#split the data into testing and training
x_train,x_test,y_train,y_test = train_test_split(x_data,y_data,test_size=0.2,random_state=0)
print(x_train.shape,x_test.shape)
print(y_train.shape,y_test.shape)
#make naive bayes method alias.
gnb = GaussianNB()
#predict values using naive bayes method.
y_pred = gnb.fit(x_train, y_train).predict(x_test)
print(confusion_matrix(y_test,y_pred))
print(roc_auc_score(y_test,y_pred)) #THIS PREDICTS ACCURACY FOR OUR METHOD 91% IN
#
#OUR CASE.
#PERFORM SIMILAR TASK FOR LDA(linear discriminant analysis.)
clf_lda = LinearDiscriminantAnalysis()
clf_lda.fit(x_train,y_train)
y_pred2 = clf_lda.predict(x_test)
print('*******************LDA*****************************')
print(confusion_matrix(y_test,y_pred2))
print(roc_auc_score(y_test,y_pred2))
----------------------------------------------OUTPUT----------------------------------------
(6499, 22) (1625, 22)
(6499,) (1625,)
[[773 79]
[ 58 715]]
0.9161223268893219
***********************LDA***********************************
[[823 29]
[ 51 722]]
0.9499928636068242
Comments
Post a Comment