Logistic regression after pca

 #from sklearn.datasets import fetch_mldata

from sklearn.decomposition import PCA

from sklearn import metrics

import pandas as pd

from sklearn.linear_model import LogisticRegression

from sklearn.preprocessing import StandardScaler

import numpy as np

df = pd.read_csv('mnist_test.csv')

df_x = df.iloc[:,1:]

df_y = df.iloc[:,0]

print(df_x.shape,df_y.shape)

from sklearn.model_selection import train_test_split


x_train,x_test,y_train,y_test = train_test_split(df_x,df_y,test_size=0.2,random_state=0)

print(x_train.shape,x_test.shape)

print(y_train.shape,y_test.shape)

benchmark = [.95,.90,.85,0.80,0.75,0.70,0.60]

#standardise the data

scl = StandardScaler()

scl.fit(x_train)

x_train = scl.transform(x_train)

x_test = scl.transform(x_test)

#lets create pandas dataframe that contains variance_reatined,n_components,accuracy and time

cols = ['variance','n_comp','time','accuracy']

df_result = pd.DataFrame(columns=cols)


import time


def pca_applier(variance,x_train,x_test,y_train,y_test):

    global df_result

    global cols

    pca = PCA(variance)

    pca.fit(x_train)

    n_components = pca.n_components_

    z_pca = pca.transform(x_train)

    zpca_test = pca.transform(x_test)

    lgr = LogisticRegression()

    start = time.time()

    lgr.fit(z_pca,y_train)

    pred = lgr.predict(zpca_test)

    accuracy = metrics.accuracy_score(y_test,pred)

    end = time.time()

    timing = end - start

    a = dict(zip(cols,[variance,n_components,timing,accuracy]))

    df_result = df_result.append(a,ignore_index = True)

    

    

for v in benchmark:

    pca_applier(v,x_train,x_test,y_train,y_test)

    print('i ran')

    

    print(df_result)


import matplotlib.pyplot as plt


plt.plot(df_result['time'],df_result['accuracy'])

plt.xlabel('time')

plt.ylabel('accuracy')

plt.show()

"""

import numpy as np

import matplotlib.pyplot as plt

cum_var_explained = np.cumsum(pca2.explained_variance_ratio_)

plt.figure(figsize=(8,6))

plt.plot(cum_var_explained)

plt.show()

#to see variation of variance with components.

"""

****************************output**********************

ref:-http://localhost:8888/tree

   variance  n_comp      time  accuracy
0      0.95   276.0  1.469610    0.8945
1      0.90   190.0  1.050148    0.9030
2      0.85   144.0  0.909179    0.9060
3      0.80   114.0  0.842857    0.9120
4      0.75    91.0  0.783063    0.9160
5      0.70    74.0  0.748055    0.9125
6      0.60    49.0  0.678146    0.9040💗👄👄👄💗🔴

Comments

Popular posts from this blog

starting cv2 and face recognition.