Artificial neural networks
## defining architecture
def softmax(a):
ea = np.exp(a)
return ea/np.sum(ea,axis=1,keepdims=True)
class NeuralNetwork:
#constructo
def __init__(self,input_size,layers,output_size):
np.random.seed(0)# so that we are able to reproduce the results every time
model = {}
model['W1'] = np.random.randn(input_size,layers[0])
model['b1'] = np.zeros((1,layers[0]))
model['W2'] = np.random.randn(layers[0],layers[1])
model['b2'] = np.zeros((1,layers[1]))
model['W3'] = np.random.randn(layers[1],output_size)
model['b3'] = np.zeros((1,output_size))
self.model = model
def forward(self,x):
"""
z1: output from hidden layer 1
a1: activation of output from hidden layer 1
z2: output from hidden layer 2
a2: activation of output from hidden layer 2
z3: output from output layer
y_: activation of output from output layer
"""
W1,W2,W3 = self.model['W1'],self.model['W2'],self.model['W3']
b1,b2,b3 = self.model['b1'],self.model['b2'],self.model['b3']
z1 = np.dot(x,W1) + b1
a1 = np.tanh(z1)
z2 = np.dot(a1,W2) + b2
a2 = np.tanh(z2)
z3 = np.dot(a2,W3) + b3
y_ = softmax(z3)
self.activation_outputs = (a1,a2,y_)
"""
y_ will be a matrix of m*c.
m = number of examples.
c = number of classes.
y_ = [[0.6,0.1,0.2],
[0.3,0.7,0]....
.......]
,where each term in the 1-D matrix represents the probability of this example belonging
to one class.
"""
return y_
def backward(self,x,y,n=0.001):
W1,W2,W3 = self.model['W1'],self.model['W2'],self.model['W3']
a1,a2,y_ = self.activation_outputs
delta3 = y_ - y# wh
dw3 = np.dot(a2.T,delta3) #)h2+1,C) => (h2+1,m) X (m,C)
db3 = np.sum(delta3,axis=0)
delta2 = (1-np.square(a2))*np.dot(delta3,W3.T) #(m,h2+1) => (m,h2+1) *(m,C X C,h2+1)
dw2 = np.dot(a1.T,delta2)
db2 = np.sum(delta2,axis=0)
delta1 = (1-np.square(a1))*np.dot(delta2,W2.T)
dw1 = np.dot(X.T,delta1)
db1 = np.sum(delta1,axis=0)
self.model['W1'] -= n*dw1
self.model['W2'] -= n*dw2
self.model['W3'] -= n*dw3
self.model['b1'] -= n*db1
self.model['b2'] -= n*db2
self.model['b3'] -= n*db3
def predict(self,x):
y_out = self.forward(x)
return np.argmax(y_out,axis=1)
def summary(self):
W1,W2,W3 = self.model['W1'],self.model['W2'],self.model['W3']
a1,a2,y_ = self.activation_outputs
print("W1 ",W1.shape)
print("A1 ",a1.shape)
print("W2 ",W2.shape)
print("A2 ",a2.shape)
print("W3 ",W3.shape)
print("Y_ ",y_.shape)
Comments
Post a Comment