Assignment 2: RNN Backpropagation

1. Recurrent Neural Networks

$$ Sk=f(Sk−1⋅Wrec+Xk⋅Wx) $$

2. Linear Recurrent Neural

Untitled

3. Code

1) making the dataset

import numpy as np

n_samples = 50
len_sequence = 10
X = np.zeros((n_samples,len_sequence) #X.shape = (50,10) 

for row_idx in range(n_samples):      #initializing X with random ints
    X[row_idx,:]=np.around(np.random.rand(len_sequence)).astype(int)

target = np.sum(X, axis=1)            #target.shape = (50,)

2) Forwardpropagation

def update_state(xk, sk, wx, wRec):
    return xk * wx + sk * wRec

def forward_state(X, wx, wRec):
    S = np.zeros((X.shape[0], X.shape[1]+1))
    for i in range(0, X.shape[1]):
        S[:,i+1] = update_state(X[:,i], S[:,i], wx, wRec)
    return S

def loss(y,t):                      # MSE as loss function
    return np.mean((t - y)**2)

3) Back propagation

SmartSelect_20220131-203129_Samsung Notes.jpg

def output_gradient(y, t):  #gradient of loss wrt t
    return -2 * (t - y)

def backward_gradient(X, S, grad_out, WRec):
    grad_over_time = np.zeros((X.shape[0], X.shape[1]+1))
    grad_over_time[:, -1] = grad_out
    wx_grad = 0
    wRec_grad = 0
    for i in range(X.shape[0],0,-1):
        wx_grad+=np.mean(grad_over_time[:,i]*X[:,i-1])
        wRec_grad+=np.mean(grad_over_time[:,i]*S[:,i-1])
        grad_over_time[:,i-1]=grad_over_time[:,i]*wRec
    return (wx_grad,wRec_grad), grad_over_time

4) Gradient Check

params = [1.2, 1.2]                              # [wx, wRec]
eps = 1e-7
S = forward_state(X, params[0], params[1])
grad_out = output_gradient(S[:-1], target)       # gradient of loss wrt Sn
backprop_grads, grad_over_time = backward_gradient(X,S,grad_out,params[1])
for p_idx, _ in enumerate(params):
    grad_backprop = backprop_grads[p_idx]
    params[p_idx] += eps
    plus_loss = loss(forward_states(X, params[0], params[1])[:,-1], t)
    params[p_idx] -= 2 * eps
    min_loss = loss(forward_states(X, params[0], params[1])[:,-1], t)
    params[p_idx] += eps
    grad_num = (plus_loss - min_loss) / (2*eps)
    if not np.isclose(grad_num, grad_backprop):
        raise ValueError((
            f'Numerical gradient of {grad_num:.6f} is not close to '
            f'the backpropagation gradient of {grad_backprop:.6f}!'))
print('No gradient errors found')