Stanford CS 224N -2017: Assignment 1 Part 1

Source: Deep Learning on Medium


Go to the profile of Piyush Gandhi

The Stanford CS 224N course – Natural Language Processing with Deep Learning is known to be one of the best courses around for as evident by the title.

I recently started doing the course as I wanted to do NLP for a long time now and considering that the 2019 course is going to be publicly available (http://web.stanford.edu/class/cs224n/), I thought that it might be a good idea to write a blog as I go through the assignments.

In a series of stories, I’ll be releasing the solutions as I go on about the course. Feel free to comment if you find a mistake or a better solution to mine.

Question 1

Assignment 1 Question 1

b)

import numpy as np
def softmax(x):
"""
Arguments:
x -- A N dimensional vector or M x N dimensional numpy matrix.
Return:
x -- You are allowed to modify x in-place
"""
orig_shape = x.shape
if len(x.shape) > 1:
# Matrix
### YOUR CODE HERE
axis = 1
x = x.astype(float)
x = x - np.expand_dims(np.max(x, axis=axis), axis)
x = np.exp(x)
axis_sum = np.expand_dims(np.sum(x, axis=axis), axis)
x = x/axis_sum
### END YOUR CODE
else:
# Vector
### YOUR CODE HERE
max_val = np.ndarray.max(x)
x = np.exp(x - max_val)
exp_sum = sum(x)
x = x/exp_sum
### END YOUR CODE
assert x.shape == orig_shape
return x
def test_softmax_basic():
"""
Some simple tests to get you started.
Warning: these are not exhaustive.
"""
print "Running basic tests..."
test1 = softmax(np.array([1,2]))
print test1
ans1 = np.array([0.26894142, 0.73105858])
assert np.allclose(test1, ans1, rtol=1e-05, atol=1e-06)
test2 = softmax(np.array([[1001,1002],[3,4]]))
print test2
ans2 = np.array([
[0.26894142, 0.73105858],
[0.26894142, 0.73105858]])
assert np.allclose(test2, ans2, rtol=1e-05, atol=1e-06)
test3 = softmax(np.array([[-1001,-1002]]))
print test3
ans3 = np.array([0.73105858, 0.26894142])
assert np.allclose(test3, ans3, rtol=1e-05, atol=1e-06)
print "You should be able to verify these results by hand!\n"
if __name__ == "__main__":
test_softmax_basic()

Question 2

a)

b)

c)

d)

e)

import numpy as np
def sigmoid(x):
"""
Arguments:
x -- A scalar or numpy array.
Return:
s -- sigmoid(x)
"""
# YOUR CODE HERE
s = 1/(1+np.exp(-x))
# END YOUR CODE
return s
def sigmoid_grad(s):
"""
Arguments:
s -- A scalar or numpy array.
Return:
ds -- Your computed gradient.
"""
# YOUR CODE HERE
ds = s*(1-s)
# END YOUR CODE
return ds
def test_sigmoid_basic():
"""
Some simple tests to get you started.
Warning: these are not exhaustive.
"""
print "Running basic tests..."
x = np.array([[1, 2], [-1, -2]])
f = sigmoid(x)
g = sigmoid_grad(f)
print f
f_ans = np.array([
[0.73105858, 0.88079708],
[0.26894142, 0.11920292]])
assert np.allclose(f, f_ans, rtol=1e-05, atol=1e-06)
print g
g_ans = np.array([
[0.19661193, 0.10499359],
[0.19661193, 0.10499359]])
assert np.allclose(g, g_ans, rtol=1e-05, atol=1e-06)
print "You should verify these results by hand!\n"
if __name__ == "__main__":
test_sigmoid_basic()

f)

import numpy as np
import random
# First implement a gradient checker by filling in the following functions
def gradcheck_naive(f, x):
""" Gradient check for a function f.
    Arguments:
f -- a function that takes a single argument and outputs the
cost and its gradients
x -- the point (numpy array) to check the gradient at
"""
    rndstate = random.getstate()
random.setstate(rndstate)
fx, grad = f(x) # Evaluate function value at original point
h = 1e-4 # Do not change this!
    # Iterate over all indexes in x
it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
print fx, grad
while not it.finished:
ix = it.multi_index
        # Try modifying x[ix] with h defined above to compute
# numerical gradients. Make sure you call
# random.setstate(rndstate)
# before calling f(x) each time. This will make it possible
# to test cost functions with built in randomness later.
        # YOUR CODE HERE:
old_x = x[ix]
x[ix] = old_x + h
random.setstate(rndstate)
fxph = f(x)[0]
x[ix] = old_x - h
random.setstate(rndstate)
fxmh = f(x)[0]
numgrad = (fxph - fxmh) / (2 * h)
x[ix] = old_x
# END YOUR CODE
        # Compare gradients
reldiff = abs(numgrad - grad[ix]) / max(1, abs(numgrad),
abs(grad[ix]))
if reldiff > 1e-5:
print "Gradient check failed."
print "First gradient error found at index %s" % str(ix)
print "Your gradient: %f \t Numerical gradient: %f" % (
grad[ix], numgrad)
return
        it.iternext()  # Step to next dimension
    print "Gradient check passed!"
def sanity_check():
"""
Some basic sanity checks.
"""
def quad(x): return (np.sum(x ** 2), x * 2)
print "Running sanity checks..."
gradcheck_naive(quad, np.array(123.456)) # scalar test
gradcheck_naive(quad, np.random.randn(3,)) # 1-D test
gradcheck_naive(quad, np.random.randn(4, 5)) # 2-D test
print ""
if __name__ == "__main__":
sanity_check()

g)

import numpy as np
import random
from q1_softmax import softmax
from q2_sigmoid import sigmoid, sigmoid_grad
from q2_gradcheck import gradcheck_naive
def forward_backward_prop(data, labels, params, dimensions):
"""
Forward and backward propagation for a two-layer sigmoidal
network
    Compute the forward propagation and for the cross entropy cost,
and backward propagation for the gradients for all parameters.
    Arguments:
data -- M x Dx matrix, where each row is a training example.
labels -- M x Dy matrix, where each row is a one-hot vector.
params -- Model parameters, these are unpacked for you.
dimensions -- A tuple of input dimension, number of hidden units
and output dimension
"""
    # Unpack network parameters (do not modify)
ofs = 0
Dx, H, Dy = (dimensions[0], dimensions[1], dimensions[2])
W1 = np.reshape(params[ofs:ofs + Dx * H], (Dx, H))
ofs += Dx * H
b1 = np.reshape(params[ofs:ofs + H], (1, H))
ofs += H
W2 = np.reshape(params[ofs:ofs + H * Dy], (H, Dy))
ofs += H * Dy
b2 = np.reshape(params[ofs:ofs + Dy], (1, Dy))

# YOUR CODE HERE
# forward propagation
z1 = np.dot(data, W1) + b1
h = sigmoid(z1)
    z2 = np.dot(h, W2) + b2 
logits = softmax(z2)
    cost = -np.sum(labels * np.log(logits))
    # backward propagation
grad_logits = logits - labels # (M, Dy)
gradW2 = np.dot(h.T, grad_logits)
gradb2 = np.sum(grad_logits, axis=0)
gradh = np.dot(grad_logits, W2.T)
gradZ1 = sigmoid_grad(h) * gradh
gradW1 = np.dot(data.T, gradZ1)
gradb1 = np.sum(gradZ1, axis=0)
# END YOUR CODE
    # Stack gradients (do not modify)
grad = np.concatenate((gradW1.flatten(), gradb1.flatten(),
gradW2.flatten(), gradb2.flatten()))
    return cost, grad
def sanity_check():
"""
Set up fake data and parameters for the neural network, and test
using
gradcheck.
"""
print "Running sanity check..."
    N = 20
dimensions = [10, 5, 10]
data = np.random.randn(N, dimensions[0])
labels = np.zeros((N, dimensions[2]))
for i in xrange(N):
labels[i, random.randint(0, dimensions[2]-1)] = 1
params = np.random.randn((dimensions[0] + 1) * dimensions[1]
+ (dimensions[1] + 1) * dimensions[2], )
    gradcheck_naive(lambda params:forward_backward_prop(data, 
labels, params, dimensions), params)
if __name__ == "__main__":
sanity_check()

I’ll be publishing the next 2 questions of assignment 1 later. Do lookout for that as well.

Do follow me for updates. Leave a clap if you liked it!