# coding: utf-8 import time import numpy as np import h5py import matplotlib.pyplot as plt import scipy import random from PIL import Image from scipy import ndimage def load_data(train_set_percentage): """ Arguments: train_set_percentage -- python float indicating the percentage of the data to be used as training set Returns: train_set_x -- numpy array which represent the training set (digit images) train_set_y -- numpy array which represent the label of the training set test_set_x -- numpy array which represent the testing set (digit images) test_set_y -- numpy array which represent the label of the testing set """ dataset = h5py.File('dataset.h5', "r") total_num = np.shape(np.array(dataset["digit_images"][:]))[1] train_num = int(total_num * train_set_percentage) test_num = total_num - train_num digits = dataset["digit_images"][:] labels = dataset["digit_labels"][:] train_set_x = np.zeros((np.shape(digits)[0], train_num)) train_set_y = np.zeros((1, train_num)) test_set_x = np.zeros((np.shape(digits)[0], test_num)) test_set_y = np.zeros((1, test_num)) training_samples = [] random.seed(0) for i in range(train_num): found = False while not found: sample_index = random.randint(0, total_num - 1) if sample_index not in training_samples: training_samples.append(sample_index) found = True train_set_x[:, i] = digits[:, sample_index] train_set_y[:, i] = labels[:, sample_index] test_index = 0 for i in range(total_num): if i not in training_samples: test_set_x[:, test_index] = digits[:, i] test_set_y[:, test_index] = labels[:, i] test_index += 1 dataset.close() return train_set_x, train_set_y, test_set_x, test_set_y def reshape_Y(Y): """ Arguments: Y -- numpy array which represents the labels, example, [0, 1, 2, 3, ...] Returns: Y_output -- numpy array which represent the labels in a different way (corresponding to the output layer) example corresponding to above: [[1, 0, 0, 0, ...], [0, 1, 0, 0, ...], [0, 0, 1, 0, ...], [0, 0, 0, 0, ...], [0, 0, 0, 1, ...], [0, 0, 0, 0, ...], [0, 0, 0, 0, ...], [0, 0, 0, 0, ...], [0, 0, 0, 0, ...], [0, 0, 0, 0, ...], """ num_samples = int(np.shape(Y)[1]) Y_output = np.zeros((10, num_samples)) for i in range(np.shape(Y)[1]): Y_output[int(np.squeeze(Y[:, i])), i] = 1 return Y_output def display_digit_image(image_set, label_set, index, figure_index): """ Arguments: image_set -- numpy array which represents the image label_set -- numpy array which represents the label index -- the index of the image to be shown figure_index -- the index of the figure to show upon (every time when calling plt.figure function, a new figure_index needs to be assigned to get a new figure rather than draw on the original figure) """ plt.figure(figure_index) image = image_set[:, index].reshape(20, 20).T plt.imshow(image) plt.title("digit is " + str(int(label_set[0, index]))) plt.xlim(0, 19) plt.ylim(19, 0) plt.xticks(np.arange(0, 20, 2)) plt.yticks(np.arange(0, 20, 2)) def initialize_parameters(layer_dims): """ Arguments: layer_dims -- python array (list) containing the dimensions of each layer in our network Returns: parameters -- python dictionary containing your parameters "W1", "b1", ..., "WL", "bL": Wl -- weight matrix of shape (layer_dims[l], layer_dims[l-1]) bl -- bias vector of shape (layer_dims[l], 1) """ np.random.seed(3) parameters = {} L = len(layer_dims) # number of layers in the network for l in range(1, L): parameters['W' + str(l)] = np.random.randn(layer_dims[l], layer_dims[l - 1]) * 0.01 parameters['b' + str(l)] = np.zeros((layer_dims[l], 1)) assert(parameters['W' + str(l)].shape == (layer_dims[l], layer_dims[l-1])) assert(parameters['b' + str(l)].shape == (layer_dims[l], 1)) return parameters def sigmoid(Z): """ Implements the sigmoid activation in numpy Arguments: Z -- numpy array of any shape Returns: A -- output of sigmoid(z), same shape as Z """ return 1 / (1 + np.exp(-Z)) def sigmoid_gradient(A): """ Implements the inverse of the sigmoid Arguments: A -- numpy array Returns: Z -- output of inverse of sigmoid(A), same shape as Z """ return sigmoid(A) * (1 - sigmoid(A)) def linear_activation_forward(A_prev, W, b): """ Implement the forward propagation for one layer Arguments: A_prev -- activations from previous layer (or input data): (size of previous layer, number of examples) W -- weights matrix: numpy array of shape (size of current layer, size of previous layer) b -- bias vector, numpy array of shape (size of the current layer, 1) Returns: Z -- the result of linear calculation A -- the output of the activation function, also called the post-activation value """ # Inputs: "A_prev, W, b". Outputs: "A, activation_cache". Z = np.matmul(W, A_prev) + b A = sigmoid(Z) assert(Z.shape == (W.shape[0], A.shape[1])) assert (A.shape == (W.shape[0], A_prev.shape[1])) return Z, A def feedforward(X, parameters): """ Implement forward propagation. The activation functions are all sigmoid functions. Arguments: X -- data, numpy array of shape (input size, number of examples) parameters -- output of initialize_parameters(layer_dims) Returns: AL -- last post-activation value caches -- a python dictionary containing "a1", "z2", "a2", "z3", ...; stored for computing the backward pass efficiently """ caches = {} caches["a1"] = X A = X L = len(parameters) // 2 # number of layers in the neural network for l in range(1, L + 1): A_prev = A Z, A = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)]) caches["z" + str(l + 1)] = Z caches["a" + str(l + 1)] = A assert(A.shape == (10, X.shape[1])) return A, caches def compute_cost(AL, Y): """ Implement the cost function. Arguments: AL -- probability vector corresponding to your label predictions, shape (10, number of examples) Y -- true "label" vector after reshape by reshape_Y function, shape (10, number of examples) Returns: cost -- cross-entropy cost """ m = Y.shape[1] # Compute loss from aL and y. cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL)) / m cost = np.squeeze(cost) # To make sure your cost's shape is what we expect (e.g. this turns [[17]] into 17). assert(cost.shape == ()) return cost def display_cost(costs, learning_rate, figure_index): """ display the cost in a figure Arguments: costs -- a list of floats with the costs of each iteration learning_rate -- learning rate to be displayed on the figure figure_index -- figure_index -- the index of the figure to show upon (every time when calling plt.figure function, a new figure_index needs to be assigned to get a new figure rather than draw on the original figure) """ plt.figure(figure_index) plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate = " + str(learning_rate)) plt.show() def predict(parameters, X): """ Using the learned parameters, predicts a class for each example in X Arguments: parameters -- python dictionary containing your parameters X -- input data of size (n_x, m) Returns predictions -- vector of predictions of our model (0, 1, 2, 3 ...) """ A2, cache = feedforward(X, parameters) predictions = np.argmax(A2, axis = 0) np.reshape(predictions, (1, np.shape(predictions)[0])) return predictions def compute_accuracy(predictions, results): """ Arguments: predictions -- output of predict results -- true labels Returns accuracy -- how accurate this model is to predict the digits """ comparison = predictions == results return np.sum(comparison) / np.shape(results)[1] def backpropogation(AL, Y, parameters, caches, layers_dims): """ Implement the backward propagation Arguments: AL -- probability vector, output of the forward propagation Y -- true "label" vector after reshaping by reshape_Y caches -- output of feedforward function Returns: grads -- A dictionary with the gradients grads["dA" + str(l)] = ... grads["dW" + str(l)] = ... grads["db" + str(l)] = ... """ grads = {} L = len(layers_dims) m = AL.shape[1] # Initializing the backpropagation, by calculating dAL and save it to grads with the key "dA*" where * is the index of the layer # pleaes do not hard code to 3 here, as we will later play with neural networks with other configurations (e.g., more layers) ### START CODE HERE ### grads["dA" + str(L)] = AL - Y ### END CODE HERE ### for l in reversed(range(1, L)): # Inputs: "grads["dA" + str(l + 2)], caches". Outputs: "grads["dA" + str(l + 1)] , grads["dW" + str(l + 1)] , grads["db" + str(l + 1)] ### START CODE HERE ### grads["dW" + str(l)] = np.matmul(grads["dA" + str(l + 1)], caches["a" + str(l)].T) / m grads["db" + str(l)] = np.matmul(grads["dA" + str(l + 1)], np.ones((1, caches["a" + str(l)].shape[1])).T) / m if l != 1: grads["dA" + str(l)] = np.matmul(parameters["W" + str(l)].T, grads["dA" + str(l + 1)]) * sigmoid_gradient(caches["z" + str(l)]) ### END CODE HERE ### return grads def update_parameters(parameters, grads, learning_rate): """ Update parameters using gradient descent Arguments: parameters -- python dictionary containing your parameters grads -- python dictionary containing your gradients, output of backpropagation Returns: parameters -- python dictionary containing your updated parameters parameters["W" + str(l)] = ... parameters["b" + str(l)] = ... """ L = len(parameters) // 2 # number of layers in the neural network # Update rule for each parameter. Use a for loop. ### START CODE HERE ### for l in range(1, L + 1): parameters['W' + str(l)] = parameters['W' + str(l)] - learning_rate * grads["dW" + str(l)] parameters['b' + str(l)] = parameters['b' + str(l)] - learning_rate * grads["db" + str(l)] ### END CODE HERE ### return parameters def deep_NN(X, Y, layers_dims, learning_rate, num_iterations, print_cost=False): """ Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID. Arguments: X -- data, numpy array of shape (number of examples, num_px * num_px * 3) Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) layers_dims -- list containing the input size and each layer size, of length (number of layers + 1). learning_rate -- learning rate of the gradient descent update rule num_iterations -- number of iterations of the optimization loop print_cost -- if True, it prints all the cost Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ np.random.seed(1) costs = [] # keep track of cost # Parameters initialization. parameters = initialize_parameters(layers_dims) # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation: ### START CODE HERE ### AL, caches = feedforward(X, parameters) ### END CODE HERE ### # Compute cost. ### START CODE HERE ### cost = compute_cost(AL, Y) ### END CODE HERE ### # Backward propagation. ### START CODE HERE ### grads = backpropogation(AL, Y, parameters, caches, layers_dims) ### END CODE HERE ### # Update parameters. ### START CODE HERE ### parameters = update_parameters(parameters, grads, learning_rate) ### END CODE HERE ### if print_cost: print ("Cost after iteration %i: %f" %(i, cost)) costs.append(cost) return parameters, costs if __name__ == "__main__": plt.rcParams['figure.figsize'] = (5.0, 4.0) # set default size of plots plt.rcParams['image.interpolation'] = 'nearest' plt.rcParams['image.cmap'] = 'gray' np.random.seed(1) # the following are parameters to play with training_percentage = 1 digit_sample_index = 0 learning_rate = 0.5 layers_dims = [400, 25, 10] num_iterations = 2000 # section 1: one example of how to run the model with the training data train_x, train_y, test_x, test_y = load_data(training_percentage) display_digit_image(train_x, train_y, digit_sample_index, figure_index=1) # you can change the digit_sample_index to display different images parameters, costs = deep_NN(train_x, reshape_Y(train_y), layers_dims, learning_rate, num_iterations, print_cost=True) display_cost(costs, learning_rate, figure_index=2) train_set_predictions = predict(parameters, train_x) train_set_accuracy = compute_accuracy(train_set_predictions, train_y) print("the accuracy on the training set is : ") print(train_set_accuracy) # section 2: separate dataset into training set and test set #training_percentage = 0.8 #train_x, train_y, test_x, test_y = load_data(training_percentage) #display_digit_image(train_x, train_y, digit_sample_index, figure_index=1) # you can change the digit_sample_index to display different images #parameters, costs = deep_NN(train_x, reshape_Y(train_y), layers_dims, learning_rate, num_iterations, print_cost=True) #display_cost(costs, learning_rate, figure_index=2) #train_set_predictions = predict(parameters, train_x) #train_set_accuracy = compute_accuracy(train_set_predictions, train_y) #print("the accuracy on the training set is : ") #print(train_set_accuracy) #test_set_predictions = predict(parameters, test_x) #test_set_accuracy = compute_accuracy(test_set_predictions, test_y) #print("the accuracy on the testing set is : ") #print(test_set_accuracy) # section 3: modifying the learning_rate #training_percentage = 1 #learning_rate = 10 #train_x, train_y, test_x, test_y = load_data(training_percentage) #display_digit_image(train_x, train_y, digit_sample_index, figure_index=1) # you can change the digit_sample_index to display different images #parameters, costs = deep_NN(train_x, reshape_Y(train_y), layers_dims, learning_rate, num_iterations, print_cost=True) #display_cost(costs, learning_rate, figure_index=2) #train_set_predictions = predict(parameters, train_x) #train_set_accuracy = compute_accuracy(train_set_predictions, train_y) #print("the accuracy on the training set is : ") #print(train_set_accuracy) # section 4: overfitting #training_percentage = 0.8 #learning_rate = 2 #layers_dims = [400, 50, 25, 10] #num_iterations = 30000 #train_x, train_y, test_x, test_y = load_data(training_percentage) #display_digit_image(train_x, train_y, digit_sample_index, figure_index=1) # you can change the digit_sample_index to display different images #parameters, costs = deep_NN(train_x, reshape_Y(train_y), layers_dims, learning_rate, num_iterations, print_cost=True) #display_cost(costs, learning_rate, figure_index=2) #train_set_predictions = predict(parameters, train_x) #train_set_accuracy = compute_accuracy(train_set_predictions, train_y) #print("the accuracy on the training set is : ") #print(train_set_accuracy) #test_set_predictions = predict(parameters, test_x) #test_set_accuracy = compute_accuracy(test_set_predictions, test_y) #print("the accuracy on the testing set is : ") #print(test_set_accuracy) # section 5: your experiment #training_percentage = 1 #learning_rate = 0.5 #layers_dims = [400, 25, 10] #num_iterations = 2000 #train_x, train_y, test_x, test_y = load_data(training_percentage) #display_digit_image(train_x, train_y, digit_sample_index, figure_index=1) # you can change the digit_sample_index to display different images #parameters, costs = deep_NN(train_x, reshape_Y(train_y), layers_dims, learning_rate, num_iterations, print_cost=True) #display_cost(costs, learning_rate, figure_index=2) #train_set_predictions = predict(parameters, train_x) #train_set_accuracy = compute_accuracy(train_set_predictions, train_y) #print("the accuracy on the training set is : ") #print(train_set_accuracy) #test_set_predictions = predict(parameters, test_x) #test_set_accuracy = compute_accuracy(test_set_predictions, test_y) #print("the accuracy on the testing set is : ") #print(test_set_accuracy)