""" common_knowledge ~~~~~~~~~~~~~~~~ Try to determine whether or not it's possible to relate the descriptions given by two different autoencoders. """ #### Libraries # My libraries from backprop2 import Network, sigmoid_vec import mnist_loader # Third-party libraries import matplotlib import matplotlib.pyplot as plt import numpy as np #### Parameters # Size of the training sets. May range from 1000 to 12,500. Lower # will be faster, higher will give more accuracy. SIZE = 5000 # Number of hidden units in the autoencoder HIDDEN = 30 print "\nGenerating training data" training_data, _, _ = mnist_loader.load_data_nn() td_1 = [(x, x) for x, _ in training_data[0:SIZE]] td_2 = [(x, x) for x, _ in training_data[12500:12500+SIZE]] td_3 = [x for x, _ in training_data[25000:25000+SIZE]] test = [x for x, _ in training_data[37500:37500+SIZE]] print "\nFinding first autoencoder" ae_1 = Network([784, HIDDEN, 784]) ae_1.SGD(td_1, 4, 10, 0.01, 0.05) print "\nFinding second autoencoder" ae_2 = Network([784, HIDDEN, 784]) ae_2.SGD(td_1, 4, 10, 0.01, 0.05) print "\nGenerating encoded training data" encoded_td_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) for x in td_3] encoded_td_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) for x in td_3] encoded_training_data = zip(encoded_td_1, encoded_td_2) print "\nFinding mapping between theories" net = Network([HIDDEN, HIDDEN]) net.SGD(encoded_training_data, 6, 10, 0.01, 0.05) print """\nBaseline for comparison: decompress with the first autoencoder""" print """and compress with the second autoencoder""" encoded_test_1 = [sigmoid_vec(np.dot(ae_1.weights[0], x)+ae_1.biases[0]) for x in test] encoded_test_2 = [sigmoid_vec(np.dot(ae_2.weights[0], x)+ae_2.biases[0]) for x in test] test_data = zip(encoded_test_1, encoded_test_2) net_baseline = Network([HIDDEN, 784, HIDDEN]) net_baseline.biases[0] = ae_1.biases[1] net_baseline.weights[0] = ae_1.weights[1] net_baseline.biases[1] = ae_2.biases[0] net_baseline.weights[1] = ae_2.weights[0] error_baseline = sum(np.linalg.norm(net_baseline.feedforward(x)-y, 1) for (x, y) in test_data) print "Baseline average l1 error per training image: %s" % (error_baseline / SIZE,) print "\nComparing theories with a simple interconversion" print "Mean desired output activation: %s" % ( sum(y.mean() for _, y in test_data) / SIZE,) error = sum(np.linalg.norm(net.feedforward(x)-y, 1) for (x, y) in test_data) print "Average l1 error per training image: %s" % (error / SIZE,) print "\nComputing fiducial image inputs" fiducial_images_1 = [ ae_1.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) for j in range(HIDDEN)] fiducial_images_2 = [ ae_2.weights[0][j,:].reshape(28,28)/np.linalg.norm(net.weights[0][j,:]) for j in range(HIDDEN)] image = np.concatenate([np.concatenate(fiducial_images_1, axis=1), np.concatenate(fiducial_images_2, axis=1)]) fig = plt.figure() ax = fig.add_subplot(111) ax.matshow(image, cmap = matplotlib.cm.binary) plt.xticks(np.array([])) plt.yticks(np.array([])) plt.show()