LeNet 是最经典的卷积神经网络模型,不过现在看起来也有一点古老了。
LeNet
我们可以用LeNet 来进行手写 MNIST 数字识别,
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets("MNIST_data/", reshape=False) X_train, y_train = mnist.train.images, mnist.train.labels X_validation, y_validation = mnist.validation.images, mnist.validation.labels X_test, y_test = mnist.test.images, mnist.test.labels assert(len(X_train) == len(y_train)) assert(len(X_validation) == len(y_validation)) assert(len(X_test) == len(y_test)) print() print("Image Shape: {}".format(X_train[0].shape)) print() print("Training Set: {} samples".format(len(X_train))) print("Validation Set: {} samples".format(len(X_validation))) print("Test Set: {} samples".format(len(X_test)))可以看到,MNIST 的数据集是 28 * 28 的灰度图
import numpy as np # Pad images with 0s X_train = np.pad(X_train, ((0,0),(2,2),(2,2),(0,0)), 'constant') X_validation = np.pad(X_validation, ((0,0),(2,2),(2,2),(0,0)), 'constant') X_test = np.pad(X_test, ((0,0),(2,2),(2,2),(0,0)), 'constant') print("Updated Image Shape: {}".format(X_train[0].shape))np.pad 的语法是这样的,目前我们 X_train 的 shape 是 4 个维度的,
from tensorflow.contrib.layers import flatten def LeNet(x): mu = 0 sigma = 0.1 # Layer 1: Convolutional. Input = 32x32x1. Output = 28x28x6. conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 1, 6), mean = mu, stddev = sigma)) conv1_b = tf.Variable(tf.zeros(6)) conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b # Activation. conv1 = tf.nn.relu(conv1) # Pooling. Input = 28x28x6. Output = 14x14x6. conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Layer 2: Convolutional. Output = 10x10x16. conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma)) conv2_b = tf.Variable(tf.zeros(16)) conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b # Activation. conv2 = tf.nn.relu(conv2) # Pooling. Input = 10x10x16. Output = 5x5x16. conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID') # Flatten. Input = 5x5x16. Output = 400. fc0 = flatten(conv2) # Layer 3: Fully Connected. Input = 400. Output = 120. fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma)) fc1_b = tf.Variable(tf.zeros(120)) fc1 = tf.matmul(fc0, fc1_W) + fc1_b # Activation. fc1 = tf.nn.relu(fc1) # Layer 4: Fully Connected. Input = 120. Output = 84. fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma)) fc2_b = tf.Variable(tf.zeros(84)) fc2 = tf.matmul(fc1, fc2_W) + fc2_b # Activation. fc2 = tf.nn.relu(fc2) # Layer 5: Fully Connected. Input = 84. Output = 10. fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 10), mean = mu, stddev = sigma)) fc3_b = tf.Variable(tf.zeros(10)) logits = tf.matmul(fc2, fc3_W) + fc3_b return logits x = tf.placeholder(tf.float32, (None, 32, 32, 1)) y = tf.placeholder(tf.int32, (None)) one_hot_y = tf.one_hot(y, 10) rate = 0.001 logits = LeNet(x) cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits) loss_operation = tf.reduce_mean(cross_entropy) optimizer = tf.train.AdamOptimizer(learning_rate = rate) training_operation = optimizer.minimize(loss_operation) correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1)) accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) saver = tf.train.Saver() def evaluate(X_data, y_data): num_examples = len(X_data) total_accuracy = 0 sess = tf.get_default_session() for offset in range(0, num_examples, BATCH_SIZE): batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE] accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y}) total_accuracy += (accuracy * len(batch_x)) return total_accuracy / num_examples with tf.Session() as sess: sess.run(tf.global_variables_initializer()) num_examples = len(X_train) print("Training...") print() for i in range(EPOCHS): X_train, y_train = shuffle(X_train, y_train) for offset in range(0, num_examples, BATCH_SIZE): end = offset + BATCH_SIZE batch_x, batch_y = X_train[offset:end], y_train[offset:end] sess.run(training_operation, feed_dict={x: batch_x, y: batch_y}) validation_accuracy = evaluate(X_validation, y_validation) print("EPOCH {} ...".format(i+1)) print("Validation Accuracy = {:.3f}".format(validation_accuracy)) print() saver.save(sess, './lenet') print("Model saved") with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint('.')) test_accuracy = evaluate(X_test, y_test) print("Test Accuracy = {:.3f}".format(test_accuracy))以上就是我们如何用 LeNet 来对手写数字进行识别的例子。