{ "cells": [ { "cell_type": "markdown", "id": "9dd15852", "metadata": { "id": "9dd15852" }, "source": [ "# **GoogleNet**\n", "此份程式碼會介紹如何使用 tf.keras 的方式建構 GoogleNet 的模型架構,以及訓練的方式。" ] }, { "cell_type": "markdown", "id": "f6fb147a", "metadata": { "id": "f6fb147a" }, "source": [ "\n", "- [source paper](https://arxiv.org/abs/1409.4842)" ] }, { "cell_type": "markdown", "id": "fb094860", "metadata": { "id": "fb094860" }, "source": [ "## 匯入套件" ] }, { "cell_type": "code", "execution_count": null, "id": "49929c0a", "metadata": { "id": "49929c0a" }, "outputs": [], "source": [ "import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "# Tensorflow 相關套件\n", "import tensorflow as tf\n", "from tensorflow.keras import datasets, layers, Model, Sequential, losses" ] }, { "cell_type": "markdown", "id": "29c5c36f", "metadata": { "id": "29c5c36f" }, "source": [ "## 載入資料集" ] }, { "cell_type": "code", "execution_count": null, "id": "3271a031", "metadata": { "id": "3271a031" }, "outputs": [], "source": [ "(x_train, y_train), (x_test, y_test) = datasets.mnist.load_data()\n", "\n", "# Expand dimensions\n", "x_train = tf.expand_dims(x_train, axis=3, name=None)\n", "x_test = tf.expand_dims(x_test, axis=3, name=None)\n", "print(f'x_train shape: {x_train.shape}')\n", "print(f'x_test shape: {x_test.shape}')\n", "print('----------')\n", "\n", "# Grayscale to RGB\n", "x_train = tf.repeat(x_train, 3, axis=3)\n", "x_test = tf.repeat(x_test, 3, axis=3)\n", "print(f'x_train shape: {x_train.shape}')\n", "print(f'x_test shape: {x_test.shape}')\n", "print('----------')\n", "\n", "# Split dataset into training and validation data\n", "x_val = x_train[int(x_train.shape[0]*0.8):, :, :, :]\n", "y_val = y_train[int(y_train.shape[0]*0.8):]\n", "x_train = x_train[:int(x_train.shape[0]*0.8), :, :, :]\n", "y_train = y_train[:int(y_train.shape[0]*0.8)]\n", "print(f'x_train shape: {x_train.shape}, x_val shape: {x_val.shape}')\n", "print(f'y_train shape: {y_train.shape}, y_val shape: {y_val.shape}')" ] }, { "cell_type": "markdown", "id": "d8affc19", "metadata": { "id": "d8affc19" }, "source": [ "## GoogleNet Architecture" ] }, { "cell_type": "markdown", "id": "e282ed54", "metadata": { "id": "e282ed54" }, "source": [ "\n", "\n", "- [source paper](https://arxiv.org/abs/1409.4842)" ] }, { "cell_type": "code", "execution_count": null, "id": "2b088320", "metadata": { "id": "2b088320" }, "outputs": [], "source": [ "def inception_block(x, filters_1x1,\n", " filters_3x3_reduce, filters_3x3,\n", " filters_5x5_reduce, filters_5x5,\n", " filters_pooling):\n", "\n", " path_1 = layers.Conv2D(filters_1x1, (1, 1),\n", " padding='same',\n", " activation='relu')(x)\n", "\n", " path_2 = layers.Conv2D(filters_3x3_reduce, (1, 1),\n", " padding='same',\n", " activation='relu')(x)\n", " path_2 = layers.Conv2D(filters_3x3, (3, 3),\n", " padding='same',\n", " activation='relu')(path_2)\n", "\n", " path_3 = layers.Conv2D(filters_5x5_reduce, (1, 1),\n", " padding='same',\n", " activation='relu')(x)\n", " path_3 = layers.Conv2D(filters_5x5, (5, 5),\n", " padding='same',\n", " activation='relu')(path_3)\n", "\n", " path_4 = layers.MaxPool2D((3, 3),\n", " strides=(1, 1),\n", " padding='same')(x)\n", " path_4 = layers.Conv2D(filters_pooling, (1, 1),\n", " padding='same',\n", " activation='relu')(path_4)\n", "\n", " return tf.concat([path_1, path_2, path_3, path_4], axis=3)" ] }, { "cell_type": "code", "execution_count": null, "id": "fd785af3", "metadata": { "id": "fd785af3" }, "outputs": [], "source": [ "def auxiliary_classifier(inputs):\n", " aux = layers.AveragePooling2D((5, 5), strides=3)(inputs)\n", " aux = layers.Conv2D(128, 1, padding='same', activation='relu')(aux)\n", " aux = layers.Flatten()(aux)\n", " aux = layers.Dense(1024, activation='relu')(aux)\n", " aux = layers.Dropout(0.7)(aux)\n", " outputs = layers.Dense(10, activation='softmax')(aux)\n", "\n", " return outputs" ] }, { "cell_type": "code", "execution_count": null, "id": "8d42d6d1", "metadata": { "id": "8d42d6d1" }, "outputs": [], "source": [ "labels_num = 10" ] }, { "cell_type": "code", "execution_count": null, "id": "1c8b695e", "metadata": { "id": "1c8b695e" }, "outputs": [], "source": [ "tf.keras.backend.clear_session()\n", "inputs = layers.Input(shape=x_train.shape[1:])\n", "x = layers.Resizing(224, 224,\n", " interpolation=\"bilinear\",\n", " input_shape=x_train.shape[1:])(inputs)\n", "x = layers.Conv2D(64, (7, 7), strides=2,\n", " padding='same',\n", " activation='relu')(x)\n", "x = layers.MaxPooling2D((3, 3), strides=2)(x)\n", "x = layers.Conv2D(64, (1, 1), strides=1,\n", " padding='same',\n", " activation='relu')(x)\n", "x = layers.Conv2D(192, (3, 3), strides=1,\n", " padding='same',\n", " activation='relu')(x)\n", "x = layers.MaxPooling2D((3, 3), strides=2)(x)\n", "\n", "inception_3a = inception_block(x, filters_1x1=64,\n", " filters_3x3_reduce=96, filters_3x3=128,\n", " filters_5x5_reduce=16, filters_5x5=32,\n", " filters_pooling=32)\n", "\n", "inception_3b = inception_block(inception_3a, filters_1x1=128,\n", " filters_3x3_reduce=128, filters_3x3=192,\n", " filters_5x5_reduce=32, filters_5x5=96,\n", " filters_pooling=64)\n", "inception_3b = layers.MaxPooling2D((3, 3), strides=2)(inception_3b)\n", "\n", "inception_4a = inception_block(inception_3b, filters_1x1=192,\n", " filters_3x3_reduce=96, filters_3x3=208,\n", " filters_5x5_reduce=16, filters_5x5=48,\n", " filters_pooling=64)\n", "\n", "aux_1 = auxiliary_classifier(inception_4a)\n", "\n", "inception_4b = inception_block(inception_4a, filters_1x1=160,\n", " filters_3x3_reduce=112, filters_3x3=224,\n", " filters_5x5_reduce=24, filters_5x5=64,\n", " filters_pooling=64)\n", "\n", "inception_4c = inception_block(inception_4b, filters_1x1=128,\n", " filters_3x3_reduce=128, filters_3x3=256,\n", " filters_5x5_reduce=24, filters_5x5=64,\n", " filters_pooling=64)\n", "\n", "inception_4d = inception_block(inception_4c, filters_1x1=112,\n", " filters_3x3_reduce=144, filters_3x3=288,\n", " filters_5x5_reduce=32, filters_5x5=64,\n", " filters_pooling=64)\n", "\n", "aux_2 = auxiliary_classifier(inception_4d)\n", "\n", "inception_4e = inception_block(inception_4d, filters_1x1=256,\n", " filters_3x3_reduce=160, filters_3x3=320,\n", " filters_5x5_reduce=32, filters_5x5=128,\n", " filters_pooling=128)\n", "\n", "inception_4e = layers.MaxPooling2D((3, 3), strides=2)(inception_4e)\n", "\n", "inception_5a = inception_block(inception_4e, filters_1x1=256,\n", " filters_3x3_reduce=160, filters_3x3=320,\n", " filters_5x5_reduce=32, filters_5x5=128,\n", " filters_pooling=128)\n", "\n", "inception_5b = inception_block(inception_5a, filters_1x1=384,\n", " filters_3x3_reduce=192, filters_3x3=384,\n", " filters_5x5_reduce=48, filters_5x5=128,\n", " filters_pooling=128)\n", "\n", "outputs = layers.GlobalAveragePooling2D()(inception_5b)\n", "outputs = layers.Dropout(0.4)(outputs)\n", "outputs = layers.Dense(labels_num,\n", " activation='softmax')(outputs)" ] }, { "cell_type": "code", "execution_count": null, "id": "75c1b9f7", "metadata": { "id": "75c1b9f7" }, "outputs": [], "source": [ "GoogleNet_model = Model(inputs=inputs, outputs=[outputs, aux_1, aux_2])" ] }, { "cell_type": "code", "execution_count": null, "id": "604fcf7d", "metadata": { "id": "604fcf7d" }, "outputs": [], "source": [ "GoogleNet_model.summary()" ] }, { "cell_type": "code", "execution_count": null, "id": "e6d1ff9e", "metadata": { "id": "e6d1ff9e" }, "outputs": [], "source": [ "batch_size = 256\n", "epochs = 10" ] }, { "cell_type": "code", "execution_count": null, "id": "583e462e", "metadata": { "id": "583e462e" }, "outputs": [], "source": [ "inputs = np.ones((batch_size, x_train.shape[1], x_train.shape[2], 3),\n", " dtype=np.float32)\n", "GoogleNet_model(inputs)" ] }, { "cell_type": "code", "execution_count": null, "id": "bdc52d71", "metadata": { "id": "bdc52d71" }, "outputs": [], "source": [ "GoogleNet_model.compile(\n", " optimizer='adam',\n", " loss=[losses.sparse_categorical_crossentropy,\n", " losses.sparse_categorical_crossentropy,\n", " losses.sparse_categorical_crossentropy],\n", " loss_weights=[1, 0.2, 0.2],\n", " metrics=['accuracy'])\n", "\n", "history = GoogleNet_model.fit(x_train, y_train,\n", " validation_data=(x_val, y_val),\n", " batch_size=batch_size,\n", " epochs=epochs)" ] }, { "cell_type": "code", "execution_count": null, "id": "33305fff", "metadata": { "id": "33305fff" }, "outputs": [], "source": [ "fig, axs = plt.subplots(1, 2, figsize=(15,6))\n", "\n", "axs[0].plot(history.history['loss'])\n", "axs[0].plot(history.history['val_loss'])\n", "axs[0].title.set_text('Training Loss vs Validation Loss')\n", "axs[0].set_xlabel('Epochs')\n", "axs[0].set_ylabel('Loss')\n", "axs[0].legend(['Train', 'Val'])\n", "\n", "axs[1].plot(history.history['dense_4_accuracy'])\n", "axs[1].plot(history.history['val_dense_4_accuracy'])\n", "axs[1].title.set_text('Training Accuracy vs Validation Accuracy')\n", "axs[1].set_xlabel('Epochs')\n", "axs[1].set_ylabel('Accuracy')\n", "axs[1].legend(['Train', 'Val'])" ] }, { "cell_type": "code", "execution_count": null, "id": "de8cc9a7", "metadata": { "id": "de8cc9a7" }, "outputs": [], "source": [ "GoogleNet_model.evaluate(x_test, y_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "12908910", "metadata": { "id": "12908910" }, "outputs": [], "source": [] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 5 }