{ "cells": [ { "cell_type": "markdown", "id": "1cf8437b", "metadata": { "id": "1cf8437b" }, "source": [ "#### Download dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "09717ecc", "metadata": { "id": "09717ecc" }, "outputs": [], "source": [ "!pip install --upgrade gdown\n", "!gdown --fuzzy 1B6inr_JHXRTidFsSMqOGhJWJr_09hXdF" ] }, { "cell_type": "code", "execution_count": null, "id": "9074dadc", "metadata": { "id": "9074dadc" }, "outputs": [], "source": [ "!tar xf where_am_i.tar" ] }, { "cell_type": "code", "execution_count": null, "id": "748447d1", "metadata": { "id": "748447d1" }, "outputs": [], "source": [ "import tensorflow as tf\n", "from glob import glob\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import os\n", "from PIL import Image\n", "from sklearn.model_selection import train_test_split" ] }, { "cell_type": "markdown", "id": "82cc5e13", "metadata": { "id": "82cc5e13" }, "source": [ "#### Hyperparameters" ] }, { "cell_type": "code", "execution_count": null, "id": "ef739773", "metadata": { "id": "ef739773" }, "outputs": [], "source": [ "IMG_SIZE = 224\n", "BS = 64" ] }, { "cell_type": "code", "execution_count": null, "id": "3f314a4f", "metadata": { "id": "3f314a4f" }, "outputs": [], "source": [ "# 建立類別名稱轉class idx字典\n", "with open(\"mapping.txt\") as f:\n", " lines = f.readlines()\n", "lines = [l.strip().replace(\" \", \"\") for l in lines]\n", "CLASS_MAP = {pair.split(\",\")[0]: int(pair.split(\",\")[1]) for pair in lines}\n", "NUM_CLASS = len(CLASS_MAP)\n", "\n", "CLASS_MAP, NUM_CLASS" ] }, { "cell_type": "markdown", "id": "5bde9d7d", "metadata": { "id": "5bde9d7d" }, "source": [ "#### Build Dataset" ] }, { "cell_type": "code", "execution_count": null, "id": "ef2bdac6", "metadata": { "id": "ef2bdac6" }, "outputs": [], "source": [ "# 撈取有解答之圖片路徑, 分割train, val路徑\n", "paths = glob(\"train/*/*.jpg\")\n", "train_paths, val_paths = train_test_split(paths, \n", " test_size=0.2, \n", " random_state=5566)\n", "\n", "len(train_paths), len(val_paths)" ] }, { "cell_type": "code", "execution_count": null, "id": "0f9fd26f", "metadata": { "id": "0f9fd26f" }, "outputs": [], "source": [ "# 圖片視覺化\n", "path = np.random.choice(train_paths)\n", "img = np.array(Image.open(path).convert(\"RGB\"))\n", "plt.imshow(img)\n", "img.shape, path" ] }, { "cell_type": "code", "execution_count": null, "id": "331a428b", "metadata": { "id": "331a428b" }, "outputs": [], "source": [ "# 擷取資料夾名稱轉成類別id\n", "# os.sep: \"/\"\n", "def paths2labels(paths):\n", " return [CLASS_MAP[p.split(os.sep)[-2]] for p in paths]" ] }, { "cell_type": "code", "execution_count": null, "id": "be3193b4", "metadata": { "id": "be3193b4" }, "outputs": [], "source": [ "train_paths[:10], paths2labels(train_paths)[:10]" ] }, { "cell_type": "code", "execution_count": null, "id": "52dff95a", "metadata": { "id": "52dff95a" }, "outputs": [], "source": [ "# 影像讀取 & resize\n", "def load_and_resize_image(path):\n", " image = tf.io.read_file(path)\n", " image = tf.image.decode_jpeg(image, channels=3)\n", " image = tf.image.resize(image, [IMG_SIZE, IMG_SIZE])\n", " return image\n", "\n", "# 使用路徑建構 tf.data.Dataset\n", "def build_ds(paths):\n", " labels = paths2labels(paths) # paths -> labels\n", " image_ds = tf.data.Dataset.from_tensor_slices((paths, labels))\n", " image_ds = image_ds.map(lambda path, label: (load_and_resize_image(path), label)) # path -> img, labels\n", " return image_ds" ] }, { "cell_type": "code", "execution_count": null, "id": "5b62bb1b", "metadata": { "id": "5b62bb1b" }, "outputs": [], "source": [ "# 建構train, val dataset\n", "train_ds = build_ds(train_paths).shuffle(buffer_size=len(train_paths)).batch(BS)\n", "val_ds = build_ds(val_paths).batch(BS)" ] }, { "cell_type": "markdown", "id": "f250a877", "metadata": { "id": "f250a877" }, "source": [ "#### Build Model" ] }, { "cell_type": "code", "execution_count": null, "id": "26a273bd", "metadata": { "id": "26a273bd" }, "outputs": [], "source": [ "class WAIModel(tf.keras.models.Model):\n", " def __init__(self, encoder, preprocess, freeze):\n", " super().__init__()\n", " # 前處理函數\n", " self.preprocess = preprocess\n", " # 資料擴增\n", " self.data_aug = tf.keras.Sequential([\n", " tf.keras.layers.RandomFlip(\"horizontal\"),\n", " tf.keras.layers.RandomRotation(0.1),\n", " ])\n", " # feature extractor\n", " self.encoder = encoder\n", " # classifier\n", " self.classifier = tf.keras.Sequential([\n", " tf.keras.layers.GlobalAveragePooling2D(),\n", " tf.keras.layers.Dropout(0.2),\n", " tf.keras.layers.Dense(NUM_CLASS),\n", " tf.keras.layers.Softmax(),\n", " ])\n", " # freeze: 是否鎖住encoder參數\n", " if freeze:\n", " for l in self.encoder.layers:\n", " l.trainable = False\n", " def call(self, inputs, training=None):\n", " x = self.preprocess(inputs)\n", " # 非訓練時不使用資料擴增\n", " if training:\n", " x = self.data_aug(x)\n", " \n", " x = self.encoder(x, training)\n", " x = self.classifier(x, training)\n", " return x" ] }, { "cell_type": "markdown", "source": [ "#### Use pre-trained model\n", "\n", "https://keras.io/api/applications/\n", "\n", "https://www.tensorflow.org/api_docs/python/tf/keras/applications" ], "metadata": { "id": "10iKyL5xpmcl" }, "id": "10iKyL5xpmcl" }, { "cell_type": "code", "execution_count": null, "id": "bb4044db", "metadata": { "id": "bb4044db" }, "outputs": [], "source": [ "# 使用模型對應之前處理函式\n", "preprocess = tf.keras.applications.efficientnet.preprocess_input\n", "encoder = tf.keras.applications.efficientnet.EfficientNetB0(include_top=False, \n", " weights='imagenet')\n", "\n", "model = WAIModel(encoder, \n", " preprocess,\n", " freeze=True)" ] }, { "cell_type": "code", "source": [ "model.encoder.summary()" ], "metadata": { "id": "CT64sob5G2zO" }, "id": "CT64sob5G2zO", "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "id": "b64930a5", "metadata": { "id": "b64930a5" }, "outputs": [], "source": [ "model.compile(optimizer=tf.keras.optimizers.Adam(),\n", " loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n", " metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])" ] }, { "cell_type": "markdown", "id": "7454fd36", "metadata": { "id": "7454fd36" }, "source": [ "#### Training" ] }, { "cell_type": "code", "execution_count": null, "id": "2a048808", "metadata": { "id": "2a048808" }, "outputs": [], "source": [ "model.fit(train_ds, \n", " epochs=20, \n", " validation_data=val_ds,\n", " callbacks=[\n", " tf.keras.callbacks.ModelCheckpoint(\"best.h5\",\n", " save_best_only=True,\n", " save_weights_only=True)\n", " ])" ] }, { "cell_type": "markdown", "id": "b0fbb7b1", "metadata": { "id": "b0fbb7b1" }, "source": [ "#### Test submission\n", "\n", "繳交至:https://www.kaggle.com/competitions/aia-xt121-cv-kaggle" ] }, { "cell_type": "code", "execution_count": null, "id": "c2ce297c", "metadata": { "id": "c2ce297c" }, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": null, "id": "c1b732be", "metadata": { "id": "c1b732be" }, "outputs": [], "source": [ "# 讀取最佳val_loss model參數\n", "model.load_weights(\"best.h5\")" ] }, { "cell_type": "code", "execution_count": null, "id": "35fdf2ac", "metadata": { "id": "35fdf2ac" }, "outputs": [], "source": [ "df = pd.read_csv(\"sample_submission.csv\")" ] }, { "cell_type": "code", "execution_count": null, "id": "da310451", "metadata": { "id": "da310451" }, "outputs": [], "source": [ "df.head(), len(df)" ] }, { "cell_type": "code", "execution_count": null, "id": "e0663417", "metadata": { "id": "e0663417" }, "outputs": [], "source": [ "# 建構測試dataset\n", "test_paths = [os.path.join(f\"test/{name}.jpg\") for name in df[\"id\"].tolist()]\n", "test_ds = tf.data.Dataset.from_tensor_slices((test_paths))\n", "test_ds = test_ds.map(lambda path: load_and_resize_image(path)).batch(BS)" ] }, { "cell_type": "code", "execution_count": null, "id": "249f059c", "metadata": { "id": "249f059c" }, "outputs": [], "source": [ "prediction = model.predict(test_ds)\n", "prediction = np.argmax(prediction, axis=-1)" ] }, { "cell_type": "code", "execution_count": null, "id": "3f929694", "metadata": { "id": "3f929694" }, "outputs": [], "source": [ "df[\"class\"] = prediction\n", "df.to_csv(\"submission.csv\", index=None)" ] }, { "cell_type": "markdown", "id": "ceb9bc7c", "metadata": { "id": "ceb9bc7c" }, "source": [ "Public Private\n", "\n", "0.92264 0.90529" ] }, { "cell_type": "markdown", "source": [ "#### Freeze some layers" ], "metadata": { "id": "f-yPwAD4H_ut" }, "id": "f-yPwAD4H_ut" }, { "cell_type": "code", "source": [ "tf.keras.backend.clear_session()\n", "encoder = tf.keras.applications.efficientnet.EfficientNetB0(include_top=False, \n", " weights='imagenet')" ], "metadata": { "id": "BupnvZxTHEZp" }, "id": "BupnvZxTHEZp", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "for i, layer in enumerate(encoder.layers):\n", " print(i, layer.trainable, layer.name)" ], "metadata": { "id": "kM3DHE5vIDgU" }, "id": "kM3DHE5vIDgU", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "for layer in encoder.layers[:100]:\n", " layer.trainable = False" ], "metadata": { "id": "-R-3i6n4IKBs" }, "id": "-R-3i6n4IKBs", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "encoder.summary()" ], "metadata": { "id": "HaaJ5PYXIPP-" }, "id": "HaaJ5PYXIPP-", "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [], "metadata": { "id": "ffopJ24TO_c0" }, "id": "ffopJ24TO_c0", "execution_count": null, "outputs": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" }, "colab": { "provenance": [] }, "accelerator": "GPU", "gpuClass": "standard" }, "nbformat": 4, "nbformat_minor": 5 }