{ "cells": [ { "cell_type": "markdown", "id": "1f146166", "metadata": { "id": "1f146166" }, "source": [ "# **常見訓練設定**\n", "此份程式碼將會介紹隨著訓練過程,可以調整或者紀錄的函式。\n", "\n", "## 本章節內容大綱\n", "* ### EarlyStopping(已於 part3/2_Overfitting.ipynb 介紹)\n", "* ### [ModelCheckpoint](#ModelCheckpoint)\n", "* ### [LearningRateSchedular](#LearningRateSchedular)\n", "* ### [CSVLogger](#CSVLogger)" ] }, { "cell_type": "markdown", "id": "011871a9", "metadata": { "id": "011871a9" }, "source": [ "## 匯入套件" ] }, { "cell_type": "code", "execution_count": null, "id": "d8600dff", "metadata": { "id": "d8600dff" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from tqdm.auto import tqdm\n", "\n", "# PyTorch 相關套件\n", "import torch\n", "import torch.nn as nn\n", "import torch.nn.functional as F\n", "\n", "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n", "print(f'device: {device}')" ] }, { "cell_type": "markdown", "id": "33e4ec08", "metadata": { "id": "33e4ec08" }, "source": [ "## 創建資料集/載入資料集(Dataset Creating / Loading)" ] }, { "cell_type": "code", "execution_count": null, "id": "P8myXB-0vB_F", "metadata": { "id": "P8myXB-0vB_F" }, "outputs": [], "source": [ "# 上傳資料\n", "!wget -q https://github.com/TA-aiacademy/course_3.0/releases/download/DL/Data_part3.zip\n", "!unzip -q Data_part3.zip" ] }, { "cell_type": "code", "execution_count": null, "id": "d4f04ed4", "metadata": { "id": "d4f04ed4" }, "outputs": [], "source": [ "train_df = pd.read_csv('./Data/News_train.csv')\n", "test_df = pd.read_csv('./Data/News_test.csv')" ] }, { "cell_type": "code", "execution_count": null, "id": "092919ec", "metadata": { "id": "092919ec" }, "outputs": [], "source": [ "train_df.head()" ] }, { "cell_type": "code", "execution_count": null, "id": "c516c346", "metadata": { "id": "c516c346" }, "outputs": [], "source": [ "X_df = train_df.iloc[:, :-1].values\n", "y_df = train_df.y_category.values" ] }, { "cell_type": "code", "execution_count": null, "id": "2dfe2b2f", "metadata": { "id": "2dfe2b2f" }, "outputs": [], "source": [ "X_test = test_df.iloc[:, :-1].values\n", "y_test = test_df.y_category.values" ] }, { "cell_type": "markdown", "id": "063a839a", "metadata": { "id": "063a839a" }, "source": [ "## 資料前處理(Data Preprocessing)" ] }, { "cell_type": "code", "execution_count": null, "id": "04803f0e", "metadata": { "id": "04803f0e" }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler, MinMaxScaler\n", "# Feature scaling\n", "sc = StandardScaler()\n", "X_scale = sc.fit_transform(X_df, y_df)\n", "X_test_scale = sc.transform(X_test)" ] }, { "cell_type": "code", "execution_count": null, "id": "f6de8e01", "metadata": { "id": "f6de8e01" }, "outputs": [], "source": [ "# train, valid/test dataset split\n", "from sklearn.model_selection import train_test_split\n", "X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y_df,\n", " test_size=0.2,\n", " random_state=5566,\n", " stratify=y_df)" ] }, { "cell_type": "code", "execution_count": null, "id": "c51092ac", "metadata": { "id": "c51092ac" }, "outputs": [], "source": [ "print(f'X_train shape: {X_train.shape}')\n", "print(f'X_valid shape: {X_valid.shape}')\n", "print(f'y_train shape: {y_train.shape}')\n", "print(f'y_valid shape: {y_valid.shape}')" ] }, { "cell_type": "code", "execution_count": null, "id": "jpBQ5B5llkUP", "metadata": { "id": "jpBQ5B5llkUP" }, "outputs": [], "source": [ "# build dataset and dataloader\n", "train_ds = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32),\n", " torch.tensor(y_train, dtype=torch.long))\n", "valid_ds = torch.utils.data.TensorDataset(torch.tensor(X_valid, dtype=torch.float32),\n", " torch.tensor(y_valid, dtype=torch.long))\n", "test_ds = torch.utils.data.TensorDataset(torch.tensor(X_test_scale, dtype=torch.float32),\n", " torch.tensor(y_test, dtype=torch.long))\n", "\n", "BATCH_SIZE = 64\n", "train_loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)\n", "valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BATCH_SIZE)\n", "test_loader = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE)" ] }, { "cell_type": "markdown", "id": "670e0666", "metadata": { "id": "670e0666" }, "source": [ "## 模型建置(Model Building)" ] }, { "cell_type": "code", "execution_count": null, "id": "ce8aca19", "metadata": { "id": "ce8aca19" }, "outputs": [], "source": [ "NUM_CLASS = 11\n", "\n", "def build_model(input_shape, num_class):\n", " torch.manual_seed(5566)\n", " model = nn.Sequential(\n", " nn.Linear(input_shape, 64),\n", " nn.Tanh(),\n", " nn.Linear(64, 64),\n", " nn.Tanh(),\n", " nn.Linear(64, num_class),\n", " )\n", " return model" ] }, { "cell_type": "markdown", "id": "5046649f", "metadata": { "id": "5046649f" }, "source": [ "\n", "* ## ModelCheckpoint" ] }, { "cell_type": "code", "execution_count": null, "id": "a1ccd11e", "metadata": { "id": "a1ccd11e" }, "outputs": [], "source": [ "model = build_model(X_train.shape[1], NUM_CLASS)\n", "model = model.to(device)\n", "\n", "optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n", "loss_fn = nn.CrossEntropyLoss() # 多元分類損失函數" ] }, { "cell_type": "code", "execution_count": null, "id": "4a70dc9f", "metadata": { "id": "4a70dc9f" }, "outputs": [], "source": [ "def train_epoch(model, optimizer, loss_fn, train_dataloader, val_dataloader):\n", " # 訓練一輪\n", " model.train()\n", " total_train_loss = 0\n", " total_train_correct = 0\n", " for x, y in tqdm(train_dataloader, leave=False):\n", " optimizer.zero_grad() # 梯度歸零\n", " x, y = x.to(device), y.to(device) # 將資料移至GPU\n", " y_pred = model(x) # 計算預測值\n", " loss = loss_fn(y_pred, y) # 計算誤差\n", " loss.backward() # 反向傳播計算梯度\n", " optimizer.step() # 更新模型參數\n", " total_train_loss += loss.item()\n", " # 利用argmax計算最大值是第n個類別,與解答比對是否相同\n", " total_train_correct += ((y_pred.argmax(dim=1) == y).sum().item())\n", "\n", " avg_train_loss = total_train_loss / len(train_dataloader)\n", " avg_train_acc = total_train_correct / len(train_dataloader.dataset)\n", "\n", " return avg_train_loss, avg_train_acc\n", "\n", "def test_epoch(model, loss_fn, val_dataloader):\n", " # 驗證一輪\n", " model.eval()\n", " total_val_loss = 0\n", " total_val_correct = 0\n", " # 關閉梯度計算以加速\n", " with torch.no_grad():\n", " for x, y in val_dataloader:\n", " x, y = x.to(device), y.to(device)\n", " y_pred = model(x)\n", " loss = loss_fn(y_pred, y)\n", " total_val_loss += loss.item()\n", " # 利用argmax計算最大值是第n個類別,與解答比對是否相同\n", " total_val_correct += ((y_pred.argmax(dim=1) == y).sum().item())\n", "\n", " avg_val_loss = total_val_loss / len(val_dataloader)\n", " avg_val_acc = total_val_correct / len(val_dataloader.dataset)\n", "\n", " return avg_val_loss, avg_val_acc\n", "\n", "BEST_MODEL_PATH = './Data/best.pth' # 最佳模型位置\n", "LAST_MODEL_PATH = './Data/last.pth' # 最佳模型位置\n", "\n", "def run(model, optimizer, loss_fn, train_loader, valid_loader, verbose=1):\n", " train_loss_log = []\n", " val_loss_log = []\n", " train_acc_log = []\n", " val_acc_log = []\n", " best_val_loss = np.inf\n", "\n", " for epoch in tqdm(range(20)):\n", " avg_train_loss, avg_train_acc = train_epoch(model, optimizer, loss_fn, train_loader, valid_loader)\n", " avg_val_loss, avg_val_acc = test_epoch(model, loss_fn, valid_loader)\n", " train_loss_log.append(avg_train_loss)\n", " val_loss_log.append(avg_val_loss)\n", " train_acc_log.append(avg_train_acc)\n", " val_acc_log.append(avg_val_acc)\n", " if verbose == 1:\n", " print(f'Epoch: {epoch}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f} | Train Acc: {avg_train_acc:.3f}, Val Acc: {avg_val_acc:.3f}')\n", " # Model check point\n", " if avg_val_loss < best_val_loss:\n", " best_val_loss = avg_val_loss\n", " torch.save(model.state_dict(), BEST_MODEL_PATH)\n", " torch.save(model.state_dict(), LAST_MODEL_PATH)\n", " return train_loss_log, train_acc_log, val_loss_log, val_acc_log" ] }, { "cell_type": "code", "execution_count": null, "id": "Rimvx-Lgnly1", "metadata": { "id": "Rimvx-Lgnly1" }, "outputs": [], "source": [ "_ = run(model, optimizer, loss_fn, train_loader, valid_loader)" ] }, { "cell_type": "markdown", "id": "815412b3", "metadata": { "id": "815412b3" }, "source": [ "\n", "* ## torch.optim.lr_scheduler: https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate" ] }, { "cell_type": "code", "execution_count": null, "id": "80bbc0ab", "metadata": { "id": "80bbc0ab" }, "outputs": [], "source": [ "class CustomLRScheduler:\n", " def __init__(self, optimizer):\n", " self.optimizer = optimizer\n", "\n", " def step(self, epoch):\n", " if epoch < 10:\n", " lr = 0.001\n", " elif epoch < 15:\n", " lr = 0.0001\n", " else:\n", " lr = 0.00001\n", "\n", " for param_group in self.optimizer.param_groups:\n", " param_group['lr'] = lr\n", "\n", "def run(model, optimizer, loss_fn, train_loader, valid_loader,\n", " scheduler=None,\n", " verbose=1):\n", " train_loss_log = []\n", " val_loss_log = []\n", " train_acc_log = []\n", " val_acc_log = []\n", " best_val_loss = np.inf\n", "\n", " for epoch in tqdm(range(20)):\n", " avg_train_loss, avg_train_acc = train_epoch(model, optimizer, loss_fn, train_loader, valid_loader)\n", " avg_val_loss, avg_val_acc = test_epoch(model, loss_fn, valid_loader)\n", " train_loss_log.append(avg_train_loss)\n", " val_loss_log.append(avg_val_loss)\n", " train_acc_log.append(avg_train_acc)\n", " val_acc_log.append(avg_val_acc)\n", " if verbose == 1:\n", " print(f'Epoch: {epoch}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f} | Train Acc: {avg_train_acc:.3f}, Val Acc: {avg_val_acc:.3f}')\n", " # Model check point\n", " if avg_val_loss < best_val_loss:\n", " best_val_loss = avg_val_loss\n", " torch.save(model.state_dict(), BEST_MODEL_PATH)\n", " torch.save(model.state_dict(), LAST_MODEL_PATH)\n", " if type(scheduler) == CustomLRScheduler:\n", " scheduler.step(epoch)\n", " elif type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau:\n", " scheduler.step(avg_val_loss)\n", " else:\n", " pass\n", " return train_loss_log, train_acc_log, val_loss_log, val_acc_log" ] }, { "cell_type": "code", "execution_count": null, "id": "24de7e6a", "metadata": { "id": "24de7e6a" }, "outputs": [], "source": [ "# 建立兩個 list 記錄選用不同 learing rate schedular 的訓練結果\n", "train_loss_list = []\n", "train_acc_list = []\n", "\n", "# 建立兩個 list 記錄選用不同 learning rate schedular 的驗證結果\n", "valid_loss_list = []\n", "valid_acc_list = []\n", "\n", "callback_l = {\n", " 'None': None,\n", " 'CustomLRScheduler': CustomLRScheduler,\n", " 'ReduceLROnPlateau': torch.optim.lr_scheduler.ReduceLROnPlateau,\n", "}\n", "# for cb in callback_l:\n", "for _, scheduler_type in callback_l.items():\n", " print('Training a model with callbacks: {}'\n", " .format(scheduler_type))\n", " model = build_model(X_train.shape[1], NUM_CLASS)\n", " model = model.to(device)\n", " optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n", " if scheduler_type is not None:\n", " scheduler = scheduler_type(optimizer)\n", " else:\n", " scheduler = None\n", " loss_fn = nn.CrossEntropyLoss()\n", "\n", " history = run(model, optimizer, loss_fn, train_loader, valid_loader, scheduler, verbose=0)\n", "\n", " # 將訓練過程記錄下來\n", " train_loss_list.append(history[0])\n", " train_acc_list.append(history[1])\n", " valid_loss_list.append(history[2])\n", " valid_acc_list.append(history[3])\n", "print('----------------- training done! -----------------')" ] }, { "cell_type": "code", "execution_count": null, "id": "5b56d666", "metadata": { "id": "5b56d666" }, "outputs": [], "source": [ "# 視覺化訓練過程\n", "plt.figure(figsize=(15, 5))\n", "\n", "train_line = ()\n", "valid_line = ()\n", "\n", "# 繪製 Training loss\n", "plt.subplot(121)\n", "for k, cb in enumerate(callback_l):\n", " loss = train_loss_list[k]\n", " val_loss = valid_loss_list[k]\n", " train_l = plt.plot(\n", " range(len(loss)), loss,\n", " label=f'Training callback:{cb}')\n", " valid_l = plt.plot(\n", " range(len(val_loss)), val_loss, '--',\n", " label=f'Validation callback:{cb}')\n", "\n", " train_line += tuple(train_l)\n", " valid_line += tuple(valid_l)\n", "plt.title('Loss')\n", "\n", "# 繪製 Training accuracy\n", "plt.subplot(122)\n", "train_acc_line = []\n", "valid_acc_line = []\n", "for k, cb in enumerate(callback_l):\n", " acc = train_acc_list[k]\n", " val_acc = valid_acc_list[k]\n", " plt.plot(range(len(acc)), acc,\n", " label=f'Training callback:{cb}')\n", " plt.plot(range(len(val_acc)), val_acc, '--',\n", " label=f'Validation callback:{cb}')\n", "plt.title('Accuracy')\n", "\n", "first_legend = plt.legend(handles=train_line,\n", " bbox_to_anchor=(1.05, 1))\n", "\n", "plt.gca().add_artist(first_legend)\n", "plt.legend(handles=valid_line,\n", " bbox_to_anchor=(1.05, 0.7))" ] }, { "cell_type": "markdown", "id": "33e55106", "metadata": { "id": "33e55106" }, "source": [ "\n", "* ## CSVLogger" ] }, { "cell_type": "code", "execution_count": null, "id": "jnoJSeLFsMe7", "metadata": { "id": "jnoJSeLFsMe7" }, "outputs": [], "source": [ "model = build_model(X_train.shape[1], NUM_CLASS)\n", "model = model.to(device)\n", "optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n", "loss_fn = nn.CrossEntropyLoss()\n", "history = run(model, optimizer, loss_fn, train_loader, valid_loader,\n", " scheduler=None, verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "id": "ceLRva-gsym0", "metadata": { "id": "ceLRva-gsym0" }, "outputs": [], "source": [ "# 將訓練過程記錄下來\n", "import pandas as pd\n", "df = pd.DataFrame()\n", "df['train_loss'] = history[0]\n", "df['train_acc'] = history[1]\n", "df['valid_loss'] = history[2]\n", "df['valid_acc'] = history[3]\n", "df.to_csv('logs.csv', index=False)\n", "df" ] }, { "cell_type": "markdown", "id": "12554d04", "metadata": { "id": "12554d04" }, "source": [ "---\n", "wandb(補充教材): https://docs.wandb.ai/v/zh-hans/quickstart" ] } ], "metadata": { "accelerator": "GPU", "colab": { "provenance": [] }, "gpuClass": "standard", "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 5 }