{
"cells": [
{
"cell_type": "markdown",
"id": "1f146166",
"metadata": {
"id": "1f146166"
},
"source": [
"# **常見訓練設定**\n",
"此份程式碼將會介紹隨著訓練過程,可以調整或者紀錄的函式。\n",
"\n",
"## 本章節內容大綱\n",
"* ### EarlyStopping(已於 part3/2_Overfitting.ipynb 介紹)\n",
"* ### [ModelCheckpoint](#ModelCheckpoint)\n",
"* ### [LearningRateSchedular](#LearningRateSchedular)\n",
"* ### [CSVLogger](#CSVLogger)"
]
},
{
"cell_type": "markdown",
"id": "011871a9",
"metadata": {
"id": "011871a9"
},
"source": [
"## 匯入套件"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8600dff",
"metadata": {
"id": "d8600dff"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from tqdm.auto import tqdm\n",
"\n",
"# PyTorch 相關套件\n",
"import torch\n",
"import torch.nn as nn\n",
"import torch.nn.functional as F\n",
"\n",
"device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
"print(f'device: {device}')"
]
},
{
"cell_type": "markdown",
"id": "33e4ec08",
"metadata": {
"id": "33e4ec08"
},
"source": [
"## 創建資料集/載入資料集(Dataset Creating / Loading)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "P8myXB-0vB_F",
"metadata": {
"id": "P8myXB-0vB_F"
},
"outputs": [],
"source": [
"# 上傳資料\n",
"!wget -q https://github.com/TA-aiacademy/course_3.0/releases/download/DL/Data_part3.zip\n",
"!unzip -q Data_part3.zip"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d4f04ed4",
"metadata": {
"id": "d4f04ed4"
},
"outputs": [],
"source": [
"train_df = pd.read_csv('./Data/News_train.csv')\n",
"test_df = pd.read_csv('./Data/News_test.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "092919ec",
"metadata": {
"id": "092919ec"
},
"outputs": [],
"source": [
"train_df.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c516c346",
"metadata": {
"id": "c516c346"
},
"outputs": [],
"source": [
"X_df = train_df.iloc[:, :-1].values\n",
"y_df = train_df.y_category.values"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2dfe2b2f",
"metadata": {
"id": "2dfe2b2f"
},
"outputs": [],
"source": [
"X_test = test_df.iloc[:, :-1].values\n",
"y_test = test_df.y_category.values"
]
},
{
"cell_type": "markdown",
"id": "063a839a",
"metadata": {
"id": "063a839a"
},
"source": [
"## 資料前處理(Data Preprocessing)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "04803f0e",
"metadata": {
"id": "04803f0e"
},
"outputs": [],
"source": [
"from sklearn.preprocessing import StandardScaler, MinMaxScaler\n",
"# Feature scaling\n",
"sc = StandardScaler()\n",
"X_scale = sc.fit_transform(X_df, y_df)\n",
"X_test_scale = sc.transform(X_test)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f6de8e01",
"metadata": {
"id": "f6de8e01"
},
"outputs": [],
"source": [
"# train, valid/test dataset split\n",
"from sklearn.model_selection import train_test_split\n",
"X_train, X_valid, y_train, y_valid = train_test_split(X_scale, y_df,\n",
" test_size=0.2,\n",
" random_state=5566,\n",
" stratify=y_df)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c51092ac",
"metadata": {
"id": "c51092ac"
},
"outputs": [],
"source": [
"print(f'X_train shape: {X_train.shape}')\n",
"print(f'X_valid shape: {X_valid.shape}')\n",
"print(f'y_train shape: {y_train.shape}')\n",
"print(f'y_valid shape: {y_valid.shape}')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "jpBQ5B5llkUP",
"metadata": {
"id": "jpBQ5B5llkUP"
},
"outputs": [],
"source": [
"# build dataset and dataloader\n",
"train_ds = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float32),\n",
" torch.tensor(y_train, dtype=torch.long))\n",
"valid_ds = torch.utils.data.TensorDataset(torch.tensor(X_valid, dtype=torch.float32),\n",
" torch.tensor(y_valid, dtype=torch.long))\n",
"test_ds = torch.utils.data.TensorDataset(torch.tensor(X_test_scale, dtype=torch.float32),\n",
" torch.tensor(y_test, dtype=torch.long))\n",
"\n",
"BATCH_SIZE = 64\n",
"train_loader = torch.utils.data.DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)\n",
"valid_loader = torch.utils.data.DataLoader(valid_ds, batch_size=BATCH_SIZE)\n",
"test_loader = torch.utils.data.DataLoader(test_ds, batch_size=BATCH_SIZE)"
]
},
{
"cell_type": "markdown",
"id": "670e0666",
"metadata": {
"id": "670e0666"
},
"source": [
"## 模型建置(Model Building)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ce8aca19",
"metadata": {
"id": "ce8aca19"
},
"outputs": [],
"source": [
"NUM_CLASS = 11\n",
"\n",
"def build_model(input_shape, num_class):\n",
" torch.manual_seed(5566)\n",
" model = nn.Sequential(\n",
" nn.Linear(input_shape, 64),\n",
" nn.Tanh(),\n",
" nn.Linear(64, 64),\n",
" nn.Tanh(),\n",
" nn.Linear(64, num_class),\n",
" )\n",
" return model"
]
},
{
"cell_type": "markdown",
"id": "5046649f",
"metadata": {
"id": "5046649f"
},
"source": [
"\n",
"* ## ModelCheckpoint"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a1ccd11e",
"metadata": {
"id": "a1ccd11e"
},
"outputs": [],
"source": [
"model = build_model(X_train.shape[1], NUM_CLASS)\n",
"model = model.to(device)\n",
"\n",
"optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n",
"loss_fn = nn.CrossEntropyLoss() # 多元分類損失函數"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4a70dc9f",
"metadata": {
"id": "4a70dc9f"
},
"outputs": [],
"source": [
"def train_epoch(model, optimizer, loss_fn, train_dataloader, val_dataloader):\n",
" # 訓練一輪\n",
" model.train()\n",
" total_train_loss = 0\n",
" total_train_correct = 0\n",
" for x, y in tqdm(train_dataloader, leave=False):\n",
" optimizer.zero_grad() # 梯度歸零\n",
" x, y = x.to(device), y.to(device) # 將資料移至GPU\n",
" y_pred = model(x) # 計算預測值\n",
" loss = loss_fn(y_pred, y) # 計算誤差\n",
" loss.backward() # 反向傳播計算梯度\n",
" optimizer.step() # 更新模型參數\n",
" total_train_loss += loss.item()\n",
" # 利用argmax計算最大值是第n個類別,與解答比對是否相同\n",
" total_train_correct += ((y_pred.argmax(dim=1) == y).sum().item())\n",
"\n",
" avg_train_loss = total_train_loss / len(train_dataloader)\n",
" avg_train_acc = total_train_correct / len(train_dataloader.dataset)\n",
"\n",
" return avg_train_loss, avg_train_acc\n",
"\n",
"def test_epoch(model, loss_fn, val_dataloader):\n",
" # 驗證一輪\n",
" model.eval()\n",
" total_val_loss = 0\n",
" total_val_correct = 0\n",
" # 關閉梯度計算以加速\n",
" with torch.no_grad():\n",
" for x, y in val_dataloader:\n",
" x, y = x.to(device), y.to(device)\n",
" y_pred = model(x)\n",
" loss = loss_fn(y_pred, y)\n",
" total_val_loss += loss.item()\n",
" # 利用argmax計算最大值是第n個類別,與解答比對是否相同\n",
" total_val_correct += ((y_pred.argmax(dim=1) == y).sum().item())\n",
"\n",
" avg_val_loss = total_val_loss / len(val_dataloader)\n",
" avg_val_acc = total_val_correct / len(val_dataloader.dataset)\n",
"\n",
" return avg_val_loss, avg_val_acc\n",
"\n",
"BEST_MODEL_PATH = './Data/best.pth' # 最佳模型位置\n",
"LAST_MODEL_PATH = './Data/last.pth' # 最佳模型位置\n",
"\n",
"def run(model, optimizer, loss_fn, train_loader, valid_loader, verbose=1):\n",
" train_loss_log = []\n",
" val_loss_log = []\n",
" train_acc_log = []\n",
" val_acc_log = []\n",
" best_val_loss = np.inf\n",
"\n",
" for epoch in tqdm(range(20)):\n",
" avg_train_loss, avg_train_acc = train_epoch(model, optimizer, loss_fn, train_loader, valid_loader)\n",
" avg_val_loss, avg_val_acc = test_epoch(model, loss_fn, valid_loader)\n",
" train_loss_log.append(avg_train_loss)\n",
" val_loss_log.append(avg_val_loss)\n",
" train_acc_log.append(avg_train_acc)\n",
" val_acc_log.append(avg_val_acc)\n",
" if verbose == 1:\n",
" print(f'Epoch: {epoch}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f} | Train Acc: {avg_train_acc:.3f}, Val Acc: {avg_val_acc:.3f}')\n",
" # Model check point\n",
" if avg_val_loss < best_val_loss:\n",
" best_val_loss = avg_val_loss\n",
" torch.save(model.state_dict(), BEST_MODEL_PATH)\n",
" torch.save(model.state_dict(), LAST_MODEL_PATH)\n",
" return train_loss_log, train_acc_log, val_loss_log, val_acc_log"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "Rimvx-Lgnly1",
"metadata": {
"id": "Rimvx-Lgnly1"
},
"outputs": [],
"source": [
"_ = run(model, optimizer, loss_fn, train_loader, valid_loader)"
]
},
{
"cell_type": "markdown",
"id": "815412b3",
"metadata": {
"id": "815412b3"
},
"source": [
"\n",
"* ## torch.optim.lr_scheduler: https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "80bbc0ab",
"metadata": {
"id": "80bbc0ab"
},
"outputs": [],
"source": [
"class CustomLRScheduler:\n",
" def __init__(self, optimizer):\n",
" self.optimizer = optimizer\n",
"\n",
" def step(self, epoch):\n",
" if epoch < 10:\n",
" lr = 0.001\n",
" elif epoch < 15:\n",
" lr = 0.0001\n",
" else:\n",
" lr = 0.00001\n",
"\n",
" for param_group in self.optimizer.param_groups:\n",
" param_group['lr'] = lr\n",
"\n",
"def run(model, optimizer, loss_fn, train_loader, valid_loader,\n",
" scheduler=None,\n",
" verbose=1):\n",
" train_loss_log = []\n",
" val_loss_log = []\n",
" train_acc_log = []\n",
" val_acc_log = []\n",
" best_val_loss = np.inf\n",
"\n",
" for epoch in tqdm(range(20)):\n",
" avg_train_loss, avg_train_acc = train_epoch(model, optimizer, loss_fn, train_loader, valid_loader)\n",
" avg_val_loss, avg_val_acc = test_epoch(model, loss_fn, valid_loader)\n",
" train_loss_log.append(avg_train_loss)\n",
" val_loss_log.append(avg_val_loss)\n",
" train_acc_log.append(avg_train_acc)\n",
" val_acc_log.append(avg_val_acc)\n",
" if verbose == 1:\n",
" print(f'Epoch: {epoch}, Train Loss: {avg_train_loss:.3f}, Val Loss: {avg_val_loss:.3f} | Train Acc: {avg_train_acc:.3f}, Val Acc: {avg_val_acc:.3f}')\n",
" # Model check point\n",
" if avg_val_loss < best_val_loss:\n",
" best_val_loss = avg_val_loss\n",
" torch.save(model.state_dict(), BEST_MODEL_PATH)\n",
" torch.save(model.state_dict(), LAST_MODEL_PATH)\n",
" if type(scheduler) == CustomLRScheduler:\n",
" scheduler.step(epoch)\n",
" elif type(scheduler) == torch.optim.lr_scheduler.ReduceLROnPlateau:\n",
" scheduler.step(avg_val_loss)\n",
" else:\n",
" pass\n",
" return train_loss_log, train_acc_log, val_loss_log, val_acc_log"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "24de7e6a",
"metadata": {
"id": "24de7e6a"
},
"outputs": [],
"source": [
"# 建立兩個 list 記錄選用不同 learing rate schedular 的訓練結果\n",
"train_loss_list = []\n",
"train_acc_list = []\n",
"\n",
"# 建立兩個 list 記錄選用不同 learning rate schedular 的驗證結果\n",
"valid_loss_list = []\n",
"valid_acc_list = []\n",
"\n",
"callback_l = {\n",
" 'None': None,\n",
" 'CustomLRScheduler': CustomLRScheduler,\n",
" 'ReduceLROnPlateau': torch.optim.lr_scheduler.ReduceLROnPlateau,\n",
"}\n",
"# for cb in callback_l:\n",
"for _, scheduler_type in callback_l.items():\n",
" print('Training a model with callbacks: {}'\n",
" .format(scheduler_type))\n",
" model = build_model(X_train.shape[1], NUM_CLASS)\n",
" model = model.to(device)\n",
" optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n",
" if scheduler_type is not None:\n",
" scheduler = scheduler_type(optimizer)\n",
" else:\n",
" scheduler = None\n",
" loss_fn = nn.CrossEntropyLoss()\n",
"\n",
" history = run(model, optimizer, loss_fn, train_loader, valid_loader, scheduler, verbose=0)\n",
"\n",
" # 將訓練過程記錄下來\n",
" train_loss_list.append(history[0])\n",
" train_acc_list.append(history[1])\n",
" valid_loss_list.append(history[2])\n",
" valid_acc_list.append(history[3])\n",
"print('----------------- training done! -----------------')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5b56d666",
"metadata": {
"id": "5b56d666"
},
"outputs": [],
"source": [
"# 視覺化訓練過程\n",
"plt.figure(figsize=(15, 5))\n",
"\n",
"train_line = ()\n",
"valid_line = ()\n",
"\n",
"# 繪製 Training loss\n",
"plt.subplot(121)\n",
"for k, cb in enumerate(callback_l):\n",
" loss = train_loss_list[k]\n",
" val_loss = valid_loss_list[k]\n",
" train_l = plt.plot(\n",
" range(len(loss)), loss,\n",
" label=f'Training callback:{cb}')\n",
" valid_l = plt.plot(\n",
" range(len(val_loss)), val_loss, '--',\n",
" label=f'Validation callback:{cb}')\n",
"\n",
" train_line += tuple(train_l)\n",
" valid_line += tuple(valid_l)\n",
"plt.title('Loss')\n",
"\n",
"# 繪製 Training accuracy\n",
"plt.subplot(122)\n",
"train_acc_line = []\n",
"valid_acc_line = []\n",
"for k, cb in enumerate(callback_l):\n",
" acc = train_acc_list[k]\n",
" val_acc = valid_acc_list[k]\n",
" plt.plot(range(len(acc)), acc,\n",
" label=f'Training callback:{cb}')\n",
" plt.plot(range(len(val_acc)), val_acc, '--',\n",
" label=f'Validation callback:{cb}')\n",
"plt.title('Accuracy')\n",
"\n",
"first_legend = plt.legend(handles=train_line,\n",
" bbox_to_anchor=(1.05, 1))\n",
"\n",
"plt.gca().add_artist(first_legend)\n",
"plt.legend(handles=valid_line,\n",
" bbox_to_anchor=(1.05, 0.7))"
]
},
{
"cell_type": "markdown",
"id": "33e55106",
"metadata": {
"id": "33e55106"
},
"source": [
"\n",
"* ## CSVLogger"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "jnoJSeLFsMe7",
"metadata": {
"id": "jnoJSeLFsMe7"
},
"outputs": [],
"source": [
"model = build_model(X_train.shape[1], NUM_CLASS)\n",
"model = model.to(device)\n",
"optimizer = torch.optim.NAdam(model.parameters(), lr=0.001)\n",
"loss_fn = nn.CrossEntropyLoss()\n",
"history = run(model, optimizer, loss_fn, train_loader, valid_loader,\n",
" scheduler=None, verbose=0)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "ceLRva-gsym0",
"metadata": {
"id": "ceLRva-gsym0"
},
"outputs": [],
"source": [
"# 將訓練過程記錄下來\n",
"import pandas as pd\n",
"df = pd.DataFrame()\n",
"df['train_loss'] = history[0]\n",
"df['train_acc'] = history[1]\n",
"df['valid_loss'] = history[2]\n",
"df['valid_acc'] = history[3]\n",
"df.to_csv('logs.csv', index=False)\n",
"df"
]
},
{
"cell_type": "markdown",
"id": "12554d04",
"metadata": {
"id": "12554d04"
},
"source": [
"---\n",
"wandb(補充教材): https://docs.wandb.ai/v/zh-hans/quickstart"
]
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"provenance": []
},
"gpuClass": "standard",
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}