{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "77421006",
   "metadata": {},
   "source": [
    "# 下載課程所需檔案 (YOLOv7, Dataset)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "7722f2df",
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/dog_cat_for_YOLO.zip\n",
    "!unzip dog_cat_for_YOLO.zip\n",
    "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/yolov7.zip\n",
    "!unzip yolov7.zip"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09403475",
   "metadata": {},
   "source": [
    "# 下載課程所需檔案(先訓練好的權重)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b6b88f96",
   "metadata": {},
   "outputs": [],
   "source": [
    "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/pre_train.pt"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "f058c51a",
   "metadata": {},
   "source": [
    "# YOLOv7 預測教學"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a0a9ce67",
   "metadata": {},
   "source": [
    "## 匯入所需要的套件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8bdf3af0",
   "metadata": {},
   "outputs": [],
   "source": [
    "import argparse\n",
    "import time\n",
    "from pathlib import Path\n",
    "\n",
    "import cv2\n",
    "import torch\n",
    "import torch.backends.cudnn as cudnn\n",
    "from numpy import random\n",
    "\n",
    "from models.experimental import attempt_load\n",
    "from utils.datasets import LoadStreams, LoadImages\n",
    "from utils.general import (check_img_size, check_requirements,\n",
    "                           check_imshow, non_max_suppression, apply_classifier,\n",
    "                           scale_coords, xyxy2xywh, strip_optimizer,\n",
    "                           set_logging, increment_path)\n",
    "from utils.plots import plot_one_box\n",
    "from utils.torch_utils import select_device, load_classifier, time_synchronized"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5eeab535",
   "metadata": {},
   "source": [
    "## 參數與初始值設定"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "9a1bfc0a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 用 parser 來設定參數，參數不能重複設置\n",
    "parser = argparse.ArgumentParser()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "11b74c90",
   "metadata": {},
   "source": [
    "### 模型相關參數"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a2479394",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 模型存放的路徑\n",
    "parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt')\n",
    "# 要預測的目標 file/folder, 0 for webcam\n",
    "parser.add_argument('--source', type=str, default='inference/images')\n",
    "# 預測的圖片大小\n",
    "parser.add_argument('--img-size', type=int, default=640)\n",
    "# 是物件的閾值調整\n",
    "parser.add_argument('--conf-thres', type=float, default=0.25)\n",
    "# 執行 NMS 的 IOU 閾值調整\n",
    "parser.add_argument('--iou-thres', type=float, default=0.45)\n",
    "# 設定 CPU/GPU 的裝置，正常電腦使用 GPU 會設 0\n",
    "parser.add_argument('--device', default='cpu')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e1a54c59",
   "metadata": {},
   "source": [
    "### 存檔相關參數"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "15d295fd",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 是否把結果儲存成文字文件\n",
    "parser.add_argument('--save-txt', action='store_true')\n",
    "# 是否把信心分數存下來\n",
    "parser.add_argument('--save-conf', action='store_true')\n",
    "# 是否不儲存結果（圖片或影像的預測結果）\n",
    "parser.add_argument('--nosave', action='store_true')\n",
    "# 資料儲存位置，會儲存在 project/name 底下\n",
    "parser.add_argument('--project', default='runs/detect')\n",
    "# 資料儲存位置的子資料夾名稱，會儲存在 project/name 底下\n",
    "parser.add_argument('--name', default='exp')\n",
    "# 如果資料夾已存在，是否放進同個資料夾，或要新增資料夾來放\n",
    "# 預設是 False，資料夾存在就會新增一個資料夾放結果\n",
    "parser.add_argument('--exist-ok', action='store_true')"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8e4f4e37",
   "metadata": {},
   "source": [
    "### NMS 相關參數"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "14b52cc8",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 可以指定只顯示特定的類別，--class 0, or --class 0 2 3\n",
    "parser.add_argument('--classes', nargs='+', type=int)\n",
    "# NMS 預設是相同類別會取一個框，可以設定同個位置只選一個框\n",
    "# 跨類別的執行 NMS （目的是希望不要有很多框重複在同個位置，預設是 False）\n",
    "parser.add_argument('--agnostic-nms', action='store_true')\n",
    "# 預測時是否對目標做資料擴增\n",
    "parser.add_argument('--augment', action='store_true')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d8ce7f59",
   "metadata": {},
   "outputs": [],
   "source": [
    "# 基本上使用預設即可，僅需調整權重位置即要預測的目標\n",
    "opt = parser.parse_args(\n",
    "    args=['--weights', './pre_train.pt',\n",
    "          '--source', './datasets/pet/valid/images/Bengal_130.jpg',\n",
    "          '--device', '0',\n",
    "          '--nosave'])\n",
    "print(opt)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f478b713",
   "metadata": {},
   "outputs": [],
   "source": [
    "source, weights = opt.source, opt.weights\n",
    "save_txt, imgsz = opt.save_txt, opt.img_size\n",
    "save_img = not opt.nosave and not source.endswith('.txt')  # 是否要儲存預測結果\n",
    "\n",
    "# 如果輸入是 webcam，會讀取 webcam 位置\n",
    "webcam = source.isnumeric() or source.endswith('.txt') or \\\n",
    "    source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))\n",
    "\n",
    "# 設定資料夾\n",
    "# save_dir => 'runs/{project}/{name}'\n",
    "save_dir = Path(increment_path(Path(opt.project) / opt.name,\n",
    "                               exist_ok=opt.exist_ok))  \n",
    "(save_dir / 'labels' if save_txt else save_dir).mkdir(\n",
    "    parents=True, exist_ok=True)  # make dir\n",
    "\n",
    "# 初始化設定\n",
    "set_logging()\n",
    "device = select_device(opt.device)\n",
    "half = device.type != 'cpu'  # 如果是GPU的話，就使用半精度的方式做預測(速度會快一些)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5e94af84",
   "metadata": {},
   "outputs": [],
   "source": [
    "with torch.no_grad():\n",
    "\n",
    "    # Load model\n",
    "    model = attempt_load(weights, map_location=device)  # load FP32 model\n",
    "    vid_path, vid_writer = None, None\n",
    "    stride = int(model.stride.max())  # model stride\n",
    "\n",
    "    # 設定 Dataloader\n",
    "    if webcam:\n",
    "        view_img = check_imshow()\n",
    "        cudnn.benchmark = True  # 如果輸入的圖片大小都相同，設定這個可以增加推論速度，若輸入大小不同，反而會降低速度\n",
    "        dataset = LoadStreams(source, img_size=imgsz, stride=stride)\n",
    "    else:\n",
    "        dataset = LoadImages(source, img_size=imgsz, stride=stride)\n",
    "\n",
    "    if half:\n",
    "        model.half()  # to FP16\n",
    "\n",
    "    # Get names and colors\n",
    "    names = model.module.names if hasattr(model, 'module') else model.names\n",
    "    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]\n",
    "\n",
    "    # Run inference, 讓模型初始化一次\n",
    "    if device.type != 'cpu':\n",
    "        model(torch.zeros(1, 3, imgsz, imgsz).to(device).\n",
    "              type_as(next(model.parameters())))  # run once\n",
    "\n",
    "    t0 = time.time()\n",
    "    for path, img, im0s, vid_cap in dataset:\n",
    "        img = torch.from_numpy(img).to(device)\n",
    "        img = img.half() if half else img.float()  # uint8 to fp16/32\n",
    "        img /= 255.0  # 0 - 255 to 0.0 - 1.0\n",
    "        if img.ndimension() == 3:\n",
    "            img = img.unsqueeze(0)\n",
    "\n",
    "        # Inference\n",
    "        t1 = time_synchronized()\n",
    "        with torch.no_grad():\n",
    "            pred = model(img, augment=opt.augment)[0]\n",
    "        t2 = time_synchronized()\n",
    "\n",
    "        # Apply NMS\n",
    "        pred = non_max_suppression(pred, opt.conf_thres,\n",
    "                                   opt.iou_thres,\n",
    "                                   classes=opt.classes,\n",
    "                                   agnostic=opt.agnostic_nms)\n",
    "        t3 = time_synchronized()\n",
    "\n",
    "        # Process detections\n",
    "        for i, det in enumerate(pred):  # detections per image\n",
    "            if webcam:  # batch_size >= 1\n",
    "                p, s,  = path[i], '%g: ' % i\n",
    "                im0, frame = im0s[i].copy(), dataset.count\n",
    "            else:\n",
    "                p, s,  = path, ''\n",
    "                im0, frame = im0s, getattr(dataset, 'frame', 0)\n",
    "            p = Path(p)  # to Path\n",
    "            save_path = str(save_dir / p.name)  # img.jpg\n",
    "            # img.txt\n",
    "            txt_path = str(save_dir / 'labels' / p.stem) + \\\n",
    "                ('' if dataset.mode == 'image' else f'_{frame}')\n",
    "\n",
    "            # normalization gain whwh\n",
    "            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]\n",
    "            if len(det):\n",
    "                # Rescale boxes from img_size to im0 size\n",
    "                det[:, :4] = scale_coords(img.shape[2:],\n",
    "                                          det[:, :4], im0.shape).round()\n",
    "\n",
    "                # Print results\n",
    "                for c in det[:, -1].unique():\n",
    "                    # detections per class\n",
    "                    n = (det[:, -1] == c).sum()\n",
    "                    # add to string\n",
    "                    s += f\"{n} {names[int(c)]}{'s' * (n > 1)}, \"\n",
    "\n",
    "                # Write results\n",
    "                for *xyxy, conf, cls in reversed(det):\n",
    "                    if save_txt:  # Write to file\n",
    "                        # normalized xywh\n",
    "                        xywh = (xyxy2xywh(torch.tensor(xyxy)\n",
    "                                          .view(1, 4)) / gn).view(-1).tolist()\n",
    "                        # label format\n",
    "                        if opt.save_conf:\n",
    "                            line = (cls, *xywh, conf)\n",
    "                        else:\n",
    "                            line = (cls, *xywh)\n",
    "                        with open(txt_path + '.txt', 'a') as f:\n",
    "                            f.write(('%g ' * len(line)).rstrip() % line + '\\n')\n",
    "\n",
    "                    if save_img:  # Add bbox to image\n",
    "                        label = f'{names[int(cls)]} {conf:.2f}'\n",
    "                        plot_one_box(xyxy, im0, label=label,\n",
    "                                     color=colors[int(cls)],\n",
    "                                     line_thickness=1)\n",
    "\n",
    "            # Print time (inference + NMS)\n",
    "            print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}',\n",
    "                  f'ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')\n",
    "\n",
    "\n",
    "            # 儲存預測完的結果\n",
    "            if save_img:\n",
    "                if dataset.mode == 'image':\n",
    "                    cv2.imwrite(save_path, im0)\n",
    "                    print(\"The image with the result is saved in:\",\n",
    "                          {save_path})\n",
    "                else:  # 'video' or 'stream'\n",
    "                    if vid_path != save_path:  # new video\n",
    "                        vid_path = save_path\n",
    "                        if isinstance(vid_writer, cv2.VideoWriter):\n",
    "                            # release previous video writer\n",
    "                            vid_writer.release()\n",
    "                        if vid_cap:  # video\n",
    "                            fps = vid_cap.get(cv2.CAP_PROP_FPS)\n",
    "                            w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n",
    "                            h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n",
    "                        else:  # stream\n",
    "                            fps, w, h = 30, im0.shape[1], im0.shape[0]\n",
    "                            save_path += '.mp4'\n",
    "                        vid_writer = cv2.VideoWriter(\n",
    "                            save_path,\n",
    "                            cv2.VideoWriter_fourcc(*'mp4v'),\n",
    "                            fps, (w, h))\n",
    "                    vid_writer.write(im0)\n",
    "\n",
    "    if save_txt or save_img:\n",
    "        s = f\"\\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}\" if save_txt else ''\n",
    "        print(f\"Results saved to {save_dir}{s}\")\n",
    "\n",
    "    print(f'Done. ({time.time() - t0:.3f}s)')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e505349f",
   "metadata": {},
   "outputs": [],
   "source": [
    "!python detect.py --weights pre_train.pt --source ./datasets/pet/valid/images/Bengal_130.jpg"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}