{ "cells": [ { "cell_type": "markdown", "id": "77421006", "metadata": {}, "source": [ "# 下載課程所需檔案 (YOLOv7, Dataset)" ] }, { "cell_type": "code", "execution_count": null, "id": "7722f2df", "metadata": {}, "outputs": [], "source": [ "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/dog_cat_for_YOLO.zip\n", "!unzip dog_cat_for_YOLO.zip\n", "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/yolov7.zip\n", "!unzip yolov7.zip" ] }, { "cell_type": "markdown", "id": "09403475", "metadata": {}, "source": [ "# 下載課程所需檔案(先訓練好的權重)" ] }, { "cell_type": "code", "execution_count": null, "id": "b6b88f96", "metadata": {}, "outputs": [], "source": [ "!wget https://github.com/TA-aiacademy/course_3.0/releases/download/CVCNN_Data/pre_train.pt" ] }, { "cell_type": "markdown", "id": "f058c51a", "metadata": {}, "source": [ "# YOLOv7 預測教學" ] }, { "cell_type": "markdown", "id": "a0a9ce67", "metadata": {}, "source": [ "## 匯入所需要的套件" ] }, { "cell_type": "code", "execution_count": null, "id": "8bdf3af0", "metadata": {}, "outputs": [], "source": [ "import argparse\n", "import time\n", "from pathlib import Path\n", "\n", "import cv2\n", "import torch\n", "import torch.backends.cudnn as cudnn\n", "from numpy import random\n", "\n", "from models.experimental import attempt_load\n", "from utils.datasets import LoadStreams, LoadImages\n", "from utils.general import (check_img_size, check_requirements,\n", " check_imshow, non_max_suppression, apply_classifier,\n", " scale_coords, xyxy2xywh, strip_optimizer,\n", " set_logging, increment_path)\n", "from utils.plots import plot_one_box\n", "from utils.torch_utils import select_device, load_classifier, time_synchronized" ] }, { "cell_type": "markdown", "id": "5eeab535", "metadata": {}, "source": [ "## 參數與初始值設定" ] }, { "cell_type": "code", "execution_count": null, "id": "9a1bfc0a", "metadata": {}, "outputs": [], "source": [ "# 用 parser 來設定參數,參數不能重複設置\n", "parser = argparse.ArgumentParser()" ] }, { "cell_type": "markdown", "id": "11b74c90", "metadata": {}, "source": [ "### 模型相關參數" ] }, { "cell_type": "code", "execution_count": null, "id": "a2479394", "metadata": {}, "outputs": [], "source": [ "# 模型存放的路徑\n", "parser.add_argument('--weights', nargs='+', type=str, default='yolov7.pt')\n", "# 要預測的目標 file/folder, 0 for webcam\n", "parser.add_argument('--source', type=str, default='inference/images')\n", "# 預測的圖片大小\n", "parser.add_argument('--img-size', type=int, default=640)\n", "# 是物件的閾值調整\n", "parser.add_argument('--conf-thres', type=float, default=0.25)\n", "# 執行 NMS 的 IOU 閾值調整\n", "parser.add_argument('--iou-thres', type=float, default=0.45)\n", "# 設定 CPU/GPU 的裝置,正常電腦使用 GPU 會設 0\n", "parser.add_argument('--device', default='cpu')" ] }, { "cell_type": "markdown", "id": "e1a54c59", "metadata": {}, "source": [ "### 存檔相關參數" ] }, { "cell_type": "code", "execution_count": null, "id": "15d295fd", "metadata": {}, "outputs": [], "source": [ "# 是否把結果儲存成文字文件\n", "parser.add_argument('--save-txt', action='store_true')\n", "# 是否把信心分數存下來\n", "parser.add_argument('--save-conf', action='store_true')\n", "# 是否不儲存結果(圖片或影像的預測結果)\n", "parser.add_argument('--nosave', action='store_true')\n", "# 資料儲存位置,會儲存在 project/name 底下\n", "parser.add_argument('--project', default='runs/detect')\n", "# 資料儲存位置的子資料夾名稱,會儲存在 project/name 底下\n", "parser.add_argument('--name', default='exp')\n", "# 如果資料夾已存在,是否放進同個資料夾,或要新增資料夾來放\n", "# 預設是 False,資料夾存在就會新增一個資料夾放結果\n", "parser.add_argument('--exist-ok', action='store_true')" ] }, { "cell_type": "markdown", "id": "8e4f4e37", "metadata": {}, "source": [ "### NMS 相關參數" ] }, { "cell_type": "code", "execution_count": null, "id": "14b52cc8", "metadata": {}, "outputs": [], "source": [ "# 可以指定只顯示特定的類別,--class 0, or --class 0 2 3\n", "parser.add_argument('--classes', nargs='+', type=int)\n", "# NMS 預設是相同類別會取一個框,可以設定同個位置只選一個框\n", "# 跨類別的執行 NMS (目的是希望不要有很多框重複在同個位置,預設是 False)\n", "parser.add_argument('--agnostic-nms', action='store_true')\n", "# 預測時是否對目標做資料擴增\n", "parser.add_argument('--augment', action='store_true')" ] }, { "cell_type": "code", "execution_count": null, "id": "d8ce7f59", "metadata": {}, "outputs": [], "source": [ "# 基本上使用預設即可,僅需調整權重位置即要預測的目標\n", "opt = parser.parse_args(\n", " args=['--weights', './pre_train.pt',\n", " '--source', './datasets/pet/valid/images/Bengal_130.jpg',\n", " '--device', '0',\n", " '--nosave'])\n", "print(opt)" ] }, { "cell_type": "code", "execution_count": null, "id": "f478b713", "metadata": {}, "outputs": [], "source": [ "source, weights = opt.source, opt.weights\n", "save_txt, imgsz = opt.save_txt, opt.img_size\n", "save_img = not opt.nosave and not source.endswith('.txt') # 是否要儲存預測結果\n", "\n", "# 如果輸入是 webcam,會讀取 webcam 位置\n", "webcam = source.isnumeric() or source.endswith('.txt') or \\\n", " source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))\n", "\n", "# 設定資料夾\n", "# save_dir => 'runs/{project}/{name}'\n", "save_dir = Path(increment_path(Path(opt.project) / opt.name,\n", " exist_ok=opt.exist_ok)) \n", "(save_dir / 'labels' if save_txt else save_dir).mkdir(\n", " parents=True, exist_ok=True) # make dir\n", "\n", "# 初始化設定\n", "set_logging()\n", "device = select_device(opt.device)\n", "half = device.type != 'cpu' # 如果是GPU的話,就使用半精度的方式做預測(速度會快一些)" ] }, { "cell_type": "code", "execution_count": null, "id": "5e94af84", "metadata": {}, "outputs": [], "source": [ "with torch.no_grad():\n", "\n", " # Load model\n", " model = attempt_load(weights, map_location=device) # load FP32 model\n", " vid_path, vid_writer = None, None\n", " stride = int(model.stride.max()) # model stride\n", "\n", " # 設定 Dataloader\n", " if webcam:\n", " view_img = check_imshow()\n", " cudnn.benchmark = True # 如果輸入的圖片大小都相同,設定這個可以增加推論速度,若輸入大小不同,反而會降低速度\n", " dataset = LoadStreams(source, img_size=imgsz, stride=stride)\n", " else:\n", " dataset = LoadImages(source, img_size=imgsz, stride=stride)\n", "\n", " if half:\n", " model.half() # to FP16\n", "\n", " # Get names and colors\n", " names = model.module.names if hasattr(model, 'module') else model.names\n", " colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]\n", "\n", " # Run inference, 讓模型初始化一次\n", " if device.type != 'cpu':\n", " model(torch.zeros(1, 3, imgsz, imgsz).to(device).\n", " type_as(next(model.parameters()))) # run once\n", "\n", " t0 = time.time()\n", " for path, img, im0s, vid_cap in dataset:\n", " img = torch.from_numpy(img).to(device)\n", " img = img.half() if half else img.float() # uint8 to fp16/32\n", " img /= 255.0 # 0 - 255 to 0.0 - 1.0\n", " if img.ndimension() == 3:\n", " img = img.unsqueeze(0)\n", "\n", " # Inference\n", " t1 = time_synchronized()\n", " with torch.no_grad():\n", " pred = model(img, augment=opt.augment)[0]\n", " t2 = time_synchronized()\n", "\n", " # Apply NMS\n", " pred = non_max_suppression(pred, opt.conf_thres,\n", " opt.iou_thres,\n", " classes=opt.classes,\n", " agnostic=opt.agnostic_nms)\n", " t3 = time_synchronized()\n", "\n", " # Process detections\n", " for i, det in enumerate(pred): # detections per image\n", " if webcam: # batch_size >= 1\n", " p, s, = path[i], '%g: ' % i\n", " im0, frame = im0s[i].copy(), dataset.count\n", " else:\n", " p, s, = path, ''\n", " im0, frame = im0s, getattr(dataset, 'frame', 0)\n", " p = Path(p) # to Path\n", " save_path = str(save_dir / p.name) # img.jpg\n", " # img.txt\n", " txt_path = str(save_dir / 'labels' / p.stem) + \\\n", " ('' if dataset.mode == 'image' else f'_{frame}')\n", "\n", " # normalization gain whwh\n", " gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]\n", " if len(det):\n", " # Rescale boxes from img_size to im0 size\n", " det[:, :4] = scale_coords(img.shape[2:],\n", " det[:, :4], im0.shape).round()\n", "\n", " # Print results\n", " for c in det[:, -1].unique():\n", " # detections per class\n", " n = (det[:, -1] == c).sum()\n", " # add to string\n", " s += f\"{n} {names[int(c)]}{'s' * (n > 1)}, \"\n", "\n", " # Write results\n", " for *xyxy, conf, cls in reversed(det):\n", " if save_txt: # Write to file\n", " # normalized xywh\n", " xywh = (xyxy2xywh(torch.tensor(xyxy)\n", " .view(1, 4)) / gn).view(-1).tolist()\n", " # label format\n", " if opt.save_conf:\n", " line = (cls, *xywh, conf)\n", " else:\n", " line = (cls, *xywh)\n", " with open(txt_path + '.txt', 'a') as f:\n", " f.write(('%g ' * len(line)).rstrip() % line + '\\n')\n", "\n", " if save_img: # Add bbox to image\n", " label = f'{names[int(cls)]} {conf:.2f}'\n", " plot_one_box(xyxy, im0, label=label,\n", " color=colors[int(cls)],\n", " line_thickness=1)\n", "\n", " # Print time (inference + NMS)\n", " print(f'{s}Done. ({(1E3 * (t2 - t1)):.1f}',\n", " f'ms) Inference, ({(1E3 * (t3 - t2)):.1f}ms) NMS')\n", "\n", "\n", " # 儲存預測完的結果\n", " if save_img:\n", " if dataset.mode == 'image':\n", " cv2.imwrite(save_path, im0)\n", " print(\"The image with the result is saved in:\",\n", " {save_path})\n", " else: # 'video' or 'stream'\n", " if vid_path != save_path: # new video\n", " vid_path = save_path\n", " if isinstance(vid_writer, cv2.VideoWriter):\n", " # release previous video writer\n", " vid_writer.release()\n", " if vid_cap: # video\n", " fps = vid_cap.get(cv2.CAP_PROP_FPS)\n", " w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))\n", " h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))\n", " else: # stream\n", " fps, w, h = 30, im0.shape[1], im0.shape[0]\n", " save_path += '.mp4'\n", " vid_writer = cv2.VideoWriter(\n", " save_path,\n", " cv2.VideoWriter_fourcc(*'mp4v'),\n", " fps, (w, h))\n", " vid_writer.write(im0)\n", "\n", " if save_txt or save_img:\n", " s = f\"\\n{len(list(save_dir.glob('labels/*.txt')))} labels saved to {save_dir / 'labels'}\" if save_txt else ''\n", " print(f\"Results saved to {save_dir}{s}\")\n", "\n", " print(f'Done. ({time.time() - t0:.3f}s)')" ] }, { "cell_type": "code", "execution_count": null, "id": "e505349f", "metadata": {}, "outputs": [], "source": [ "!python detect.py --weights pre_train.pt --source ./datasets/pet/valid/images/Bengal_130.jpg" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.12" } }, "nbformat": 4, "nbformat_minor": 5 }