{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_digits\n", "from sklearn.model_selection import train_test_split\n", "\n", "digits = load_digits()\n", "\n", "# split the data into training and validation sets\n", "X_train, X_test, y_train, y_test = train_test_split(digits.data,\n", " digits.target,\n", " test_size=0.25,\n", " random_state=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Real Digits" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "actual = y_test\n", "actual" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Prediction\n", "這個prediction的結果是利用logistic regression預測出X_test對應的\n", "y_test,後面會讓大家練習到,這邊直接先幫大家把結果印出來。" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "predicted = np.array([\n", " 2, 8, 2, 6, 6, 7, 1, 9, 8, 5, 2, 8, 6, 6, 6, 6, 1, 0, 5, 8, 8, 7, 8, 4, 7,\n", " 5, 4, 9, 2, 9, 4, 7, 6, 8, 9, 4, 3, 1, 0, 1, 8, 6, 7, 7, 1, 0, 7, 6, 2, 1,\n", " 9, 6, 7, 9, 0, 0, 9, 1, 6, 3, 0, 2, 3, 4, 1, 9, 2, 6, 9, 1, 8, 3, 5, 1, 2,\n", " 8, 2, 2, 9, 7, 2, 3, 6, 0, 5, 3, 7, 5, 1, 2, 8, 9, 3, 1, 4, 7, 4, 8, 5, 8,\n", " 5, 5, 2, 5, 9, 0, 7, 1, 4, 7, 3, 4, 8, 9, 7, 9, 8, 2, 1, 5, 2, 5, 8, 4, 1,\n", " 7, 0, 6, 1, 5, 5, 9, 9, 5, 9, 9, 5, 7, 5, 6, 2, 8, 6, 9, 6, 1, 5, 1, 5, 9,\n", " 9, 1, 5, 3, 6, 1, 8, 9, 8, 7, 6, 7, 6, 5, 6, 0, 8, 8, 9, 8, 6, 1, 0, 4, 1,\n", " 6, 3, 8, 6, 7, 4, 9, 6, 3, 0, 3, 3, 3, 0, 7, 7, 5, 7, 8, 0, 7, 1, 9, 6, 4,\n", " 5, 0, 1, 4, 6, 4, 3, 3, 0, 9, 5, 9, 2, 1, 4, 2, 1, 6, 8, 9, 2, 4, 9, 3, 7,\n", " 6, 2, 3, 3, 1, 6, 9, 3, 6, 3, 2, 2, 0, 7, 6, 1, 1, 9, 7, 2, 7, 8, 5, 5, 7,\n", " 5, 3, 3, 7, 2, 7, 5, 5, 7, 0, 9, 1, 6, 5, 9, 7, 4, 3, 8, 0, 3, 6, 4, 6, 3,\n", " 2, 6, 8, 8, 8, 4, 6, 7, 5, 2, 4, 5, 3, 2, 4, 6, 9, 4, 5, 4, 3, 4, 6, 2, 9,\n", " 0, 6, 7, 2, 0, 9, 6, 0, 4, 2, 0, 7, 9, 8, 5, 7, 8, 2, 8, 4, 3, 7, 2, 6, 9,\n", " 9, 5, 1, 0, 8, 2, 8, 9, 5, 6, 2, 2, 7, 2, 1, 5, 1, 6, 4, 5, 0, 9, 4, 1, 1,\n", " 7, 0, 8, 9, 0, 5, 4, 3, 8, 8, 6, 5, 3, 4, 4, 4, 8, 8, 7, 0, 9, 6, 3, 5, 2,\n", " 3, 0, 8, 8, 3, 1, 3, 3, 0, 0, 4, 6, 0, 7, 7, 6, 2, 0, 4, 4, 2, 3, 7, 1, 9,\n", " 8, 6, 8, 5, 6, 2, 2, 3, 1, 7, 7, 8, 0, 3, 3, 2, 1, 5, 5, 9, 1, 3, 7, 0, 0,\n", " 3, 0, 4, 5, 8, 3, 3, 4, 3, 1, 8, 9, 8, 3, 6, 3, 1, 6, 2, 1, 7, 5, 5, 1, 9\n", "])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Plot Result\n", "將手寫辨識及對應的預測結果畫出來,圖上標為綠色的數字代表預測正確,紅色的代表預測錯誤。\n", "組共有450組資料,這邊只畫出前64張" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize=(6, 6)) # figure size in inches\n", "fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)\n", "\n", "# plot the digits: each image is 8x8 pixels\n", "for i in range(64):\n", " ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])\n", " ax.imshow(X_test.reshape(-1, 8, 8)[i],\n", " cmap=plt.cm.binary,\n", " interpolation='nearest')\n", "\n", " # label the image with the target value\n", " if predicted[i] == actual[i]:\n", " ax.text(0, 7, str(predicted[i]), color='green')\n", " else:\n", " ax.text(0, 7, str(predicted[i]), color='red')" ] }, { "cell_type": "raw", "metadata": {}, "source": [ "# Quantitative Measurement on the Performance\n", "在分類問題上,所有的模型評估方法基本上都可以由confusion matrix得出來。在y方向也就是\n", "row方向代表的就是actual 0~9,x方向代表也就是column方向代表的是predicted 0~9。\n", "\n", "actual/predicted 0 1 2 3 4 5 6 7 8 9\n", " 0 [[37 0 0 0 0 0 0 0 0 0]\n", " 1 [ 0 40 0 0 0 0 1 0 1 1]\n", " 2 [ 0 0 42 2 0 0 0 0 0 0]\n", " 3 [ 0 0 0 44 0 0 0 0 1 0]\n", " 4 [ 0 0 0 0 37 0 0 1 0 0]\n", " 5 [ 0 0 0 0 0 46 0 0 0 2]\n", " 6 [ 0 1 0 0 0 0 51 0 0 0]\n", " 7 [ 0 0 0 1 1 0 0 46 0 0]\n", " 8 [ 0 3 1 0 0 0 0 0 44 0]\n", " 9 [ 0 0 0 0 0 1 0 0 2 44]]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn import metrics\n", "print(metrics.confusion_matrix(actual, predicted))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Accuracy\n", "想想是如何從confusion matrix算出accuracy。" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print('accuracy:%.3f' % (metrics.accuracy_score(actual, predicted)))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Precision Recall & F1-score\n", "想想如何從confusion matrix算出這些值。除了accuracy\n", ",這些值是否可以幫我們判斷哪數字預測的比較準哪些預測的\n", "比較不準?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(metrics.classification_report(actual, predicted))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }