{ "cells": [ { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pylab import rcParams\n", "from sklearn.datasets import make_blobs\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.metrics import accuracy_score, confusion_matrix\n", "from sklearn.svm import SVC\n", "\n", "%matplotlib inline\n", "rcParams['figure.figsize'] = 9, 6" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Imbalanced Datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# make 3-class dataset for classification\n", "center1 = [[0, 0]]\n", "X1, y1 = make_blobs(n_samples=800, centers=center1, random_state=42)\n", "center2 = [[0, 0], [1, 1]]\n", "X2, y2 = make_blobs(n_samples=200,\n", " centers=center2,\n", " random_state=42,\n", " cluster_std=0.3)\n", "X = np.vstack((X1, X2))\n", "y = np.hstack((y1, y2))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "* 在圖上紅色點表示為壞人(1),藍色點是好人(0)\n", "* 今天身為一位警察該如何透過兩個 feature 從好人中去抓出壞人呢?\n", "* 這邊產生的資料只有 20% 是壞人,80% 是好人" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "color = \"br\"\n", "color = [color[y[i]] for i in range(len(y))]\n", "plt.scatter(X[:, 0], X[:, 1], c=color, alpha=.5)\n", "plt.xlabel('Feature1')\n", "plt.ylabel('Feature2')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Predict with Logistic Regression \n", "* 試試看預測結果 accuracy 有沒有辦法超過 90%?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Print F1_Score, Precision, Recall" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 如何讓F1_Score更好?\n", "* 大家可以試著 print 出 logistic regression 預測的 probability\n", "* 試著去調整 threshold 看看如何把 f1_score 做得更好。" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }