{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Logistic Regression: Classification of Handwritten Digits" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "from sklearn.datasets import load_digits\n", "\n", "%matplotlib inline\n", "digits = load_digits()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "digits.images.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "digits.data.shape" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize=(6, 6)) # figure size in inches\n", "fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)\n", "\n", "# plot the digits: each image is 8x8 pixels\n", "for i in range(64):\n", " ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])\n", " ax.imshow(digits.images[i], cmap=plt.cm.binary, interpolation='nearest')\n", "\n", " # label the image with the target value\n", " ax.text(0, 7, str(digits.target[i]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Logistic Regression" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "# split the data into training and validation sets\n", "X_train, X_test, y_train, y_test = train_test_split(digits.data,\n", " digits.target,\n", " test_size=0.25,\n", " random_state=42)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression\n", "''' build your LR model here and please specify the value of C and\\\n", "try 'multinomial' classifier with 'sag' solver '''\n", "\n", "# train the model\n", "clf = LogisticRegression(C=1e-2, multi_class=\"ovr\", solver='liblinear')\n", "\n", "# fit on the training dataset\n", "clf.fit(X_train, y_train)\n", "\n", "# use the model to predict the labels of the test data\n", "predicted = clf.predict(X_test)\n", "expected = y_test" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fig = plt.figure(figsize=(6, 6)) # figure size in inches\n", "fig.subplots_adjust(left=0, right=1, bottom=0, top=1, hspace=0.05, wspace=0.05)\n", "\n", "# plot the digits: each image is 8x8 pixels\n", "for i in range(64):\n", " ax = fig.add_subplot(8, 8, i + 1, xticks=[], yticks=[])\n", " ax.imshow(X_test.reshape(-1, 8, 8)[i],\n", " cmap=plt.cm.binary,\n", " interpolation='nearest')\n", "\n", " # label the image with the target value\n", " if predicted[i] == expected[i]:\n", " ax.text(0, 7, str(predicted[i]), color='green')\n", " else:\n", " ax.text(0, 7, str(predicted[i]), color='red')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Quantitative Measurement on the Performance" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "matches = (predicted == expected)\n", "print('accuracy:%.3f' % (matches.sum() / float(len(matches))))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn import metrics\n", "print(metrics.classification_report(expected, predicted))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(metrics.confusion_matrix(expected, predicted))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.imshow(digits.images[0], cmap=plt.cm.binary, interpolation='nearest')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "digits.images[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }