{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Library" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import random\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pylab import rcParams\n", "\n", "%matplotlib inline\n", "rcParams['figure.figsize'] = 8, 5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Sin(x) Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# define input array with angles from 60deg to 300deg converted to radians\n", "x = np.array([i * np.pi / 180 for i in range(60, 300, 4)])\n", "np.random.seed(100) # Setting seed for reproducability\n", "y = np.sin(x) + np.random.normal(0, 0.15, len(x))\n", "\n", "data = pd.DataFrame(np.column_stack([x, y]), columns=['x', 'y'])\n", "plt.plot(data['x'], data['y'], '.')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate New Features with higher power " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(2, 16): # power of 1 is already there\n", " colname = 'x_%d' % i # new var will be x_power\n", " data[colname] = data['x']**i\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 0. Function definition" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n", "# 調整參數造成收斂不完全,省略警告\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def example_regression(data, power, plot_dict, reg_type, alpha, var):\n", " # define estimator object\n", " type_dict = {\n", " 'Linear': LinearRegression(),\n", " 'Lasso': Lasso(alpha=alpha),\n", " 'Ridge': Ridge(alpha=alpha)\n", " }\n", " if var == 'power':\n", " # generate X in order\n", " X = ['x']\n", " if power >= 2:\n", " X.extend(['x_%d' % i for i in range(2, power + 1)])\n", "\n", " # fit the model\n", " if reg_type in type_dict:\n", " model = type_dict[reg_type]\n", " model.fit(data[X], data['y'])\n", " y_pred = model.predict(data[X])\n", "\n", " # check if a plot is to be made for the entered power\n", " if power in plot_dict:\n", " plt.subplot(plot_dict[power])\n", " plt.tight_layout()\n", " plt.plot(data['x'], data['y'], '.')\n", " plt.plot(data['x'], y_pred)\n", " plt.title('Plot for power: %d' % power)\n", "\n", " # return the result in pre-defined format\n", " rss = sum((y_pred - data['y'])**2)\n", " ret = [rss]\n", " ret.extend([model.intercept_])\n", " ret.extend(model.coef_)\n", " elif var == 'alpha':\n", " # generate X in order\n", " X = ['x']\n", " if power >= 2:\n", " X.extend(['x_%d' % i for i in range(2, power + 1)])\n", "\n", " # fit the model\n", " if reg_type in type_dict:\n", " model = type_dict[reg_type]\n", " model.fit(data[X], data['y'])\n", " y_pred = model.predict(data[X])\n", "\n", " # check if a plot is to be made for the entered power\n", " if alpha in plot_dict:\n", " plt.subplot(plot_dict[alpha])\n", " plt.tight_layout()\n", " plt.plot(data['x'], data['y'], '.')\n", " plt.plot(data['x'], y_pred)\n", " plt.title('Plot for alpha: %.3f' % alpha)\n", "\n", " # return the result in pre-defined format\n", " rss = sum((y_pred - data['y'])**2)\n", " ret = [rss]\n", " ret.extend([model.intercept_])\n", " ret.extend(model.coef_)\n", " return ret" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Lasso Regularization with Varied power" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize a dataframe to store the results:\n", "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n", "ind = ['power_%d' % i for i in range(1, 16)]\n", "\n", "perf_Lasso = pd.DataFrame(index=ind, columns=col)\n", "\n", "# define the powers for which a plot is required: {power:where}\n", "plot_dict = {1: 231, 3: 232, 6: 233, 9: 234, 12: 235, 15: 236}\n", "\n", "# iterate through all powers and assimilate results\n", "for i in range(1, 16):\n", " perf_Lasso.iloc[i - 1, 0:i + 2] = example_regression(data,\n", " power=i,\n", " plot_dict=plot_dict,\n", " reg_type='Lasso',\n", " alpha=1e-3,\n", " var='power')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "pd.options.display.float_format = '{:,.2g}'.format\n", "perf_Lasso" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Lasso Regularization with Varied Alpha" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize a dataframe to store the results:\n", "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n", "ind = ['alpha_%de-3' % i for i in range(1, 16)]\n", "\n", "perf_Lasso = pd.DataFrame(index=ind, columns=col)\n", "\n", "# define the alpha for which a plot is required: {alpha:where}\n", "plot_dict = {\n", " 1e-3: 231,\n", " 3e-3: 232,\n", " 6e-3: 233,\n", " 7e-3: 234,\n", " 12e-3: 235,\n", " 15e-3: 236\n", "} # key值為alpha的值\n", "\n", "# iterate through all powers and assimilate results\n", "for i in range(1, 16):\n", " perf_Lasso.iloc[i - 1, 0:18] = example_regression(data,\n", " power=15,\n", " plot_dict=plot_dict,\n", " reg_type='Lasso',\n", " alpha=i * 1e-3,\n", " var='alpha')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.options.display.float_format = '{:,.2g}'.format\n", "perf_Lasso" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ridge Regularization with Varied power" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize a dataframe to store the results:\n", "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n", "ind = ['power_%d' % i for i in range(1, 16)]\n", "\n", "perf_Ridge = pd.DataFrame(index=ind, columns=col)\n", "\n", "# define the powers for which a plot is required: {power:where}\n", "plot_dict = {1: 231, 3: 232, 6: 233, 9: 234, 12: 235, 15: 236}\n", "\n", "# iterate through all powers and assimilate results\n", "for i in range(1, 16):\n", " perf_Ridge.iloc[i - 1, 0:i + 2] = example_regression(data,\n", " power=i,\n", " plot_dict=plot_dict,\n", " reg_type='Ridge',\n", " alpha=1e-3,\n", " var='power')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.options.display.float_format = '{:,.2g}'.format\n", "perf_Ridge" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ridge Regularization with Varied Alpha¶" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize a dataframe to store the results:\n", "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n", "ind = ['alpha_%de-3' % i for i in range(1, 16)]\n", "\n", "perf_Ridge = pd.DataFrame(index=ind, columns=col)\n", "\n", "# define the alpha for which a plot is required: {power:where}\n", "plot_dict = {\n", " 1e-3: 231,\n", " 3e-3: 232,\n", " 6e-3: 233,\n", " 7e-3: 234,\n", " 12e-3: 235,\n", " 15e-3: 236\n", "} # key值為alpha的值\n", "# iterate through all alphas and assimilate results\n", "for i in range(1, 16):\n", " perf_Ridge.iloc[i - 1, 0:18] = example_regression(data,\n", " power=15,\n", " plot_dict=plot_dict,\n", " reg_type='Ridge',\n", " alpha=i * 1e-3,\n", " var='alpha')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.options.display.float_format = '{:,.2g}'.format\n", "perf_Ridge" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }