{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Library" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import random\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pylab import rcParams\n", "from sklearn.metrics import mean_absolute_error\n", "\n", "%matplotlib inline\n", "rcParams['figure.figsize'] = 8, 5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Sin(x) Dataset" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# define input array with angles from 60deg to 300deg converted to radians\n", "x = np.array([i*np.pi/180 for i in range(60, 300, 4)])\n", "np.random.seed(100) # Setting seed for reproducability\n", "y = np.sin(x) + np.random.normal(0, 0.15, len(x)) # 加上雜訊\n", "\n", "data = pd.DataFrame(np.column_stack([x, y]), columns=['x', 'y'])\n", "plt.plot(data['x'], data['y'], '.')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate New Features with higher power \n", "自行產生新的feature,x^2~x^15" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for i in range(2, 16): # power of 1 is already there\n", " colname = 'x_%d' % i # new var will be x_power\n", " data[colname] = data['x']**i\n", "data.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 0. Function definition" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LinearRegression, Lasso, Ridge" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def example_regression(data, power, plot_dict, reg_type, alpha = 0):\n", " # define estimator object\n", " type_dict = {'Linear':LinearRegression(),\n", " 'Lasso':Lasso(alpha = alpha),\n", " 'Ridge':Ridge(alpha = alpha)}\n", " \n", " # generate X in order\n", " X = ['x']\n", " if power >=2:\n", " X.extend(['x_%d'%i for i in range(2, power+1)])\n", " \n", " # fit the model\n", " if reg_type in type_dict:\n", " model = type_dict[reg_type]\n", " model.fit(data[X],data['y'])\n", " y_pred = model.predict(data[X])\n", " mae = mean_absolute_error(y_pred, y)\n", " # check if a plot is to be made for the entered power\n", " if power in plot_dict:\n", " plt.subplot(plot_dict[power])\n", " plt.tight_layout()\n", " plt.plot(data['x'], data['y'], '.')\n", " plt.plot(data['x'],y_pred)\n", " plt.title('Plot for power: %d'%power + '\\n' + 'mae:%.2f'%mae)\n", " \n", " # return the result in pre-defined format\n", " rss = sum((y_pred-data['y'])**2)\n", " ret = [rss]\n", " ret.extend([model.intercept_])\n", " ret.extend(model.coef_)\n", " return ret" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# initialize a dataframe to store the results:\n", "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n", "ind = ['pow_%d' % i for i in range(1, 16)]\n", "\n", "perf_Linear = pd.DataFrame(index=ind, columns=col)\n", "\n", "# define the powers for which a plot is required: {power:where}\n", "plot_dict = {1:231, 3:232, 6:233, 9:234, 12:235, 15:236}\n", "\n", "# iterate through all powers and assimilate results\n", "for i in range(1, 16):\n", " perf_Linear.iloc[i-1, 0:i+2] = example_regression(data, power=i, plot_dict=plot_dict, reg_type='Linear')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "pd.options.display.float_format = '{:,.2g}'.format\n", "perf_Linear" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }