{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.pylab import rcParams\n",
    "\n",
    "%matplotlib inline\n",
    "rcParams['figure.figsize'] = 8, 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generate Sin(x) Dataset"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define input array with angles from 60deg to 300deg converted to radians\n",
    "x = np.array([i * np.pi / 180 for i in range(60, 300, 4)])\n",
    "np.random.seed(100)  # Setting seed for reproducability\n",
    "y = np.sin(x) + np.random.normal(0, 0.15, len(x))\n",
    "\n",
    "data = pd.DataFrame(np.column_stack([x, y]), columns=['x', 'y'])\n",
    "plt.plot(data['x'], data['y'], '.')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generate New Features with higher power "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for i in range(2, 16):  # power of 1 is already there\n",
    "    colname = 'x_%d' % i  # new var will be x_power\n",
    "    data[colname] = data['x']**i\n",
    "data.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 0. Function definition"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.linear_model import LinearRegression, Lasso, Ridge\n",
    "#　調整參數造成收斂不完全，省略警告\n",
    "import warnings\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def example_regression(data, power, plot_dict, reg_type, alpha, var):\n",
    "    # define estimator object\n",
    "    type_dict = {\n",
    "        'Linear': LinearRegression(),\n",
    "        'Lasso': Lasso(alpha=alpha),\n",
    "        'Ridge': Ridge(alpha=alpha)\n",
    "    }\n",
    "    if var == 'power':\n",
    "        # generate X in order\n",
    "        X = ['x']\n",
    "        if power >= 2:\n",
    "            X.extend(['x_%d' % i for i in range(2, power + 1)])\n",
    "\n",
    "        # fit the model\n",
    "        if reg_type in type_dict:\n",
    "            model = type_dict[reg_type]\n",
    "        model.fit(data[X], data['y'])\n",
    "        y_pred = model.predict(data[X])\n",
    "\n",
    "        # check if a plot is to be made for the entered power\n",
    "        if power in plot_dict:\n",
    "            plt.subplot(plot_dict[power])\n",
    "            plt.tight_layout()\n",
    "            plt.plot(data['x'], data['y'], '.')\n",
    "            plt.plot(data['x'], y_pred)\n",
    "            plt.title('Plot for power: %d' % power)\n",
    "\n",
    "        # return the result in pre-defined format\n",
    "        rss = sum((y_pred - data['y'])**2)\n",
    "        ret = [rss]\n",
    "        ret.extend([model.intercept_])\n",
    "        ret.extend(model.coef_)\n",
    "    elif var == 'alpha':\n",
    "        # generate X in order\n",
    "        X = ['x']\n",
    "        if power >= 2:\n",
    "            X.extend(['x_%d' % i for i in range(2, power + 1)])\n",
    "\n",
    "        # fit the model\n",
    "        if reg_type in type_dict:\n",
    "            model = type_dict[reg_type]\n",
    "        model.fit(data[X], data['y'])\n",
    "        y_pred = model.predict(data[X])\n",
    "\n",
    "        # check if a plot is to be made for the entered power\n",
    "        if alpha in plot_dict:\n",
    "            plt.subplot(plot_dict[alpha])\n",
    "            plt.tight_layout()\n",
    "            plt.plot(data['x'], data['y'], '.')\n",
    "            plt.plot(data['x'], y_pred)\n",
    "            plt.title('Plot for alpha: %.3f' % alpha)\n",
    "\n",
    "        # return the result in pre-defined format\n",
    "        rss = sum((y_pred - data['y'])**2)\n",
    "        ret = [rss]\n",
    "        ret.extend([model.intercept_])\n",
    "        ret.extend(model.coef_)\n",
    "    return ret"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lasso Regularization with Varied power"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize a dataframe to store the results:\n",
    "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n",
    "ind = ['power_%d' % i for i in range(1, 16)]\n",
    "\n",
    "perf_Lasso = pd.DataFrame(index=ind, columns=col)\n",
    "\n",
    "# define the powers for which a plot is required: {power:where}\n",
    "plot_dict = {1: 231, 3: 232, 6: 233, 9: 234, 12: 235, 15: 236}\n",
    "\n",
    "# iterate through all powers and assimilate results\n",
    "for i in range(1, 16):\n",
    "    perf_Lasso.iloc[i - 1, 0:i + 2] = example_regression(data,\n",
    "                                                         power=i,\n",
    "                                                         plot_dict=plot_dict,\n",
    "                                                         reg_type='Lasso',\n",
    "                                                         alpha=1e-3,\n",
    "                                                         var='power')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "pd.options.display.float_format = '{:,.2g}'.format\n",
    "perf_Lasso"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lasso Regularization with Varied Alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize a dataframe to store the results:\n",
    "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n",
    "ind = ['alpha_%de-3' % i for i in range(1, 16)]\n",
    "\n",
    "perf_Lasso = pd.DataFrame(index=ind, columns=col)\n",
    "\n",
    "# define the alpha for which a plot is required: {alpha:where}\n",
    "plot_dict = {\n",
    "    1e-3: 231,\n",
    "    3e-3: 232,\n",
    "    6e-3: 233,\n",
    "    7e-3: 234,\n",
    "    12e-3: 235,\n",
    "    15e-3: 236\n",
    "}  # key值為alpha的值\n",
    "\n",
    "# iterate through all powers and assimilate results\n",
    "for i in range(1, 16):\n",
    "    perf_Lasso.iloc[i - 1, 0:18] = example_regression(data,\n",
    "                                                      power=15,\n",
    "                                                      plot_dict=plot_dict,\n",
    "                                                      reg_type='Lasso',\n",
    "                                                      alpha=i * 1e-3,\n",
    "                                                      var='alpha')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.float_format = '{:,.2g}'.format\n",
    "perf_Lasso"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ridge Regularization with Varied power"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize a dataframe to store the results:\n",
    "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n",
    "ind = ['power_%d' % i for i in range(1, 16)]\n",
    "\n",
    "perf_Ridge = pd.DataFrame(index=ind, columns=col)\n",
    "\n",
    "# define the powers for which a plot is required: {power:where}\n",
    "plot_dict = {1: 231, 3: 232, 6: 233, 9: 234, 12: 235, 15: 236}\n",
    "\n",
    "# iterate through all powers and assimilate results\n",
    "for i in range(1, 16):\n",
    "    perf_Ridge.iloc[i - 1, 0:i + 2] = example_regression(data,\n",
    "                                                         power=i,\n",
    "                                                         plot_dict=plot_dict,\n",
    "                                                         reg_type='Ridge',\n",
    "                                                         alpha=1e-3,\n",
    "                                                         var='power')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.float_format = '{:,.2g}'.format\n",
    "perf_Ridge"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ridge Regularization with Varied Alpha¶"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# initialize a dataframe to store the results:\n",
    "col = ['rss', 'intercept'] + ['coef_x_%d' % i for i in range(1, 16)]\n",
    "ind = ['alpha_%de-3' % i for i in range(1, 16)]\n",
    "\n",
    "perf_Ridge = pd.DataFrame(index=ind, columns=col)\n",
    "\n",
    "# define the alpha for which a plot is required: {power:where}\n",
    "plot_dict = {\n",
    "    1e-3: 231,\n",
    "    3e-3: 232,\n",
    "    6e-3: 233,\n",
    "    7e-3: 234,\n",
    "    12e-3: 235,\n",
    "    15e-3: 236\n",
    "}  # key值為alpha的值\n",
    "# iterate through all alphas and assimilate results\n",
    "for i in range(1, 16):\n",
    "    perf_Ridge.iloc[i - 1, 0:18] = example_regression(data,\n",
    "                                                      power=15,\n",
    "                                                      plot_dict=plot_dict,\n",
    "                                                      reg_type='Ridge',\n",
    "                                                      alpha=i * 1e-3,\n",
    "                                                      var='alpha')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "pd.options.display.float_format = '{:,.2g}'.format\n",
    "perf_Ridge"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}