{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Import Library"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import pandas as pd\n",
    "import random\n",
    "import matplotlib.pyplot as plt\n",
    "from matplotlib.pylab import rcParams\n",
    "from sklearn.metrics import mean_absolute_error\n",
    "\n",
    "%matplotlib inline\n",
    "rcParams['figure.figsize'] = 8, 5"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Generate Sin(x)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define input array with angles from 60deg to 300deg converted to radians\n",
    "X = np.array([i * np.pi / 180 for i in range(60, 300, 4)])\n",
    "np.random.seed(100)  # Setting seed for reproducability\n",
    "y = np.sin(X) + np.random.normal(0, 0.15, len(X))\n",
    "X = X.reshape(60, -1)\n",
    "y = y.reshape(60, -1)\n",
    "data = pd.DataFrame(np.column_stack([X, y]), columns=['X', 'y'])\n",
    "plt.plot(data['X'], data['y'], '.')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.svm import SVR\n",
    "\n",
    "\n",
    "def example_svm_regression(X, y, plot_dict, kernel, C=1):\n",
    "\n",
    "    for params in plot_dict:\n",
    "        # define estimator object\n",
    "        kernel_dict = {\n",
    "            'linear': SVR(kernel='linear', C=params),\n",
    "            'poly': SVR(kernel='poly', C=C, degree=params),\n",
    "            'rbf': SVR(kernel='rbf', C=C, gamma=params)\n",
    "        }\n",
    "\n",
    "        # fit the model\n",
    "        if kernel in kernel_dict:\n",
    "            model = kernel_dict[kernel]\n",
    "            model.fit(X, y)\n",
    "            y_pred = model.predict(X)\n",
    "            mae = mean_absolute_error(y_pred, y)\n",
    "\n",
    "        # check if a plot is to be made for the entered power\n",
    "        plt.subplot(plot_dict[params])\n",
    "        plt.tight_layout()\n",
    "        plt.plot(X, y, '.')\n",
    "        plt.plot(X, y_pred)\n",
    "        if kernel == 'linear':\n",
    "            plt.title('Plot for C: %.2f' % params + '\\n' + 'mae:%.2f' % mae)\n",
    "        elif kernel == 'poly':\n",
    "            plt.title('Plot for degree: %.2f' % params + '\\n' +\n",
    "                      'mae:%.2f' % mae)\n",
    "        elif kernel == 'rbf':\n",
    "            plt.title('Plot for gamma: %.2f' % params + '\\n' +\n",
    "                      'mae:%.2f' % mae)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# RBF SVR：看圖是否看得出來gamma多少是比較好的選擇？"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_dict = {0.01: 231, 0.1: 232, 1: 233, 10: 234, 100: 235, 1000: 236}\n",
    "example_svm_regression(X, np.squeeze(y), plot_dict, kernel='rbf')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 利用5-folds Cross Validation算不同gamma的MAE"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn.model_selection import KFold\n",
    "\n",
    "\n",
    "def cross_validation(X, y, folds, gamma):\n",
    "    kf = KFold(n_splits=folds, random_state=None, shuffle=False)\n",
    "    evaluation = []\n",
    "    for train_index, test_index in kf.split(X):\n",
    "        X_train, X_test = X[train_index], X[test_index]\n",
    "        y_train, y_test = y[train_index], y[test_index]\n",
    "        model = SVR(kernel='rbf', C=1, gamma=gamma)\n",
    "        model.fit(X_train, y_train)\n",
    "        y_pred = model.predict(X_test)\n",
    "        mae = mean_absolute_error(y_pred, y_test)\n",
    "        evaluation.append(mae)\n",
    "    return evaluation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "folds = 5\n",
    "cv = pd.DataFrame()\n",
    "for gamma in np.logspace(-2, 3, 6).tolist():  # generate gamma from 1e-2~1e3\n",
    "    # print(gamma)\n",
    "    cv[str(gamma)] = cross_validation(X, np.squeeze(y), folds, gamma)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.title('5-folds Cross Validation')\n",
    "plt.xlabel('Gamma')\n",
    "plt.ylabel('MAE')\n",
    "cv.boxplot()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "cv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.2"
  },
  "toc": {
   "base_numbering": 1,
   "nav_menu": {},
   "number_sections": true,
   "sideBar": true,
   "skip_h1_title": false,
   "title_cell": "Table of Contents",
   "title_sidebar": "Contents",
   "toc_cell": false,
   "toc_position": {},
   "toc_section_display": true,
   "toc_window_display": false
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}