{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Import Library" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import random\n", "import matplotlib.pyplot as plt\n", "from matplotlib.pylab import rcParams\n", "from sklearn.metrics import mean_absolute_error\n", "\n", "%matplotlib inline\n", "rcParams['figure.figsize'] = 8, 5" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Generate Sin(x)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# define input array with angles from 60deg to 300deg converted to radians\n", "X = np.array([i * np.pi / 180 for i in range(60, 300, 4)])\n", "np.random.seed(100) # Setting seed for reproducability\n", "y = np.sin(X) + np.random.normal(0, 0.15, len(X))\n", "X = X.reshape(60, -1)\n", "y = y.reshape(60, -1)\n", "data = pd.DataFrame(np.column_stack([X, y]), columns=['X', 'y'])\n", "plt.plot(data['X'], data['y'], '.')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.svm import SVR\n", "\n", "\n", "def example_svm_regression(X, y, plot_dict, kernel, C=1):\n", "\n", " for params in plot_dict:\n", " # define estimator object\n", " kernel_dict = {\n", " 'linear': SVR(kernel='linear', C=params),\n", " 'poly': SVR(kernel='poly', C=C, degree=params),\n", " 'rbf': SVR(kernel='rbf', C=C, gamma=params)\n", " }\n", "\n", " # fit the model\n", " if kernel in kernel_dict:\n", " model = kernel_dict[kernel]\n", " model.fit(X, y)\n", " y_pred = model.predict(X)\n", " mae = mean_absolute_error(y_pred, y)\n", "\n", " # check if a plot is to be made for the entered power\n", " plt.subplot(plot_dict[params])\n", " plt.tight_layout()\n", " plt.plot(X, y, '.')\n", " plt.plot(X, y_pred)\n", " if kernel == 'linear':\n", " plt.title('Plot for C: %.2f' % params + '\\n' + 'mae:%.2f' % mae)\n", " elif kernel == 'poly':\n", " plt.title('Plot for degree: %.2f' % params + '\\n' +\n", " 'mae:%.2f' % mae)\n", " elif kernel == 'rbf':\n", " plt.title('Plot for gamma: %.2f' % params + '\\n' +\n", " 'mae:%.2f' % mae)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# RBF SVR:看圖是否看得出來gamma多少是比較好的選擇?" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plot_dict = {0.01: 231, 0.1: 232, 1: 233, 10: 234, 100: 235, 1000: 236}\n", "example_svm_regression(X, np.squeeze(y), plot_dict, kernel='rbf')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 利用5-folds Cross Validation算不同gamma的MAE" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from sklearn.model_selection import KFold\n", "\n", "\n", "def cross_validation(X, y, folds, gamma):\n", " kf = KFold(n_splits=folds, random_state=None, shuffle=False)\n", " evaluation = []\n", " for train_index, test_index in kf.split(X):\n", " X_train, X_test = X[train_index], X[test_index]\n", " y_train, y_test = y[train_index], y[test_index]\n", " model = SVR(kernel='rbf', C=1, gamma=gamma)\n", " model.fit(X_train, y_train)\n", " y_pred = model.predict(X_test)\n", " mae = mean_absolute_error(y_pred, y_test)\n", " evaluation.append(mae)\n", " return evaluation" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "folds = 5\n", "cv = pd.DataFrame()\n", "for gamma in np.logspace(-2, 3, 6).tolist(): # generate gamma from 1e-2~1e3\n", " # print(gamma)\n", " cv[str(gamma)] = cross_validation(X, np.squeeze(y), folds, gamma)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "plt.title('5-folds Cross Validation')\n", "plt.xlabel('Gamma')\n", "plt.ylabel('MAE')\n", "cv.boxplot()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "cv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.2" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }