{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MDI 720 : Statistiques\n", "## Ridge\n", "### *Joseph Salmon*\n", "\n", "This notebook reproduces the pictures for the course \"Ridge_fr\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt # for plots\n", "from matplotlib import rc\n", "from sklearn.linear_model import RidgeCV\n", "import seaborn as sns\n", "from os import mkdir, path\n", "from mpl_toolkits.mplot3d import Axes3D\n", "# interaction mode better for 3D\n", "%matplotlib notebook" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dirname = \"../prebuiltimages/\"\n", "if not path.exists(dirname):\n", " mkdir(dirname)\n", "\n", "imageformat = '.pdf'\n", "rc('font', **{'family': 'sans-serif', 'sans-serif': ['Computer Modern Roman']})\n", "params = {'axes.labelsize': 12,\n", " 'font.size': 16,\n", " 'legend.fontsize': 16,\n", " 'text.usetex': True,\n", " 'figure.figsize': (8, 6)}\n", "plt.rcParams.update(params)\n", "plt.close(\"all\")\n", "\n", "# sns.set_context(\"poster\")\n", "sns.set_context(\"poster\")\n", "sns.set_palette(\"colorblind\")\n", "sns.set_style(\"white\")\n", "np.random.seed(666)\n", "\n", "###############################################################################\n", "# display function:\n", "\n", "saving = False\n", "\n", "\n", "def my_saving_display(fig, dirname, filename, imageformat):\n", " \"\"\"\"Saving with personal function.\"\"\"\n", " filename = filename.replace('.', 'pt') # remove \".\" to avoid floats issues\n", " if saving is True:\n", " dirname + filename + imageformat\n", " image_name = dirname + filename + imageformat\n", " fig.savefig(image_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ridge path / Ridge CV" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n_features = 50\n", "n_samples = 51\n", "\n", "eps = 1e-6\n", "\n", "alpha_max = 1e4\n", "n_alphas = 100\n", "alphas = np.logspace(np.log10(alpha_max * eps),\n", " np.log10(alpha_max), num=n_alphas)\n", "\n", "# observe the weird phenomenon with such a design matrix X:\n", "# x = 10. ** (-np.arange(n_samples,))\n", "# X = (np.column_stack([x ** (i) for i in range(n_features)]))\n", "\n", "X = np.random.randn(n_samples, n_features)\n", "theta_true = np.zeros([n_features, ])\n", "theta_true[0:5] = 2\n", "y_true = np.dot(X, theta_true)\n", "sigma = 1.\n", "noise = sigma * np.random.randn(n_samples,)\n", "y = np.dot(X, theta_true) + noise\n", "\n", "\n", "def ridge_path(X, y, alphas):\n", " \"\"\" compute the ridge path for a list of tuning parameters \"\"\"\n", " U, s, Vt = np.linalg.svd(X, full_matrices=False)\n", " theta_ridge = np.zeros((n_features, n_alphas))\n", " mat_d = np.zeros((n_features, n_alphas))\n", " UTy = np.dot(U.T, y)\n", " for index, alpha in enumerate(alphas):\n", " mat_d = np.diag((s / (s ** 2 + alpha)))\n", " coef_alpha = np.dot(Vt.T, np.dot(mat_d, UTy))\n", " theta_ridge[:, index] = coef_alpha\n", "\n", " return theta_ridge, s\n", "\n", "# Possible alternative\n", "# from sklearn.linear_model import _solve_svd\n", "\n", "theta_ridge, s = ridge_path(X, y, np.asarray(alphas).ravel())\n", "\n", "fig2 = plt.figure(figsize=(10, 8))\n", "sns.despine()\n", "plt.title(\"Ridge path: \" +\n", " r\"$p={0}, n={1} $\".format(n_features, n_samples), fontsize=16)\n", "ax1 = fig2.add_subplot(111)\n", "ax1.plot(alphas, np.transpose(theta_ridge), linewidth=3)\n", "ax1.set_xscale('log')\n", "ax1.set_xlabel(r\"$\\lambda$\")\n", "ax1.set_ylabel(\"Coefficients values\")\n", "plt.tight_layout()\n", "plt.show()\n", "\n", "my_saving_display(fig2, dirname, \"Ridge_path\", imageformat)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "###############################################################################\n", "# Cross Validation for Ridge\n", "\n", "# If cv not specified GCV (=leave-one-out) is used... seems really bad,\n", "cv_fold = 5\n", "clf = RidgeCV(alphas=alphas, fit_intercept=False, normalize=False, cv=cv_fold)\n", "clf.fit(X, y)\n", "\n", "ax1.axvline(clf.alpha_, color='K', linestyle='-', linewidth=3, label=\"$a$\")\n", "plt.annotate('$CV=5$', xy=(3 * clf.alpha_, 0.5), xycoords='data',\n", " xytext=(0, 150), textcoords='offset points', fontsize=18)\n", "\n", "my_saving_display(fig2, dirname, \"Ridge_path_CV\", imageformat)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Bias / Variance trade-off" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "# Bias contribution to the prediction risk\n", "Biais2_tab = np.zeros(np.shape(alphas))\n", "Gram = np.dot(X.T, X)\n", "for index, alpha in enumerate(alphas):\n", " # Biais2 = np.dot(np.dot(Gram, theta_true),\n", " # np.linalg.solve(np.dot(Gram + alpha * np.eye(n_features),\n", " # Gram + alpha * np.eye(n_features)),\n", " # theta_true))\n", " intermed = alpha * X.dot(np.linalg.solve(Gram + alpha * np.eye(n_features),\n", " theta_true))\n", " Biais2 = np.linalg.norm(intermed) ** 2\n", " Biais2_tab[index] = Biais2\n", "\n", "# Variance contribution to the prediction risk\n", "Var_tab = np.zeros(np.shape(alphas))\n", "\n", "for index, alpha in enumerate(alphas):\n", " Var_tab[index] = np.sum(s ** 4 / (s ** 2 + alpha) ** 2) * sigma ** 2\n", "\n", "Risk = Var_tab + Biais2_tab\n", "fig1 = plt.figure(figsize=(10, 8))\n", "\n", "plt.title(\"Bias-Variance trade-off: \" +\n", " r\"$p={0}, n={1} $\".format(n_features, n_samples), fontsize=16)\n", "ax1 = fig1.add_subplot(111)\n", "ax1.set_ylim([5 * sigma ** 2, Risk[-1] * 1.3])\n", "plt.loglog(alphas, Var_tab, label=\"Variance\", linewidth=6)\n", "plt.loglog(alphas, Biais2_tab, label=\"Squared Bias\", linewidth=6)\n", "plt.loglog(alphas, Risk, label=\"Risk\", linewidth=6)\n", "plt.xlabel('$\\lambda$')\n", "plt.ylabel(\"Risk\")\n", "plt.legend(loc=\"upper left\", fontsize=20)\n", "plt.tight_layout()\n", "plt.show()\n", "my_saving_display(fig1, dirname, \"Bias_variance_trade_off\", imageformat)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "plt.axvline(clf.alpha_, color='K', linestyle='-', linewidth=8, label=\"$a$\")\n", "plt.annotate('$CV=5$', xy=(3 * clf.alpha_, 3), xycoords='data',\n", " xytext=(0, 80), textcoords='offset points', fontsize=18)\n", "\n", "my_saving_display(fig1, dirname, \"Bias_variance_trade_off_C\", imageformat)\n" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }