{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# MDI 720 : Statistiques\n", "## SVD\n", "### *Joseph Salmon*\n", "\n", "This notebook reproduces the pictures for the course \"SVD\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import statsmodels.api as sm\n", "import matplotlib.pyplot as plt\n", "from matplotlib import rc\n", "import seaborn as sns\n", "from os import mkdir, path\n", "from sklearn import decomposition\n", "import time\n", "\n", "%matplotlib notebook" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Plot initialization\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dirname = \"../prebuiltimages/\"\n", "if not path.exists(dirname):\n", " mkdir(dirname)\n", "\n", "imageformat = '.pdf'\n", "rc('font', **{'family': 'sans-serif', 'sans-serif': ['Computer Modern Roman']})\n", "params = {'axes.labelsize': 12,\n", " 'font.size': 12,\n", " 'legend.fontsize': 12,\n", " 'xtick.labelsize': 10,\n", " 'ytick.labelsize': 10,\n", " 'text.usetex': True,\n", " 'figure.figsize': (8, 6)}\n", "plt.rcParams.update(params)\n", "\n", "sns.set_context(\"poster\")\n", "sns.set_palette(\"colorblind\")\n", "sns.axes_style()\n", "sns.set_style({'legend.frameon': True})\n", "plt.close(\"all\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Saving display function" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "saving = True\n", "\n", "\n", "def my_saving_display(fig, dirname, filename, imageformat):\n", " \"\"\"\"Saving with personal function.\"\"\"\n", " filename = filename.replace('.', 'pt') # remove \".\" to avoid floats issues\n", " if saving is True:\n", " dirname + filename + imageformat\n", " image_name = dirname + filename + imageformat\n", " fig.savefig(image_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Compare " ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X = np.random.randn(9, 6)\n", "\n", "# Full SVD\n", "U, s, V = np.linalg.svd(X, full_matrices=True)\n", "U.shape, V.shape, s.shape\n", "S = np.zeros((9, 6), dtype=float)\n", "S[:6, :6] = np.diag(s)\n", "\n", "# test to numerical precision if 2 arguments are equal\n", "print(np.allclose(X, U.dot(S.dot(V))))\n", "\n", "# Partial SVD\n", "U, s, V = np.linalg.svd(X, full_matrices=False)\n", "U.shape, V.shape, s.shape\n", "S = np.diag(s) # reshape to get a diagonal matrix\n", "\n", "# test to numerical precision if 2 arguments are equal\n", "print(np.allclose(X, U.dot(S.dot(V))))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# SVD/OLS (un)stability" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [ "n_features = 6\n", "n_samples = 10\n", "x = 10. ** (-np.arange(n_samples,))\n", "X = (np.column_stack([x ** (i) for i in range(n_features)]))\n", "U, s, V = np.linalg.svd(X, full_matrices=False)\n", "theta_true = np.ones([n_features, ])\n", "y_true = np.dot(X, theta_true)\n", "err = np.zeros(n_samples,)\n", "err_delta = np.zeros(n_samples,)\n", "for i in range(1, n_samples):\n", " delta = 10. ** (-i) * (0.5 - np.random.rand(n_samples, )) * y_true\n", " y = y_true + delta\n", " w = np.dot(np.transpose(U), y)\n", " theta_hat = np.dot(V, w / s)\n", " err[i] = np.sqrt(np.sum((theta_hat - theta_true) ** 2))\n", " err_delta[i] = np.sqrt(np.sum(delta ** 2))\n", "\n", "sns.set_context(\"poster\", font_scale=1.5)\n", "sns.set_style(\"ticks\")\n", "fig1 = plt.figure(figsize=(14, 8))\n", "ax1 = fig1.add_subplot(111)\n", "ax1.plot(err_delta, err, '*', markersize=20)\n", "ax1.set_yscale('log')\n", "ax1.set_xscale('log')\n", "sns.despine()\n", "ax1.set_xlabel(r\"$\\|\\Delta\\|_2$\")\n", "ax1.set_ylabel(r\"$\\|\\hat\\theta^{\\Delta}-\\hat\\theta\\|_2$\")\n", "plt.title(r\"$s_1={0: .1e}, s_2={1:.1e}, s_3={2:.1e}, s_4={3: .1e}, s_5={4: .1e}, s_6={5: .1e}$\".format(s[0], s[1], s[2], s[3], s[4], s[5]), fontsize=22)\n", "plt.tight_layout()\n", "plt.show()\n", "\n", "filename = \"amplification_erreur\"\n", "image_name = dirname + filename + imageformat\n", "fig1.savefig(image_name)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }