{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import numpy.linalg as lin\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "from mpl_toolkits import mplot3d\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Name</th>\n",
       "      <th>Once upon a time in Hollywood (2019)</th>\n",
       "      <th>Dark Knight (2008)</th>\n",
       "      <th>Farenheit 9/11 (2004)</th>\n",
       "      <th>Gladiator (2000)</th>\n",
       "      <th>No country for old men (2007)</th>\n",
       "      <th>Ocean's 8</th>\n",
       "      <th>Call me by your name (2018)</th>\n",
       "      <th>Singin' in the Rain (1952)</th>\n",
       "      <th>Groundhog Day (1993)</th>\n",
       "      <th>...</th>\n",
       "      <th>Parks and Recreation (TV)</th>\n",
       "      <th>Friends (TV)</th>\n",
       "      <th>Veronica Mars (TV)</th>\n",
       "      <th>Battlestar Galactica (TV, 2004)</th>\n",
       "      <th>Brokeback Mountain (2005)</th>\n",
       "      <th>Sense8 (TV)</th>\n",
       "      <th>Bohemian Rhapsody (2019)</th>\n",
       "      <th>Godfather</th>\n",
       "      <th>Ghost in the shell</th>\n",
       "      <th>Attack on Titan</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>F2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>S3</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5.0</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>D4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>...</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>2.0</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>K5</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>C6</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2.0</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>56</th>\n",
       "      <td>A58</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>57</th>\n",
       "      <td>G59</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>4.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>58</th>\n",
       "      <td>B60</td>\n",
       "      <td>4</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>4.0</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>5.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>59</th>\n",
       "      <td>M61</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>5.0</td>\n",
       "      <td>4</td>\n",
       "      <td>4</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>60</th>\n",
       "      <td>J62</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>4</td>\n",
       "      <td>...</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>4.0</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>3.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>61 rows × 28 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "   Name  Once upon a time in Hollywood (2019)  Dark Knight (2008)  \\\n",
       "0    F2                                     3                   3   \n",
       "1    S3                                     1                   1   \n",
       "2    D4                                     3                   4   \n",
       "3    K5                                     3                   4   \n",
       "4    C6                                     4                   5   \n",
       "..  ...                                   ...                 ...   \n",
       "56  A58                                     2                   5   \n",
       "57  G59                                     4                   5   \n",
       "58  B60                                     4                   5   \n",
       "59  M61                                     3                   4   \n",
       "60  J62                                     3                   2   \n",
       "\n",
       "    Farenheit 9/11 (2004)  Gladiator (2000)  No country for old men (2007)  \\\n",
       "0                       5                 3                              1   \n",
       "1                       5                 1                              1   \n",
       "2                       3                 4                              2   \n",
       "3                       3                 4                              3   \n",
       "4                       3                 3                              3   \n",
       "..                    ...               ...                            ...   \n",
       "56                      1                 2                              3   \n",
       "57                      1                 3                              2   \n",
       "58                      5                 5                              2   \n",
       "59                      2                 4                              2   \n",
       "60                      3                 2                              3   \n",
       "\n",
       "    Ocean's 8  Call me by your name (2018)  Singin' in the Rain (1952)  \\\n",
       "0           3                            3                           4   \n",
       "1           3                            3                           2   \n",
       "2           4                            3                           2   \n",
       "3           3                            3                           2   \n",
       "4           4                            3                           3   \n",
       "..        ...                          ...                         ...   \n",
       "56          2                            3                           3   \n",
       "57          1                            1                           2   \n",
       "58          3                            2                           3   \n",
       "59          3                            1                           3   \n",
       "60          3                            2                           3   \n",
       "\n",
       "    Groundhog Day (1993)  ...  Parks and Recreation (TV)  Friends (TV)  \\\n",
       "0                      3  ...                          4             5   \n",
       "1                      5  ...                          3             4   \n",
       "2                      5  ...                          5             4   \n",
       "3                      2  ...                          3             3   \n",
       "4                      3  ...                          2             3   \n",
       "..                   ...  ...                        ...           ...   \n",
       "56                     3  ...                          2             4   \n",
       "57                     3  ...                          2             2   \n",
       "58                     4  ...                          2             5   \n",
       "59                     4  ...                          2             3   \n",
       "60                     4  ...                          3             2   \n",
       "\n",
       "    Veronica Mars (TV)  Battlestar Galactica (TV, 2004)  \\\n",
       "0                    1                                3   \n",
       "1                    1                                1   \n",
       "2                    3                                3   \n",
       "3                    2                                5   \n",
       "4                    2                                2   \n",
       "..                 ...                              ...   \n",
       "56                   1                                2   \n",
       "57                   1                                4   \n",
       "58                   4                                4   \n",
       "59                   1                                4   \n",
       "60                   2                                3   \n",
       "\n",
       "    Brokeback Mountain (2005)  Sense8 (TV)  Bohemian Rhapsody (2019)  \\\n",
       "0                           4            3                       4.0   \n",
       "1                           1            1                       5.0   \n",
       "2                           4            4                       2.0   \n",
       "3                           3            3                       3.0   \n",
       "4                           3            2                       2.0   \n",
       "..                        ...          ...                       ...   \n",
       "56                          2            3                       4.0   \n",
       "57                          2            4                       3.0   \n",
       "58                          4            4                       4.0   \n",
       "59                          3            4                       5.0   \n",
       "60                          3            3                       4.0   \n",
       "\n",
       "    Godfather  Ghost in the shell  Attack on Titan  \n",
       "0           3                   3              5.0  \n",
       "1           3                   1              1.0  \n",
       "2           3                   3              4.0  \n",
       "3           2                   2              2.0  \n",
       "4           4                   4              4.0  \n",
       "..        ...                 ...              ...  \n",
       "56          3                   4              5.0  \n",
       "57          5                   2              4.0  \n",
       "58          5                   5              5.0  \n",
       "59          4                   4              NaN  \n",
       "60          2                   4              3.0  \n",
       "\n",
       "[61 rows x 28 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "dataframe = pd.read_csv(\"movieratings.csv\")\n",
    "dataframe.head(n=100)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "# we skip the first rwo (empty, full of nans) and the first col (names).\n",
    "ratings = dataframe.to_numpy()[1:-1,1:-1].astype(np.float32)\n",
    "\n",
    "# there may be stray examples not rated, get rid of them by replacing nan by 3, the \"meh\" rating\n",
    "x = np.isnan(ratings)\n",
    "ratings[x]=3\n",
    "means = np.mean(ratings, axis=0)\n",
    "X = ratings - means\n",
    "\n",
    "u,s,vt = lin.svd(X)\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59, 26) (59, 59) (26,) (26, 26)\n",
      "[[1. 1. 5. ... 5. 3. 1.]\n",
      " [3. 4. 3. ... 2. 3. 3.]\n",
      " [3. 4. 3. ... 3. 2. 2.]\n",
      " ...\n",
      " [4. 5. 1. ... 3. 5. 2.]\n",
      " [4. 5. 5. ... 4. 5. 5.]\n",
      " [3. 4. 2. ... 5. 4. 4.]]\n"
     ]
    }
   ],
   "source": [
    "print(X.shape, u.shape, s.shape, vt.shape)\n",
    "\n",
    "print(np.round(u[:,:26] @ np.diag(s[:26]) @vt[:26,:] +means,2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59, 26)\n",
      "[18.85 12.87 11.89 10.81 10.24  9.14  8.91  8.62  8.01  7.46  7.27  6.67\n",
      "  6.36  6.17  5.85  5.59  5.27  4.84  4.77  4.38  3.97  3.63  3.41  3.26\n",
      "  2.79  2.47]\n"
     ]
    }
   ],
   "source": [
    "print(ratings.shape)\n",
    "print(np.round(s,2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[18.85  12.871 11.89  10.81  10.241  9.14   8.906  8.619  8.01   7.459\n",
      "  7.267  6.672  6.36   6.17   5.848  5.593  5.268  4.835  4.769  4.382\n",
      "  3.969  3.632  3.413  3.256  2.79   2.469]\n",
      "(26,)\n",
      "[18.85 12.87 11.89 10.81 10.24  0.    0.    0.    0.    0.    0.    0.\n",
      "  0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.\n",
      "  0.    0.  ]\n"
     ]
    }
   ],
   "source": [
    "print(np.round(s,3))\n",
    "print(s.shape)\n",
    "\n",
    "sapprox = s\n",
    "sapprox[5:] =0\n",
    "print(np.round(sapprox,2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(26, 26)\n"
     ]
    }
   ],
   "source": [
    "v = vt.T\n",
    "print(v.shape)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[[ 1. -0.  0.  0. -0. -0. -0. -0.  0.  0.  0. -0.  0. -0. -0. -0. -0. -0.\n",
      "  -0. -0.  0. -0. -0.  0.  0.  0.]\n",
      " [-0.  1.  0. -0. -0.  0. -0. -0. -0.  0.  0.  0. -0. -0.  0. -0. -0. -0.\n",
      "   0.  0.  0. -0. -0. -0. -0.  0.]\n",
      " [ 0.  0.  1. -0. -0.  0. -0.  0.  0. -0. -0. -0. -0. -0. -0.  0.  0. -0.\n",
      "   0. -0.  0. -0. -0.  0.  0. -0.]\n",
      " [ 0. -0. -0.  1. -0. -0. -0.  0. -0.  0. -0. -0.  0. -0. -0. -0.  0.  0.\n",
      "   0. -0. -0.  0.  0.  0.  0.  0.]\n",
      " [-0. -0. -0. -0.  1. -0.  0.  0. -0. -0. -0.  0. -0.  0.  0.  0. -0.  0.\n",
      "   0. -0. -0.  0.  0.  0. -0. -0.]\n",
      " [-0.  0.  0. -0. -0.  1.  0. -0. -0. -0. -0. -0. -0.  0.  0. -0.  0.  0.\n",
      "  -0. -0. -0. -0.  0.  0.  0. -0.]\n",
      " [-0. -0. -0. -0.  0.  0.  1.  0.  0. -0.  0.  0.  0. -0.  0.  0.  0. -0.\n",
      "   0.  0.  0.  0.  0.  0.  0.  0.]\n",
      " [-0. -0.  0.  0.  0. -0.  0.  1. -0. -0.  0. -0.  0. -0.  0. -0. -0. -0.\n",
      "  -0. -0.  0. -0.  0. -0. -0.  0.]\n",
      " [ 0. -0.  0. -0. -0. -0.  0. -0.  1. -0. -0. -0.  0. -0.  0.  0.  0.  0.\n",
      "   0. -0.  0. -0. -0.  0.  0.  0.]\n",
      " [ 0.  0. -0.  0. -0. -0. -0. -0. -0.  1.  0. -0.  0. -0.  0.  0.  0.  0.\n",
      "  -0.  0.  0. -0. -0. -0.  0.  0.]\n",
      " [ 0.  0. -0. -0. -0. -0.  0.  0. -0.  0.  1. -0. -0. -0. -0. -0.  0.  0.\n",
      "   0.  0. -0. -0.  0. -0. -0. -0.]\n",
      " [-0.  0. -0. -0.  0. -0.  0. -0. -0. -0. -0.  1.  0. -0.  0. -0.  0. -0.\n",
      "  -0. -0.  0. -0. -0.  0.  0.  0.]\n",
      " [ 0. -0. -0.  0. -0. -0.  0.  0.  0.  0. -0.  0.  1. -0.  0. -0. -0. -0.\n",
      "   0. -0. -0.  0. -0. -0. -0.  0.]\n",
      " [-0. -0. -0. -0.  0.  0. -0. -0. -0. -0. -0. -0. -0.  1. -0. -0. -0. -0.\n",
      "   0. -0.  0. -0.  0.  0. -0.  0.]\n",
      " [-0.  0. -0. -0.  0.  0.  0.  0.  0.  0. -0.  0.  0. -0.  1.  0.  0.  0.\n",
      "  -0.  0.  0. -0.  0.  0. -0. -0.]\n",
      " [-0. -0.  0. -0.  0. -0.  0. -0.  0.  0. -0. -0. -0. -0.  0.  1.  0. -0.\n",
      "   0.  0. -0. -0. -0. -0.  0. -0.]\n",
      " [-0. -0.  0.  0. -0.  0.  0. -0.  0.  0.  0.  0. -0. -0.  0.  0.  1. -0.\n",
      "  -0.  0.  0. -0.  0.  0. -0. -0.]\n",
      " [-0. -0. -0.  0.  0.  0. -0. -0.  0.  0.  0. -0. -0. -0.  0. -0. -0.  1.\n",
      "   0. -0. -0.  0.  0. -0. -0.  0.]\n",
      " [-0.  0.  0.  0.  0. -0.  0. -0.  0. -0.  0. -0.  0.  0. -0.  0. -0.  0.\n",
      "   1.  0.  0. -0.  0. -0.  0.  0.]\n",
      " [-0.  0. -0. -0. -0. -0.  0. -0. -0.  0.  0. -0. -0. -0.  0.  0.  0. -0.\n",
      "   0.  1.  0. -0.  0.  0.  0. -0.]\n",
      " [ 0.  0.  0. -0. -0. -0.  0.  0.  0.  0. -0.  0. -0.  0.  0. -0.  0. -0.\n",
      "   0.  0.  1.  0. -0.  0. -0.  0.]\n",
      " [-0. -0. -0.  0.  0. -0.  0. -0. -0. -0. -0. -0.  0. -0. -0. -0. -0.  0.\n",
      "  -0. -0.  0.  1. -0. -0. -0. -0.]\n",
      " [-0. -0. -0.  0.  0.  0.  0.  0. -0. -0.  0. -0. -0.  0.  0. -0.  0.  0.\n",
      "   0.  0. -0. -0.  1. -0. -0.  0.]\n",
      " [ 0. -0.  0.  0.  0.  0.  0. -0.  0. -0. -0.  0. -0.  0.  0. -0.  0. -0.\n",
      "  -0.  0.  0. -0. -0.  1. -0. -0.]\n",
      " [ 0. -0.  0.  0. -0.  0.  0. -0.  0.  0. -0.  0. -0. -0. -0.  0. -0. -0.\n",
      "   0.  0. -0. -0. -0. -0.  1. -0.]\n",
      " [ 0.  0. -0.  0. -0. -0.  0.  0.  0.  0. -0.  0.  0.  0. -0. -0. -0.  0.\n",
      "   0. -0.  0. -0.  0. -0. -0.  1.]]\n"
     ]
    }
   ],
   "source": [
    "print(np.round(v @ v.T,1))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(59, 26)\n",
      "Approximate:  [[0.88718534 1.101284   2.8596613  1.4945695  0.35503674]\n",
      " [2.7006295  3.620013   3.2670913  3.412057   2.3522089 ]\n",
      " [2.8339202  3.9221613  3.0754366  3.6236403  3.101586  ]\n",
      " [3.1373506  4.505858   2.949022   4.10042    3.5543702 ]\n",
      " [2.301337   3.4716864  3.2791522  3.2286947  3.1189191 ]]\n",
      "True:  [[1. 1. 5. 1. 1.]\n",
      " [3. 4. 3. 4. 2.]\n",
      " [3. 4. 3. 4. 3.]\n",
      " [4. 5. 3. 3. 3.]\n",
      " [1. 4. 3. 3. 4.]]\n"
     ]
    }
   ],
   "source": [
    "usigma = u[:,:26] @ np.diag(sapprox)\n",
    "\n",
    "print(usigma.shape)\n",
    "Xapprox = usigma @ vt\n",
    "\n",
    "meanratings = np.mean(ratings, axis=0)\n",
    "predratings = Xapprox + meanratings\n",
    "print('Approximate: ',predratings[:5,:5])\n",
    "print('True: ',ratings[:5,:5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(-0.37406564, 'Dark Knight (2008)')\n",
      "(-0.31349522, 'No country for old men (2007)')\n",
      "(0.47848177, \"Ocean's 8\")\n",
      "(0.24769787, 'Planet Earth (documentary)')\n",
      "(0.23609659, 'Friends (TV)')\n",
      "(-0.28989413, 'Battlestar Galactica (TV, 2004)')\n",
      "(0.26575094, 'Bohemian Rhapsody (2019)')\n",
      "(-0.30667964, 'Ghost in the shell')\n",
      "\n"
     ]
    }
   ],
   "source": [
    "# top eigenvector of X^TX=== first column of v \n",
    "# \"Most typical user\"\n",
    "ind = 1\n",
    "#print(v[:,ind])\n",
    "\n",
    "attachedv = zip(v[:,ind], dataframe.columns[1:])\n",
    "movies = []\n",
    "for elt in iter(attachedv):\n",
    "    if np.abs(elt[0]) > .2:\n",
    "        print(elt)\n",
    "        movies.append(elt[1])\n",
    "        \n",
    "print()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(0.29549047, 'F2') \n",
      " Dark Knight (2008)                 3.0\n",
      "No country for old men (2007)      1.0\n",
      "Ocean's 8                          3.0\n",
      "Planet Earth (documentary)         3.0\n",
      "Friends (TV)                       5.0\n",
      "Battlestar Galactica (TV, 2004)    3.0\n",
      "Bohemian Rhapsody (2019)           4.0\n",
      "Ghost in the shell                 3.0\n",
      "Name: 0, dtype: float64 \n",
      " ---------------\n",
      "(-0.22748804, 'K7') \n",
      " Dark Knight (2008)                 4.0\n",
      "No country for old men (2007)      4.0\n",
      "Ocean's 8                          3.0\n",
      "Planet Earth (documentary)         5.0\n",
      "Friends (TV)                       3.0\n",
      "Battlestar Galactica (TV, 2004)    3.0\n",
      "Bohemian Rhapsody (2019)           5.0\n",
      "Ghost in the shell                 3.0\n",
      "Name: 5, dtype: float64 \n",
      " ---------------\n",
      "(0.2385178, 'R12') \n",
      " Dark Knight (2008)                 4.0\n",
      "No country for old men (2007)      2.0\n",
      "Ocean's 8                          3.0\n",
      "Planet Earth (documentary)         3.0\n",
      "Friends (TV)                       4.0\n",
      "Battlestar Galactica (TV, 2004)    3.0\n",
      "Bohemian Rhapsody (2019)           3.0\n",
      "Ghost in the shell                 3.0\n",
      "Name: 10, dtype: float64 \n",
      " ---------------\n",
      "(0.2217204, 'E41') \n",
      " Dark Knight (2008)                 4.0\n",
      "No country for old men (2007)      3.0\n",
      "Ocean's 8                          5.0\n",
      "Planet Earth (documentary)         5.0\n",
      "Friends (TV)                       1.0\n",
      "Battlestar Galactica (TV, 2004)    2.0\n",
      "Bohemian Rhapsody (2019)           4.0\n",
      "Ghost in the shell                 4.0\n",
      "Name: 39, dtype: float64 \n",
      " ---------------\n",
      "(-0.2834699, 'K44') \n",
      " Dark Knight (2008)                 4.0\n",
      "No country for old men (2007)      2.0\n",
      "Ocean's 8                          4.0\n",
      "Planet Earth (documentary)         4.0\n",
      "Friends (TV)                       5.0\n",
      "Battlestar Galactica (TV, 2004)    1.0\n",
      "Bohemian Rhapsody (2019)           2.0\n",
      "Ghost in the shell                 2.0\n",
      "Name: 42, dtype: float64 \n",
      " ---------------\n",
      "(-0.20115133, 'J47') \n",
      " Dark Knight (2008)                 5.0\n",
      "No country for old men (2007)      2.0\n",
      "Ocean's 8                          5.0\n",
      "Planet Earth (documentary)         5.0\n",
      "Friends (TV)                       4.0\n",
      "Battlestar Galactica (TV, 2004)    5.0\n",
      "Bohemian Rhapsody (2019)           5.0\n",
      "Ghost in the shell                 5.0\n",
      "Name: 45, dtype: float64 \n",
      " ---------------\n",
      "(0.33471763, 'I48') \n",
      " Dark Knight (2008)                 5.0\n",
      "No country for old men (2007)      4.0\n",
      "Ocean's 8                          2.0\n",
      "Planet Earth (documentary)         1.0\n",
      "Friends (TV)                       3.0\n",
      "Battlestar Galactica (TV, 2004)    3.0\n",
      "Bohemian Rhapsody (2019)           3.0\n",
      "Ghost in the shell                 3.0\n",
      "Name: 46, dtype: float64 \n",
      " ---------------\n",
      "(-0.2017926, 'A56') \n",
      " Dark Knight (2008)                 4.0\n",
      "No country for old men (2007)      5.0\n",
      "Ocean's 8                          3.0\n",
      "Planet Earth (documentary)         2.0\n",
      "Friends (TV)                       3.0\n",
      "Battlestar Galactica (TV, 2004)    3.0\n",
      "Bohemian Rhapsody (2019)           2.0\n",
      "Ghost in the shell                 5.0\n",
      "Name: 54, dtype: float64 \n",
      " ---------------\n"
     ]
    }
   ],
   "source": [
    "# uses ind from previous cell\n",
    "#print(u[:,ind])\n",
    "selectedmovies = dataframe[movies]\n",
    "attachedu = zip(u[:,ind], dataframe['Name'])\n",
    "\n",
    "num= 0\n",
    "for elt in iter(attachedu):\n",
    "    if np.abs(elt[0]) > .2:\n",
    "        print(elt,'\\n', selectedmovies.iloc[num], '\\n ---------------')\n",
    "    num +=1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Top eigenvector is a linear combination of all of you. The coefficients of the linear combination are in w1. \n",
    "# As an approximate gauge of the contribution, we look at the unit vector along w1 (ie, the first column of \n",
    "# the matrix u)\n",
    "\n",
    "u1= u[:,0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(np.round(u1*u1,3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "u2 = u[:,1]\n",
    "print(np.round(u2*u2,3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# top eigenvector of XX^T=== first column of u \n",
    "# \"Most typical genre\"\n",
    "\n",
    "u1 = u[:,0]\n",
    "print(u[:,0])\n",
    "\n",
    "# Recall u_1 = X v_1/sqrt(lambda_1) where v_1 was the unit eigenvector of X^TX with eigenvalue lambda_1\n",
    "# u_1 is the first col of the matrix u, the eigenvector of length 1 of XX^T with eigenvalue lambda_1\n",
    "\n",
    "print(np.round(u1-X @ (v1 / s[0]),3))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "movies=(dataframe.columns[1:-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Let us examine the contribution of each movie to the eigenvector u_i. Because\n",
    "# u_i = X v_i /\\sqrt{lambda}, u_i is a linear combination of the cols of X, and\n",
    "# that the components of the vector v_i can be interpreted as\n",
    "# the coefficients of the movies contributing to the eigenvector u_i.\n",
    "# Since length of v_i is 1, we will interpret the square of each component of\n",
    "# v_i as the strength of contribution of the corresponding movie to u_i\n",
    "\n",
    "# Change the number in the line below to access different eigenvectors.\n",
    "v2= v[:,4]\n",
    "\n",
    "p = len(dataframe.columns[1:-1])\n",
    "\n",
    "# the following line sorts the squared coefficients of v2\n",
    "ind = sorted(range(p), key = lambda k: (v2*v2)[k])\n",
    "\n",
    "# and prints the movies with the 5 highest contributions to the eigenvector u\n",
    "# the last movie in the list has the highest contribution.\n",
    "print(movies[ind[-10:]])\n",
    "print(np.sum((v2*v2)[ind[-10:]]))\n",
    "print(v2[ind[-10:]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(ind)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "print(ratings[:,23])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}