{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import numpy.linalg as lin\n", "\n", "import matplotlib.pyplot as plt\n", "from mpl_toolkits import mplot3d\n", "\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameOnce upon a time in Hollywood (2019)Dark Knight (2008)Farenheit 9/11 (2004)Gladiator (2000)No country for old men (2007)Ocean's 8Call me by your name (2018)Singin' in the Rain (1952)Groundhog Day (1993)...Parks and Recreation (TV)Friends (TV)Veronica Mars (TV)Battlestar Galactica (TV, 2004)Brokeback Mountain (2005)Sense8 (TV)Bohemian Rhapsody (2019)GodfatherGhost in the shellAttack on Titan
0F2335313343...4513434.0335.0
1S3115113325...3411115.0311.0
2D4343424325...5433442.0334.0
3K5343433322...3325333.0222.0
4C6453334333...2322322.0444.0
..................................................................
56A58251232333...2412234.0345.0
57G59451321123...2214243.0524.0
58B60455523234...2544444.0555.0
59M61342423134...2314345.044NaN
60J62323233234...3223334.0243.0
\n", "

61 rows × 28 columns

\n", "
" ], "text/plain": [ " Name Once upon a time in Hollywood (2019) Dark Knight (2008) \\\n", "0 F2 3 3 \n", "1 S3 1 1 \n", "2 D4 3 4 \n", "3 K5 3 4 \n", "4 C6 4 5 \n", ".. ... ... ... \n", "56 A58 2 5 \n", "57 G59 4 5 \n", "58 B60 4 5 \n", "59 M61 3 4 \n", "60 J62 3 2 \n", "\n", " Farenheit 9/11 (2004) Gladiator (2000) No country for old men (2007) \\\n", "0 5 3 1 \n", "1 5 1 1 \n", "2 3 4 2 \n", "3 3 4 3 \n", "4 3 3 3 \n", ".. ... ... ... \n", "56 1 2 3 \n", "57 1 3 2 \n", "58 5 5 2 \n", "59 2 4 2 \n", "60 3 2 3 \n", "\n", " Ocean's 8 Call me by your name (2018) Singin' in the Rain (1952) \\\n", "0 3 3 4 \n", "1 3 3 2 \n", "2 4 3 2 \n", "3 3 3 2 \n", "4 4 3 3 \n", ".. ... ... ... \n", "56 2 3 3 \n", "57 1 1 2 \n", "58 3 2 3 \n", "59 3 1 3 \n", "60 3 2 3 \n", "\n", " Groundhog Day (1993) ... Parks and Recreation (TV) Friends (TV) \\\n", "0 3 ... 4 5 \n", "1 5 ... 3 4 \n", "2 5 ... 5 4 \n", "3 2 ... 3 3 \n", "4 3 ... 2 3 \n", ".. ... ... ... ... \n", "56 3 ... 2 4 \n", "57 3 ... 2 2 \n", "58 4 ... 2 5 \n", "59 4 ... 2 3 \n", "60 4 ... 3 2 \n", "\n", " Veronica Mars (TV) Battlestar Galactica (TV, 2004) \\\n", "0 1 3 \n", "1 1 1 \n", "2 3 3 \n", "3 2 5 \n", "4 2 2 \n", ".. ... ... \n", "56 1 2 \n", "57 1 4 \n", "58 4 4 \n", "59 1 4 \n", "60 2 3 \n", "\n", " Brokeback Mountain (2005) Sense8 (TV) Bohemian Rhapsody (2019) \\\n", "0 4 3 4.0 \n", "1 1 1 5.0 \n", "2 4 4 2.0 \n", "3 3 3 3.0 \n", "4 3 2 2.0 \n", ".. ... ... ... \n", "56 2 3 4.0 \n", "57 2 4 3.0 \n", "58 4 4 4.0 \n", "59 3 4 5.0 \n", "60 3 3 4.0 \n", "\n", " Godfather Ghost in the shell Attack on Titan \n", "0 3 3 5.0 \n", "1 3 1 1.0 \n", "2 3 3 4.0 \n", "3 2 2 2.0 \n", "4 4 4 4.0 \n", ".. ... ... ... \n", "56 3 4 5.0 \n", "57 5 2 4.0 \n", "58 5 5 5.0 \n", "59 4 4 NaN \n", "60 2 4 3.0 \n", "\n", "[61 rows x 28 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dataframe = pd.read_csv(\"movieratings.csv\")\n", "dataframe.head(n=100)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# we skip the first rwo (empty, full of nans) and the first col (names).\n", "ratings = dataframe.to_numpy()[1:-1,1:-1].astype(np.float32)\n", "\n", "# there may be stray examples not rated, get rid of them by replacing nan by 3, the \"meh\" rating\n", "x = np.isnan(ratings)\n", "ratings[x]=3\n", "means = np.mean(ratings, axis=0)\n", "X = ratings - means\n", "\n", "u,s,vt = lin.svd(X)\n", "\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59, 26) (59, 59) (26,) (26, 26)\n", "[[1. 1. 5. ... 5. 3. 1.]\n", " [3. 4. 3. ... 2. 3. 3.]\n", " [3. 4. 3. ... 3. 2. 2.]\n", " ...\n", " [4. 5. 1. ... 3. 5. 2.]\n", " [4. 5. 5. ... 4. 5. 5.]\n", " [3. 4. 2. ... 5. 4. 4.]]\n" ] } ], "source": [ "print(X.shape, u.shape, s.shape, vt.shape)\n", "\n", "print(np.round(u[:,:26] @ np.diag(s[:26]) @vt[:26,:] +means,2))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59, 26)\n", "[18.85 12.87 11.89 10.81 10.24 9.14 8.91 8.62 8.01 7.46 7.27 6.67\n", " 6.36 6.17 5.85 5.59 5.27 4.84 4.77 4.38 3.97 3.63 3.41 3.26\n", " 2.79 2.47]\n" ] } ], "source": [ "print(ratings.shape)\n", "print(np.round(s,2))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[18.85 12.871 11.89 10.81 10.241 9.14 8.906 8.619 8.01 7.459\n", " 7.267 6.672 6.36 6.17 5.848 5.593 5.268 4.835 4.769 4.382\n", " 3.969 3.632 3.413 3.256 2.79 2.469]\n", "(26,)\n", "[18.85 12.87 11.89 10.81 10.24 0. 0. 0. 0. 0. 0. 0.\n", " 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", " 0. 0. ]\n" ] } ], "source": [ "print(np.round(s,3))\n", "print(s.shape)\n", "\n", "sapprox = s\n", "sapprox[5:] =0\n", "print(np.round(sapprox,2))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(26, 26)\n" ] } ], "source": [ "v = vt.T\n", "print(v.shape)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 1. -0. 0. 0. -0. -0. -0. -0. 0. 0. 0. -0. 0. -0. -0. -0. -0. -0.\n", " -0. -0. 0. -0. -0. 0. 0. 0.]\n", " [-0. 1. 0. -0. -0. 0. -0. -0. -0. 0. 0. 0. -0. -0. 0. -0. -0. -0.\n", " 0. 0. 0. -0. -0. -0. -0. 0.]\n", " [ 0. 0. 1. -0. -0. 0. -0. 0. 0. -0. -0. -0. -0. -0. -0. 0. 0. -0.\n", " 0. -0. 0. -0. -0. 0. 0. -0.]\n", " [ 0. -0. -0. 1. -0. -0. -0. 0. -0. 0. -0. -0. 0. -0. -0. -0. 0. 0.\n", " 0. -0. -0. 0. 0. 0. 0. 0.]\n", " [-0. -0. -0. -0. 1. -0. 0. 0. -0. -0. -0. 0. -0. 0. 0. 0. -0. 0.\n", " 0. -0. -0. 0. 0. 0. -0. -0.]\n", " [-0. 0. 0. -0. -0. 1. 0. -0. -0. -0. -0. -0. -0. 0. 0. -0. 0. 0.\n", " -0. -0. -0. -0. 0. 0. 0. -0.]\n", " [-0. -0. -0. -0. 0. 0. 1. 0. 0. -0. 0. 0. 0. -0. 0. 0. 0. -0.\n", " 0. 0. 0. 0. 0. 0. 0. 0.]\n", " [-0. -0. 0. 0. 0. -0. 0. 1. -0. -0. 0. -0. 0. -0. 0. -0. -0. -0.\n", " -0. -0. 0. -0. 0. -0. -0. 0.]\n", " [ 0. -0. 0. -0. -0. -0. 0. -0. 1. -0. -0. -0. 0. -0. 0. 0. 0. 0.\n", " 0. -0. 0. -0. -0. 0. 0. 0.]\n", " [ 0. 0. -0. 0. -0. -0. -0. -0. -0. 1. 0. -0. 0. -0. 0. 0. 0. 0.\n", " -0. 0. 0. -0. -0. -0. 0. 0.]\n", " [ 0. 0. -0. -0. -0. -0. 0. 0. -0. 0. 1. -0. -0. -0. -0. -0. 0. 0.\n", " 0. 0. -0. -0. 0. -0. -0. -0.]\n", " [-0. 0. -0. -0. 0. -0. 0. -0. -0. -0. -0. 1. 0. -0. 0. -0. 0. -0.\n", " -0. -0. 0. -0. -0. 0. 0. 0.]\n", " [ 0. -0. -0. 0. -0. -0. 0. 0. 0. 0. -0. 0. 1. -0. 0. -0. -0. -0.\n", " 0. -0. -0. 0. -0. -0. -0. 0.]\n", " [-0. -0. -0. -0. 0. 0. -0. -0. -0. -0. -0. -0. -0. 1. -0. -0. -0. -0.\n", " 0. -0. 0. -0. 0. 0. -0. 0.]\n", " [-0. 0. -0. -0. 0. 0. 0. 0. 0. 0. -0. 0. 0. -0. 1. 0. 0. 0.\n", " -0. 0. 0. -0. 0. 0. -0. -0.]\n", " [-0. -0. 0. -0. 0. -0. 0. -0. 0. 0. -0. -0. -0. -0. 0. 1. 0. -0.\n", " 0. 0. -0. -0. -0. -0. 0. -0.]\n", " [-0. -0. 0. 0. -0. 0. 0. -0. 0. 0. 0. 0. -0. -0. 0. 0. 1. -0.\n", " -0. 0. 0. -0. 0. 0. -0. -0.]\n", " [-0. -0. -0. 0. 0. 0. -0. -0. 0. 0. 0. -0. -0. -0. 0. -0. -0. 1.\n", " 0. -0. -0. 0. 0. -0. -0. 0.]\n", " [-0. 0. 0. 0. 0. -0. 0. -0. 0. -0. 0. -0. 0. 0. -0. 0. -0. 0.\n", " 1. 0. 0. -0. 0. -0. 0. 0.]\n", " [-0. 0. -0. -0. -0. -0. 0. -0. -0. 0. 0. -0. -0. -0. 0. 0. 0. -0.\n", " 0. 1. 0. -0. 0. 0. 0. -0.]\n", " [ 0. 0. 0. -0. -0. -0. 0. 0. 0. 0. -0. 0. -0. 0. 0. -0. 0. -0.\n", " 0. 0. 1. 0. -0. 0. -0. 0.]\n", " [-0. -0. -0. 0. 0. -0. 0. -0. -0. -0. -0. -0. 0. -0. -0. -0. -0. 0.\n", " -0. -0. 0. 1. -0. -0. -0. -0.]\n", " [-0. -0. -0. 0. 0. 0. 0. 0. -0. -0. 0. -0. -0. 0. 0. -0. 0. 0.\n", " 0. 0. -0. -0. 1. -0. -0. 0.]\n", " [ 0. -0. 0. 0. 0. 0. 0. -0. 0. -0. -0. 0. -0. 0. 0. -0. 0. -0.\n", " -0. 0. 0. -0. -0. 1. -0. -0.]\n", " [ 0. -0. 0. 0. -0. 0. 0. -0. 0. 0. -0. 0. -0. -0. -0. 0. -0. -0.\n", " 0. 0. -0. -0. -0. -0. 1. -0.]\n", " [ 0. 0. -0. 0. -0. -0. 0. 0. 0. 0. -0. 0. 0. 0. -0. -0. -0. 0.\n", " 0. -0. 0. -0. 0. -0. -0. 1.]]\n" ] } ], "source": [ "print(np.round(v @ v.T,1))" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(59, 26)\n", "Approximate: [[0.88718534 1.101284 2.8596613 1.4945695 0.35503674]\n", " [2.7006295 3.620013 3.2670913 3.412057 2.3522089 ]\n", " [2.8339202 3.9221613 3.0754366 3.6236403 3.101586 ]\n", " [3.1373506 4.505858 2.949022 4.10042 3.5543702 ]\n", " [2.301337 3.4716864 3.2791522 3.2286947 3.1189191 ]]\n", "True: [[1. 1. 5. 1. 1.]\n", " [3. 4. 3. 4. 2.]\n", " [3. 4. 3. 4. 3.]\n", " [4. 5. 3. 3. 3.]\n", " [1. 4. 3. 3. 4.]]\n" ] } ], "source": [ "usigma = u[:,:26] @ np.diag(sapprox)\n", "\n", "print(usigma.shape)\n", "Xapprox = usigma @ vt\n", "\n", "meanratings = np.mean(ratings, axis=0)\n", "predratings = Xapprox + meanratings\n", "print('Approximate: ',predratings[:5,:5])\n", "print('True: ',ratings[:5,:5])" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(-0.37406564, 'Dark Knight (2008)')\n", "(-0.31349522, 'No country for old men (2007)')\n", "(0.47848177, \"Ocean's 8\")\n", "(0.24769787, 'Planet Earth (documentary)')\n", "(0.23609659, 'Friends (TV)')\n", "(-0.28989413, 'Battlestar Galactica (TV, 2004)')\n", "(0.26575094, 'Bohemian Rhapsody (2019)')\n", "(-0.30667964, 'Ghost in the shell')\n", "\n" ] } ], "source": [ "# top eigenvector of X^TX=== first column of v \n", "# \"Most typical user\"\n", "ind = 1\n", "#print(v[:,ind])\n", "\n", "attachedv = zip(v[:,ind], dataframe.columns[1:])\n", "movies = []\n", "for elt in iter(attachedv):\n", " if np.abs(elt[0]) > .2:\n", " print(elt)\n", " movies.append(elt[1])\n", " \n", "print()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(0.29549047, 'F2') \n", " Dark Knight (2008) 3.0\n", "No country for old men (2007) 1.0\n", "Ocean's 8 3.0\n", "Planet Earth (documentary) 3.0\n", "Friends (TV) 5.0\n", "Battlestar Galactica (TV, 2004) 3.0\n", "Bohemian Rhapsody (2019) 4.0\n", "Ghost in the shell 3.0\n", "Name: 0, dtype: float64 \n", " ---------------\n", "(-0.22748804, 'K7') \n", " Dark Knight (2008) 4.0\n", "No country for old men (2007) 4.0\n", "Ocean's 8 3.0\n", "Planet Earth (documentary) 5.0\n", "Friends (TV) 3.0\n", "Battlestar Galactica (TV, 2004) 3.0\n", "Bohemian Rhapsody (2019) 5.0\n", "Ghost in the shell 3.0\n", "Name: 5, dtype: float64 \n", " ---------------\n", "(0.2385178, 'R12') \n", " Dark Knight (2008) 4.0\n", "No country for old men (2007) 2.0\n", "Ocean's 8 3.0\n", "Planet Earth (documentary) 3.0\n", "Friends (TV) 4.0\n", "Battlestar Galactica (TV, 2004) 3.0\n", "Bohemian Rhapsody (2019) 3.0\n", "Ghost in the shell 3.0\n", "Name: 10, dtype: float64 \n", " ---------------\n", "(0.2217204, 'E41') \n", " Dark Knight (2008) 4.0\n", "No country for old men (2007) 3.0\n", "Ocean's 8 5.0\n", "Planet Earth (documentary) 5.0\n", "Friends (TV) 1.0\n", "Battlestar Galactica (TV, 2004) 2.0\n", "Bohemian Rhapsody (2019) 4.0\n", "Ghost in the shell 4.0\n", "Name: 39, dtype: float64 \n", " ---------------\n", "(-0.2834699, 'K44') \n", " Dark Knight (2008) 4.0\n", "No country for old men (2007) 2.0\n", "Ocean's 8 4.0\n", "Planet Earth (documentary) 4.0\n", "Friends (TV) 5.0\n", "Battlestar Galactica (TV, 2004) 1.0\n", "Bohemian Rhapsody (2019) 2.0\n", "Ghost in the shell 2.0\n", "Name: 42, dtype: float64 \n", " ---------------\n", "(-0.20115133, 'J47') \n", " Dark Knight (2008) 5.0\n", "No country for old men (2007) 2.0\n", "Ocean's 8 5.0\n", "Planet Earth (documentary) 5.0\n", "Friends (TV) 4.0\n", "Battlestar Galactica (TV, 2004) 5.0\n", "Bohemian Rhapsody (2019) 5.0\n", "Ghost in the shell 5.0\n", "Name: 45, dtype: float64 \n", " ---------------\n", "(0.33471763, 'I48') \n", " Dark Knight (2008) 5.0\n", "No country for old men (2007) 4.0\n", "Ocean's 8 2.0\n", "Planet Earth (documentary) 1.0\n", "Friends (TV) 3.0\n", "Battlestar Galactica (TV, 2004) 3.0\n", "Bohemian Rhapsody (2019) 3.0\n", "Ghost in the shell 3.0\n", "Name: 46, dtype: float64 \n", " ---------------\n", "(-0.2017926, 'A56') \n", " Dark Knight (2008) 4.0\n", "No country for old men (2007) 5.0\n", "Ocean's 8 3.0\n", "Planet Earth (documentary) 2.0\n", "Friends (TV) 3.0\n", "Battlestar Galactica (TV, 2004) 3.0\n", "Bohemian Rhapsody (2019) 2.0\n", "Ghost in the shell 5.0\n", "Name: 54, dtype: float64 \n", " ---------------\n" ] } ], "source": [ "# uses ind from previous cell\n", "#print(u[:,ind])\n", "selectedmovies = dataframe[movies]\n", "attachedu = zip(u[:,ind], dataframe['Name'])\n", "\n", "num= 0\n", "for elt in iter(attachedu):\n", " if np.abs(elt[0]) > .2:\n", " print(elt,'\\n', selectedmovies.iloc[num], '\\n ---------------')\n", " num +=1" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Top eigenvector is a linear combination of all of you. The coefficients of the linear combination are in w1. \n", "# As an approximate gauge of the contribution, we look at the unit vector along w1 (ie, the first column of \n", "# the matrix u)\n", "\n", "u1= u[:,0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(np.round(u1*u1,3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "u2 = u[:,1]\n", "print(np.round(u2*u2,3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# top eigenvector of XX^T=== first column of u \n", "# \"Most typical genre\"\n", "\n", "u1 = u[:,0]\n", "print(u[:,0])\n", "\n", "# Recall u_1 = X v_1/sqrt(lambda_1) where v_1 was the unit eigenvector of X^TX with eigenvalue lambda_1\n", "# u_1 is the first col of the matrix u, the eigenvector of length 1 of XX^T with eigenvalue lambda_1\n", "\n", "print(np.round(u1-X @ (v1 / s[0]),3))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "movies=(dataframe.columns[1:-1])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Let us examine the contribution of each movie to the eigenvector u_i. Because\n", "# u_i = X v_i /\\sqrt{lambda}, u_i is a linear combination of the cols of X, and\n", "# that the components of the vector v_i can be interpreted as\n", "# the coefficients of the movies contributing to the eigenvector u_i.\n", "# Since length of v_i is 1, we will interpret the square of each component of\n", "# v_i as the strength of contribution of the corresponding movie to u_i\n", "\n", "# Change the number in the line below to access different eigenvectors.\n", "v2= v[:,4]\n", "\n", "p = len(dataframe.columns[1:-1])\n", "\n", "# the following line sorts the squared coefficients of v2\n", "ind = sorted(range(p), key = lambda k: (v2*v2)[k])\n", "\n", "# and prints the movies with the 5 highest contributions to the eigenvector u\n", "# the last movie in the list has the highest contribution.\n", "print(movies[ind[-10:]])\n", "print(np.sum((v2*v2)[ind[-10:]]))\n", "print(v2[ind[-10:]])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(ind)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "print(ratings[:,23])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }