{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f0fa5808-3393-4d51-98de-687aa0fe98fe",
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from math import sqrt\n",
    "from numpy.random import normal\n",
    "import plotly.graph_objs as go\n",
    "import plotly.express as px\n",
    "import csv"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "797519fa-1175-4e69-a236-49b82eecc991",
   "metadata": {},
   "outputs": [],
   "source": [
    "def indicesOfRatings(matrix):\n",
    "    non_nan_indices = np.where(~np.isnan(matrix))\n",
    "    return list(zip(non_nan_indices[0],non_nan_indices[1]))\n",
    "    \n",
    "def rmse(original,reproduction,indexSet):\n",
    "    s=0\n",
    "    for row,col in indexSet:\n",
    "        s+=pow(original[row,col]-reproduction[row,col],2)\n",
    "    s=s/len(indexSet)\n",
    "    return sqrt(s)\n",
    "    \n",
    "def initializePQ(size,k):\n",
    "    n,m=size\n",
    "    P=normal(scale=.5,size=(n,k))\n",
    "    Q=normal(scale=.5,size=(k,m))\n",
    "    return P,Q\n",
    "\n",
    "def epoch(original,p,q,stepsize,train,test):\n",
    "    for row,col in train:\n",
    "        err=original[row,col]-p[row,:]@q[:,col]\n",
    "        p[row,:]=p[row,:]+stepsize*q[:,col].T*err\n",
    "        err=original[row,col]-p[row,:]@q[:,col]\n",
    "        q[:,col]=q[:,col]+stepsize*p[row,:].T*err\n",
    "    trainerror=rmse(original,p@q,train)\n",
    "    testerror=rmse(original,p@q,test)\n",
    "    return p,q,trainerror,testerror"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "5a9f4c7a-d535-44f9-9133-5ff1eca4429a",
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset='ml-latest-small'\n",
    "\n",
    "def getmovies():\n",
    "    movies=dict() #make a dictionary that goes from movieID-1 to the movie name\n",
    "    with open(dataset+'/movies.csv') as f:\n",
    "        f.readline()\n",
    "        reader=csv.reader(f,delimiter=',')\n",
    "        for row in reader:\n",
    "            key=int(row[0])\n",
    "            movies[key]=row[1]\n",
    "    return movies\n",
    "\n",
    "def getratings():\n",
    "    df=pd.read_csv(dataset+'/ratings.csv')\n",
    "    df2=df.pivot(columns=['movieId'],index=['userId'],values=['rating'])\n",
    "    return df2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9f1dc4d-46e9-42e0-a6b0-81abff2c77d8",
   "metadata": {},
   "outputs": [],
   "source": [
    "ratings=getratings()\n",
    "movies=getmovies()\n",
    "ratings"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "126e6ded-3131-450b-b97f-ab6128ed06a4",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
