from fastai.collab import *
from fastai.tabular.all import *

path = untar_data(URLs.ML_100k)
path.ls()

(#23) [Path('/home/doyu/.fastai/data/ml-100k/ub.test'),Path('/home/doyu/.fastai/data/ml-100k/u5.test'),Path('/home/doyu/.fastai/data/ml-100k/u4.test'),Path('/home/doyu/.fastai/data/ml-100k/u.occupation'),Path('/home/doyu/.fastai/data/ml-100k/u.genre'),Path('/home/doyu/.fastai/data/ml-100k/u.item'),Path('/home/doyu/.fastai/data/ml-100k/ub.base'),Path('/home/doyu/.fastai/data/ml-100k/u2.test'),Path('/home/doyu/.fastai/data/ml-100k/README'),Path('/home/doyu/.fastai/data/ml-100k/ua.test')...]

ratings = pd.read_csv(path/'u.data', sep='\t', header=None)
ratings.columns = ['user', 'movie', 'rating', 'timestamp']
ratings = ratings.drop(columns='timestamp')
ratings.head()

dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64)
dls.show_batch()

n_users = len(dls.classes['user']) 
n_movie = len(dls.classes['movie'])

n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movie, n_factors)
user_factors.shape, movie_factors.shape

(torch.Size([944, 5]), torch.Size([1653, 5]))

one_hot_3 = one_hot(3, n_users).float()
one_hot_5 = one_hot(5, n_users).float()

one_hot = torch.stack([one_hot_3, one_hot_5], dim=1)
one_hot.shape

torch.Size([944, 2])

one_hot.t() @ user_factors

tensor([[ 0.1457,  0.3334, -1.6283, -1.3542, -1.1832],
        [-0.8224,  0.4091,  1.4074,  0.2336, -0.8085]])

user_factors[[3,5],:]

tensor([[ 0.1457,  0.3334, -1.6283, -1.3542, -1.1832],
        [-0.8224,  0.4091,  1.4074,  0.2336, -0.8085]])

	user	movie	rating
0	196	242	3
1	186	302	3
2	22	377	1
3	244	51	2
4	166	346	1

	user	movie	rating
0	821	181	4
1	495	140	5
2	922	699	3
3	548	472	2
4	276	262	4
5	432	3	3
6	311	1222	3
7	618	125	3
8	715	735	4
9	45	597	3