from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)
path.ls()
(#23) [Path('/home/doyu/.fastai/data/ml-100k/ub.test'),Path('/home/doyu/.fastai/data/ml-100k/u5.test'),Path('/home/doyu/.fastai/data/ml-100k/u4.test'),Path('/home/doyu/.fastai/data/ml-100k/u.occupation'),Path('/home/doyu/.fastai/data/ml-100k/u.genre'),Path('/home/doyu/.fastai/data/ml-100k/u.item'),Path('/home/doyu/.fastai/data/ml-100k/ub.base'),Path('/home/doyu/.fastai/data/ml-100k/u2.test'),Path('/home/doyu/.fastai/data/ml-100k/README'),Path('/home/doyu/.fastai/data/ml-100k/ua.test')...]
ratings = pd.read_csv(path/'u.data', sep='\t', header=None)
ratings.columns = ['user', 'movie', 'rating', 'timestamp']
ratings = ratings.drop(columns='timestamp')
ratings.head()
user movie rating
0 196 242 3
1 186 302 3
2 22 377 1
3 244 51 2
4 166 346 1
dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64)
dls.show_batch()
user movie rating
0 821 181 4
1 495 140 5
2 922 699 3
3 548 472 2
4 276 262 4
5 432 3 3
6 311 1222 3
7 618 125 3
8 715 735 4
9 45 597 3
n_users = len(dls.classes['user']) 
n_movie = len(dls.classes['movie']) 
n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movie, n_factors)
user_factors.shape, movie_factors.shape
(torch.Size([944, 5]), torch.Size([1653, 5]))
one_hot_3 = one_hot(3, n_users).float()
one_hot_5 = one_hot(5, n_users).float()

one_hot = torch.stack([one_hot_3, one_hot_5], dim=1)
one_hot.shape
torch.Size([944, 2])
one_hot.t() @ user_factors
tensor([[ 0.1457,  0.3334, -1.6283, -1.3542, -1.1832],
        [-0.8224,  0.4091,  1.4074,  0.2336, -0.8085]])
user_factors[[3,5],:]
tensor([[ 0.1457,  0.3334, -1.6283, -1.3542, -1.1832],
        [-0.8224,  0.4091,  1.4074,  0.2336, -0.8085]])