MovieLens with Collaboration Filtering from fastbook 08
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(
path/'u.data', delimiter='\t', header=None,
names=['user', 'movie', 'rating', 'timestamp'])
ratings = ratings.drop(columns='timestamp')
ratings.head()
dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64)
dls.show_batch()
L(dls.classes)
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['movie'])
n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)
class DotProduct(Module):
def __init__(self, n_users, n_movies, n_factors):
self.user_factors = Embedding(n_users, n_factors)
self.movie_factors = Embedding(n_movies, n_factors)
def forward(self, X):
users = self.user_factors(X[:,0])
movies = self.movie_factors(X[:,1])
return (users * movies).sum(dim=1)
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
class DotProductRange(DotProduct):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
super().__init__(n_users, n_movies, n_factors)
self.y_range = y_range
def forward(self, X):
return sigmoid_range(super().forward(X), *self.y_range)
model = DotProductRange(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
class DotProductBias(DotProductRange):
def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
super().__init__(n_users, n_movies, n_factors, y_range=(0,5.5))
self.user_bias = Embedding(n_users, 1)
self.movie_bias = Embedding(n_movies, 1)
def foward(self, x):
y = super().forward(X)
y += self.user_bias(X[:,0])
y += self.movie_bias(X[:,1])
return sigmoid_range(y, *self.y_range)
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)
learn.model
embs = get_emb_sz(dls)
embs
class CollabNN(Module):
def __init__(self, user_sz, item_sz, y_range=(0, 5.5), n_act=100):
self.user_factors = Embedding(*user_sz)
self.item_factors = Embedding(*item_sz)
self.layers = nn.Sequential(
nn.Linear(user_sz[1]+item_sz[1], n_act),
nn.ReLU(),
nn.Linear(n_act, 1),
)
self.y_range = y_range
def forward(self, X):
embs = self.user_factors(X[:,0]), self.item_factors(X[:,1])
x = self.layers(torch.cat(embs, dim=1))
return sigmoid_range(x, *self.y_range)
model = CollabNN(*embs)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)
learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100])
learn.fit_one_cycle(5, 5e-3, wd=0.01)