MovieLens with Collaboration Filtering from fastbook 08
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)
ratings = pd.read_csv(
    path/'u.data', delimiter='\t', header=None,
    names=['user', 'movie', 'rating', 'timestamp'])
ratings = ratings.drop(columns='timestamp')
ratings.head()
dls = CollabDataLoaders.from_df(ratings, item_name='movie', bs=64)
dls.show_batch()
L(dls.classes)
n_users = len(dls.classes['user'])
n_movies = len(dls.classes['movie'])
n_factors = 5
user_factors = torch.randn(n_users, n_factors)
movie_factors = torch.randn(n_movies, n_factors)
class DotProduct(Module):
    def __init__(self, n_users, n_movies, n_factors):
        self.user_factors = Embedding(n_users, n_factors)
        self.movie_factors = Embedding(n_movies, n_factors)
    def forward(self, X):
        users = self.user_factors(X[:,0])
        movies = self.movie_factors(X[:,1])        
        return (users * movies).sum(dim=1)
model = DotProduct(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
class DotProductRange(DotProduct):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        super().__init__(n_users, n_movies, n_factors)
        self.y_range = y_range
    def forward(self, X):
        return sigmoid_range(super().forward(X), *self.y_range)
model = DotProductRange(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
class DotProductBias(DotProductRange):
    def __init__(self, n_users, n_movies, n_factors, y_range=(0,5.5)):
        super().__init__(n_users, n_movies, n_factors, y_range=(0,5.5))
        self.user_bias = Embedding(n_users, 1)
        self.movie_bias = Embedding(n_movies, 1)
    def foward(self, x):
        y = super().forward(X)
        y += self.user_bias(X[:,0])
        y += self.movie_bias(X[:,1])
        return sigmoid_range(y, *self.y_range)
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)
model = DotProductBias(n_users, n_movies, 50)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.1)
learn = collab_learner(dls, n_factors=50, y_range=(0, 5.5))
learn.fit_one_cycle(5, 5e-3, wd=0.1)
learn.model
embs = get_emb_sz(dls)
embs
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0, 5.5), n_act=100):
        self.user_factors = Embedding(*user_sz)
        self.item_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1),
        )
        self.y_range = y_range
    def forward(self, X):
        embs = self.user_factors(X[:,0]), self.item_factors(X[:,1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)
model = CollabNN(*embs)
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3, wd=0.01)
learn = collab_learner(dls, use_nn=True, y_range=(0, 5.5), layers=[100])
learn.fit_one_cycle(5, 5e-3, wd=0.01)