[ Recommender System ]

15. [code] DeepFM : A Factorization-Machine based Neural Network for CTR prediction

( 참고 : Fastcampus 추천시스템 강의 )

paper : DeepFM : A Factorization-Machine based Neural Network for CT prediction ( Guo et al., 2017 )

( https://arxiv.org/pdf/1703.04247.pdf )



Torchfm 中 DeepFM 에 focus

#!pip install torchfm
import numpy as np
import torch
import torch.nn.functional as F
import torchfm


(sigmoid 들어가기전의) output :

self.linear(x) + self.fm(embed_x) + self.mlp(embed_x.view(-1, self.embed_output_dim))

  • raw input이 Linear한 함수를 통과
  • embedded input이 FM을 통과
  • embedded input이 MLP를 통과

$\rightarrow$ 위 세개의 합이 sigmoid activation function을 통과해서 최종적인 click 확률 예측!


1. feat_linear : linear 함수

class feat_linear(torch.nn.Module):
    def __init__(self, field_dim_list, output_dim=1):
        super().__init__()
        self.fc = torch.nn.Embedding(sum(field_dim_list), output_dim)
        self.bias = torch.nn.Parameter(torch.zeros((output_dim,)))
        self.offsets = np.array((0, *np.cumsum(field_dim_list)[:-1]), dtype=np.long)

    def forward(self, x):
        x = x + x.new_tensor(self.offsets).unsqueeze(0)
        return torch.sum(self.fc(x), dim=1) + self.bias


2. feat_embedding : Embedding을 수행하는 함수

class feat_embedding(torch.nn.Module):
    def __init__(self, field_dim_list, embed_dim):
        super().__init__()
        self.embedding = torch.nn.Embedding(sum(field_dim_list), embed_dim)
        self.offsets = np.array((0, *np.cumsum(field_dim_list)[:-1]), dtype=np.long)
        torch.nn.init.xavier_uniform_(self.embedding.weight.data)

    def forward(self, x):
        x = x + x.new_tensor(self.offsets).unsqueeze(0)
        return self.embedding(x)


3. FM : Factorziation Machine

class FM(torch.nn.Module):
    def __init__(self, reduce_sum=True):
        super().__init__()
        self.reduce_sum = reduce_sum

    def forward(self, x):
        ix = (torch.sum(x, dim=1) ** 2) - (torch.sum(x ** 2, dim=1))
        if self.reduce_sum:
            ix = torch.sum(ix, dim=1, keepdim=True)
        return 0.5 * ix


4. MLP : Multi-Layer Perceptron

class MLP(torch.nn.Module):
    def __init__(self, input_dim, embed_dims, dropout):
        super().__init__()
        layers = list()
        for embed_dim in embed_dims:
            layers.append(torch.nn.Linear(input_dim, embed_dim))
            layers.append(torch.nn.BatchNorm1d(embed_dim))
            layers.append(torch.nn.ReLU())
            if dropout:
	            layers.append(torch.nn.Dropout(p=dropout))
            input_dim = embed_dim
        layers.append(torch.nn.Linear(input_dim, 1))
        self.layers = torch.nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)


5. DeepFM : 최종적인 Deep Factorization Machine

class DeepFM(torch.nn.Module):
    def __init__(self, field_dim_list, embed_dim, mlp_dims, dropout):
        super().__init__()
        self.linear = feat_linear(field_dim_list)
        self.fm = FM(reduce_sum=True)
        self.embedding = feat_embedding(field_dim_list, embed_dim)
        self.embed_output_dim = len(field_dim_list) * embed_dim
        self.mlp = MLP(self.embed_output_dim, mlp_dims, dropout)

    def forward(self, x):
        embed_x = self.embedding(x)
        output = self.linear(x) + self.fm(embed_x) + self.mlp(embed_x.view(-1, self.embed_output_dim))
        return torch.sigmoid(output.squeeze(1))

Categories:

Updated: