[ Recommender System ]
15. [code] DeepFM : A Factorization-Machine based Neural Network for CTR prediction
( 참고 : Fastcampus 추천시스템 강의 )
paper : DeepFM : A Factorization-Machine based Neural Network for CT prediction ( Guo et al., 2017 )
( https://arxiv.org/pdf/1703.04247.pdf )
Torchfm 中 DeepFM 에 focus
#!pip install torchfm
import numpy as np
import torch
import torch.nn.functional as F
import torchfm
(sigmoid 들어가기전의) output :
self.linear(x) + self.fm(embed_x) + self.mlp(embed_x.view(-1, self.embed_output_dim))
- raw input이 Linear한 함수를 통과
- embedded input이 FM을 통과
- embedded input이 MLP를 통과
$\rightarrow$ 위 세개의 합이 sigmoid activation function을 통과해서 최종적인 click 확률 예측!
1. feat_linear
: linear 함수
class feat_linear(torch.nn.Module):
def __init__(self, field_dim_list, output_dim=1):
super().__init__()
self.fc = torch.nn.Embedding(sum(field_dim_list), output_dim)
self.bias = torch.nn.Parameter(torch.zeros((output_dim,)))
self.offsets = np.array((0, *np.cumsum(field_dim_list)[:-1]), dtype=np.long)
def forward(self, x):
x = x + x.new_tensor(self.offsets).unsqueeze(0)
return torch.sum(self.fc(x), dim=1) + self.bias
2. feat_embedding
: Embedding을 수행하는 함수
class feat_embedding(torch.nn.Module):
def __init__(self, field_dim_list, embed_dim):
super().__init__()
self.embedding = torch.nn.Embedding(sum(field_dim_list), embed_dim)
self.offsets = np.array((0, *np.cumsum(field_dim_list)[:-1]), dtype=np.long)
torch.nn.init.xavier_uniform_(self.embedding.weight.data)
def forward(self, x):
x = x + x.new_tensor(self.offsets).unsqueeze(0)
return self.embedding(x)
3. FM
: Factorziation Machine
class FM(torch.nn.Module):
def __init__(self, reduce_sum=True):
super().__init__()
self.reduce_sum = reduce_sum
def forward(self, x):
ix = (torch.sum(x, dim=1) ** 2) - (torch.sum(x ** 2, dim=1))
if self.reduce_sum:
ix = torch.sum(ix, dim=1, keepdim=True)
return 0.5 * ix
4. MLP
: Multi-Layer Perceptron
class MLP(torch.nn.Module):
def __init__(self, input_dim, embed_dims, dropout):
super().__init__()
layers = list()
for embed_dim in embed_dims:
layers.append(torch.nn.Linear(input_dim, embed_dim))
layers.append(torch.nn.BatchNorm1d(embed_dim))
layers.append(torch.nn.ReLU())
if dropout:
layers.append(torch.nn.Dropout(p=dropout))
input_dim = embed_dim
layers.append(torch.nn.Linear(input_dim, 1))
self.layers = torch.nn.Sequential(*layers)
def forward(self, x):
return self.layers(x)
5. DeepFM
: 최종적인 Deep Factorization Machine
class DeepFM(torch.nn.Module):
def __init__(self, field_dim_list, embed_dim, mlp_dims, dropout):
super().__init__()
self.linear = feat_linear(field_dim_list)
self.fm = FM(reduce_sum=True)
self.embedding = feat_embedding(field_dim_list, embed_dim)
self.embed_output_dim = len(field_dim_list) * embed_dim
self.mlp = MLP(self.embed_output_dim, mlp_dims, dropout)
def forward(self, x):
embed_x = self.embedding(x)
output = self.linear(x) + self.fm(embed_x) + self.mlp(embed_x.view(-1, self.embed_output_dim))
return torch.sigmoid(output.squeeze(1))