Restricted Boltzmann Machine (RBM)

https://github.com/GabrielBianconi/pytorch-rbm 를 참고하여, pytorch로 RBM을 구현해해보았다.

( RBM 이론 포스트에서는 언급하지 않은 momentum, weight decay, L2-regularization이 추가되어 있다. )

구현한 모델을 사용하여, user가 남긴 각각의 movie에 대한 rating을 바탕으로 Movie Recommendation을 해줄

것이다.

1. Import Data,Libraries

import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")
train = pd.read_csv('C:\\Users\\samsung\\Downloads\\u1.base.csv',sep='\t',header=None)
test = pd.read_csv('C:\\Users\\samsung\\Downloads\\u1.test.csv',sep='\t',header=None)
train.columns = ['user','movie','rating','time']
test.columns = ['user','movie','rating','time']

2. Data Preprocessing

def convert_table(data):
    table = pd.pivot_table(data, values='rating',index=['user'],columns=['movie'])
    table[table<3]=0
    table[table>=3]=1
    table.fillna(0,inplace=True)
    return table
train_t = convert_table(train)
test_t = convert_table(test)
train_t.head()
movie 1 2 3 4 5 6 7 8 9 10 ... 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
user
1 1.0 1.0 1.0 1.0 1.0 0.0 1.0 0.0 1.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
5 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 1650 columns

</div>

Train = np.array(train_t,dtype='int')
Test = np.array(test_t,dtype='int')
Train.shape, Test.shape
((943, 1650), (459, 1410))

3. Restricted Boltzmann Machine

import torch
from torch.utils.data import TensorDataset, DataLoader
train_tensor = torch.Tensor(Train) 
train_tensor2 = TensorDataset(train_tensor) 
train_data = DataLoader(train_tensor,batch_size=32)
class RBM():
    def __init__(self,num_v,num_h,k,lr,mom_coef,w_decay,cuda=False):
        self.num_v = num_v
        self.num_h = num_h
        self.k  = k
        self.lr = lr
        self.mom_coef = mom_coef
        self.w_decay = w_decay
        self.cuda = cuda
        
        # weight(w) & bias(b)
        self.w = torch.randn(num_v,num_h)*0.1
        self.b_v = torch.ones(num_v) *0.5
        self.b_h = torch.zeros(num_h)
        
        # momentum(mom)
        self.w_mom = torch.zeros(num_v,num_h)        
        self.b_v_mom = torch.zeros(num_v)
        self.b_h_mom = torch.zeros(num_h)
        
        if self.cuda:
            self.w = self.w.cuda()
            self.b_v = self.b_v.cuda()
            self.b_h = self.b_h.cuda()

            self.w_mom = self.w_mom.cuda()
            self.b_v_mom = self.b_v_mom.cuda()
            self.b_h_mom = self.b_h_mom.cuda()
    
    def sig(self,x):
        return 1 / (1+torch.exp(-x))
    
    def rand_prob(self,num):
        rand_prob = torch.rand(num)
        if self.cuda :
            rand_prob = rand_prob.cuda()
        return rand_prob
    
    def sample_h(self,prob_v):
        h_act = torch.matmul(prob_v, self.w) + self.b_h
        h_prob = self.sig(h_act)
        return h_prob
    
    def sample_v(self,prob_h):
        v_act = torch.matmul(prob_h, self.w.t()) + self.b_v
        v_prob = self.sig(v_act)
        return v_prob
    
    def CD_k(self,x):
        pos_h_prob = self.sample_h(x)
        pos_h_act = (pos_h_prob > self.rand_prob(self.num_h)).float()
        pos = torch.matmul(x.t(), pos_h_act)
        
        h_act = pos_h_act
        for _ in range(self.k):
            v_prob = self.sample_v(h_act)
            h_prob = self.sample_h(v_prob)
            h_act = (h_prob >= self.rand_prob(self.num_h)).float()
        
        neg_v_prob = v_prob
        neg_h_prob = h_prob
        neg = torch.matmul(neg_v_prob.t(), neg_h_prob)
        
        self.w_mom *= self.mom_coef
        self.w_mom += (pos-neg)
        
        self.b_v_mom *= self.mom_coef
        self.b_v_mom += torch.sum(x-neg_v_prob,dim=0)
        self.b_h_mom *= self.mom_coef
        self.b_h_mom += torch.sum(pos_h_prob - neg_h_prob,dim=0)
        
        batch_size = x.size(0)
        self.w += self.w_mom * self.lr  / batch_size
        self.b_v += self.b_v_mom * self.lr / batch_size
        self.b_h += self.b_h_mom * self.lr / batch_size
        self.w -= self.w * self.w_decay
        
        error = torch.sum( (x-neg_v_prob)**2 )
        return error
        
num_v = len(train_tensor[0])
num_h = 200
batch_size=64
k = 5
lr = 0.001
mom_coef = 0.9
w_decay = 0.001
epochs = 30
cuda = False
rbm = RBM(num_v,num_h,k,lr,mom_coef,w_decay,cuda)
for epoch in range(1,epochs+1):
    epoch_error = 0
    for batch in train_data:        
        batch = batch.view(len(batch),num_v)
        if cuda:
            batch = batch.cuda()

        batch_error = rbm.CD_k(batch)
        epoch_error += batch_error
    if epoch%5==0:
        print('Error (epoch=%d): %.4f' % (epoch, epoch_error))