Deep Walk with Softmax

# Skip-Gram # SGD # Embedding Vector

  • 목표 : Karate Graph를 Input으로, 2차원의 Embedding Vector로 표현하라

1. Import Dataset

import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
import random
import pandas as pd
from random import shuffle
from copy import copy

%matplotlib inline
edge = pd.read_csv('karate_club.edgelist', sep=' ', names=['x','y','w'])

networx를 이용해서 graph를 그려보면, 다음과 같은 모양을 가진다.

graph = nx.Graph()
for i in range(edge.shape[0]):
    graph.add_node(node_for_adding = edge['x'][i])
    graph.add_node(node_for_adding = edge['y'][i])
    graph.add_edge(edge['x'][i], edge['y'][i])
nx.draw(graph,with_label=True)

png

1) Adjacency Matrix

인접행렬 ( 1 : 연결, 0 : 연결X)

A = nx.to_numpy_matrix(graph, nodelist=sorted(graph.nodes()))
A
matrix([[0., 1., 1., ..., 1., 0., 0.],
        [1., 0., 1., ..., 0., 0., 0.],
        [1., 1., 0., ..., 0., 1., 0.],
        ...,
        [1., 0., 0., ..., 0., 1., 1.],
        [0., 0., 1., ..., 1., 0., 1.],
        [0., 0., 0., ..., 1., 1., 0.]])

2) Input Node Vector ( One-Hot encoded )

Input으로 들어갈 vector들 ( One-hot Encoded된 34차원의 Identity Matrix )

OH = np.identity(34)
OH
array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]])



2. Define Functions

  • 1) random step
  • 2) softmax
  • 3) feed forward
  • 4) back propagation

1). Random Walk

  • take steps randomly, with length ‘w’
def random_step(i,w):
    walk_list = []
    walk_list.append(i)
    for k in range(w):
        ad = np.nonzero(A[i])[1] # i와 인접한 vertex들의 list
        rand = random.choice(ad) # 그 list중 랜덤하게 하나 고르기
        walk_list.append(rand)
        i = rand
    return walk_list
  • ex) take 10 steps, starting from node index ‘3’
random_step(3,10)
[3, 12, 3, 0, 31, 33, 19, 1, 21, 0, 19]

2) Softmax

def softmax(x):
    c = np.max(x)
    b = x-c
    exp_x = np.exp(b)
    sum_exp_x = np.sum(exp_x)
    y = exp_x / sum_exp_x
    return y

3) Feed Forward

def feedforward(input_word,index,w1,w2):
    h=np.matmul(w1.T,input_word[index])
    u=np.matmul(w2.T,h)
    y = softmax(u)
    return h,u,y     

4) Back Propagation

  • update weights using backpropagation
def backprop(input_word,w1,w2,lr,h,y_pred,index,window_size):
    front = input_word[index-window_size : index]
    back = input_word[index+1 : index+window_size+1]
    window_OH = np.concatenate([front,back])
    
    # output -> hidden
    for j in range(w2.shape[1]):
        adjust = (y_pred-window_OH)[:,j].sum()*h
        w2[:,j] -= -lr*adjust
        
    # hidden -> input
    adjust2 = ((y_pred-window_OH).sum(axis=0)*w2).T
    w1-= lr*adjust2
    return w1,w2    



3. Deep Walk

  • 위에서 만든 함수들을 바탕으로 Deep Walk를 구현한다.
def Deepwalk(input_word, reduced_dim, lr, walk_size, window_size,epoch):
    W1 = np.random.random((input_word.shape[0],reduced_dim))
    W2 = np.random.random((reduced_dim, input_word.shape[0]))
    
    for _ in range(epoch):
        input_word = copy(input_word)
        shuffle(input_word)
        for index in range(input_word.shape[0]):
            RW = input_word[random_step(index,walk_size)]
            for i in range(len(RW)):
                h,u,y = feedforward(RW,i,W1,W2)
                W1,W2 = backprop(RW,W1,W2,lr,h,y,i,window_size)
                
    return W1,W2



4. Result

  • reduced_dim (줄이고자 하는 목표 차원) : 2
  • lr (학습률) : 0.001
  • walk_size (random walk을 할때의 walk length) : 15
  • window_size (참고할 양 옆의 window size) : 5
  • epoch(에폭) : 5
w1,w2 = Deepwalk(OH,reduced_dim=2, lr=0.001,
                 walk_size=15,window_size=5,epoch=5)