[ CS224W - Colab 0 ]

( 참고 : CS224W: Machine Learning with Graphs )

import networkx as nx

1. Graph

Directed & Undirected graph

G_undirected = nx.Graph()
G_directed = nx.DiGraph()

Graph Level attribute

G_undirected.graph['graph_attr1']="A"

2. Node

Add node ( + Node Level attribute )

G = nx.Graph()

num_nodes=10
node_attr1s = [1,3,5,7,9,11,13,15,17,19]
node_attr2s = [10,20,30,40,50,60,70,80,90,100]

for idx in range(num_nodes):
    G.add_node(idx,
               attr1=node_attr1s[idx],
               attr2=node_attr2s[idx])

Add multiple nodes

G.add_nodes_from([
  (10, {"attr1": 21, "attr2": 110}),
  (11, {"attr1": 23, "attr2": 120})
]) 

Get node attributes

node_0_attr = G.nodes[0]
print(node_0_attr)

# Node 0 has the attributes {'attr1': 1, 'attr2': 10}

Print all nodes

for node in G.nodes(data=True):
  print(node)
  
for node in G.nodes():
  print(node)

(0, {'attr1': 1, 'attr2': 10})
(1, {'attr1': 3, 'attr2': 20})
(2, {'attr1': 5, 'attr2': 30})
....

0
1
2
...

Number of nodes :

num_nodes = G.number_of_nodes()

3. Edge

Add edge

G.add_edge(0, 1, weight=0.5)

Add multiple edges

G.add_edges_from([
  (1, 2, {"weight": 0.3}),
  (2, 0, {"weight": 0.1})
])

Print all edges

for edge in G.edges():
  print(edge)

(0, 1)
(0, 2)
(1, 2)

Get edge attributes

print(G.edges[(0, 1)])

# Edge (0, 1) has the attributes {'weight': 0.5}

Number of edges :

num_edges = G.number_of_edges()

4. Visualization

nx.draw(G, with_labels = True)

5. Degree & Neighbors

node_id=2

# degree
G.degree[node_id])

# neighbors
G.neighbors(node_id)

6. Pagerank

Pagerank of nodes

num_nodes = 4
G = nx.DiGraph(nx.path_graph(num_nodes))
pr = nx.pagerank(G, alpha=0.8)

7. Dataset

ex) KarateClub

from torch_geometric.datasets import KarateClub

dataset = KarateClub()

describe dataset

len(dataset) # 1 graph
dataset.num_features  # 34 features
dataset.num_classes   # 4 classes

get one graph ( + node & edges )

G1 = dataset[0]

num_nodes = G1.num_nodes
num_edges = G1.num_edges

Average node degrees

avg_degree = (2*num_nodes) / num_edges

check other properties

G1.has_isolated_nodes()
G1.is_undirected()
G1.has_self_loops()

get edge indices

G1.edge_index.T

8. GNN with pytorch

making graphs with torch_geometric

from torch_geometric.utils import to_networkx

G = to_networkx(data, to_undirected=True)

Import packages

import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv

GNN model with Pytorch

class GCN(torch.nn.Module):
    def __init__(self,num_classes,input_dim,embed_dim,hidden_dim,num_layers):
        super(GCN, self).__init__()
        torch.manual_seed(12345)
        # 0) attributes
        self.num_classes = num_classes
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.embed_dim = embed_dim
        self.num_layers = num_layers
        
		# 1) classifier
        self.classifier = Linear(embed_dim,self.num_classes)
		
        # 2) graph convolution layers
        self.convs = torch.nn.ModuleList()
        self.convs.append(GCNConv(self.input_dim, self.hidden_dim))
        for l in range(self.num_layers-2):
          self.convs.append(GCNConv(self.hidden_dim, self.hidden_dim))
        self.convs_final = GCNConv(self.hidden_dim,embed_dim)
        
        # 3) activation function
        self.relu = torch.nn.ReLU()

    def forward(self, x, edge_idx):
		
        # 1) pass convolution layers
        for layer_idx in range(self.num_layers-1):
          x = self.convs[layer_idx](x, edge_idx)
          x = x.tanh()
		
        # 2) pass final convolution layer & make embedding
        h = torch.nn.functional.relu(x)
        h = torch.nn.Dropout(p=0.2)(h)
        h = self.convs_final(h, edge_index)
        embeddings = h.tanh()  # Final GNN embedding space.
        
        # 3) pass final classifier
        out = self.classifier(embeddings)

        return out, embeddings

Hyperparameters

input_dim = dataset.num_features
hidden_dim = 16
embed_dim = 2

num_classes = dataset.num_classes
num_layers = 3

GNN model

model = GCN(num_classes,input_dim,embed_dim,hidden_dim,num_layers)
print(model)

GCN(
  (classifier): Linear(in_features=2, out_features=4, bias=True)
  (convs): ModuleList(
    (0): GCNConv(34, 16)
    (1): GCNConv(16, 16)
  )
  (convs_final): GCNConv(16, 2)
  (relu): ReLU()
)

model = GCN(num_classes,input_dim,embed_dim,hidden_dim,num_layers)

_, h = model(data.x, data.edge_index)

print(f'Embedding shape: {list(h.shape)}')
# Embedding shape: [34, 2]

visualize(h, color=data.y)

Train Model

Model / Loss Function / Optimizer

model = GCN(num_classes,input_dim,embed_dim,hidden_dim,num_layers)
loss_fn = torch.nn.CrossEntropyLoss()  
opt = torch.optim.Adam(model.parameters(), lr=0.01) 

Training Function

def train(data):
    train_idx = data.train_mask
    opt.zero_grad()  
    
    # Feed Forward
    y_hat, h = model(data.x, data.edge_index) 
    loss = loss_fn(y_hat[train_idx], data.y[train_idx])  
    loss.backward()
    opt.step() 
	
    # Prediction
    accuracy = {}
    
    ## train data
    y_pred = torch.argmax(y_hat[train_idx], axis=1)
    y_true = data.y[data.train_idx]
    accuracy['train'] = torch.mean(torch.where(y_pred == y_true, 1, 0).float())
    
    ## whole data
    y_pred_total = torch.argmax(y_hat, axis=1)
    y_true_total = data.y
    accuracy['val'] = torch.mean(torch.where(y_pred_total == y_true_total, 1, 0).float())

    return loss, h, accuracy

Train Model & Visualize

num_epochs = 300
print_epoch = 10

for epoch in range(num_epochs):
    loss, h, accuracy = train(data)
    if epoch % print_epoch == 0:
        visualize(h, color=data.y, epoch=epoch, loss=loss, accuracy=accuracy)
        time.sleep(0.3)

Twitter Facebook LinkedIn

(CS224W) Colab 0

Seunghan Lee