```
import networkx as nx
import random
import numpy as np
from typing import List
from tqdm import tqdm
from gensim.models.word2vec import Word2Vec
import torch
class DeepWalk:
def __init__(self, window_size: int, embedding_size: int, walk_length: int, walks_per_node: int, device='cuda'):
self.window_size = window_size
self.embedding_size = embedding_size
self.walk_length = walk_length
self.walk_per_node = walks_per_node
self.device = device
def random_walk(self, g: nx.Graph, start: str, use_probabilities: bool = False) -> List[str]:
walk = [start]
for i in range(self.walk_length):
neighbours = g.neighbors(walk[i])
neighs = list(neighbours)
if use_probabilities:
probabilities = [g.get_edge_data(walk[i], neig)["weight"] for neig in neighs]
sum_probabilities = sum(probabilities)
probabilities = list(map(lambda t: t / sum_probabilities, probabilities))
p = np.random.choice(neighs, p=probabilities)
else:
p = random.choice(neighs)
walk.append(p)
return walk
def get_walks(self, g: nx.Graph, use_probabilities: bool = False) -> List[List[str]]:
random_walks = []
for _ in range(self.walk_per_node):
random_nodes = list(g.nodes)
random.shuffle(random_nodes)
for node in tqdm(random_nodes):
random_walks.append(self.random_walk(g=g, start=node, use_probabilities=use_probabilities))
return random_walks
def compute_embeddings(self, walks: List[List[str]]):
model = Word2Vec(sentences=walks, window=self.window_size, vector_size=self.embedding_size, epochs = 3)
embeddings = model.wv.vectors
device_embeddings = torch.tensor(embeddings, device=self.device)
model.wv.vectors = device_embeddings
return model.wv
```
```
import networkx as nx
import numpy as np
G = nx.karate_club_graph()
deepwalk = DeepWalk(window_size=5, embedding_size=16, walk_length=10, walks_per_node=80)
walks = deepwalk.get_walks(G, use_probabilities=False)
embeddings = deepwalk.compute_embeddings(walks)
embeddings.vectors = embeddings.vectors.cpu().detach().numpy()
np.savetxt("karate.embeddings", embeddings.vectors)
node_embedding_dic = {}
for node in G.nodes:
node_embedding_dic.update({node:embeddings[node]})
node_embedding_dic
```
```
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, data, n_users, n_items, batch_size):
self.data = data
self.n_users = n_users
self.n_items = n_items
self.batch_size = batch_size
def __len__(self):
return len(self.data)//self.batch_size
def __getitem__(self, index):
# get a random user and their interacted items
user_id = random.randint(0, self.n_users - 1)
items = self.data[self.data['user_id_idx'] == user_id]['item_id_idx'].tolist()
pos_item_id = random.choice(items)
neg_item_id = random.randint(0, self.n_items - 1)
while neg_item_id in items:
neg_item_id = random.randint(0, self.n_items - 1)
return torch.tensor(user_id).long(), torch.tensor(pos_item_id).long(), torch.tensor(neg_item_id).long()
```
```
import random
import numpy as np
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader
class MyDataset(Dataset):
def __init__(self, data, n_users, n_items, batch_size):
self.data = data
self.n_users = n_users
self.n_items = n_items
self.batch_size = batch_size
self.inter = self.data.groupby('user_id_idx')['item_id_idx'].apply(list).reset_index()
def __len__(self):
return len(self.data)
def __getitem__(self, index):
# get a random user and their interacted items
user_id = random.randint(0, self.n_users - 1)
# print(user_id)
# items = self.data[self.data['user_id_idx'] == user_id]['item_id_idx'].tolist()
items = self.inter.iloc[user_id]['item_id_idx']
pos_item_id = random.choice(items)
neg_item_id = random.randint(0, self.n_items - 1)
while neg_item_id in items:
neg_item_id = random.randint(0, self.n_items - 1)
return torch.tensor(user_id).long(), torch.tensor(pos_item_id).long(), torch.tensor(neg_item_id).long()
```
```