# NPL in PyTorch for Monkey: 2. Feed-forward Networks for NLP
This tutorial is a cheatsheet of the book "<i class="fa fa-book fa-fw"></i> Natural Language Processing with PyTorch: Build Intelligent Language Applications Using Deep Learning".
https://github.com/delip/PyTorchNLPBook
Outlines
---
1. [PyTorch Basics](https://hackmd.io/@martinliu/Hkt4VBggi)
2. Feed-forward Networks for NLP
3. Embedding Words and Types
4. Sequence Modeling for NLP
5. Intermediate Sequence Modeling for NLP
6. Advanced Sequence Modeling for NLP
7. My Note
---
### Perceptron in binary classification task
---
Implementing a perceptron using PyTorch
```
import numpy as np
import torch
import torch.nn as nn
seed = 1337
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
%matplotlib inline
```
Sigmoid activation
```
import torch
import matplotlib.pyplot as plt
x = torch.arange(-5., 5., 0.1)
y = torch.sigmoid(x)
plt.plot(x.numpy(), y.detach().numpy())
plt.show()
```
Tanh activation
```
import torch
import matplotlib.pyplot as plt
x = torch.arange(-5., 5., 0.1)
y = torch.tanh(x)
plt.plot(x.numpy(), y.detach().numpy())
plt.show()
```
ReLU activation
```
import torch
import matplotlib.pyplot as plt
relu = torch.nn.ReLU()
x = torch.arange(-5., 5., 0.1)
y = relu(x)
plt.plot(x.numpy(), y.detach().numpy())
plt.show()
```
PReLU activation
```
import torch.nn as nn
import matplotlib.pyplot as plt
prelu = nn.PReLU(num_parameters=1)
x = torch.arange(-5., 5., 0.1)
y = prelu(x)
plt.plot(x.numpy(), y.detach().numpy())
plt.show()
```
Softmax activation
```
softmax = nn.Softmax(dim=1)
x_input = torch.randn(1, 3)
y_output = softmax(x_input)
print(x_input)
tensor([[-2.0260, -2.0655, -1.2054]])
print(y_output)
tensor([[0.2362, 0.2271, 0.5367]])
print(torch.sum(y_output, dim=1))
tensor([1.])
```
MSE loss
```
import torch
import torch.nn as nn
mse_loss = nn.MSELoss()
outputs = torch.randn(3, 5, requires_grad=True)
targets = torch.randn(3, 5)
loss = mse_loss(outputs, targets)
loss.backward()
print(loss)
tensor(1.6031, grad_fn=<MseLossBackward>)
```
Cross-entropy loss
```
import torch
import torch.nn as nn
ce_loss = nn.CrossEntropyLoss()
outputs = torch.randn(3, 5, requires_grad=True)
targets = torch.tensor([1, 0, 3], dtype=torch.int64)
loss = ce_loss(outputs, targets)
loss.backward()
print (loss)
tensor(2.5949, grad_fn=<NllLossBackward>)
```
Binary cross-entropy loss
```
bce_loss = nn.BCELoss()
sigmoid = nn.Sigmoid()
probabilities = sigmoid(torch.randn(4, 1, requires_grad=True))
print(probabilities)
tensor([[0.7411],
[0.5622],
[0.8286],
[0.6569]], grad_fn=<SigmoidBackward>)
targets = torch.tensor([1, 0, 1, 0], dtype=torch.float32).view(4, 1)
loss = bce_loss(probabilities, targets)
loss.backward()
print(loss)
tensor(0.5958, grad_fn=<BinaryCrossEntropyBackward>)
```
---
### Diving Deep into Supervised Training
#### Instantiating the Adam optimizer
Global Settings
```
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
%matplotlib inline
LEFT_CENTER = (3, 3)
RIGHT_CENTER = (3, -2)
```
Defining the Model
```
class Perceptron(nn.Module):
""" A Perceptron is one Linear layer """
def __init__(self, input_dim):
"""
Args:
input_dim (int): size of the input features
"""
super(Perceptron, self).__init__()
self.fc1 = nn.Linear(input_dim, 1)
def forward(self, x_in):
"""The forward pass of the MLP
Args:
x_in (torch.Tensor): an input data tensor.
x_in.shape should be (batch, input_dim)
Returns:
the resulting tensor. tensor.shape should be (batch, 1)
"""
return torch.sigmoid(self.fc1(x_in))
```
Get Data Function
```
def get_toy_data(batch_size, left_center=LEFT_CENTER, right_center=RIGHT_CENTER):
x_data = []
y_targets = np.zeros(batch_size)
for batch_i in range(batch_size):
if np.random.random() > 0.5:
x_data.append(np.random.normal(loc=left_center))
else:
x_data.append(np.random.normal(loc=right_center))
y_targets[batch_i] = 1
return torch.tensor(x_data, dtype=torch.float32), torch.tensor(y_targets, dtype=torch.float32)
```
Visualizing Results Function
```
def visualize_results(perceptron, x_data, y_truth, n_samples=1000, ax=None, epoch=None,
title='', levels=[0.3, 0.4, 0.5], linestyles=['--', '-', '--']):
y_pred = perceptron(x_data)
y_pred = (y_pred > 0.5).long().data.numpy().astype(np.int32)
x_data = x_data.data.numpy()
y_truth = y_truth.data.numpy().astype(np.int32)
n_classes = 2
all_x = [[] for _ in range(n_classes)]
all_colors = [[] for _ in range(n_classes)]
colors = ['black', 'white']
markers = ['o', '*']
for x_i, y_pred_i, y_true_i in zip(x_data, y_pred, y_truth):
all_x[y_true_i].append(x_i)
if y_pred_i == y_true_i:
all_colors[y_true_i].append("white")
else:
all_colors[y_true_i].append("black")
#all_colors[y_true_i].append(colors[y_pred_i])
all_x = [np.stack(x_list) for x_list in all_x]
if ax is None:
_, ax = plt.subplots(1, 1, figsize=(10,10))
for x_list, color_list, marker in zip(all_x, all_colors, markers):
ax.scatter(x_list[:, 0], x_list[:, 1], edgecolor="black", marker=marker, facecolor=color_list, s=300)
xlim = (min([x_list[:,0].min() for x_list in all_x]),
max([x_list[:,0].max() for x_list in all_x]))
ylim = (min([x_list[:,1].min() for x_list in all_x]),
max([x_list[:,1].max() for x_list in all_x]))
# hyperplane
xx = np.linspace(xlim[0], xlim[1], 30)
yy = np.linspace(ylim[0], ylim[1], 30)
YY, XX = np.meshgrid(yy, xx)
xy = np.vstack([XX.ravel(), YY.ravel()]).T
Z = perceptron(torch.tensor(xy, dtype=torch.float32)).detach().numpy().reshape(XX.shape)
ax.contour(XX, YY, Z, colors='k', levels=levels, linestyles=linestyles)
plt.suptitle(title)
if epoch is not None:
plt.text(xlim[0], ylim[1], "Epoch = {}".format(str(epoch)))
```
#### A supervised training loop for a perceptron and binary classification
Initial Data Plot
```
seed = 1337
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
x_data, y_truth = get_toy_data(batch_size=1000)
x_data = x_data.data.numpy()
y_truth = y_truth.data.numpy()
left_x = []
right_x = []
left_colors = []
right_colors = []
for x_i, y_true_i in zip(x_data, y_truth):
color = 'black'
if y_true_i == 0:
left_x.append(x_i)
left_colors.append(color)
else:
right_x.append(x_i)
right_colors.append(color)
left_x = np.stack(left_x)
right_x = np.stack(right_x)
_, ax = plt.subplots(1, 1, figsize=(10,4))
ax.scatter(left_x[:, 0], left_x[:, 1], color=left_colors, marker='*', s=100)
ax.scatter(right_x[:, 0], right_x[:, 1], facecolor='white', edgecolor=right_colors, marker='o', s=100)
plt.axis('off');
```
The Training + intermittent data plots
```
lr = 0.01
input_dim = 2
batch_size = 1000
n_epochs = 12
n_batches = 5
seed = 1337
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
perceptron = Perceptron(input_dim=input_dim)
optimizer = optim.Adam(params=perceptron.parameters(), lr=lr)
bce_loss = nn.BCELoss()
losses = []
x_data_static, y_truth_static = get_toy_data(batch_size)
fig, ax = plt.subplots(1, 1, figsize=(10,5))
visualize_results(perceptron, x_data_static, y_truth_static, ax=ax, title='Initial Model State')
plt.axis('off')
#plt.savefig('initial.png')
change = 1.0
last = 10.0
epsilon = 1e-3
epoch = 0
while change > epsilon or epoch < n_epochs or last > 0.3:
#for epoch in range(n_epochs):
for _ in range(n_batches):
optimizer.zero_grad()
x_data, y_target = get_toy_data(batch_size)
y_pred = perceptron(x_data).squeeze()
loss = bce_loss(y_pred, y_target)
loss.backward()
optimizer.step()
loss_value = loss.item()
losses.append(loss_value)
change = abs(last - loss_value)
last = loss_value
fig, ax = plt.subplots(1, 1, figsize=(10,5))
visualize_results(perceptron, x_data_static, y_truth_static, ax=ax, epoch=epoch,
title=f"{loss_value}; {change}")
plt.axis('off')
epoch += 1
#plt.savefig('epoch{}_toylearning.png'.format(epoch))
```
Final model and plot results
```
_, axes = plt.subplots(1,2,figsize=(12,4))
axes[0].scatter(left_x[:, 0], left_x[:, 1], facecolor='white',edgecolor='black', marker='o', s=300)
axes[0].scatter(right_x[:, 0], right_x[:, 1], facecolor='white', edgecolor='black', marker='*', s=300)
axes[0].axis('off');
visualize_results(perceptron, x_data_static, y_truth_static, epoch=None, levels=[0.5], ax=axes[1])
axes[1].axis('off');
plt.savefig('perceptron_final.png')
plt.savefig('perceptron_final.pdf')
```
---
### Classifying Sentiment of Restaurant Reviews
Dataset preprocessing
```
```
```
```
Example 3-12. Creating training, validation, and testing splits
```
```
Example 3-13. Minimally cleaning the data
```
```
Example 3-14. A PyTorch Dataset class for the Yelp Review dataset
```
```
Example 3-15. The Vocabulary class maintains token to integer mapping needed for the rest of the machine learning pipeline
```
```
Example 3-16. The Vectorizer class converts text to numeric vectors
```
```
Example 3-17. Generating minibatches from a dataset
```
```
Example 3-18. A perceptron classifier for classifying Yelp reviews
```
```
Example 3-19. Hyperparameters and program options for the perceptron-based Yelp review classifier
```
```
Example 3-20. Instantiating the dataset, model, loss, optimizer, and training state
```
```
Example 3-21. A bare-bones training loop
```
```
Example 3-22. Test set evaluation
```
```
Example 3-23. Printing the prediction for a sample review
```
```
Example 3-24. Inspecting what the classifier learned
```
```
###### tags: `Python` `pytorch` `NPL`