# Aim on AIConsole
## An easy-to-use & supercharged open-source experiment tracker

Aim is an open-source, self-hosted ML experiment tracking tool. It's good at tracking lots (1000s) of training runs and it allows you to compare them with a performant and beautiful UI.
[TOC]
## 1. Create Aim Site on AIConsole
1. On Container Service page, create a site of aim solution.
2. Fill inputs, and the most important NFS storage.
:::info
NFS server is used to serve your repo, in steps later we need to put our tracking data in this repo.
:::

## 2. Create pytorch Site on AIConsole
1. On Container Service page, create a site of pytorch solution.
:::info
Other framework works too, in this example we use pytorch.
:::
2. The most important part is NFS storage too
:::info
1. Server IP and share directory should be the same with the Aim server
2. Mount directory(Repo path) please give it wherever you want to put your tracking data when training model.
:::

## 3. Write your Machine Learning code
1. Open jupyter from Entry Point

2. Write your training code to track on Aim, or you can use [example code](#Example-code) provided below
:::info
You should modify the repo argument as **{Repo Path}** you defined before.
So the Aim server can get your tracking data.
```
aim_run = Run(
repo='{Repo Path}'
```
:::
## 4. Live watch your training on Aim
Go to the Aim server web ui, browse your training metadata.



#### Example code
```python
!pip install aim ipywidgets
```
```python
from aim import Run
from aim.pytorch import track_gradients_dists, track_params_dists
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
```
```python
# Initialize a new Run
aim_run = Run(
repo='/aim',
experiment='aim-demo-2'
)
# Device configuration
device = torch.device("cpu")
# Hyper parameters
num_epochs = 5
num_classes = 10
batch_size = 16
learning_rate = 0.01
# aim - Track hyper parameters
aim_run["hparams"] = {
"num_epochs": num_epochs,
"num_classes": num_classes,
"batch_size": batch_size,
"learning_rate": learning_rate,
}
# MNIST dataset
train_dataset = torchvision.datasets.MNIST(
root="./data/", train=True, transform=transforms.ToTensor(), download=True
)
test_dataset = torchvision.datasets.MNIST(
root="./data/", train=False, transform=transforms.ToTensor()
)
# Data loader
train_loader = torch.utils.data.DataLoader(
dataset=train_dataset, batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
dataset=test_dataset, batch_size=batch_size, shuffle=False
)
# Convolutional neural network (two convolutional layers)
class ConvNet(nn.Module):
def __init__(self, num_classes=10):
super(ConvNet, self).__init__()
self.layer1 = nn.Sequential(
nn.Conv2d(1, 16, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(16),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.layer2 = nn.Sequential(
nn.Conv2d(16, 32, kernel_size=5, stride=1, padding=2),
nn.BatchNorm2d(32),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2),
)
self.fc = nn.Linear(7 * 7 * 32, num_classes)
def forward(self, x):
out = self.layer1(x)
out = self.layer2(out)
out = out.reshape(out.size(0), -1)
out = self.fc(out)
return out
model = ConvNet(num_classes).to(device)
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# Train the model
total_step = len(train_loader)
for epoch in range(num_epochs):
for i, (images, labels) in enumerate(train_loader):
images = images.to(device)
labels = labels.to(device)
# Forward pass
outputs = model(images)
loss = criterion(outputs, labels)
# Backward and optimize
optimizer.zero_grad()
loss.backward()
optimizer.step()
if i % 30 == 0:
print(
"Epoch [{}/{}], Step [{}/{}], "
"Loss: {:.4f}".format(
epoch + 1, num_epochs, i + 1, total_step, loss.item()
)
)
# aim - Track model loss function
aim_run.track(
loss.item(), name="loss", epoch=epoch, context={"subset": "train"}
)
correct = 0
total = 0
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
acc = 100 * correct / total
# aim - Track metrics
aim_run.track(
acc, name="accuracy", epoch=epoch, context={"subset": "train"}
)
# aim - Track weights and gradients distributions
track_params_dists(model, aim_run)
track_gradients_dists(model, aim_run)
# TODO: Do actual validation
if i % 300 == 0:
aim_run.track(
loss.item(), name="loss", epoch=epoch, context={"subset": "val"}
)
aim_run.track(
acc, name="accuracy", epoch=epoch, context={"subset": "val"}
)
# Test the model
model.eval()
with torch.no_grad():
correct = 0
total = 0
for images, labels in test_loader:
images = images.to(device)
labels = labels.to(device)
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print("Test Accuracy: {} %".format(100 * correct / total))
```