## Food101 Machine Learning
Learning journey inspired by...
<i class="fa fa-file-text"></i> [Roadmap to Machine Learning ](https://hackmd.io/@ml-roadmap/SkCmMNPH0#Roadmap-to-Machine-Learning-%F0%9F%A6%9D)
:::info
:information_source: Note
This larger project follows the HACKMD Roadmap to Machine Learning. Split train database into 80 train and 20 validate... Added validation function + val/train visual graphs... Included early stopper...
**Reflection:** Following a HACKMD Roadmap to Machine Learning, I built onto my basic knowledge of coding ML and adopted better coding practices to maximize my ML performance. There were many new and advanced concepts that I learned along the way, helping me build a stronger and more structured model. I expiremented with a lot of optional features and applied analytic visuals to help better understand the model's training process. Over this course, my computer still ran really slowly, which gave me a hard time debugging and raising the abnormally low accuracy %. These concepts were a challenege for me but they allowed me to grow and touch advance ML material. Overall, this learning schedule gave me a great boost in my journey and pushed me outside of the basic ML skeleton. These newly learned concepts taught me how to really customize a model to fit specific tasks and helped me discover many alternatives to coding algorithms.
:::
:::spoiler Image Gallery !
**📷 Extremely slow training...**

^It took 3 days to train 10 epochs and ended with 6.41% accuracy... still working on that training accuracy :(
**📷 New vs. old progress tracking**

^Copilot recommended me tqdm progress bar (i love it)
**📷 Random visualizing classes**

^My favorite (and simplest) visual so far... normalizing the image data makes them so funny looking
**📷 Classification Report**

^I realized this report is very similar to per-class matrix, with less customization... It's easier to code CR, so I think i'll be using this more.
**📷 Per-class Matrix**

^So compact looking
**📷 Confusion Matrix**

^Isn't the blue gradient so pretty? I had to change the y ticks font size multiple times that they don't overlap
**📷 Grad-CAM**

^This such a cool visual; I'm still a little fuzzy on how it works though
**📷 Val Vs. Train**

^Finally got my graphs to print after two long epochs... the first time it was blank b/c I only ran one epoch #LearnedTheHardWay
**📷 Code overflow**

^I ran out of space on HackMD notes and now I deeply regret not starting off with GitHub
:::
### Set Single Training Loop
```python=
import torch
import random
import os
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
#Define transformations
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#Load datasets
base_train = datasets.Food101(
root = "Food101/data",
split = "train",
download = True,
transform = None
)
#split train and validation datasets
total = len(base_train)
train_size = int(0.8 * total)
val_size = total - train_size
train_ds, val_ds = random_split(
base_train,
[train_size, val_size],
generator = torch.Generator().manual_seed(42)
)
#Transform
train_ds.dataset.transform = train_transform
val_ds.dataset.transform = eval_transform
test_ds = datasets.Food101(
root = "Food101/data",
split = "test",
download = False,
transform = eval_transform
)
#Data loaders
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=4)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=4)
#List of classes
labels_map = {
0: "apple_pie",
1: "baby_back_ribs",
2: "baklava",
3: "beef_carpaccio",
4: "beef_tartare",
5: "beet_salad",
6: "beignets",
7: "bibimbap",
8: "bread_pudding",
9: "breakfast_burrito",
10: "bruschetta",
11: "caesar_salad",
12: "cannoli",
13: "caprese_salad",
14: "carrot_cake",
15: "ceviche",
16: "cheesecake",
17: "cheese_plate",
18: "chicken_curry",
19: "chicken_quesadilla",
20: "chicken_wings",
21: "chocolate_cake",
22: "chocolate_mousse",
23: "churros",
24: "clam_chowder",
25: "club_sandwich",
26: "crab_cakes",
27: "creme_brulee",
28: "croque_madame",
29: "cup_cakes",
30: "deviled_eggs",
31: "donuts",
32: "dumplings",
33: "edamame",
34: "eggs_benedict",
35: "escargots",
36: "falafel",
37: "filet_mignon",
38: "fish_and_chips",
39: "foie_gras",
40: "french_fries",
41: "french_onion_soup",
42: "french_toast",
43: "fried_calamari",
44: "fried_rice",
45: "frozen_yogurt",
46: "garlic_bread",
47: "gnocchi",
48: "greek_salad",
49: "grilled_cheese_sandwich",
50: "grilled_salmon",
51: "guacamole",
52: "gyoza",
53: "hamburger",
54: "hot_and_sour_soup",
55: "hot_dog",
56: "huevos_rancheros",
57: "hummus",
58: "ice_cream",
59: "lasagna",
60: "lobster_bisque",
61: "lobster_roll_sandwich",
62: "macaroni_and_cheese",
63: "macarons",
64: "miso_soup",
65: "mussels",
66: "nachos",
67: "omelette",
68: "onion_rings",
69: "oysters",
70: "pad_thai",
71: "paella",
72: "pancakes",
73: "panna_cotta",
74: "peking_duck",
75: "pho",
76: "pizza",
77: "pork_chop",
78: "poutine",
79: "prime_rib",
80: "pulled_pork_sandwich",
81: "ramen",
82: "ravioli",
83: "red_velvet_cake",
84: "risotto",
85: "samosa",
86: "sashimi",
87: "scallops",
88: "seaweed_salad",
89: "shrimp_and_grits",
90: "spaghetti_bolognese",
91: "spaghetti_carbonara",
92: "spring_rolls",
93: "steak",
94: "strawberry_shortcake",
95: "sushi",
96: "tacos",
97: "takoyaki",
98: "tiramisu",
99: "tuna_tartare",
100: "waffles"
}
#visualize classes
print("=== Welcome to Machine Learning Food101 ===")
num_classes = len(labels_map)
row, col = 5,5
current_class = 0
for x in range(4):
figs, axes = plt.subplots(row,col, figsize=(row*2, col*2))
figs.canvas.manager.set_window_title("Food101 Class Visuals")
figs.suptitle(f"Food101 Classes (Group {x+1} of 4)", fontsize=16)
axes = axes.flatten()
shown_classes = set()
for i in range(row*col):
while True:
idx = random.randint(0, len(train_ds) - 1)
img, label = train_ds[idx]
if(label not in shown_classes):
shown_classes.add(label)
break
img_np = img.permute(1, 2, 0).numpy()
img_np = np.clip(img_np, 0, 1)
ax = axes[i]
ax.imshow(img_np)
ax.set_title(labels_map[label], fontsize = 8)
ax.axis('off')
print(f"Showing figure...{x+1}")
plt.tight_layout()
plt.show()
#Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define model
class cnn_model(nn.Module):
def __init__(self):
super(cnn_model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.batchnorm1 = nn.BatchNorm2d(32, momentum=0.05)
self.batchnorm2 = nn.BatchNorm2d(64)
self.batchnorm3 = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(128*16*16, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(self.relu(self.batchnorm1(self.conv1(x))))
x = self.pool(self.relu(self.batchnorm2(self.conv2(x))))
x = self.pool(self.relu(self.batchnorm3(self.conv3(x))))
x = self.flatten(x)
x = self.dropout(self.relu(self.fc1(x)))
x = self.dropout(self.relu(self.fc2(x)))
x = self.fc3(x)
return x
model = cnn_model().to(device)
print("\n Model Summary:")
print(model)
#optimizer and loss function
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode = "min",
factor = 0.1,
patience = 3,
min_lr= 1e-6
)
#Keep track of model performance
def validate(model, dataloader, loss_fn, device):
model.eval() # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for efficiency
for imgs, labels in dataloader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
val_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
avg_loss = val_loss / len(dataloader)
accuracy = correct * 100 / total
return avg_loss, accuracy # Return for scheduler
#Training loop
train_losses, train_accs = [], []
val_losses, val_accs = [], []
epochs = 10
current_batch = 0
best_val_loss = float("inf")
patience, wait = 5, 0
checkpoint_path = "Food101/best_model.pth"
for e in range(epochs):
print("Starting epoch:", e+1)
model.train()
running_loss = 0.0
correct = 0
total = 0
for imgs, labels in train_dataloader:
if current_batch % 50 == 0: # Print only every 50 batches
print(f" Batch {current_batch}/{len(train_dataloader)} ({current_batch/len(train_dataloader)*100:.1f}%)")
current_batch += 1
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
preds = torch.argmax(outputs,1)
total += labels.size(0)
correct += (preds == labels).sum().item()
current_batch = 0
val_loss, val_accuracy = validate(model, val_dataloader, loss_fn, device)
val_losses.append(val_loss)
val_accs.append(val_accuracy)
scheduler.step(val_loss)
accuracy = correct*100 / total
loss = running_loss / len(train_dataloader)
train_losses.append(loss)
train_accs.append(accuracy)
print(f"Epoch [{e+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%\n")
#Early stopping /checkpoint
if val_loss < best_val_loss:
best_val_loss = val_loss
wait = 0
torch.save(model.state_dict(), checkpoint_path)
print (f" New best val loss: {val_loss:.4f}, checkpoint saved.")
else:
wait += 1
print (f" No improvement for {wait}/{patience} epochs.")
if wait >= patience:
print(" Early stopping triggered. Stopping training.")
break
#Testing the model
def evaluate(model, dataLoader):
model.eval()
with torch.no_grad():
correct = 0
total = 0
for imgs, labels in dataLoader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
preds = torch.argmax(outputs, 1)
correct += (preds == labels).sum().item()
total += labels.size(0)
accuracy = correct * 100 / total
print (f"Test Accuracy: {accuracy:.2f}%")
evaluate(model, test_dataloader)
#Save and load model
torch.save(model.state_dict(), "Food101/Food101_model.pth")
model = cnn_model().to(device)
model.load_state_dict(torch.load( "Food101/Food101_model.pth"))
model.eval()
#Plotting training and validation loss and accuracy
epochs_range = range(1, epochs + 1)
plt.figure()
plt.plot(epochs_range, train_losses, label = "Training loss")
plt.plot(epochs_range, val_losses, label = "Validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs. Validation loss")
plt.legend()
plt.show()
plt.figure()
plt.plot(epochs_range, train_accs, label = "Training accuracy")
plt.plot(epochs_range, val_accs, label = "Validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.title("Training vs. Validation accuracy")
plt.legend()
plt.show()
```
>[time=Fri, Jun 13, 2025]
### Grid Search
:::info
:information_source: Note
Implemented hyper-param search algorithms to find best combos...
:::
```python=
import torch
import random
import os
import itertools
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
#Define transformations
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#Load dataset
base_train = datasets.Food101(
root = "Food101/data",
split = "train",
download = True,
transform = None
)
#split train and validation datasets
total = len(base_train)
train_size = int(0.8 * total)
val_size = total - train_size
train_ds, val_ds = random_split(
base_train,
[train_size, val_size],
generator = torch.Generator().manual_seed(42)
)
#Apply Transform
train_ds.dataset.transform = train_transform
val_ds.dataset.transform = eval_transform
test_ds = datasets.Food101(
root = "Food101/data",
split = "test",
download = False,
transform = eval_transform
)
#Data loaders
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=4)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=4)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=4)
#List of classes
labels_map = {
0: "apple_pie",
1: "baby_back_ribs",
2: "baklava",
3: "beef_carpaccio",
4: "beef_tartare",
5: "beet_salad",
6: "beignets",
7: "bibimbap",
8: "bread_pudding",
9: "breakfast_burrito",
10: "bruschetta",
11: "caesar_salad",
12: "cannoli",
13: "caprese_salad",
14: "carrot_cake",
15: "ceviche",
16: "cheesecake",
17: "cheese_plate",
18: "chicken_curry",
19: "chicken_quesadilla",
20: "chicken_wings",
21: "chocolate_cake",
22: "chocolate_mousse",
23: "churros",
24: "clam_chowder",
25: "club_sandwich",
26: "crab_cakes",
27: "creme_brulee",
28: "croque_madame",
29: "cup_cakes",
30: "deviled_eggs",
31: "donuts",
32: "dumplings",
33: "edamame",
34: "eggs_benedict",
35: "escargots",
36: "falafel",
37: "filet_mignon",
38: "fish_and_chips",
39: "foie_gras",
40: "french_fries",
41: "french_onion_soup",
42: "french_toast",
43: "fried_calamari",
44: "fried_rice",
45: "frozen_yogurt",
46: "garlic_bread",
47: "gnocchi",
48: "greek_salad",
49: "grilled_cheese_sandwich",
50: "grilled_salmon",
51: "guacamole",
52: "gyoza",
53: "hamburger",
54: "hot_and_sour_soup",
55: "hot_dog",
56: "huevos_rancheros",
57: "hummus",
58: "ice_cream",
59: "lasagna",
60: "lobster_bisque",
61: "lobster_roll_sandwich",
62: "macaroni_and_cheese",
63: "macarons",
64: "miso_soup",
65: "mussels",
66: "nachos",
67: "omelette",
68: "onion_rings",
69: "oysters",
70: "pad_thai",
71: "paella",
72: "pancakes",
73: "panna_cotta",
74: "peking_duck",
75: "pho",
76: "pizza",
77: "pork_chop",
78: "poutine",
79: "prime_rib",
80: "pulled_pork_sandwich",
81: "ramen",
82: "ravioli",
83: "red_velvet_cake",
84: "risotto",
85: "samosa",
86: "sashimi",
87: "scallops",
88: "seaweed_salad",
89: "shrimp_and_grits",
90: "spaghetti_bolognese",
91: "spaghetti_carbonara",
92: "spring_rolls",
93: "steak",
94: "strawberry_shortcake",
95: "sushi",
96: "tacos",
97: "takoyaki",
98: "tiramisu",
99: "tuna_tartare",
100: "waffles"
}
#visualize classes
print("=== Welcome to Machine Learning Food101 ===")
num_classes = len(labels_map)
row, col = 5,5
current_class = 0
for x in range(4):
figs, axes = plt.subplots(row,col, figsize=(row*2, col*2))
figs.canvas.manager.set_window_title("Food101 Class Visuals")
figs.suptitle(f"Food101 Classes (Group {x+1} of 4)", fontsize=16)
axes = axes.flatten()
shown_classes = set()
for i in range(row*col):
while True:
idx = random.randint(0, len(train_ds) - 1)
img, label = train_ds[idx]
if(label not in shown_classes):
shown_classes.add(label)
break
img_np = img.permute(1, 2, 0).numpy()
img_np = np.clip(img_np, 0, 1)
ax = axes[i]
ax.imshow(img_np)
ax.set_title(labels_map[label], fontsize = 8)
ax.axis('off')
print(f"Showing figure...{x+1}")
plt.tight_layout()
plt.show()
#Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define model
class cnn_model(nn.Module):
def __init__(self, dropout_rate=0.5):
super(cnn_model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.batchnorm1 = nn.BatchNorm2d(32, momentum=0.05)
self.batchnorm2 = nn.BatchNorm2d(64)
self.batchnorm3 = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(128*16*16, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, num_classes)
self.dropout = nn.Dropout(dropout_rate)
def forward(self, x):
x = self.pool(self.relu(self.batchnorm1(self.conv1(x))))
x = self.pool(self.relu(self.batchnorm2(self.conv2(x))))
x = self.pool(self.relu(self.batchnorm3(self.conv3(x))))
x = self.flatten(x)
x = self.dropout(self.relu(self.fc1(x)))
x = self.dropout(self.relu(self.fc2(x)))
x = self.fc3(x)
return x
model = cnn_model().to(device)
print("\n Model Summary:")
print(model)
#evaluate and validate functions
def validate(model, dataloader, loss_fn, device):
model.eval() # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for efficiency
for imgs, labels in dataloader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
val_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
avg_loss = val_loss / len(dataloader)
accuracy = correct * 100 / total
return avg_loss, accuracy # Return for scheduler
def evaluate(model, dataLoader):
model.eval()
with torch.no_grad():
correct = 0
total = 0
for imgs, labels in dataLoader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
preds = torch.argmax(outputs, 1)
correct += (preds == labels).sum().item()
total += labels.size(0)
accuracy = correct * 100 / total
print (f"Test Accuracy: {accuracy:.2f}%")
# Train & Early stopper
def train_and_eval(model, optimizer, scheduler, loss_fn, train_loader, val_loader, device, epochs = 10, patience = 5):
best_val, wait = float("inf"), 0
for epoch in range(1, epochs + 1):
#train for one epoch
model.train()
running_loss = 0.0
correct = 0
total = 0
for imgs, labels in train_loader:
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
#Tracking metrics
running_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
#validate
val_loss, val_accuracy = validate(model, val_loader, loss_fn, device)
scheduler.step(val_loss)
#early stop check
if val_loss < best_val:
best_val = val_loss
wait = 0
else:
wait+=1
if wait >= patience:
break
train_acc = correct*100/ total
train_loss = running_loss / len(train_loader)
print(f"Epoch [{epoch}/{epochs}], Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.2f}%, Val Loss: {val_loss:.4f}, Val Acc: {val_accuracy:.2f}%")
return best_val
#Grid Search
param_grid = {
'lr': [1e-2, 1e-3],
'wd': [1e-4, 1e-5],
'dropout': [0.3, 0.5]
}
best_overall = {'val_loss': float('inf')}
for lr, wd, dropout in itertools.product(param_grid['lr'], param_grid['wd'], param_grid['dropout']):
print(f"\n Config: lr: {lr}, wd: {wd}, dropout: {dropout}")
model = cnn_model(dropout=dropout).to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=wd)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',factor=0.1, patience=3,min_lr=1e-6)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
val_loss = train_and_eval(model, optimizer, scheduler, loss_fn, train_dataloader, val_dataloader, device, epochs = 10, patience =5)
if val_loss < best_overall['val_loss']:
best_overall.update({'val_loss': val_loss, 'lr': lr, 'wd': wd, 'dropout': dropout})
torch.save(model.state_dict(), "Food101/best_model.pth")
print(f" New overall best (val_loss={val_loss:.4f})")
print(f"\n Grid Search Complete... Best Config: {best_overall}")
#Load and Evaluate Best Model
best_model = cnn_model(dropout_rate=best_overall['dropout']).to(device)
best_model.load_state_dict(torch.load("Food101/best_model.pth"))
evaluate(best_model, test_dataloader)
```
>[time=Mon, Jun 16, 2025]
### Evaluation & Analysis
:::info
:information_source: Note
Added and experimented with generating reports and analysis for model work... Confusion matrix and classification report, Per-Class Metrics, Grad-CAM / Saliency Maps... Added visual progress bar...
**Reflections:** Working with graphs and visuals proved to be a learning curve, but it was nice to see how the models train and make their predictions underneath the surface. I delt with a lot of problems debugging reports and analysis, especially since the training ran really slow. Making sure every run didn't go to waste was today's greatest challanege. I faced multiprocessing issues and formatting issues, but in the end, I got a lot of new photos to put in my image gallery. Seeing these colorful image analysis inspired me to push on and go further into my ML learning journey.
:::
```python
import torch
import random
import os
import cv2
import numpy as np
import torch.nn as nn
import seaborn as sns
import torch.optim as optim
import matplotlib.pyplot as plt
from tqdm import tqdm
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support
#Define transformations
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#Load datasets
base_train = datasets.Food101(
root = "Food101/data",
split = "train",
download = True,
transform = None
)
#split train and validation datasets
total = len(base_train)
train_size = int(0.8 * total)
val_size = total - train_size
train_ds, val_ds = random_split(
base_train,
[train_size, val_size],
generator = torch.Generator().manual_seed(42)
)
#Transform
train_ds.dataset.transform = train_transform
val_ds.dataset.transform = eval_transform
test_ds = datasets.Food101(
root = "Food101/data",
split = "test",
download = False,
transform = eval_transform
)
#Data loaders
#Disabled multiprocessing because of issues with matplotlib
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=0)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)
#List of classes
labels_map = {
0: "apple_pie",
1: "baby_back_ribs",
2: "baklava",
3: "beef_carpaccio",
4: "beef_tartare",
5: "beet_salad",
6: "beignets",
7: "bibimbap",
8: "bread_pudding",
9: "breakfast_burrito",
10: "bruschetta",
11: "caesar_salad",
12: "cannoli",
13: "caprese_salad",
14: "carrot_cake",
15: "ceviche",
16: "cheesecake",
17: "cheese_plate",
18: "chicken_curry",
19: "chicken_quesadilla",
20: "chicken_wings",
21: "chocolate_cake",
22: "chocolate_mousse",
23: "churros",
24: "clam_chowder",
25: "club_sandwich",
26: "crab_cakes",
27: "creme_brulee",
28: "croque_madame",
29: "cup_cakes",
30: "deviled_eggs",
31: "donuts",
32: "dumplings",
33: "edamame",
34: "eggs_benedict",
35: "escargots",
36: "falafel",
37: "filet_mignon",
38: "fish_and_chips",
39: "foie_gras",
40: "french_fries",
41: "french_onion_soup",
42: "french_toast",
43: "fried_calamari",
44: "fried_rice",
45: "frozen_yogurt",
46: "garlic_bread",
47: "gnocchi",
48: "greek_salad",
49: "grilled_cheese_sandwich",
50: "grilled_salmon",
51: "guacamole",
52: "gyoza",
53: "hamburger",
54: "hot_and_sour_soup",
55: "hot_dog",
56: "huevos_rancheros",
57: "hummus",
58: "ice_cream",
59: "lasagna",
60: "lobster_bisque",
61: "lobster_roll_sandwich",
62: "macaroni_and_cheese",
63: "macarons",
64: "miso_soup",
65: "mussels",
66: "nachos",
67: "omelette",
68: "onion_rings",
69: "oysters",
70: "pad_thai",
71: "paella",
72: "pancakes",
73: "panna_cotta",
74: "peking_duck",
75: "pho",
76: "pizza",
77: "pork_chop",
78: "poutine",
79: "prime_rib",
80: "pulled_pork_sandwich",
81: "ramen",
82: "ravioli",
83: "red_velvet_cake",
84: "risotto",
85: "samosa",
86: "sashimi",
87: "scallops",
88: "seaweed_salad",
89: "shrimp_and_grits",
90: "spaghetti_bolognese",
91: "spaghetti_carbonara",
92: "spring_rolls",
93: "steak",
94: "strawberry_shortcake",
95: "sushi",
96: "tacos",
97: "takoyaki",
98: "tiramisu",
99: "tuna_tartare",
100: "waffles"
}
num_classes = len(labels_map)
#visualize classes
def visualize_classes():
print("=== Welcome to Machine Learning Food101 ===")
row, col = 5,5
current_class = 0
for x in range(4):
figs, axes = plt.subplots(row,col, figsize=(row*2, col*2))
figs.canvas.manager.set_window_title("Food101 Class Visuals")
figs.suptitle(f"Food101 Classes (Group {x+1} of 4)", fontsize=16)
axes = axes.flatten()
shown_classes = set()
for i in range(row*col):
while True:
idx = random.randint(0, len(train_ds) - 1)
img, label = train_ds[idx]
if(label not in shown_classes):
shown_classes.add(label)
break
img_np = img.permute(1, 2, 0).numpy()
img_np = np.clip(img_np, 0, 1)
ax = axes[i]
ax.imshow(img_np)
ax.set_title(labels_map[label], fontsize = 8)
ax.axis('off')
print(f"Showing figure...{x+1}")
plt.tight_layout()
plt.show()
plt.close('all')
visualize_classes() #optional
#Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define model
class cnn_model(nn.Module):
def __init__(self):
super(cnn_model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.batchnorm1 = nn.BatchNorm2d(32, momentum=0.05)
self.batchnorm2 = nn.BatchNorm2d(64)
self.batchnorm3 = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(128*16*16, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(self.relu(self.batchnorm1(self.conv1(x))))
x = self.pool(self.relu(self.batchnorm2(self.conv2(x))))
x = self.pool(self.relu(self.batchnorm3(self.conv3(x))))
x = self.flatten(x)
x = self.dropout(self.relu(self.fc1(x)))
x = self.dropout(self.relu(self.fc2(x)))
x = self.fc3(x)
return x
model = cnn_model().to(device)
print("\n Model Summary:")
print(model)
#optimizer and loss function
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(
optimizer,
mode = "min",
factor = 0.1,
patience = 3,
min_lr= 1e-6
)
#Keep track of model performance
def validate(model, dataloader, loss_fn, device):
model.eval() # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for efficiency
for imgs, labels in dataloader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
val_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
avg_loss = val_loss / len(dataloader)
accuracy = correct * 100 / total
return avg_loss, accuracy # Return for scheduler
#Training loop
train_losses, train_accs = [], []
val_losses, val_accs = [], []
epochs = 2
current_batch = 0
best_val_loss = float("inf")
patience, wait = 5, 0
checkpoint_path = "Food101/best_model.pth"
for e in range(epochs):
print("Starting epoch:", e+1)
model.train()
running_loss = 0.0
correct = 0
total = 0
progress_bar = tqdm(train_dataloader, desc="Training", leave=False)#initialize progressbar
for imgs, labels in progress_bar: #changed to progress_bar to show progress
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
preds = torch.argmax(outputs,1)
total += labels.size(0)
correct += (preds == labels).sum().item()
#Update progress bar
progress_bar.set_postfix(loss =f"{loss.item():.4f}", accuracy = f"{correct*100/total:.2f}%")
val_loss, val_accuracy = validate(model, val_dataloader, loss_fn, device)
val_losses.append(val_loss)
val_accs.append(val_accuracy)
scheduler.step(val_loss)
accuracy = correct*100 / total
loss = running_loss / len(train_dataloader)
train_losses.append(loss)
train_accs.append(accuracy)
print(f"Epoch [{e+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%\n")
#Early stopping /checkpoint
if val_loss < best_val_loss:
best_val_loss = val_loss
wait = 0
torch.save(model.state_dict(), checkpoint_path)
print (f" New best val loss: {val_loss:.4f}, checkpoint saved.")
else:
wait += 1
print (f" No improvement for {wait}/{patience} epochs.")
if wait >= patience:
print(" Early stopping triggered. Stopping training.")
break
#Testing the model
def evaluate(model, dataLoader):
print("Testing started...")
model.eval()
with torch.no_grad():
correct = 0
total = 0
for imgs, labels in dataLoader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
preds = torch.argmax(outputs, 1)
correct += (preds == labels).sum().item()
total += labels.size(0)
accuracy = correct * 100 / total
print (f"Test Accuracy: {accuracy:.2f}%")
evaluate(model, test_dataloader)
#Save and load [best] model
print("Saving model...")
torch.save(model.state_dict(), "Food101/Food101_model.pth")
model = cnn_model().to(device)
print("Loading model...")
model.load_state_dict(torch.load( "Food101/Food101_model.pth"))
model.eval()
#-----Confusion matrix and classification report-----
print("Generating confusion matrix and classification report...")
#Shows where model gets confused, structural patterns in errors, imbalance in predicted classes
y_true, y_pred = [], []
#y_true will store the true labels
#y_pred will store predicted labels
with torch.no_grad():
for imgs, labels in test_dataloader:
imgs = imgs.to(device)
#forward pass -> get highest score -> sends to cpu -> convert to python list
preds = model(imgs).argmax(dim=1).cpu().tolist()
y_pred.extend(preds) #appends all predicted lists to y_pred
y_true.extend(labels.tolist()) #converted python lists (class labels) appended to y_true
#creates fitted 2D numpy array
#cm[i, j] counts how often a sample of true class i was predicted as class j
#labels=list(range(num_classes)) for indexing
cm = confusion_matrix(y_true, y_pred, labels=list(range(num_classes)))
plt.figure(figsize=(12, 10))
#creating heatmap
sns.heatmap(
cm,
cmap = "Blues", #blue gradient map
fmt = "d", #formats each cell as an integer
cbar = False, #hides color bar legend
#Replaces th0–100 tick marks with class names
xticklabels=[labels_map[i] for i in range(num_classes)],
yticklabels=[labels_map[i] for i in range(num_classes)]
)
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.title("Confusion Matrix on Test Set")
plt.xticks(rotation=90) #rotates x-axis labels to avoid overlap
plt.yticks(rotation=0, fontsize=5)
plt.tight_layout()
plt.show()
plt.close('all')
#summarize report for each class printed to the console
print("Classification Report:")
print(classification_report(
y_true, y_pred,
target_names=[labels_map[i] for i in range(num_classes)] #replaces numbers with names of classes
))
#-----Per-class matrix-----
print("Computing per-class precision, recall, F1, and support...")
#Compute class precision, recall, F1, support arrays
#precision_recall_fscore_support returns arrays for each metric, indexed by class
precision, recall, f1, support = precision_recall_fscore_support(
y_true, y_pred, labels = list(range(num_classes))
)
print("Per-class matrices")
for i in range(num_classes):
cls = labels_map[i]
print (f"{cls:20s}" #left-justified class name in a 20-character field
f"Precision: {precision[i]:.2f}," #Of all samples predicted as class i, the fraction that were actually class i
f"Recall: {recall[i]:.2f},", #Of all true class i samples, the fraction correctly predicted
f"F1: {f1[i]:.2f}," #Harmonic mean of precision & recall for class i
f"Support= {support[i]}") #No. of true samples in class i
#-----Grad-CAM / Saliency Maps-----
print("Generating Grad-CAM visualizations...")
#Gradient-weighted Class Activation Mapping helps visualize where the model is looking when making predictions
#The model's predicting process
#target_layer: the convolutional layer whose activations for inspection
def grad_cam(model, img_tensor, target_layer):
model.eval()
#The forward activations of target_layer
# The backward gradients wrt those activations
activations, gradients = {}, {}
#small function that PyTorch will call right target_layer forward passes
#captures feature maps/patterns produced by target layer
def forward_hook(module, input, output):
activations['value'] = output.detach() #output is the output of the layer, detached to avoid gradient tracking, then stored
#Called during backprop, right after the gradient w.r.t. that layer’s output is computed
#captures grdients flowing back through target layer and records how much each channel contributes to the final score
def backward_hook(module, grad_in, grad_out):
gradients['value'] = grad_out[0].detach() #gradient tensor is saved and stored, detached too
#After target_layer call forward hook
handle_f = target_layer.register_forward_hook(forward_hook)
#When gradients flow back, backward hook called
handle_b = target_layer.register_full_backward_hook(backward_hook)
#Forward pass and predict
img = img_tensor.unsqueeze(0).to(device) #adds batch dimension and moves to device [1, C, H, W]
logits = model(img) #forward pass
pred_class = logits.argmax(dim=1).item() #stores class indx of the highest score
#Backward pass
model.zero_grad() #zeroes gradients
#Backpropagates from single scalar score of predicted class
#Populates gradients['value`] w/ ∂score/∂activations
logits[0,pred_class].backward()
#Builds CAM
#['value'] is they key to dictionary
#[0] excludes the batch number
act= activations['value'][0].cpu().numpy() #feature maps for batch-item
grad = gradients['value'][0].cpu().numpy() #gradient maps
#channel acivation w/ largest influence on target class gets higher weight
weights = grad.mean(axis=(1,2)) #mean gradient over all spatial positions for each channel (2nd and 3rd dimensions)
#Multiply each channel’s activation map by its corresponding weight and sum across channels → a single 2D map [H, W]
#Creates empty container filled with zeros for CAM, using act shape from [1]
cam = np.zeros(act.shape[1:], dtype=np.float32)
if len(weights) == 0:
print("Warning: No gradients captured. Using fallback visualization.")
# Create a uniform heatmap (better than crashing)
cam = np.ones(act.shape[1:], dtype=np.float32)
else:
# Proceed with normal weighted computation
for i, w in enumerate(weights):
cam += w * act[i]
cam = np.maximum(cam, 0)#zero out negative values
cam = cv2.resize(cam, (img_tensor.shape[2], img_tensor.shape[1]))#cam was at conv layer 16x16, resize to input size
cam = (cam - cam.min()) / (cam.max() - cam.min()) #normalize to [0,1]
#Unregister hooks
handle_f.remove()
handle_b.remove()
#pred_class is the model's predicted class idx, cam is normalized 2D heatmap showing “where” the model looked
return pred_class, cam
#Visualizing 3 random test images
for _ in range(3):
idx = random.randint(0, len(test_ds)-1)
img_t, img_lbl = test_ds[idx]
pred_lbl, cam = grad_cam(model, img_t, model.conv3)
img_np = img_t.permute(1,2,0).cpu().numpy().clip(0,1)
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.imshow(img_np)
plt.title(f"Actual: {labels_map[img_lbl]}\n Predicted: {labels_map[pred_lbl]}")
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(img_np)
plt.imshow(cam, cmap = 'jet', alpha =0.5)
plt.title("Grad-CAM overlay")
plt.axis('off')
plt.show()
plt.close('all')
#Plotting training and validation loss and accuracy
print("Plotting training and validation loss and accuracy...")
epochs_range = range(1, epochs + 1)
plt.figure()
plt.plot(epochs_range, train_losses, label = "Training loss")
plt.plot(epochs_range, val_losses, label = "Validation loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.title("Training vs. Validation loss")
plt.legend()
plt.show()
plt.close('all')
plt.figure()
plt.plot(epochs_range, train_accs, label = "Training accuracy")
plt.plot(epochs_range, val_accs, label = "Validation accuracy")
plt.xlabel("Epochs")
plt.ylabel("Accuracy (%)")
plt.title("Training vs. Validation accuracy")
plt.legend()
plt.show()
plt.close('all')
```
>[time=Tues, Jun 17, 2025]
### Traditional Ensemble
:::info
:information_source: Note
Implemented traditional training model ensembles...
**Relfection:** Today, I learned an apparently very common math/ml abbreviation: wrt. It means "with respect to"-- I thought it was interesting I didn't know before. Programming wise, rhe code is slowly getting more advanced and I've had to do increasily more debugging. Although it's annoying, debugging actually helped me verify my knowledge and exercised my problem solving skills with applied ML. I almost dissmissed a draft of snapshot ensemble code that never even called the training loop. When my boss came to check up on my progress, he told me that traditional ensembles weren't practical and too expensive. That led to researching on the concepts I've learned so far and what programmers actually adopt in the real world. Even though some of these algorthms may be impractical, they are still valuable skills, especially in my learning journey. Many life lessons were learned today and I obtained two more model training algorithms for my personal ML toolbox (traditional ensemble & snapshot ensemble).
:::
```python=
import torch
import random
import cv2
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import seaborn as sns
from tqdm import tqdm
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support
#Define transformations
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#Load base dataset (w/o transformations)
base_train = datasets.Food101(
root = "Food101/data",
split = "train",
download = True,
transform = None
)
#Split into train and validation sets
total = len(base_train)
train_size = int(0.8 * total)
val_size = total - train_size
train_ds, val_ds = random_split(
base_train,
[train_size, val_size],
generator = torch.Generator().manual_seed(42)
)
#Transform val and train sets
train_ds.dataset.transform = train_transform
val_ds.dataset.transform = eval_transform
#Load separate test dataset
test_ds = datasets.Food101(
root = "Food101/data",
split = "test",
download = False,
transform = eval_transform
)
#Data loaders
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=0)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)
#List of classes
labels_map = {
0: "apple_pie",
1: "baby_back_ribs",
2: "baklava",
3: "beef_carpaccio",
4: "beef_tartare",
5: "beet_salad",
6: "beignets",
7: "bibimbap",
8: "bread_pudding",
9: "breakfast_burrito",
10: "bruschetta",
11: "caesar_salad",
12: "cannoli",
13: "caprese_salad",
14: "carrot_cake",
15: "ceviche",
16: "cheesecake",
17: "cheese_plate",
18: "chicken_curry",
19: "chicken_quesadilla",
20: "chicken_wings",
21: "chocolate_cake",
22: "chocolate_mousse",
23: "churros",
24: "clam_chowder",
25: "club_sandwich",
26: "crab_cakes",
27: "creme_brulee",
28: "croque_madame",
29: "cup_cakes",
30: "deviled_eggs",
31: "donuts",
32: "dumplings",
33: "edamame",
34: "eggs_benedict",
35: "escargots",
36: "falafel",
37: "filet_mignon",
38: "fish_and_chips",
39: "foie_gras",
40: "french_fries",
41: "french_onion_soup",
42: "french_toast",
43: "fried_calamari",
44: "fried_rice",
45: "frozen_yogurt",
46: "garlic_bread",
47: "gnocchi",
48: "greek_salad",
49: "grilled_cheese_sandwich",
50: "grilled_salmon",
51: "guacamole",
52: "gyoza",
53: "hamburger",
54: "hot_and_sour_soup",
55: "hot_dog",
56: "huevos_rancheros",
57: "hummus",
58: "ice_cream",
59: "lasagna",
60: "lobster_bisque",
61: "lobster_roll_sandwich",
62: "macaroni_and_cheese",
63: "macarons",
64: "miso_soup",
65: "mussels",
66: "nachos",
67: "omelette",
68: "onion_rings",
69: "oysters",
70: "pad_thai",
71: "paella",
72: "pancakes",
73: "panna_cotta",
74: "peking_duck",
75: "pho",
76: "pizza",
77: "pork_chop",
78: "poutine",
79: "prime_rib",
80: "pulled_pork_sandwich",
81: "ramen",
82: "ravioli",
83: "red_velvet_cake",
84: "risotto",
85: "samosa",
86: "sashimi",
87: "scallops",
88: "seaweed_salad",
89: "shrimp_and_grits",
90: "spaghetti_bolognese",
91: "spaghetti_carbonara",
92: "spring_rolls",
93: "steak",
94: "strawberry_shortcake",
95: "sushi",
96: "tacos",
97: "takoyaki",
98: "tiramisu",
99: "tuna_tartare",
100: "waffles"
}
num_classes = len(labels_map)
#Visualize classes function
def visualize_classes():
print("=== Welcome to Machine Learning Food101 ===")
row, col = 5,5
current_class = 0
for x in range(4):
figs, axes = plt.subplots(row,col, figsize=(row*2, col*2))
figs.canvas.manager.set_window_title("Food101 Class Visuals")
figs.suptitle(f"Food101 Classes (Group {x+1} of 4)", fontsize=16)
axes = axes.flatten()
shown_classes = set()
for i in range(row*col):
while True:
idx = random.randint(0, len(train_ds) - 1)
img, label = train_ds[idx]
if(label not in shown_classes):
shown_classes.add(label)
break
img_np = img.permute(1, 2, 0).numpy()
img_np = np.clip(img_np, 0, 1)
ax = axes[i]
ax.imshow(img_np)
ax.set_title(labels_map[label], fontsize = 8)
ax.axis('off')
print(f"Showing figure...{x+1}")
plt.tight_layout()
plt.show()
plt.close('all')
#visualize_classes()
#Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define model
class cnn_model(nn.Module):
def __init__(self):
super(cnn_model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.batchnorm1 = nn.BatchNorm2d(32, momentum=0.05)
self.batchnorm2 = nn.BatchNorm2d(64)
self.batchnorm3 = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(128*16*16, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(self.relu(self.batchnorm1(self.conv1(x))))
x = self.pool(self.relu(self.batchnorm2(self.conv2(x))))
x = self.pool(self.relu(self.batchnorm3(self.conv3(x))))
x = self.flatten(x)
x = self.dropout(self.relu(self.fc1(x)))
x = self.dropout(self.relu(self.fc2(x)))
x = self.fc3(x)
return x
model = cnn_model().to(device)
print("\n Model Summary:")
print(model)
#Validate Function
def validate(model, dataloader, loss_fn, device):
model.eval() # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for efficiency
for imgs, labels in dataloader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
val_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
avg_loss = val_loss / len(dataloader)
accuracy = correct * 100 / total
return avg_loss, accuracy # Return for scheduler
#Ensemble prediction function
def ensemble_predict(models, input):
print("Ensemble prediction...")
outputs = []
for model in models:
with torch.no_grad():
output = F.softmax(model(input),dim=1)
outputs.append(output)
# Calculate average output after processing all models
avg_output = torch.mean(torch.stack(outputs),dim=0)
return avg_output
#Training loop
epochs = 1
best_val_loss = float("inf")
patience, wait = 5, 0
#Trains a single model
def train_single_model(seed, save_path):
#random seed
torch.manual_seed(seed) #sets seed for PyTorch's RNG
random.seed(seed) #sets seed for Python’s built-in random module
np.random.seed(seed) #sets the seed for NumPy’s RNG
#model, optimizer, loss function, scheduler
model = cnn_model().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode = "min", factor = 0.1, patience = 3, min_lr= 1e-6)
best_val_loss = float("inf")
patience, wait = 5, 0
epochs = 1
#Training loop (basically same as before)
for e in range(epochs):
model.train()
running_loss, correct, total = 0.0, 0, 0
progress_bar = tqdm(train_dataloader, desc=f"Model {seed} Epoch {e+1}", leave=False)#initialize progressbar
for imgs, labels in progress_bar: #changed to progress_bar to show progress
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
preds = torch.argmax(outputs,1)
total += labels.size(0)
correct += (preds == labels).sum().item()
progress_bar.set_postfix(loss =f"{loss.item():.4f}", accuracy = f"{correct*100/total:.2f}%")
val_loss, val_accuracy = validate(model, val_dataloader, loss_fn, device)
scheduler.step(val_loss)
accuracy = correct*100 / total
loss = running_loss / len(train_dataloader)
print(f"Epoch [{e+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%\n")
#Early stopping /checkpoint
if val_loss < best_val_loss:
best_val_loss = val_loss
wait = 0
torch.save(model.state_dict(), save_path)
print (f" New best val loss: {val_loss:.4f}, checkpoint saved.")
else:
wait += 1
print (f" No improvement for {wait}/{patience} epochs.")
if wait >= patience:
print(" Early stopping triggered. Stopping training.")
break
return model
#Training multiple models with different seeds
ensemble_models = []
n_models = 3
for i in range(n_models):
path = f"Food101/Food101_model_{i}.pth"
print(f"\n Training model {i+1}/{n_models}...")
train_single_model(i, path) #call single model training function
#load best ver of this model
model = cnn_model().to(device)
model.load_state_dict(torch.load(path))
model.eval()
ensemble_models.append(model)
#Evaluate ensemble function
#Get collective predictions from ensemble models
def evaluate_ensemble(models, dataloader):
all_preds, all_labels = [], []
for imgs, labels in tqdm(dataloader, desc= "Evaluating Ensemble"):
imgs = imgs.to(device)
avg_preds = ensemble_predict(models, imgs)
preds = torch.argmax(avg_preds, dim=1).cpu().tolist()
all_preds.extend(preds)
all_labels.extend(labels.tolist())
#classification report
print("Classification Report... ")
print(classification_report(all_labels, all_preds, target_names=[labels_map[i] for i in range(num_classes)]))
#Confusion matrix
print("Confusion Matrix...")
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12,10))
sns.heatmap(
cm,
cmap = "Blues",
fmt = "d",
cbar = False,
xticklabels = [labels_map[i] for i in range(num_classes)],
yticklabels = [labels_map[i] for i in range(num_classes)]
)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks(rotation=90)
plt.yticks(fontsize =5)
plt.tight_layout()
plt.show()
plt.close('all')
#Run evaluation !!
evaluate_ensemble(ensemble_models, test_dataloader)
#Grad CAM
print("Generating Grad-CAM visualizations...")
def grad_cam(model, img_tensor, target_layer):
model.eval()
activations, gradients = {}, {}
def forward_hook(module, input, output):
activations['value'] = output.detach() #output is the output of the layer, detached to avoid gradient tracking, then stored
def backward_hook(module, grad_in, grad_out):
gradients['value'] = grad_out[0].detach() #gradient tensor is saved and stored, detached too
handle_f = target_layer.register_forward_hook(forward_hook)
handle_b = target_layer.register_full_backward_hook(backward_hook)
#Forward pass and predict
img = img_tensor.unsqueeze(0).to(device) #adds batch dimension and moves to device [1, C, H, W]
logits = model(img) #forward pass
pred_class = logits.argmax(dim=1).item() #stores class indx of the highest score
#Backward pass
model.zero_grad()
logits[0,pred_class].backward()
#Builds CAM
act= activations['value'][0].cpu().numpy() #feature maps for batch-item
grad = gradients['value'][0].cpu().numpy() #gradient maps
weights = grad.mean(axis=(1,2))
cam = np.zeros(act.shape[1:], dtype=np.float32)
if len(weights) == 0:
print("Warning: No gradients captured. Using fallback visualization.")
# Create a uniform heatmap (better than crashing)
cam = np.ones(act.shape[1:], dtype=np.float32)
else:
# Proceed with normal weighted computation
for i, w in enumerate(weights):
cam += w * act[i]
cam = np.maximum(cam, 0)#zero out negative values
cam = cv2.resize(cam, (img_tensor.shape[2], img_tensor.shape[1]))#cam was at conv layer 16x16, resize to input size
cam = (cam - cam.min()) / (cam.max() - cam.min()) #normalize to [0,1]
#Unregister hooks
handle_f.remove()
handle_b.remove()
#pred_class is the model's predicted class idx, cam is normalized 2D heatmap showing “where” the model looked
return pred_class, cam
#Visualizing 3 random test images with random models
for _ in range(3):
random_model = random.choice(ensemble_models)
idx = random.randint(0, len(test_ds)-1)
img_t, img_lbl = test_ds[idx]
pred_lbl, cam = grad_cam(model, img_t, random_model.conv3)
img_np = img_t.permute(1,2,0).cpu().numpy().clip(0,1)
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.imshow(img_np)
plt.title(f"Actual: {labels_map[img_lbl]}\n Predicted: {labels_map[pred_lbl]}")
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(img_np)
plt.imshow(cam, cmap = 'jet', alpha =0.5)
plt.title("Grad-CAM overlay")
plt.axis('off')
plt.show()
plt.close('all')
#Save and load [best] model
print("Saving model...")
torch.save(model.state_dict(), "Food101/Food101_model.pth")
model = cnn_model().to(device)
print("Loading model...")
model.load_state_dict(torch.load( "Food101/Food101_model.pth"))
model.eval()
```
### Snapshot Ensemble
:::info
:information_source: Note
Learned a faster alternative to traditional ensembles... replaced it with snapshot ensemble
:::
```python=
import torch
import random
import cv2
import copy
import numpy as np
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import seaborn as sns
from tqdm import tqdm
from torchvision import transforms, datasets
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader, random_split
from sklearn.metrics import confusion_matrix, classification_report, precision_recall_fscore_support
#Define transformations
train_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(10),
transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
eval_transform = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])
#Load base dataset (w/o transformations)
base_train = datasets.Food101(
root = "Food101/data",
split = "train",
download = True,
transform = None
)
#Split into train and validation sets
total = len(base_train)
train_size = int(0.8 * total)
val_size = total - train_size
train_ds, val_ds = random_split(
base_train,
[train_size, val_size],
generator = torch.Generator().manual_seed(42)
)
#Transform val and train sets
train_ds.dataset.transform = train_transform
val_ds.dataset.transform = eval_transform
#Load separate test dataset
test_ds = datasets.Food101(
root = "Food101/data",
split = "test",
download = False,
transform = eval_transform
)
#Data loaders
train_dataloader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=0)
test_dataloader = DataLoader(test_ds, batch_size=64, shuffle=False, num_workers=0)
val_dataloader = DataLoader(val_ds, batch_size=64, shuffle=False, num_workers=0)
#List of classes
labels_map = {
0: "apple_pie",
1: "baby_back_ribs",
2: "baklava",
3: "beef_carpaccio",
4: "beef_tartare",
5: "beet_salad",
6: "beignets",
7: "bibimbap",
8: "bread_pudding",
9: "breakfast_burrito",
10: "bruschetta",
11: "caesar_salad",
12: "cannoli",
13: "caprese_salad",
14: "carrot_cake",
15: "ceviche",
16: "cheesecake",
17: "cheese_plate",
18: "chicken_curry",
19: "chicken_quesadilla",
20: "chicken_wings",
21: "chocolate_cake",
22: "chocolate_mousse",
23: "churros",
24: "clam_chowder",
25: "club_sandwich",
26: "crab_cakes",
27: "creme_brulee",
28: "croque_madame",
29: "cup_cakes",
30: "deviled_eggs",
31: "donuts",
32: "dumplings",
33: "edamame",
34: "eggs_benedict",
35: "escargots",
36: "falafel",
37: "filet_mignon",
38: "fish_and_chips",
39: "foie_gras",
40: "french_fries",
41: "french_onion_soup",
42: "french_toast",
43: "fried_calamari",
44: "fried_rice",
45: "frozen_yogurt",
46: "garlic_bread",
47: "gnocchi",
48: "greek_salad",
49: "grilled_cheese_sandwich",
50: "grilled_salmon",
51: "guacamole",
52: "gyoza",
53: "hamburger",
54: "hot_and_sour_soup",
55: "hot_dog",
56: "huevos_rancheros",
57: "hummus",
58: "ice_cream",
59: "lasagna",
60: "lobster_bisque",
61: "lobster_roll_sandwich",
62: "macaroni_and_cheese",
63: "macarons",
64: "miso_soup",
65: "mussels",
66: "nachos",
67: "omelette",
68: "onion_rings",
69: "oysters",
70: "pad_thai",
71: "paella",
72: "pancakes",
73: "panna_cotta",
74: "peking_duck",
75: "pho",
76: "pizza",
77: "pork_chop",
78: "poutine",
79: "prime_rib",
80: "pulled_pork_sandwich",
81: "ramen",
82: "ravioli",
83: "red_velvet_cake",
84: "risotto",
85: "samosa",
86: "sashimi",
87: "scallops",
88: "seaweed_salad",
89: "shrimp_and_grits",
90: "spaghetti_bolognese",
91: "spaghetti_carbonara",
92: "spring_rolls",
93: "steak",
94: "strawberry_shortcake",
95: "sushi",
96: "tacos",
97: "takoyaki",
98: "tiramisu",
99: "tuna_tartare",
100: "waffles"
}
num_classes = len(labels_map)
#Visualize classes function
def visualize_classes():
print("=== Welcome to Machine Learning Food101 ===")
row, col = 5,5
current_class = 0
for x in range(4):
figs, axes = plt.subplots(row,col, figsize=(row*2, col*2))
figs.canvas.manager.set_window_title("Food101 Class Visuals")
figs.suptitle(f"Food101 Classes (Group {x+1} of 4)", fontsize=16)
axes = axes.flatten()
shown_classes = set()
for i in range(row*col):
while True:
idx = random.randint(0, len(train_ds) - 1)
img, label = train_ds[idx]
if(label not in shown_classes):
shown_classes.add(label)
break
img_np = img.permute(1, 2, 0).numpy()
img_np = np.clip(img_np, 0, 1)
ax = axes[i]
ax.imshow(img_np)
ax.set_title(labels_map[label], fontsize = 8)
ax.axis('off')
print(f"Showing figure...{x+1}")
plt.tight_layout()
plt.show()
plt.close('all')
#visualize_classes()
#Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#Define model
class cnn_model(nn.Module):
def __init__(self):
super(cnn_model, self).__init__()
self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
self.batchnorm1 = nn.BatchNorm2d(32, momentum=0.05)
self.batchnorm2 = nn.BatchNorm2d(64)
self.batchnorm3 = nn.BatchNorm2d(128)
self.relu = nn.ReLU()
self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
self.flatten = nn.Flatten()
self.fc1 = nn.Linear(128*16*16, 512)
self.fc2 = nn.Linear(512, 256)
self.fc3 = nn.Linear(256, num_classes)
self.dropout = nn.Dropout(0.5)
def forward(self, x):
x = self.pool(self.relu(self.batchnorm1(self.conv1(x))))
x = self.pool(self.relu(self.batchnorm2(self.conv2(x))))
x = self.pool(self.relu(self.batchnorm3(self.conv3(x))))
x = self.flatten(x)
x = self.dropout(self.relu(self.fc1(x)))
x = self.dropout(self.relu(self.fc2(x)))
x = self.fc3(x)
return x
model = cnn_model().to(device)
print("\n Model Summary:")
print(model)
#Validate Function
def validate(model, dataloader, loss_fn, device):
model.eval() # Set model to evaluation mode
val_loss = 0.0
correct = 0
total = 0
with torch.no_grad(): # Disable gradient calculation for efficiency
for imgs, labels in dataloader:
imgs, labels = imgs.to(device), labels.to(device)
outputs = model(imgs)
loss = loss_fn(outputs, labels)
val_loss += loss.item()
preds = torch.argmax(outputs, 1)
total += labels.size(0)
correct += (preds == labels).sum().item()
avg_loss = val_loss / len(dataloader)
accuracy = correct * 100 / total
return avg_loss, accuracy # Return for scheduler
#Ensemble prediction function
def ensemble_predict(models, input):
print("Ensemble prediction...")
outputs = []
for model in models:
with torch.no_grad():
output = F.softmax(model(input),dim=1)
outputs.append(output)
# Calculate average output after processing all models
avg_output = torch.mean(torch.stack(outputs),dim=0)
return avg_output
#Training loop
epochs = 30 #30 to reach all the restart points and run through 3 cycles
best_val_loss = float("inf")
patience, wait = 5, 0
snapshot_models = [] #to store models at each cycle
#Trains a single model
def train_single_model(seed, save_path):
#random seed
torch.manual_seed(seed) #sets seed for PyTorch's RNG
random.seed(seed) #sets seed for Python’s built-in random module
np.random.seed(seed) #sets the seed for NumPy’s RNG
#model, optimizer, loss function, scheduler
model = cnn_model().to(device)
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9, weight_decay=0.0001)
loss_fn = nn.CrossEntropyLoss(label_smoothing=0.1)
#Changed to cyclic learning rate
#To jump between different local minima
scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(
optimizer,
T_0 = 10, #number of epochs in first cycle
T_mult = 1 #cycle length stays the same
#T_mult = 2 doubles cycle length each time (logic)
)
best_val_loss = float("inf")
patience, wait = 5, 0
snapshot_epochs = [9, 19, 29] #save at the end of each cycle
#Training loop (basically same as before)
for e in range(epochs):
model.train()
running_loss, correct, total = 0.0, 0, 0
progress_bar = tqdm(train_dataloader, desc=f"Model {seed} Epoch {e+1}", leave=False)#initialize progressbar
for imgs, labels in progress_bar: #changed to progress_bar to show progress
imgs, labels = imgs.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(imgs)
loss = loss_fn(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
preds = torch.argmax(outputs,1)
total += labels.size(0)
correct += (preds == labels).sum().item()
progress_bar.set_postfix(loss =f"{loss.item():.4f}", accuracy = f"{correct*100/total:.2f}%")
val_loss, val_accuracy = validate(model, val_dataloader, loss_fn, device)
scheduler.step(e) #step based on epoch index
#save model's state at each restart
if e in snapshot_epochs:
path = f"snapshot_model_epoch{e}.pth"
torch.save(model.state_dict(), path)
snapshot_models.append(copy.deepcopy(model))
accuracy = correct*100 / total
loss = running_loss / len(train_dataloader)
print(f"Epoch [{e+1}/{epochs}], Loss: {loss:.4f}, Accuracy: {accuracy:.2f}%")
print(f"Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%\n")
#Early stopping /checkpoint
if val_loss < best_val_loss:
best_val_loss = val_loss
wait = 0
torch.save(model.state_dict(), save_path)
print (f" New best val loss: {val_loss:.4f}, checkpoint saved.")
else:
wait += 1
print (f" No improvement for {wait}/{patience} epochs.")
if wait >= patience:
print(" Early stopping triggered. Stopping training.")
break
return model
#Evaluate ensemble function
#Get collective predictions from ensemble models
def evaluate_ensemble(models, dataloader):
all_preds, all_labels = [], []
for imgs, labels in tqdm(dataloader, desc= "Evaluating Ensemble"):
imgs = imgs.to(device)
avg_preds = ensemble_predict(models, imgs)
preds = torch.argmax(avg_preds, dim=1).cpu().tolist()
all_preds.extend(preds)
all_labels.extend(labels.tolist())
#classification report
print("Classification Report... ")
print(classification_report(all_labels, all_preds, target_names=[labels_map[i] for i in range(num_classes)]))
#Confusion matrix
print("Confusion Matrix...")
cm = confusion_matrix(all_labels, all_preds)
plt.figure(figsize=(12,10))
sns.heatmap(
cm,
cmap = "Blues",
fmt = "d",
cbar = False,
xticklabels = [labels_map[i] for i in range(num_classes)],
yticklabels = [labels_map[i] for i in range(num_classes)]
)
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.xticks(rotation=90)
plt.yticks(fontsize =5)
plt.tight_layout()
plt.show()
plt.close('all')
#Run evaluation !!
trained_model = train_single_model(seed=42, save_path="Food101/best_snapshot_model.pth")
evaluate_ensemble(snapshot_models, test_dataloader)
#Grad CAM
print("Generating Grad-CAM visualizations...")
def grad_cam(model, img_tensor, target_layer):
model.eval()
activations, gradients = {}, {}
def forward_hook(module, input, output):
activations['value'] = output.detach() #output is the output of the layer, detached to avoid gradient tracking, then stored
def backward_hook(module, grad_in, grad_out):
gradients['value'] = grad_out[0].detach() #gradient tensor is saved and stored, detached too
handle_f = target_layer.register_forward_hook(forward_hook)
handle_b = target_layer.register_full_backward_hook(backward_hook)
#Forward pass and predict
img = img_tensor.unsqueeze(0).to(device) #adds batch dimension and moves to device [1, C, H, W]
logits = model(img) #forward pass
pred_class = logits.argmax(dim=1).item() #stores class indx of the highest score
#Backward pass
model.zero_grad()
logits[0,pred_class].backward()
#Builds CAM
act= activations['value'][0].cpu().numpy() #feature maps for batch-item
grad = gradients['value'][0].cpu().numpy() #gradient maps
weights = grad.mean(axis=(1,2))
cam = np.zeros(act.shape[1:], dtype=np.float32)
if len(weights) == 0:
print("Warning: No gradients captured. Using fallback visualization.")
# Create a uniform heatmap (better than crashing)
cam = np.ones(act.shape[1:], dtype=np.float32)
else:
# Proceed with normal weighted computation
for i, w in enumerate(weights):
cam += w * act[i]
cam = np.maximum(cam, 0)#zero out negative values
cam = cv2.resize(cam, (img_tensor.shape[2], img_tensor.shape[1]))#cam was at conv layer 16x16, resize to input size
cam = (cam - cam.min()) / (cam.max() - cam.min()) #normalize to [0,1]
#Unregister hooks
handle_f.remove()
handle_b.remove()
#pred_class is the model's predicted class idx, cam is normalized 2D heatmap showing “where” the model looked
return pred_class, cam
#Visualizing 3 random test images with random models
for _ in range(3):
random_model = random.choice(snapshot_models)
idx = random.randint(0, len(test_ds)-1)
img_t, img_lbl = test_ds[idx]
pred_lbl, cam = grad_cam(random_model, img_t, random_model.conv3)
img_np = img_t.permute(1,2,0).cpu().numpy().clip(0,1)
plt.figure(figsize=(6,3))
plt.subplot(1,2,1)
plt.imshow(img_np)
plt.title(f"Actual: {labels_map[img_lbl]}\n Predicted: {labels_map[pred_lbl]}")
plt.axis('off')
plt.subplot(1,2,2)
plt.imshow(img_np)
plt.imshow(cam, cmap = 'jet', alpha =0.5)
plt.title("Grad-CAM overlay")
plt.axis('off')
plt.show()
plt.close('all')
```
>[time=Wed, Jun 18, 2025]