# Accelerate + LoRA fine-tuning
## Installation
- nvidia/pytorch configuration
```bash=
cd project_root_path
docker run --rm --shm-size=16G --gpus all -v `pwd`:/workspace -it nvcr.io/nvidia/pytorch:23.02-py3
```
- accelerate configuration:
```bash=
pip install accelerate
accelerate config
------------ hardware configuration (상황에 맞게 설정) ------------
In which compute environment are you running?
This machine
Which type of machine are you using?
multi-GPU
How many different machines will you use (use more than 1 for multi-node training)? [1]: 1
Do you wish to optimize your script with torch dynamo?[yes/NO]:NO
Do you want to use DeepSpeed? [yes/NO]: NO
Do you want to use FullyShardedDataParallel? [yes/NO]: NO
Do you want to use Megatron-LM ? [yes/NO]: NO
How many GPU(s) should be used for distributed training? [1]:4
What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]:all
--------------------------------------------------------------
# double check config
accelerate env
```
## Code
- requirements.txt
```txt=
accelerate==0.20.3
transformers==4.30.2
```
- imports & setup
```python=
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModelForCausalLM
from accelerate import Accelerator
# Set up the environment
accelerator = Accelerator()
```
- custom dataset class
```python=
class MyCustomDataset(torch.utils.data.Dataset):
def __init__(self, texts, tokenizer, max_length):
self.texts = texts
self.tokenizer = tokenizer
self.max_length = max_length
def __len__(self):
return len(self.texts)
def __getitem__(self, index):
text = self.texts[index]
encoding = self.tokenizer.encode_plus(
text,
max_length=self.max_length,
padding="max_length",
truncation=True,
return_tensors="pt"
)
input_ids = encoding["input_ids"].squeeze()
attention_mask = encoding["attention_mask"].squeeze()
return {
"input_ids": input_ids,
"attention_mask": attention_mask,
"labels": input_ids
}
```
- data load
```python=
# Prepare the dataset
texts = [
"Hello, how are you?",
"I'm doing great, thank you!",
"What are your plans for the weekend?",
"I'm going to the beach with friends.",
"That sounds like a lot of fun!"
]
tokenizer = AutoTokenizer.from_pretrained('beomi/KoAlpaca-Polyglot-5.8B')
max_length = 4096
train_dataset = MyCustomDataset(texts, tokenizer, max_length)
valid_dataset = MyCustomDataset(texts[:2], tokenizer, max_length)
batch_size = 4
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=batch_size)
```
- model preparation
```python=
# Define the model architecture
model = AutoModelForCausalLM.from_pretrained('beomi/KoAlpaca-Polyglot-5.8B')
# Configure the training process
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
criterion = torch.nn.CrossEntropyLoss()
# Create a data loader
model, optimizer, train_loader, valid_loader = accelerator.prepare(model, optimizer, train_loader, valid_loader)
```
- train
```python=
def train_epoch(model, loader, optimizer, criterion):
model.train()
total_loss = 0
for batch in loader:
optimizer.zero_grad()
outputs = model(**batch)
loss = outputs.loss
total_loss += loss.item()
accelerator.backward(loss)
optimizer.step()
return total_loss / len(loader)
```
- evaluate
```python=
def evaluate(model, loader, criterion):
model.eval()
total_loss = 0
with torch.no_grad():
for batch in loader:
#input_ids = batch['input_ids'].to(device)
#attention_mask = batch['attention_mask'].to(device)
#labels = batch['labels'].to(device)
#outputs = model(input_ids, attention_mask=attention_mask, labels=labels)
outputs = model(**batch)
loss = outputs.loss
total_loss += loss.item()
avg_loss = total_loss / len(loader)
return avg_loss
```
- training & saving result
```python=
num_epochs = 5
for epoch in range(num_epochs):
train_loss = train_epoch(model, train_loader, optimizer, criterion)
valid_loss = evaluate(model, valid_loader, criterion) # Implement your evaluation function
train_loss = accelerator.gather(tensor=train_loss)
valid_loss = accelerator.gather(tensor=valid_loss)
if accelerator.is_local_main_process:
print(f"Epoch {epoch + 1}: Train Loss={train_loss:.4f}, Valid Loss={valid_loss:.4f}")
# Save the fine-tuned model
accelerator.wait_for_everyone()
accelerator.save(model.state_dict(), "fine_tuned_model.pt")
```