Accelerate + LoRA fine-tuning

# Accelerate + LoRA fine-tuning ## Installation - nvidia/pytorch configuration ```bash= cd project_root_path docker run --rm --shm-size=16G --gpus all -v `pwd`:/workspace -it nvcr.io/nvidia/pytorch:23.02-py3 ``` - accelerate configuration: ```bash= pip install accelerate accelerate config ------------ hardware configuration (상황에 맞게 설정) ------------ In which compute environment are you running? This machine Which type of machine are you using? multi-GPU How many different machines will you use (use more than 1 for multi-node training)? [1]: 1 Do you wish to optimize your script with torch dynamo?[yes/NO]:NO Do you want to use DeepSpeed? [yes/NO]: NO Do you want to use FullyShardedDataParallel? [yes/NO]: NO Do you want to use Megatron-LM ? [yes/NO]: NO How many GPU(s) should be used for distributed training? [1]:4 What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]:all -------------------------------------------------------------- # double check config accelerate env ``` ## Code - requirements.txt ```txt= accelerate==0.20.3 transformers==4.30.2 ``` - imports & setup ```python= import torch from torch.utils.data import DataLoader from transformers import AutoTokenizer, AutoModelForCausalLM from accelerate import Accelerator # Set up the environment accelerator = Accelerator() ``` - custom dataset class ```python= class MyCustomDataset(torch.utils.data.Dataset): def __init__(self, texts, tokenizer, max_length): self.texts = texts self.tokenizer = tokenizer self.max_length = max_length def __len__(self): return len(self.texts) def __getitem__(self, index): text = self.texts[index] encoding = self.tokenizer.encode_plus( text, max_length=self.max_length, padding="max_length", truncation=True, return_tensors="pt" ) input_ids = encoding["input_ids"].squeeze() attention_mask = encoding["attention_mask"].squeeze() return { "input_ids": input_ids, "attention_mask": attention_mask, "labels": input_ids } ``` - data load ```python= # Prepare the dataset texts = [ "Hello, how are you?", "I'm doing great, thank you!", "What are your plans for the weekend?", "I'm going to the beach with friends.", "That sounds like a lot of fun!" ] tokenizer = AutoTokenizer.from_pretrained('beomi/KoAlpaca-Polyglot-5.8B') max_length = 4096 train_dataset = MyCustomDataset(texts, tokenizer, max_length) valid_dataset = MyCustomDataset(texts[:2], tokenizer, max_length) batch_size = 4 train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_dataset, batch_size=batch_size) ``` - model preparation ```python= # Define the model architecture model = AutoModelForCausalLM.from_pretrained('beomi/KoAlpaca-Polyglot-5.8B') # Configure the training process optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5) criterion = torch.nn.CrossEntropyLoss() # Create a data loader model, optimizer, train_loader, valid_loader = accelerator.prepare(model, optimizer, train_loader, valid_loader) ``` - train ```python= def train_epoch(model, loader, optimizer, criterion): model.train() total_loss = 0 for batch in loader: optimizer.zero_grad() outputs = model(**batch) loss = outputs.loss total_loss += loss.item() accelerator.backward(loss) optimizer.step() return total_loss / len(loader) ``` - evaluate ```python= def evaluate(model, loader, criterion): model.eval() total_loss = 0 with torch.no_grad(): for batch in loader: #input_ids = batch['input_ids'].to(device) #attention_mask = batch['attention_mask'].to(device) #labels = batch['labels'].to(device) #outputs = model(input_ids, attention_mask=attention_mask, labels=labels) outputs = model(**batch) loss = outputs.loss total_loss += loss.item() avg_loss = total_loss / len(loader) return avg_loss ``` - training & saving result ```python= num_epochs = 5 for epoch in range(num_epochs): train_loss = train_epoch(model, train_loader, optimizer, criterion) valid_loss = evaluate(model, valid_loader, criterion) # Implement your evaluation function train_loss = accelerator.gather(tensor=train_loss) valid_loss = accelerator.gather(tensor=valid_loss) if accelerator.is_local_main_process: print(f"Epoch {epoch + 1}: Train Loss={train_loss:.4f}, Valid Loss={valid_loss:.4f}") # Save the fine-tuned model accelerator.wait_for_everyone() accelerator.save(model.state_dict(), "fine_tuned_model.pt") ```