# Train Diffuers LORA
### Setup environment
### Please setup Intel PyTorch Extension first. reference : https://hackmd.io/@chungyeh/BkkPR5Hxp
```=bash
python3 -m venv sd_env
source sd_env/bin/activate
python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel_extension_for_pytorch==2.0.110+xpu -f https://developer.intel.com/ipex-whl-stable-xpu
pip install diffusers["torch"] transformers
```
### accelerate config
```=bash
accelerate config
/home/eapet/sd_env/lib/python3.10/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: ''If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source?
warn(
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------In which compute environment are you running?
Please select a choice using the arrow or number keys, and selecting with enter
➔ This machine
AWS (Amazon SageMaker)
No distributed training
Do you want to run your training on CPU only (even if a GPU / Apple Silicon device is available)? [yes/NO]:
Do you want to use XPU plugin to speed up training on XPU? [yes/NO]:yes
Do you wish to optimize your script with torch dynamo?[yes/NO]:
Do you want to use DeepSpeed? [yes/NO]:
What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]:
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Do you wish to use FP16 or BF16 (mixed precision)?
bf16
```
```=bash
source /opt/intel/oneapi/setvars.sh
source sd_env/bin/activate
export MODEL_NAME="runwayml/stable-diffusion-v1-5"
export OUTPUT_DIR="/home/eapet/finetune/lora/pokemon"
export HUB_MODEL_ID="pokemon-lora"
export DATASET_NAME="lambdalabs/pokemon-blip-captions"
huggingface-cli login
cd /home/eapet/lora/diffusers/examples/text_to_image/
accelerate launch --mixed_precision="bf16" train_text_to_image_lora.py --pretrained_model_name_or_path=$MODEL_NAME --dataset_name=$DATASET_NAME --dataloader_num_workers=8 --resolution=512 --center_crop --random_flip --train_batch_size=1 --gradient_accumulation_steps=4 --max_train_steps=15000 --learning_rate=1e-04 --max_grad_norm=1 --lr_scheduler="cosine" --lr_warmup_steps=0 --output_dir=${OUTPUT_DIR} --checkpointing_steps=500 --validation_prompt="A pokemon with blue eyes." --seed=1337
```
```=patch
eapet@eapet-NUC12SNKi72:~/lora/diffusers$ git diff
diff --git a/examples/text_to_image/train_text_to_image.py b/examples/text_to_image/train_text_to_image.py
index 542ee61d..72fa552a 100644
--- a/examples/text_to_image/train_text_to_image.py
+++ b/examples/text_to_image/train_text_to_image.py
@@ -163,7 +163,8 @@ def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight
images = []
for i in range(len(args.validation_prompts)):
- with torch.autocast("cuda"):
+ #with torch.autocast("cuda"):
+ with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
images.append(image)
@@ -185,7 +186,8 @@ def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight
logger.warn(f"image logging not implemented for {tracker.name}")
del pipeline
- torch.cuda.empty_cache()
+ #torch.cuda.empty_cache()
+ torch.xpu.empty_cache()
return images
@@ -1068,7 +1070,8 @@ def main():
generator = torch.Generator(device=accelerator.device).manual_seed(args.seed)
for i in range(len(args.validation_prompts)):
- with torch.autocast("cuda"):
+ #with torch.autocast("cuda"):
+ with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16):
image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0]
images.append(image)
diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index 3155eb3a..7246ccc7 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -862,7 +862,8 @@ def main():
pipeline.set_progress_bar_config(disable=True)
# run inference
- generator = torch.Generator(device=accelerator.device)
+ #generator = torch.Generator(device=accelerator.device)
+ generator = torch.Generator()
if args.seed is not None:
generator = generator.manual_seed(args.seed)
images = []
@@ -920,7 +921,8 @@ def main():
pipeline.unet.load_attn_procs(args.output_dir)
# run inference
- generator = torch.Generator(device=accelerator.device)
+ #generator = torch.Generator(device=accelerator.device)
+ generator = torch.Generator()
if args.seed is not None:
generator = generator.manual_seed(args.seed)
images = []
```