# Train Diffuers LORA ### Setup environment ### Please setup Intel PyTorch Extension first. reference : https://hackmd.io/@chungyeh/BkkPR5Hxp ```=bash python3 -m venv sd_env source sd_env/bin/activate python -m pip install torch==2.0.1a0 torchvision==0.15.2a0 intel_extension_for_pytorch==2.0.110+xpu -f https://developer.intel.com/ipex-whl-stable-xpu pip install diffusers["torch"] transformers ``` ### accelerate config ```=bash accelerate config /home/eapet/sd_env/lib/python3.10/site-packages/torchvision/io/image.py:13: UserWarning: Failed to load image Python extension: ''If you don't plan on using image functionality from `torchvision.io`, you can ignore this warning. Otherwise, there might be something wrong with your environment. Did you have `libjpeg` or `libpng` installed before building `torchvision` from source? warn( -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------In which compute environment are you running? Please select a choice using the arrow or number keys, and selecting with enter ➔ This machine AWS (Amazon SageMaker) No distributed training Do you want to run your training on CPU only (even if a GPU / Apple Silicon device is available)? [yes/NO]: Do you want to use XPU plugin to speed up training on XPU? [yes/NO]:yes Do you wish to optimize your script with torch dynamo?[yes/NO]: Do you want to use DeepSpeed? [yes/NO]: What GPU(s) (by id) should be used for training on this machine as a comma-seperated list? [all]: -----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------Do you wish to use FP16 or BF16 (mixed precision)? bf16 ``` ```=bash source /opt/intel/oneapi/setvars.sh source sd_env/bin/activate export MODEL_NAME="runwayml/stable-diffusion-v1-5" export OUTPUT_DIR="/home/eapet/finetune/lora/pokemon" export HUB_MODEL_ID="pokemon-lora" export DATASET_NAME="lambdalabs/pokemon-blip-captions" huggingface-cli login cd /home/eapet/lora/diffusers/examples/text_to_image/ accelerate launch --mixed_precision="bf16" train_text_to_image_lora.py --pretrained_model_name_or_path=$MODEL_NAME --dataset_name=$DATASET_NAME --dataloader_num_workers=8 --resolution=512 --center_crop --random_flip --train_batch_size=1 --gradient_accumulation_steps=4 --max_train_steps=15000 --learning_rate=1e-04 --max_grad_norm=1 --lr_scheduler="cosine" --lr_warmup_steps=0 --output_dir=${OUTPUT_DIR} --checkpointing_steps=500 --validation_prompt="A pokemon with blue eyes." --seed=1337 ``` ```=patch eapet@eapet-NUC12SNKi72:~/lora/diffusers$ git diff diff --git a/examples/text_to_image/train_text_to_image.py b/examples/text_to_image/train_text_to_image.py index 542ee61d..72fa552a 100644 --- a/examples/text_to_image/train_text_to_image.py +++ b/examples/text_to_image/train_text_to_image.py @@ -163,7 +163,8 @@ def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight images = [] for i in range(len(args.validation_prompts)): - with torch.autocast("cuda"): + #with torch.autocast("cuda"): + with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16): image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0] images.append(image) @@ -185,7 +186,8 @@ def log_validation(vae, text_encoder, tokenizer, unet, args, accelerator, weight logger.warn(f"image logging not implemented for {tracker.name}") del pipeline - torch.cuda.empty_cache() + #torch.cuda.empty_cache() + torch.xpu.empty_cache() return images @@ -1068,7 +1070,8 @@ def main(): generator = torch.Generator(device=accelerator.device).manual_seed(args.seed) for i in range(len(args.validation_prompts)): - with torch.autocast("cuda"): + #with torch.autocast("cuda"): + with torch.xpu.amp.autocast(enabled=True, dtype=torch.bfloat16): image = pipeline(args.validation_prompts[i], num_inference_steps=20, generator=generator).images[0] images.append(image) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 3155eb3a..7246ccc7 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -862,7 +862,8 @@ def main(): pipeline.set_progress_bar_config(disable=True) # run inference - generator = torch.Generator(device=accelerator.device) + #generator = torch.Generator(device=accelerator.device) + generator = torch.Generator() if args.seed is not None: generator = generator.manual_seed(args.seed) images = [] @@ -920,7 +921,8 @@ def main(): pipeline.unet.load_attn_procs(args.output_dir) # run inference - generator = torch.Generator(device=accelerator.device) + #generator = torch.Generator(device=accelerator.device) + generator = torch.Generator() if args.seed is not None: generator = generator.manual_seed(args.seed) images = [] ```