Train Tiny YOLO v7 on i7-1370PE iGPU

# Finetune YOLO v7 on i7-1370PE iGPU with 64GB system SDRAM ### Ref : https://learnopencv.com/fine-tuning-yolov7-on-custom-dataset/ ### Setup Intel PyTorch Extension on 1370PE : https://hackmd.io/@chungyeh/BkkPR5Hxp YOLO v7 Tiny ![](https://hackmd.io/_uploads/r1YNCCVWa.png) YOLO ![](https://hackmd.io/_uploads/SJrRPbSZp.png) Below are steps, Get custom data, ``` wget https://learnopencv.s3.us-west-2.amazonaws.com/pothole_dataset.zip unzip -q pothole_dataset.zip ``` Clone YOLOv7 repository from github and then patch for XPU, ``` git clone https://github.com/WongKinYiu/yolov7.git cd yolov7 pip install -r requirements.txt patch -p1 < yolov7_xpu.patch ``` Here is the yolov7_xpu.patch. ``` diff --git a/train.py b/train.py index 86c7e48..d60a0f8 100644 --- a/train.py +++ b/train.py @@ -286,7 +286,9 @@ def train(hyp, opt, device, tb_writer=None): model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou) - model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights +# model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights + model.class_weights = labels_to_class_weights(dataset.labels, nc) * nc # a ttach class weights + model.class_weights = model.class_weights.to(device) model.names = names # Start training diff --git a/utils/autoanchor.py b/utils/autoanchor.py index f491032..744722c 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -49,7 +49,9 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640): print(f'{prefix}ERROR: {e}') new_bpr = metric(anchors)[0] if new_bpr > bpr: # replace anchors - anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) + #anchors = torch.tensor(anchors, device=m.anchors.device).type_as(m.anchors) + anchors = torch.tensor(anchors).type_as(m.anchors) + anchors = anchors.to(m.anchors.device) m.anchor_grid[:] = anchors.clone().view_as(m.anchor_grid) # for inference check_anchor_order(m) m.anchors[:] = anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 1e631b5..27dad00 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -17,6 +17,8 @@ import torch.nn as nn import torch.nn.functional as F import torchvision +import intel_extension_for_pytorch as ipex + try: import thop # for FLOPS computation except ImportError: @@ -64,13 +66,18 @@ def select_device(device='', batch_size=None): # device = 'cpu' or '0' or '0,1,2,3' s = f'YOLOR 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string cpu = device.lower() == 'cpu' + xpu = device.lower() == 'xpu' if cpu: os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False + elif xpu: # non-cpu device requested + os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_availa ble() = False + assert torch.xpu.is_available(), f'XPU unavailable, invalid device {dev ice} requested' # check availability elif device: # non-cpu device requested os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability - cuda = not cpu and torch.cuda.is_available() + #cuda = not cpu and torch.cuda.is_available() + cuda = not cpu and not xpu and torch.cuda.is_available() if cuda: n = torch.cuda.device_count() if n > 1 and batch_size: # check that batch_size is compatible with device_count @@ -79,11 +86,18 @@ def select_device(device='', batch_size=None): for i, d in enumerate(device.split(',') if device else range(n)): p = torch.cuda.get_device_properties(i) s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB + elif xpu: + s += 'XPU\n' else: s += 'CPU\n' logger.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe - return torch.device('cuda:0' if cuda else 'cpu') + if cuda: + return torch.device('cuda:0') + elif xpu: + return torch.device('xpu') + else: + return torch.device('cpu') def time_synchronized(): @@ -371,4 +385,4 @@ class TracedModel(nn.Module): def forward(self, x, augment=False, profile=False): out = self.model(x) out = self.detect_layer(out) - return out \ No newline at end of file + return out ``` Download yolov7 and yolov7-tiny models, ``` wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-tiny.pt wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7_training.pt ``` Add custom data and model configuration files for training, ``` vim data/pothole.yaml ``` ## Begin content of data/pothole.yaml ## ``` train: ../pothole_dataset/images/train val: ../pothole_dataset/images/valid test: ../pothole_dataset/images/test # Classes nc: 1 # number of classes names: ['pothole'] # class names ``` ## End of content of data/pothole.yaml ## YOLOv7 Tiny training configuration, ``` cp cfg/training/yolov7-tiny.yaml cfg/training/yolov7_pothole-tiny.yaml ## modify "nc: 80" to "nc: 1" in cfg/training/yolov7_pothole-tiny.yaml ## ``` YOLOv7 training configuration, ``` cp cfg/training/yolov7.yaml cfg/training/yolov7_pothole.yaml ## modify "nc: 80" to "nc: 1" in cfg/training/yolov7_pothole.yaml ## ``` Train command, ### YOLO V7 Tiny ``` python train.py --epochs 10 --workers 4 --device xpu --batch-size 32 --data data/pothole.yaml --img 640 640 --cfg cfg/training/yolov7_pothole-tiny.yaml --weights 'yolov7-tiny.pt' --name yolov7_tiny_pothole_fixed_res --hyp data/hyp.scratch.tiny.yaml ``` ### YOLO V7 ``` python train.py --epochs 10 --workers 4 --device xpu --batch-size 16 --data data/pothole.yaml --img 640 640 --cfg cfg/training/yolov7_pothole.yaml --weights 'yolov7_training.pt' --name yolov7_pothole_fixed_res --hyp data/hyp.scratch.custom.yaml ```