owned this note
owned this note
Published
Linked with GitHub
# 32GB memory GV100 Benchmark
以下測試為混精度,分別以*restnet 50* & *resnet 152* 跑 *Imagenet 2012* 資料集。
#### Resnet-50
```
cd $HOME/examples
batch_size=64
num_gpu=1
precision=float16
root@CHTTL:/examples/examples# batch_size=64
root@CHTTL:/examples/examples# nvidia-docker run -it --rm -v $PWD:/notebooks honghu/keras:mx-cu9-dnn7-py3-18.03 python3 models/mxnet-examples/image-classification/train_imagenet.py --network resnet --num-layers 50 --data-nthreads 40 --batch-size $(($batch_size*$num_gpu)) --gpus $(seq -s , 0 $(($num_gpu-1))) --num-epochs 1 --dtype $precision --data-train /notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec --data-val /notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec
INFO:root:start with arguments Namespace(batch_size=64, benchmark=0, data_nthreads=40, data_train='/notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec', data_train_idx='', data_val='/notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec', data_val_idx='', disp_batches=20, dtype='float16', gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', kv_store='device', load_epoch=None, lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_random_aspect_ratio=0.25, max_random_h=36, max_random_l=50, max_random_rotate_angle=10, max_random_s=50, max_random_scale=1, max_random_shear_ratio=0.1, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=1, num_examples=1281167, num_layers=50, optimizer='sgd', pad_size=0, random_crop=1, random_mirror=1, rgb_mean='123.68,116.779,103.939', test_io=0, top_k=0, warmup_epochs=5, warmup_strategy='linear', wd=0.0001)
[05:06:13] src/io/iter_image_recordio_2.cc:170: ImageRecordIOParser2: /notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec, use 9 threads for decoding..
[05:06:15] src/io/iter_image_recordio_2.cc:170: ImageRecordIOParser2: /notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec, use 9 threads for decoding..
[05:06:24] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:107: Running performance tests to find the best convolution algorithm, this can take a while... (setting env variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
INFO:root:Epoch[0] Batch [20] Speed: 441.32 samples/sec accuracy=0.000000
INFO:root:Epoch[0] Batch [40] Speed: 445.53 samples/sec accuracy=0.000000
INFO:root:Epoch[0] Batch [60] Speed: 449.80 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [80] Speed: 448.48 samples/sec accuracy=0.001563
INFO:root:Epoch[0] Batch [100] Speed: 446.49 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [120] Speed: 449.15 samples/sec accuracy=0.002344
INFO:root:Epoch[0] Batch [140] Speed: 453.30 samples/sec accuracy=0.000000
INFO:root:Epoch[0] Batch [160] Speed: 450.57 samples/sec accuracy=0.002344
INFO:root:Epoch[0] Batch [180] Speed: 446.83 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [200] Speed: 448.16 samples/sec accuracy=0.002344
INFO:root:Epoch[0] Batch [220] Speed: 448.98 samples/sec accuracy=0.000000
INFO:root:Epoch[0] Batch [240] Speed: 442.49 samples/sec accuracy=0.001563
INFO:root:Epoch[0] Batch [260] Speed: 457.45 samples/sec accuracy=0.000000
INFO:root:Epoch[0] Batch [280] Speed: 452.77 samples/sec accuracy=0.001563
INFO:root:Epoch[0] Batch [300] Speed: 456.34 samples/sec accuracy=0.003906
INFO:root:Epoch[0] Batch [320] Speed: 453.76 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [340] Speed: 444.20 samples/sec accuracy=0.001563
INFO:root:Epoch[0] Batch [360] Speed: 446.13 samples/sec accuracy=0.003125
INFO:root:Epoch[0] Batch [380] Speed: 453.61 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [400] Speed: 449.45 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [420] Speed: 436.11 samples/sec accuracy=0.003906
INFO:root:Epoch[0] Batch [440] Speed: 449.06 samples/sec accuracy=0.002344
```
#### Resnet-152
```
cd $HOME/examples
batch_size=128
num_gpu=1
precision=float16
ubuntu@CHTTL:/examples# batch_size=128
root@CHTTL:/examples# nvidia-docker run -it --rm -v $PWD:/notebooks honghu/keras:mx-cu9-dnn7-py3-18.03 python3 models/mxnet-examples/image-classification/train_imagenet.py --network resnet --num-layers 152 --data-nthreads 40 --batch-size $(($batch_size*$num_gpu)) --gpus $(seq -s , 0 $(($num_gpu-1))) --num-epochs 1 --dtype $precision --data-train /notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec --data-val /notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec
INFO:root:start with arguments Namespace(batch_size=128, benchmark=0, data_nthreads=40, data_train='/notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec', data_train_idx='', data_val='/notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec', data_val_idx='', disp_batches=20, dtype='float16', gc_threshold=0.5, gc_type='none', gpus='0', image_shape='3,224,224', initializer='default', kv_store='device', load_epoch=None, lr=0.1, lr_factor=0.1, lr_step_epochs='30,60', macrobatch_size=0, max_random_aspect_ratio=0.25, max_random_h=36, max_random_l=50, max_random_rotate_angle=10, max_random_s=50, max_random_scale=1, max_random_shear_ratio=0.1, min_random_scale=1, model_prefix=None, mom=0.9, monitor=0, network='resnet', num_classes=1000, num_epochs=1, num_examples=1281167, num_layers=152, optimizer='sgd', pad_size=0, random_crop=1, random_mirror=1, rgb_mean='123.68,116.779,103.939', test_io=0, top_k=0, warmup_epochs=5, warmup_strategy='linear', wd=0.0001)
[06:25:39] src/io/iter_image_recordio_2.cc:170: ImageRecordIOParser2: /notebooks/datasets/ILSVRC2012/ILSVRC2012TRAIN.rec, use 9 threads for decoding..
[06:25:41] src/io/iter_image_recordio_2.cc:170: ImageRecordIOParser2: /notebooks/datasets/ILSVRC2012/ILSVRC2012VAL.rec, use 9 threads for decoding..
[06:25:51] src/operator/nn/./cudnn/./cudnn_algoreg-inl.h:107: Running performance tests to find the best convolution algorithm, this can take a while... (setting env variable MXNET_CUDNN_AUTOTUNE_DEFAULT to 0 to disable)
INFO:root:Epoch[0] Batch [20] Speed: 236.62 samples/sec accuracy=0.000372
INFO:root:Epoch[0] Batch [40] Speed: 237.22 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [60] Speed: 249.52 samples/sec accuracy=0.000781
INFO:root:Epoch[0] Batch [80] Speed: 235.62 samples/sec accuracy=0.001953
INFO:root:Epoch[0] Batch [100] Speed: 239.80 samples/sec accuracy=0.003516
INFO:root:Epoch[0] Batch [120] Speed: 233.57 samples/sec accuracy=0.004687
INFO:root:Epoch[0] Batch [140] Speed: 232.65 samples/sec accuracy=0.001953
INFO:root:Epoch[0] Batch [160] Speed: 235.10 samples/sec accuracy=0.001953
INFO:root:Epoch[0] Batch [180] Speed: 235.33 samples/sec accuracy=0.002344
INFO:root:Epoch[0] Batch [200] Speed: 233.81 samples/sec accuracy=0.001953
INFO:root:Epoch[0] Batch [220] Speed: 223.41 samples/sec accuracy=0.004297
```