# 碩士論文資料整理
###### tags: `論文` `experiment`
idle cpu used :3.29








openai/checkpoints
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 3.20GB | 514.40GB | 0.191 |
| 34| 3.26GB | 799.98GB | 0.371 |
| 50 | 3.32GB | 1381.65GB | 0.573 |
| 101 | 3.56GB | 1956.81GB | 1.118 |
| 152 | 3.84GB | 2083.61GB | 1.682 |
| 200 | 4.14GB | 2135.42GB | 2.287 |
```
openai/checkpoints
18 6.49 514.40 0.191
34 6.55 799.98 0.371
50 6.61 1381.65 0.573
101 6.85 1956.81 1.118
152 7.13 2083.61 1.682
200 7.43 2135.42 2.287
```
original tensorflow
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 4.20GB | 843.57GB | 0.233 |
| 34| 4.13GB | 1323.28GB | 0.371 |
| 50 | 4.11GB | 3015.52GB | 0.538 |
| 101 | 4.19GB | 4671.11GB | 0.934 |
| 152 | 4.31GB | 6577.46GB | 1.381 |
| 200 | oom | oom | oom |
```
ori
18 7.49 843.57 0.233
34 7.42 1323.28 0.371
50 7.40 3015.52 0.538
101 7.48 4671.11 0.934
152 7.60 6577.46 1.381
200 oom
```
large model support tensorflow
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 4.9GB | 829.03GB | 0.351 |
| 34| 5.33GB | 1220.52GB | 0.539 |
| 50 | 6.48GB | 2764.48GB | 0.940 |
| 101 | 8.61GB | 4108.10GB | 1.576 |
| 152 | 8.81GB | 5596.63GB | 2.290 |
| 200 | 13.11GB | 7559.23GB | 3.087 |
```
lms
18 8.19 829.03 0.351
34 8.62 1220.52 0.539
50 9.77 2764.48 0.940
101 11.9 4108.10 1.576
152 12.1 5596.63 2.290
200 16.4 7559.23 3.087
```
Optimizer memory efficiency
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 3.26GB | 509.10GB | 0.197 |
| 34| 3.40GB | 762.15GB | 0.383 |
| 50 | 3.86GB | 1230.80GB | 0.600 |
| 101 | 4.12GB | 1726.87GB | 1.139 |
| 152 | 4.94GB | 1924.51GB | 1.699 |
| 200 | 5.31GB | 1952.25GB | 2.323 |
```
ome
18 6.55 509.10 0.197
34 6.69 762.15 0.383
50 7.15 1230.80 0.600
101 7.41 1726.87 1.139
152 8.23 1924.51 1.699
200 8.60 1952.25 2.323
```
```
device: CUDA_ERROR_OUT_OF_MEMORY: out of memory
Running with checkpoints
('max_mem:', 0.00128, 'mem_used:', 0.00128)
('max_mem:', 151.00672, 'mem_used:', 141.913088)
('max_mem:', 514.143232, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
('max_mem:', 514.400256, 'mem_used:', 141.918208)
--- Size:18
|______ memory_cost:372.48224, execute_time:0.183
|______ memory_cost:372.48224, execute_time:0.188
|______ memory_cost:372.48224, execute_time:0.201
|______ memory_cost:372.48224, execute_time:0.188
|______ memory_cost:372.48224, execute_time:0.187
|______ memory_cost:372.48224, execute_time:0.196
|______ memory_cost:372.48224, execute_time:0.192
|______ memory_cost:372.48224, execute_time:0.192
|______ memory_cost:372.48224, execute_time:0.197
sleep 5 sec
('max_mem:', 514.400256, 'mem_used:', 0.00128)
('max_mem:', 514.400256, 'mem_used:', 264.635392)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
('max_mem:', 799.981312, 'mem_used:', 264.640512)
--- Size:34
|______ memory_cost:535.341024, execute_time:0.365
|______ memory_cost:535.341024, execute_time:0.370
|______ memory_cost:535.341024, execute_time:0.358
|______ memory_cost:535.341024, execute_time:0.377
|______ memory_cost:535.341024, execute_time:0.383
|______ memory_cost:535.341024, execute_time:0.378
|______ memory_cost:535.341024, execute_time:0.378
|______ memory_cost:535.341024, execute_time:0.373
|______ memory_cost:535.341024, execute_time:0.365
sleep 5 sec
('max_mem:', 799.981312, 'mem_used:', 0.00128)
('max_mem:', 799.981312, 'mem_used:', 323.772672)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
('max_mem:', 1381.657344, 'mem_used:', 323.777792)
--- Size:50
|______ memory_cost:1057.879808, execute_time:0.558
|______ memory_cost:1057.879808, execute_time:0.584
|______ memory_cost:1057.879808, execute_time:0.580
|______ memory_cost:1057.879808, execute_time:0.574
|______ memory_cost:1057.879808, execute_time:0.572
|______ memory_cost:1057.879808, execute_time:0.566
|______ memory_cost:1057.879808, execute_time:0.581
|______ memory_cost:1057.879808, execute_time:0.579
|______ memory_cost:1057.879808, execute_time:0.565
sleep 5 sec
('max_mem:', 1381.657344, 'mem_used:', 0.00128)
('max_mem:', 1381.657344, 'mem_used:', 551.831296)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
('max_mem:', 1956.819968, 'mem_used:', 551.836416)
--- Size:101
|______ memory_cost:1404.983856, execute_time:1.098
|______ memory_cost:1404.983856, execute_time:1.148
|______ memory_cost:1404.983856, execute_time:1.113
|______ memory_cost:1404.983856, execute_time:1.110
|______ memory_cost:1404.983856, execute_time:1.112
|______ memory_cost:1404.983856, execute_time:1.126
|______ memory_cost:1404.983856, execute_time:1.107
|______ memory_cost:1404.983856, execute_time:1.131
|______ memory_cost:1404.983856, execute_time:1.122
sleep 5 sec
('max_mem:', 1956.819968, 'mem_used:', 0.00128)
('max_mem:', 1956.819968, 'mem_used:', 739.487744)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
('max_mem:', 2083.61728, 'mem_used:', 739.492864)
--- Size:152
|______ memory_cost:1344.12472, execute_time:1.689
|______ memory_cost:1344.12472, execute_time:1.680
|______ memory_cost:1344.12472, execute_time:1.683
|______ memory_cost:1344.12472, execute_time:1.662
|______ memory_cost:1344.12472, execute_time:1.681
|______ memory_cost:1344.12472, execute_time:1.671
|______ memory_cost:1344.12472, execute_time:1.700
|______ memory_cost:1344.12472, execute_time:1.679
|______ memory_cost:1344.12472, execute_time:1.699
sleep 5 sec
('max_mem:', 2083.61728, 'mem_used:', 0.00128)
('max_mem:', 2083.61728, 'mem_used:', 793.60512)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
('max_mem:', 2135.42144, 'mem_used:', 793.61024)
--- Size:200
|______ memory_cost:1341.811504, execute_time:2.250
|______ memory_cost:1341.811504, execute_time:2.264
|______ memory_cost:1341.811504, execute_time:2.231
|______ memory_cost:1341.811504, execute_time:2.305
|______ memory_cost:1341.811504, execute_time:2.289
|______ memory_cost:1341.811504, execute_time:2.267
|______ memory_cost:1341.811504, execute_time:2.346
|______ memory_cost:1341.811504, execute_time:2.367
|______ memory_cost:1341.811504, execute_time:2.267
sleep 5 sec
Running without checkpoints
('max_mem:', 2135.42144, 'mem_used:', 0.00128)
('max_mem:', 2135.42144, 'mem_used:', 143.172608)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
('max_mem:', 2135.42144, 'mem_used:', 143.177472)
--- Size:18
|______ memory_cost:700.400352, execute_time:0.228
|______ memory_cost:700.400352, execute_time:0.238
|______ memory_cost:700.400352, execute_time:0.228
|______ memory_cost:700.400352, execute_time:0.237
|______ memory_cost:700.400352, execute_time:0.232
|______ memory_cost:700.400352, execute_time:0.230
|______ memory_cost:700.400352, execute_time:0.241
|______ memory_cost:700.400352, execute_time:0.231
|______ memory_cost:700.400352, execute_time:0.237
sleep 5 sec
('max_mem:', 2135.42144, 'mem_used:', 0.00128)
('max_mem:', 2135.42144, 'mem_used:', 265.10208)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
('max_mem:', 2135.42144, 'mem_used:', 265.106944)
--- Size:34
|______ memory_cost:1058.181344, execute_time:0.380
|______ memory_cost:1058.181344, execute_time:0.361
|______ memory_cost:1058.181344, execute_time:0.351
|______ memory_cost:1058.181344, execute_time:0.362
|______ memory_cost:1058.181344, execute_time:0.368
|______ memory_cost:1058.181344, execute_time:0.362
|______ memory_cost:1058.181344, execute_time:0.373
|______ memory_cost:1058.181344, execute_time:0.407
|______ memory_cost:1058.181344, execute_time:0.382
sleep 5 sec
('max_mem:', 2135.42144, 'mem_used:', 0.00128)
('max_mem:', 2135.42144, 'mem_used:', 323.613696)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
('max_mem:', 3015.52, 'mem_used:', 323.61856)
--- Size:50
|______ memory_cost:2691.903216, execute_time:0.536
|______ memory_cost:2691.903216, execute_time:0.533
|______ memory_cost:2691.903216, execute_time:0.548
|______ memory_cost:2691.903216, execute_time:0.540
|______ memory_cost:2691.903216, execute_time:0.541
|______ memory_cost:2691.903216, execute_time:0.548
|______ memory_cost:2691.903216, execute_time:0.529
|______ memory_cost:2691.903216, execute_time:0.537
|______ memory_cost:2691.903216, execute_time:0.533
sleep 5 sec
('max_mem:', 3015.52, 'mem_used:', 0.00128)
('max_mem:', 3015.52, 'mem_used:', 550.951424)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
('max_mem:', 4671.013376, 'mem_used:', 550.956288)
--- Size:101
|______ memory_cost:4120.060496, execute_time:0.961
|______ memory_cost:4120.060496, execute_time:0.916
|______ memory_cost:4120.060496, execute_time:0.926
|______ memory_cost:4120.060496, execute_time:0.908
|______ memory_cost:4120.060496, execute_time:0.940
|______ memory_cost:4120.060496, execute_time:0.933
|______ memory_cost:4120.060496, execute_time:0.949
|______ memory_cost:4120.060496, execute_time:0.937
|______ memory_cost:4120.060496, execute_time:0.940
sleep 5 sec
('max_mem:', 4671.013376, 'mem_used:', 0.00128)
('max_mem:', 4671.013376, 'mem_used:', 743.486464)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
('max_mem:', 6577.462784, 'mem_used:', 743.491328)
--- Size:152
|______ memory_cost:5833.976496, execute_time:1.360
|______ memory_cost:5833.976496, execute_time:1.369
|______ memory_cost:5833.976496, execute_time:1.364
|______ memory_cost:5833.976496, execute_time:1.376
|______ memory_cost:5833.976496, execute_time:1.387
|______ memory_cost:5833.976496, execute_time:1.379
|______ memory_cost:5833.976496, execute_time:1.387
|______ memory_cost:5833.976496, execute_time:1.440
|______ memory_cost:5833.976496, execute_time:1.373
sleep 5 sec
('max_mem:', 6577.462784, 'mem_used:', 0.00128)
('max_mem:', 6577.462784, 'mem_used:', 795.145472)
--- Size:200
sleep 5 sec
Running with tensorflow_large_model_support
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 150 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 998.296976089 ms
INFO:tensorflow:[LMS][0] 68 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 143.172608)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
('max_mem:', 7606.362368, 'mem_used:', 143.177472)
--- Size:18
|_____ memory_cost:726.358936, execute_time:0.354
|_____ memory_cost:719.486424, execute_time:0.371
|_____ memory_cost:720.993752, execute_time:0.349
|_____ memory_cost:715.423192, execute_time:0.348
|_____ memory_cost:685.861816, execute_time:0.351
|_____ memory_cost:716.93052, execute_time:0.344
|_____ memory_cost:702.184408, execute_time:0.348
|_____ memory_cost:742.227416, execute_time:0.349
|_____ memory_cost:703.879576, execute_time:0.353
sleep 5 sec
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 278 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 2858.55698586 ms
INFO:tensorflow:[LMS][0] 124 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 265.412352)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
('max_mem:', 7606.362368, 'mem_used:', 265.417216)
--- Size:34
|_____ memory_cost:955.10396, execute_time:0.520
|_____ memory_cost:979.089624, execute_time:0.533
|_____ memory_cost:985.316056, execute_time:0.538
|_____ memory_cost:1000.061656, execute_time:0.556
|_____ memory_cost:977.771504, execute_time:0.540
|_____ memory_cost:969.185432, execute_time:0.543
|_____ memory_cost:980.79356, execute_time:0.535
|_____ memory_cost:1009.040088, execute_time:0.546
|_____ memory_cost:967.481496, execute_time:0.544
sleep 5 sec
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 422 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 6043.99204254 ms
INFO:tensorflow:[LMS][0] 188 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 324.299008)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
('max_mem:', 7606.362368, 'mem_used:', 324.303872)
--- Size:50
|_____ memory_cost:2532.262584, execute_time:0.922
|_____ memory_cost:2565.386224, execute_time:0.949
|_____ memory_cost:2440.190872, execute_time:0.929
|_____ memory_cost:2571.678704, execute_time:0.932
|_____ memory_cost:2562.503152, execute_time:1.000
|_____ memory_cost:2590.02776, execute_time:0.922
|_____ memory_cost:2568.53144, execute_time:0.936
|_____ memory_cost:2566.303216, execute_time:0.934
|_____ memory_cost:2572.59672, execute_time:0.937
sleep 5 sec
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 847 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 25653.2928944 ms
INFO:tensorflow:[LMS][0] 375 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 550.808064)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
('max_mem:', 7606.362368, 'mem_used:', 550.812928)
--- Size:101
|_____ memory_cost:3612.274256, execute_time:1.653
|_____ memory_cost:3702.5222, execute_time:1.566
|_____ memory_cost:3557.289552, execute_time:1.562
|_____ memory_cost:3806.918736, execute_time:1.587
|_____ memory_cost:3751.309784, execute_time:1.558
|_____ memory_cost:3651.932536, execute_time:1.539
|_____ memory_cost:3704.488792, execute_time:1.625
|_____ memory_cost:3648.83564, execute_time:1.550
|_____ memory_cost:3582.783056, execute_time:1.543
sleep 5 sec
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 1272 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 65844.5680141 ms
INFO:tensorflow:[LMS][0] 562 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 740.937984)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
('max_mem:', 7606.362368, 'mem_used:', 740.942848)
--- Size:152
|_____ memory_cost:5022.180272, execute_time:2.389
|_____ memory_cost:5140.406192, execute_time:2.230
|_____ memory_cost:5061.041368, execute_time:2.302
|_____ memory_cost:5047.917464, execute_time:2.292
|_____ memory_cost:5020.41464, execute_time:2.293
|_____ memory_cost:4983.648944, execute_time:2.236
|_____ memory_cost:4994.26908, execute_time:2.269
|_____ memory_cost:4970.006808, execute_time:2.225
|_____ memory_cost:4855.69372, execute_time:2.377
sleep 5 sec
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 1672 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 130430.437088 ms
INFO:tensorflow:[LMS][0] 738 tensors will be swapped out(in) to(from) the host
('max_mem:', 7606.362368, 'mem_used:', 0.00128)
('max_mem:', 7606.362368, 'mem_used:', 792.36608)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
('max_mem:', 7606.362368, 'mem_used:', 792.370944)
--- Size:200
|_____ memory_cost:6813.241208, execute_time:3.055
|_____ memory_cost:6813.265016, execute_time:3.110
|_____ memory_cost:6811.99548, execute_time:3.019
|_____ memory_cost:6813.27628, execute_time:3.065
|_____ memory_cost:6813.302392, execute_time:3.039
|_____ memory_cost:6813.25324, execute_time:3.149
|_____ memory_cost:6811.634096, execute_time:3.171
|_____ memory_cost:6813.255768, execute_time:3.097
|_____ memory_cost:6766.866232, execute_time:3.082
sleep 5 sec
Running with OME
('max_mem:', 0.00128, 'mem_used:', 0.00128)
('max_mem:', 151.00672, 'mem_used:', 141.913088)
('max_mem:', 490.304512, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 509.105408, 'mem_used:', 141.918208)
('max_mem:', 520.549376, 'mem_used:', 141.918208)
('max_mem:', 520.549376, 'mem_used:', 141.918208)
--- Size:18
|_____ memory_cost:367.187216, execute_time:0.193
|_____ memory_cost:367.187216, execute_time:0.193
|_____ memory_cost:367.187216, execute_time:0.197
|_____ memory_cost:367.187216, execute_time:0.204
|_____ memory_cost:367.187216, execute_time:0.193
|_____ memory_cost:367.187216, execute_time:0.198
|_____ memory_cost:367.187216, execute_time:0.200
|_____ memory_cost:367.187216, execute_time:0.190
|_____ memory_cost:367.187216, execute_time:0.210
sleep 5 sec
('max_mem:', 520.549376, 'mem_used:', 0.00128)
('max_mem:', 520.549376, 'mem_used:', 264.72064)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
('max_mem:', 787.848448, 'mem_used:', 264.72576)
--- Size:34
|_____ memory_cost:497.4328, execute_time:0.364
|_____ memory_cost:497.4328, execute_time:0.381
|_____ memory_cost:497.4328, execute_time:0.387
|_____ memory_cost:497.4328, execute_time:0.378
|_____ memory_cost:497.4328, execute_time:0.385
|_____ memory_cost:497.4328, execute_time:0.393
|_____ memory_cost:497.4328, execute_time:0.392
|_____ memory_cost:497.4328, execute_time:0.389
|_____ memory_cost:497.4328, execute_time:0.382
sleep 5 sec
('max_mem:', 787.848448, 'mem_used:', 0.00128)
('max_mem:', 787.848448, 'mem_used:', 323.49952)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
('max_mem:', 1432.658176, 'mem_used:', 323.50464)
--- Size:50
|_____ memory_cost:929.323008, execute_time:0.598
|_____ memory_cost:929.323008, execute_time:0.581
|_____ memory_cost:925.128704, execute_time:0.613
|_____ memory_cost:929.323008, execute_time:0.609
|_____ memory_cost:929.323008, execute_time:0.609
|_____ memory_cost:907.302912, execute_time:0.608
|_____ memory_cost:929.323008, execute_time:0.610
|_____ memory_cost:929.323008, execute_time:0.593
|_____ memory_cost:929.323008, execute_time:0.586
sleep 5 sec
('max_mem:', 1432.658176, 'mem_used:', 0.00128)
('max_mem:', 1432.658176, 'mem_used:', 552.507904)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
('max_mem:', 1970.934016, 'mem_used:', 552.513024)
--- Size:101
|_____ memory_cost:1190.035504, execute_time:1.113
|_____ memory_cost:1199.821872, execute_time:1.173
|_____ memory_cost:1174.365232, execute_time:1.145
|_____ memory_cost:1174.365232, execute_time:1.135
|_____ memory_cost:1238.590512, execute_time:1.139
|_____ memory_cost:1257.858096, execute_time:1.135
|_____ memory_cost:1264.280624, execute_time:1.138
|_____ memory_cost:1264.280624, execute_time:1.144
|_____ memory_cost:1277.12568, execute_time:1.130
sleep 5 sec
('max_mem:', 1970.934016, 'mem_used:', 0.00128)
('max_mem:', 1970.934016, 'mem_used:', 742.20032)
('max_mem:', 2120.408576, 'mem_used:', 742.20544)
('max_mem:', 2120.408576, 'mem_used:', 742.20544)
('max_mem:', 2120.408576, 'mem_used:', 742.20544)
('max_mem:', 2120.408576, 'mem_used:', 742.20544)
('max_mem:', 2120.408576, 'mem_used:', 742.20544)
('max_mem:', 2131.482112, 'mem_used:', 742.20544)
('max_mem:', 2131.482112, 'mem_used:', 742.20544)
('max_mem:', 2131.482112, 'mem_used:', 742.20544)
('max_mem:', 2131.482112, 'mem_used:', 742.20544)
--- Size:152
|_____ memory_cost:1218.968368, execute_time:1.689
|_____ memory_cost:1182.314288, execute_time:1.691
|_____ memory_cost:1241.558832, execute_time:1.685
|_____ memory_cost:1233.694512, execute_time:1.678
|_____ memory_cost:1226.485552, execute_time:1.709
|_____ memory_cost:1182.314288, execute_time:1.693
|_____ memory_cost:1203.761968, execute_time:1.709
|_____ memory_cost:1197.561648, execute_time:1.703
|_____ memory_cost:1182.314288, execute_time:1.737
sleep 5 sec
('max_mem:', 2131.482112, 'mem_used:', 0.00128)
('max_mem:', 2131.482112, 'mem_used:', 794.918912)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
('max_mem:', 2142.523136, 'mem_used:', 794.924032)
--- Size:200
|_____ memory_cost:1177.276464, execute_time:2.279
|_____ memory_cost:1190.12152, execute_time:2.304
|_____ memory_cost:1188.021296, execute_time:2.287
|_____ memory_cost:1157.328432, execute_time:2.316
|_____ memory_cost:1216.228912, execute_time:2.316
|_____ memory_cost:1216.228912, execute_time:2.305
|_____ memory_cost:1206.304816, execute_time:2.360
|_____ memory_cost:1188.021296, execute_time:2.400
|_____ memory_cost:1190.12152, execute_time:2.348
sleep 5 sec
```
128bs
openai/gradients
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 3.21GB | 1560.40GB | 0.450 |
| 34| 3.28GB | 2304.05GB | 0.857 |
| 50 | 3.34GB | 4255.02GB | 1.540 |
| 101 | 3.48GB | 5740.29GB | 2.744 |
| 152 | 3.85GB | 5864.70GB | 3.942 |
| 200 | 4.12GB | 5872.75GB | 5.373 |
```
18 6.50 1560.40 0.450
34 6.57 2301.05 0.857
50 6.63 4255.02 1.540
101 6.87 5740.29 2.744
152 7.14 5864.70 3.942
200 7.41 5872.75 5.373
```
ome
| layers | cpu used | gpu used | per/sec |
| :---: | :---: | :---: | :---: |
| 18 | 4.23GB | 1487.52GB | 0.546 |
| 34 | 4.69GB | 2250.48GB | 0.979 |
| 50 | 6.34GB | 3842.24GB | 1.717 |
| 101 | 6.36GB | 5102.24GB | 2.927 |
| 152 | 8.61GB | 5449.61GB | 4.227 |
| 200 | 8.91GB | 5381.50GB | 5.720 |
```
18 7.72 1487.52 0.546
34 7.98 2250.48 0.979
50 9.63 3842.24 1.717
101 9.65 5102.24 2.927
152 11.9 5449.61 4.227
200 12.2 5381.50 5.720
```
max batch on resnet50
| original | swap | recomputing | ome |
| :---: | :---: | :---: | :---: |
| 87 <br> 7109.55MB <br> 1.146sec | 111 <br> 7282.41MB <br> 2.622sec | 212 <br> 6609.38MB <br> 2.831sec | 214 <br> 7017.42MB <br> 3.043sec |





--------------------------------------










| Strategy| gpu_mem | cpu_mem | image/per |
| :---: | :---: | :---: | :---: |
| original | 4268.21 | 3643 | 2.5983 |
| openai/checkpoints | 2528.45 | 3593 | 1.9747 |
| large model support | 4170.94 | 7853 | 2.9012 |
| ome | 2564.43 | 3723 | 2.7220 |
```
root@8dc52534a323:/home/gradient-checkpointing/test# python benchmark.py
Running with checkpoints
('max_mem:', 0.00128, 'mem_used:', 0.00128)
('max_mem:', 580.18048, 'mem_used:', 570.95552)
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.50658798218
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.70896601677
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.6566298008
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.62326598167
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.53667402267
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.50002789497
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.51719403267
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.65550398827
('max_mem:', 2528.455936, 'mem_used:', 570.961408)
2.68048596382
--- Size:50
|______ memory_cost:1957.494912, execute_time:2.507
|______ memory_cost:1957.494912, execute_time:2.709
|______ memory_cost:1957.494912, execute_time:2.657
|______ memory_cost:1957.494912, execute_time:2.623
|______ memory_cost:1957.494912, execute_time:2.537
|______ memory_cost:1957.494912, execute_time:2.500
|______ memory_cost:1957.494912, execute_time:2.517
|______ memory_cost:1957.494912, execute_time:2.656
|______ memory_cost:1957.494912, execute_time:2.680
sleep 5 sec
Running without checkpoints
('max_mem:', 2528.455936, 'mem_used:', 0.00128)
('max_mem:', 2528.455936, 'mem_used:', 570.35008)
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
2.20537495613
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
2.00065994263
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.84263706207
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.99153804779
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.9814119339
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.86698818207
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.86092019081
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
1.98590898514
('max_mem:', 4268.211456, 'mem_used:', 570.355712)
2.03736209869
--- Size:50
|______ memory_cost:3697.861472, execute_time:2.205
|______ memory_cost:3697.861472, execute_time:2.001
|______ memory_cost:3697.861472, execute_time:1.843
|______ memory_cost:3697.861472, execute_time:1.992
|______ memory_cost:3697.861472, execute_time:1.981
|______ memory_cost:3697.861472, execute_time:1.867
|______ memory_cost:3697.861472, execute_time:1.861
|______ memory_cost:3697.861472, execute_time:1.986
|______ memory_cost:3697.861472, execute_time:2.037
sleep 5 sec
Running with tensorflow_large_model_support
INFO:tensorflow:[LMS][0] Editing model for LMS
INFO:tensorflow:[LMS][0] n_tensors: all tensors
INFO:tensorflow:[LMS][0] lb: 1
INFO:tensorflow:[LMS][0] Edited model is valid and logically equivalent to the original one
INFO:tensorflow:[LMS][0] Added 727 ops into the model
INFO:tensorflow:[LMS][0] Editing model for LMS, took: 17805.366993 ms
INFO:tensorflow:[LMS][0] 335 tensors will be swapped out(in) to(from) the host
('max_mem:', 4268.211456, 'mem_used:', 0.00128)
('max_mem:', 4268.211456, 'mem_used:', 577.42208)
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.77239322662
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.97816681862
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.92871713638
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
3.04731798172
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.83289790154
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.81667685509
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.92796897888
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.90156507492
('max_mem:', 4268.211456, 'mem_used:', 577.427712)
2.90543580055
--- Size:50
|_____ memory_cost:3593.520296, execute_time:2.772
|_____ memory_cost:3598.443688, execute_time:2.978
|_____ memory_cost:3600.147624, execute_time:2.929
|_____ memory_cost:3577.9354, execute_time:3.047
|_____ memory_cost:3594.863784, execute_time:2.833
|_____ memory_cost:3593.33188, execute_time:2.817
|_____ memory_cost:3593.520296, execute_time:2.928
|_____ memory_cost:3594.380456, execute_time:2.902
|_____ memory_cost:3599.10724, execute_time:2.905
sleep 5 sec
Running with OME
total checkpoints disconnected count : 12
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_5d/concat:0", shape=(32, 35, 35, 384), dtype=float32)
swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu:0", shape=(32, 35, 35, 192), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_7c/concat:0", shape=(32, 8, 8, 1536), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_6c/concat:0", shape=(32, 17, 17, 1024), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Conv2d_2b_3x3/Relu:0", shape=(32, 147, 147, 64), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_6a/concat:0", shape=(32, 17, 17, 1024), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_5b/concat:0", shape=(32, 35, 35, 384), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_6e/concat:0", shape=(32, 17, 17, 1024), dtype=float32)
swap out to /cpu: Tensor("InceptionV4/Logits/PreLogitsFlatten/flatten/Reshape:0", shape=(32, 1536), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_3a/concat:0", shape=(32, 73, 73, 160), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_7a/concat:0", shape=(32, 8, 8, 1536), dtype=float32)
checkpoints but not swap out to /cpu: Tensor("InceptionV4/InceptionV4/Mixed_6g/concat:0", shape=(32, 17, 17, 1024), dtype=float32)
('max_mem:', 4268.211456, 'mem_used:', 0.00128)
('max_mem:', 4268.211456, 'mem_used:', 572.137216)
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.76119303703
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.72861289978
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.77925801277
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.59219789505
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.60928583145
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.74376797676
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.76235890388
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.73930001259
('max_mem:', 4268.211456, 'mem_used:', 572.143104)
2.78264713287
--- Size:50
|_____ memory_cost:1992.294016, execute_time:2.761
|_____ memory_cost:1992.294016, execute_time:2.729
|_____ memory_cost:1992.294016, execute_time:2.779
|_____ memory_cost:1992.294016, execute_time:2.592
|_____ memory_cost:1992.294016, execute_time:2.609
|_____ memory_cost:1992.294016, execute_time:2.744
|_____ memory_cost:1992.294016, execute_time:2.762
|_____ memory_cost:1992.294016, execute_time:2.739
|_____ memory_cost:1992.294016, execute_time:2.783
sleep 5 sec
```