因為 fork 出新的 process 時,會連同 buffered I/O 也一併複製過去。所以總 '-' 數量會是所有 process 數量 * NNN。
在此例題的例子,NNN == 12,總 process 的數量為 4096 ==> 總 '-' 數量 == 49152。
可以用簡易的程式來計算出不同的 NNN 會印出多少的 '-' 以及 fork 出多少個 process。
#include <stdio.h>
#include <stdlib.h>
void dfs(int n, int lim, int *p2num) {
for(int i = n;i < lim;i++) {
(*p2num)++;
dfs(i + 1, lim, p2num);
}
}
int main(int argc, char *argv[]) {
if(argc < 2) {
fprintf(stderr, "Usage: %s <NNN>\n",
argv[0]);
return -1;
}
int NNN = atoi(argv[1]);
int num = 1;
dfs(0, NNN, &num);
printf("num : %d\n", num * NNN);
return 0;
}
範例執行輸出:
$ ./count 9
num of process : 512
num of '-' : 4608
$ ./count 10
num of process : 1024
num of '-' : 10240
$ ./count 11
num of process : 2048
num of '-' : 22528
$ ./count 12
num of process : 4096
num of '-' : 49152
typedef struct {
int cnt; /* actual pool count */
int pal; /* pool array length (2^x ceil of cnt) */
int min_pool, max_pool; /* minimum/maximum pool size */
void **ps; /* pools */
int *sizes; /* chunk size for each pool */
void *hs[1]; /* heads for pools' free lists */
} mpool;
每一個 ps 會指向一個 memory pool 的開頭。
每一個 hs 會指向 memory pool 中 free-list 的第一個 chunk。
/* Initialize a memory pool for allocations between 2^min2 and 2^max2,
* inclusive. Larger allocations will be directly allocated and freed
* via mmap / munmap.
* Returns NULL on error.
*/
mpool *mpool_init(int min2, int max2)
{
int cnt = max2 - min2 + 1; /* pool array count */
int palen = iceil2(cnt);
assert(cnt > 0);
mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *));
void *pools = malloc(palen * sizeof(void **));
int *sizes = malloc(palen * sizeof(int));
if (!mp || !pools || !sizes)
return NULL;
mp->cnt = cnt;
mp->ps = pools;
mp->pal = palen;
mp->sizes = sizes;
mp->min_pool = (1 << min2), mp->max_pool = (1 << max2);
memset(sizes, 0, palen * sizeof(int));
memset(pools, 0, palen * sizeof(void *));
memset(mp->hs, 0, cnt * sizeof(void *));
return mp;
}
初始化 mpool 結構,但此時還不會向作業系統要求 memory pool 所需要的空間。
/* Allocate memory out of the relevant memory pool.
* If larger than max_pool, just mmap it. If pool is full, mmap a new one and
* link it to the end of the current one.
* Returns NULL on error.
*/
void *mpool_alloc(mpool *mp, int sz)
{
void **cur;
int i, p;
assert(mp);
if (sz >= mp->max_pool) {
cur = get_mmap(sz); /* just mmap it */
if (!cur)
return NULL;
return cur;
}
long szceil = 0;
for (i = 0, p = mp->min_pool;; i++, p *= 2) {
if (p > sz) {
szceil = p;
break;
}
}
assert(szceil > 0);
cur = mp->hs[i]; /* get current head */
if (!cur) { /* lazily allocate and initialize pool */
void **pool = mpool_new_pool(szceil, PAGE_SIZE);
printf("mpool alloc, pool from mpool_new_pool : %p\n", pool);
if (!pool)
return NULL;
mp->ps[i] = mp->hs[i] = pool;
mp->sizes[i] = szceil;
cur = mp->hs[i];
}
assert(cur);
if (!(*cur)) { /* if at end, attach to a new page */
void **np = mpool_new_pool(szceil, PAGE_SIZE);
if (!np)
return NULL;
*cur = &np[0];
assert(*cur);
if (add_pool(mp, np, szceil) < 0)
return NULL;
}
assert(*cur > (void *) PAGE_SIZE);
mp->hs[i] = *cur; /* set head to next head */
printf("mpool alloc, mp->hs[%d] = %p\n", i, *cur);
return cur;
}
假如原本沒有 memory pool ,或是該 memory pool 已經用完了,就會再向作業系統要一段空間作為 memory pool 使用。
最後從 mp->hs[i] 拿取 free-list 的第一個 chunk,並返回給 caller 做使用。並將 mp->hs[i] 指向新的 free-list header。
/* mmap a new memory pool of TOTAL_SZ bytes, then build an internal
* freelist of SZ-byte cells, with the head at (result)[0].
* Returns NULL on error.
*/
void **mpool_new_pool(unsigned int sz, unsigned int total_sz)
{
int o = 0; /* o=offset */
void *p = get_mmap(sz > total_sz ? sz : total_sz);
if (!p)
return NULL;
int **pool = (int **) p;
assert(pool);
assert(sz > sizeof(void *));
void *last = NULL;
int lim = (total_sz / sz);
for (int i = 0; i < lim; i++) {
if (last)
assert(last == pool[o]);
o = (i * sz) / sizeof(void *);
pool[o] = (int *) &pool[o + (sz / sizeof(void *))];
last = pool[o];
}
pool[o] = NULL;
return p;
}
使用 mmap 向作業系統要一塊空間,並使用 linked-list 將 free chunk 連結起來。
/* Push an individual pointer P back on the freelist for the pool with size
* SZ cells. if SZ is > the max pool size, just munmap it.
* Return pointer P (SZ bytes in size) to the appropriate pool.
*/
void mpool_repool(mpool *mp, void *p, int sz)
{
int i = 0;
if (sz > mp->max_pool) {
if (munmap(p, sz) == -1)
fprintf(stderr, "Failed to unmap %d bytes at %p\n", sz, p);
return;
}
void **ip = (void **) p;
*ip = mp->hs[i];
assert(ip);
mp->hs[i] = ip;
printf("mpool_repool, mp->hs[%d] = %p\n", i, ip);
}
假若該 chunk 的 size > mp->max_pool,則直接用 munmap 將此塊記憶體返回給作業系統 ( 因為當初也是直接用 mmap 從作業系統取得該塊記憶體 )。
假若 chuck 的 size 在 mp->min_pool, mp->max_pool 之間,則將此 chuck 返回給 mp->hs[0],插入 mp->hs[0] 所指向的 linked-list。
mpool 這個 structure 多一個欄位來儲存 page_size
typedef struct {
long int page_size; /* size of page */
int cnt; /* actual pool count */
int pal; /* pool array length (2^x ceil of cnt) */
int min_pool, max_pool; /* minimum/maximum pool size */
void **ps; /* pools */
int *sizes; /* chunk size for each pool */
void *hs[1]; /* heads for pools' free lists */
} mpool;
在 mpool 初始化的時候,將 page_size 填入
/* Initialize a memory pool for allocations between 2^min2 and 2^max2,
* inclusive. Larger allocations will be directly allocated and freed
* via mmap / munmap.
* Returns NULL on error.
*/
mpool *mpool_init(int min2, int max2)
{
.
.
.
int *sizes = malloc(palen * sizeof(int));
if (!mp || !pools || !sizes)
return NULL;
mp->page_size = getpagesize();
mp->cnt = cnt;
mp->ps = pools;
mp->pal = palen;
mp->sizes = sizes;
.
.
.
很浪費記憶體,但可以加速約 5~10 %,page size 要夠小才有辦法這樣做。
在初始化 mpool 的時候,直接建立一個可以用 sz 直接查詢 index 的陣列,sz2index。並也把 sizes 陣列順手建立好。
/* Initialize a memory pool for allocations between 2^min2 and 2^max2,
* inclusive. Larger allocations will be directly allocated and freed
* via mmap / munmap.
* Returns NULL on error.
*/
mpool *mpool_init(int min2, int max2)
{
int cnt = max2 - min2 + 1; /* pool array count */
int palen = iceil2(cnt);
int i, p;
assert(cnt > 0);
mpool *mp = malloc(sizeof(mpool) + (cnt - 1) * sizeof(void *));
void *pools = malloc(palen * sizeof(void **));
int *sizes = malloc(palen * sizeof(int));
if (!mp || !pools || !sizes)
return NULL;
int *sz2index = (int *)malloc(sizeof(int) * getpagesize());
mp->page_size = getpagesize();
mp->cnt = cnt;
mp->ps = pools;
mp->pal = palen;
mp->sizes = sizes;
mp->sz2index = sz2index;
mp->min_pool = (1 << min2), mp->max_pool = (1 << max2);
memset(sizes, 0, palen * sizeof(int));
memset(pools, 0, palen * sizeof(void *));
memset(mp->hs, 0, cnt * sizeof(void *));
memset(mp->sz2index, 0, getpagesize() * sizeof(int));
int index = 0;
for(int i = 0, p = mp->min_pool;i < mp->page_size;i++) {
if(i == p) {
mp->sizes[index] = p;
index++;
p *= 2;
}
sz2index[i] = index;
}
return mp;
}
在 mpool_alloc 的時候,直接查表得出 i 以及 szceil。
/* Allocate memory out of the relevant memory pool.
* If larger than max_pool, just mmap it. If pool is full, mmap a new one and
* link it to the end of the current one.
* Returns NULL on error.
*/
void *mpool_alloc(mpool *mp, int sz)
{
void **cur;
int i, p;
long szceil;
assert(mp);
if (sz >= mp->max_pool) {
cur = get_mmap(sz); /* just mmap it */
if (!cur)
return NULL;
return cur;
}
i = mp->sz2index[sz];
szceil = mp->sizes[i];
assert(szceil > 0);
cur = mp->hs[i]; /* get current head */
if (!cur) { /* lazily allocate and initialize pool */
void **pool = mpool_new_pool(szceil, mp->page_size);
if (!pool)
return NULL;
mp->ps[i] = mp->hs[i] = pool;
cur = mp->hs[i];
}
assert(cur);
if (!(*cur)) { /* if at end, attach to a new page */
void **np = mpool_new_pool(szceil, mp->page_size);
if (!np)
return NULL;
*cur = &np[0];
assert(*cur);
if (add_pool(mp, np, szceil) < 0)
return NULL;
}
assert(*cur > (void *) mp->page_size);
mp->hs[i] = *cur; /* set head to next head */
return cur;
}
省略了 branch 以及除法,可減少需要使用的 instruction 數目。
/* mmap a new memory pool of TOTAL_SZ bytes, then build an internal
* freelist of SZ-byte cells, with the head at (result)[0].
* Returns NULL on error.
*/
void **mpool_new_pool(unsigned int sz, unsigned int total_sz)
{
int o = 0; /* o=offset */
void *p = get_mmap(sz > total_sz ? sz : total_sz);
if (!p)
return NULL;
int **pool = (int **) p;
assert(pool);
assert(sz > sizeof(void *));
void *last = NULL;
int lim = (total_sz / sz);
int sz_d_void = sz / sizeof(void *);
pool[o] = (int *) &pool[o + sz_d_void];
last = pool[o];
for (int i = 1; i < lim; i++) {
assert(last == pool[o]);
o += sz_d_void;
pool[o] = (int *) &pool[o + sz_d_void];
last = pool[o];
}
pool[o] = NULL;
return p;
}
在 mpool_repool 的時候,放到相對應的 index 中,可以減少 mpool_new_pool 的使用次數,除了可以加快程式效能,也能節省記憶體的消耗。
/* Push an individual pointer P back on the freelist for the pool with size
* SZ cells. if SZ is > the max pool size, just munmap it.
* Return pointer P (SZ bytes in size) to the appropriate pool.
*/
void mpool_repool(mpool *mp, void *p, int sz)
{
int i = 0;
if (sz > mp->max_pool) {
if (munmap(p, sz) == -1)
fprintf(stderr, "Failed to unmap %d bytes at %p\n", sz, p);
return;
}
void **ip = (void **) p;
int now_size;
for (i = 0, now_size = mp->min_pool;; i++, now_size *= 2) {
if (now_size > sz) {
break;
}
}
*ip = mp->hs[i];
assert(ip);
mp->hs[i] = ip;
}
這個優化當 test.c 所使用的 sz 越大,程式加速以及記憶體節省的效果越明顯。
nt main(void)
{
/* Initialize a new mpool for values 2^4 to 2^PMAX */
mpool *mp = mpool_init(4, PMAX);
srandom(getpid() ^ (intptr_t) &main); /* Assume ASLR */
for (int i = 0; i < 50000000; i++) {
//int sz = random() % 64;
//int sz = random() % 128;
int sz = random() % 256;
//int sz = random() % 512;
/* also alloc some larger chunks */
if (random() % 100 == 0)
sz = random() % 10000;
if (!sz)
sz = 1; /* avoid zero allocations */
int *ip = (int *) mpool_alloc(mp, sz);
*ip = 7;
/* randomly repool some of them */
if (random() % 10 == 0) /* repool, known size */
mpool_repool(mp, ip, sz);
if (i % 10000 == 0 && i > 0) {
putchar('.');
if (i % 700000 == 0)
putchar('\n');
}
}
mpool_free(mp);
putchar('\n');
return 0;
}
在 alloc 的地方,可以加上一個簡易的 magic number,這裡我們使用 "0xdead",共 2 byte 的 magic number。
/* Allocate memory out of the relevant memory pool.
* If larger than max_pool, just mmap it. If pool is full, mmap a new one and
* link it to the end of the current one.
* Returns NULL on error.
*/
void *mpool_alloc(mpool *mp, int sz)
{
.
.
.
/* add a simple pattern to detec overflow */
void *temp_cur = cur;
temp_cur += ori_sz;
*((unsigned short *)temp_cur) = 0xdead;
.
.
.
在 repool 的地方,檢查 magic number 有沒有被改變了。
/* Push an individual pointer P back on the freelist for the pool with size
* SZ cells. if SZ is > the max pool size, just munmap it.
* Return pointer P (SZ bytes in size) to the appropriate pool.
*/
void mpool_repool(mpool *mp, void *p, int sz)
{
int ori_sz = sz;
sz += 2;
void *temp_cur = p;
temp_cur += ori_sz;
assert(*((unsigned short *)(temp_cur)) == 0xdead);
.
.
.
使用範例:
先要求一個 sizeof(int) * 100 大小的 chunk,並故意用一個會 out-of-bound 的 for 迴圈來賦值,最後會在 repool 時檢查出溢位的問題。
int main(void)
{
/* Initialize a new mpool for values 2^4 to 2^PMAX */
mpool *mp = mpool_init(4, PMAX);
int *ip = (int *) mpool_alloc(mp, sizeof(int) * 100);
for(int i = 0;i < 105;i++) {
ip[i] = 0xaaaa;
}
mpool_repool(mp, ip, sizeof(int) * 100);
mpool_free(mp);
putchar('\n');
return 0;
}
範例輸出:
a.out: ./main2.c:251: mpool_repool: Assertion `*((unsigned short *)(temp_cur)) == 0xdead' failed.
Aborted (core dumped)