contributed by < twzjwang
>
作業說明: B01: phonebook
github: twzjwang/phonebook
zhanyangch
這個 distribution 太舊,之後的實驗可能會有問題,請升級到 Ubuntu 16.04 之後 "jserv"
cpu
version: Intel® Core™ i5-3337U CPU @ 1.80GHz
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
CPU(s): 4
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 3072K
memory
size: 4GiB
$ echo 1 | sudo tee /proc/sys/vm/drop_caches
$ ./phonebook_orig
size of entry : 136 bytes
execution time of append() : 0.108467 sec
execution time of findName() : 0.006022 sec
$ make cache-test
Performance counter stats for './phonebook_orig' (100 runs):
2,080,686 cache-misses # 96.031 % of all cache refs ( +- 0.16% )
2,166,682 cache-references ( +- 0.18% )
262,557,158 instructions # 1.41 insns per cycle ( +- 0.02% )
185,753,200 cycles ( +- 0.12% )
0.075103558 seconds time elapsed ( +- 0.94% )
不要濫用「優化」(optimize) 這詞,在你有具體改進之前,都只是實驗(experiment) "jserv"
避免用 version 1, 2, 3, … 等寫法,明確標注你的嘗試途徑,尤其在 Git commit messages 更該如此,原始程式碼和技術報告都是寫給人看的! "jserv"
strcmp
判斷,left < parent, right > parent
int strcmp(const char *s1, const char *s2)
{
while(*s1 && (*s1 == *s2))
s1++, s2++;
return *(const unsigned char*) s1 - *(const unsigned char*) s2;
}
注意一致的 coding style,應該寫作
char *s1
,而非char *s1
,然後是*s1 == *s2
(有空白) 而非*s1==*s2
"jserv"
all-names.txt
find zora
測試 Performance counter stats for './phonebook_orig' (100 runs):
69,435 cache-misses # 77.038 % of all cache refs ( +- 1.23% )
90,131 cache-references ( +- 1.26% )
12,058,151 instructions # 1.13 insns per cycle ( +- 0.03% )
10,699,046 cycles ( +- 1.63% )
0.004715377 seconds time elapsed ( +- 1.86% )
Performance counter stats for './phonebook_opt_bst' (100 runs):
97,979 cache-misses # 4.795 % of all cache refs ( +- 2.68% )
2,043,183 cache-references ( +- 1.37% )
971,374,950 instructions # 2.01 insns per cycle ( +- 0.00% )
482,156,625 cycles ( +- 1.04% )
0.198216919 seconds time elapsed ( +- 1.31% )
上面這行是…?課程助教
已刪除 twzjwang
int entry_cmp(char a[], char b[])
{
if(strlen(a) == strlen(b))
return strcmp(a, b);
return strlen(a) - strlen(b);
}
沒必要多次呼叫
strlen
,請避免 "jserv"
int entry_cmp(char a[], char b[])
{
int i = strlen(a) - strlen(b);
return i ? i : strcmp(a, b);
}
Performance counter stats for './phonebook_orig' (100 runs):
74,670 cache-misses # 78.183 % of all cache refs ( +- 0.70% )
95,508 cache-references ( +- 1.00% )
12,061,787 instructions # 1.02 insns per cycle ( +- 0.02% )
11,865,179 cycles ( +- 1.95% )
0.005664565 seconds time elapsed ( +- 3.60% )
Performance counter stats for './phonebook_opt_bst' (100 runs):
134,457 cache-misses # 5.148 % of all cache refs ( +- 2.42% )
2,611,651 cache-references ( +- 0.88% )
449,076,938 instructions # 1.88 insns per cycle ( +- 0.15% )
238,755,941 cycles ( +- 1.50% )
0.098018759 seconds time elapsed ( +- 1.83% )
int entry_cmp(char a[], char b[])
{
if (strlen(a) == strlen(b)) {
int l = strlen(a);
for (int i = l - 1; i >= 0; i--) {
if (*(a + i) != *(b + i))
return *(a + i) - *(b + i);
}
}
return strlen(a) - strlen(b);
}
縮減
int i
和int l
的 scope。
strlen
多次呼叫,可以簡化 "jserv"
int entry_cmp(char a[], char b[])
{
int m = strlen(a);
int n = m - strlen(b);
if (!n) {
while (m--) {
if (*(a + m) != *(b + m))
return *(a + m) - *(b + m);
}
}
return n;
}
如何解釋 cache miss 的變化? jserv
Performance counter stats for './phonebook_orig' (100 runs):
61,477 cache-misses # 74.388 % of all cache refs ( +- 1.54% )
82,643 cache-references ( +- 1.34% )
12,059,048 instructions # 1.26 insns per cycle ( +- 0.03% )
9,593,850 cycles ( +- 1.84% )
0.004231672 seconds time elapsed ( +- 2.34% )
Performance counter stats for './phonebook_opt_bst' (100 runs):
200,986 cache-misses # 44.641 % of all cache refs ( +- 1.99% )
450,229 cache-references ( +- 1.23% )
55,910,271 instructions # 0.95 insns per cycle ( +- 0.25% )
58,829,146 cycles ( +- 1.87% )
0.025019251 seconds time elapsed ( +- 2.06% )
更改字串 cmp 方法比較 (用all-names.txt
find zora
)
版本 | 方法 1 | 方法 2 | 方法 3 |
---|---|---|---|
append | 0.189335 | 0.089585 | 0.013539 |
findName | 0.000022 | 0.000007 | 0.000001 |
words.txt
find zyxel
可用
sort -R
建立新的輸入資料集 "jserv"
//search by bst
entry *findName(char lastName[], entry *pHead)
{
entry *temp = pHead;
int cmp;
while (temp) {
cmp = strcmp(lastName, temp->lastName);
if (cmp == 0)
return temp;
else if (cmp < 0)
temp = temp->pLeft;
else
temp = temp->pRight;
}
return NULL;
}
//orig append
entry *append(char lastName[], entry *e)
{
/* allocate memory for the new entry and put lastName */
e->pNext = (entry *) malloc(sizeof(entry));
e = e->pNext;
strcpy(e->lastName, lastName);
e->pNext = NULL;
e->pRight = NULL;
e->pLeft = NULL;
return e;
}
entry *new_balance_bst(entry *root, int num)
{
if(!num)
return NULL;
entry *bst_root = NULL;
bst_root = balance_bst(bst_root, num, root);
free(root);
return bst_root;
}
entry *balance_bst(entry *e, int num, entry *root)
{
static entry *bst_build_cur = NULL;
if(!num)
return NULL;
if(!bst_build_cur)
bst_build_cur = root;
if(!e){
e = (entry *) malloc(sizeof(entry));
e->pNext = NULL;
e->pRight = NULL;
e->pLeft = NULL;
}
int mid = (num +1)/2;
e->pLeft = balance_bst(e->pLeft, mid - 1, root);
strcpy(e->lastName, bst_build_cur->lastName);
bst_build_cur = bst_build_cur->pNext;
e->pRight = balance_bst(e->pRight, num - mid, root);
return e;
}
void free_bst(entry *e)
{
if (e) {
free_bst(e->pRight);
free_bst(e->pLeft);
free(e);
}
}
上述的
findName
可改寫為以下:
{ entry *temp = pHead; while (temp) { if (!strcmp(lastName, temp->lastName)) return temp; temp = (cmp < 0) ? temp->pLeft : temp->pRight; } return NULL; }
"jserv"
Performance counter stats for './phonebook_opt_bst' (100 runs):
2,431,096 cache-misses # 94.463 % of all cache refs ( +- 0.10% )
2,573,593 cache-references ( +- 0.20% )
481,113,045 instructions # 1.34 insns per cycle ( +- 0.01% )
358,634,304 cycles ( +- 0.29% )
0.142062161 seconds time elapsed ( +- 0.62% )
while (fgets(line, sizeof(line), fp)) {
while (line[i] != '\0')
i++;
count_node++;
}
Performance counter stats for './phonebook_opt_bst' (100 runs):
1,425,612 cache-misses # 92.262 % of all cache refs ( +- 0.04% )
1,545,179 cache-references ( +- 0.11% )
434,912,471 instructions # 1.38 insns per cycle ( +- 0.00% )
314,507,544 cycles ( +- 0.10% )
0.124844305 seconds time elapsed ( +- 0.54% )!
Algo. | 優化前(singly-linked list) | 優化後(balanced binary search tree) |
---|---|---|
space | O(n) | O(n) |
insert | O(1) | O(n) (讀資料數) + O(log n) (插入) |
search | O(n) | O(log n) |
lastName
插入及搜尋entry_info
/* original version */
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
struct __PHONE_BOOK_ENTRY *pNext;
} entry;
改成以下
typedef struct __PHONE_BOOK_ENTRY_INFO {
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
} entry_info;
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
entry_info *pInfo;
struct __PHONE_BOOK_ENTRY *pNext;
} entry;
Performance counter stats for './phonebook_orig' (100 runs):
2,042,217 cache-misses # 95.007 % of all cache refs ( +- 0.12% )
2,149,550 cache-references ( +- 0.16% )
262,784,639 instructions # 1.43 insns per cycle ( +- 0.02% )
183,943,466 cycles ( +- 0.37% )
0.073711683 seconds time elapsed ( +- 0.90% )
Performance counter stats for './phonebook_opt_struct' (100 runs):
378,199 cache-misses # 68.577 % of all cache refs ( +- 0.20% )
551,497 cache-references ( +- 1.13% )
245,480,764 instructions # 1.71 insns per cycle ( +- 0.02% )
143,394,756 cycles ( +- 1.12% )
0.056445606 seconds time elapsed ( +- 1.34% )
Performance counter stats for './phonebook_orig' (100 runs):
1,987,476 cache-misses # 95.591 % of all cache refs ( +- 0.67% )
2,079,154 cache-references ( +- 0.70% )
262,484,896 instructions # 1.43 insns per cycle ( +- 0.02% )
184,098,351 cycles ( +- 0.58% )
0.084555741 seconds time elapsed ( +- 2.78% )
Performance counter stats for './phonebook_opt' (100 runs):
469,681 cache-misses # 78.364 % of all cache refs ( +- 0.35% )
599,356 cache-references ( +- 1.20% )
314,544,912 instructions # 1.49 insns per cycle ( +- 0.00% )
211,290,940 cycles ( +- 0.75% )
0.094401048 seconds time elapsed ( +- 2.10% )
Performance counter stats for './phonebook_opt_bst' (100 runs):
1,387,546 cache-misses # 90.434 % of all cache refs ( +- 0.14% )
1,534,318 cache-references ( +- 0.47% )
434,348,761 instructions # 1.37 insns per cycle ( +- 0.00% )
317,930,025 cycles ( +- 0.59% )
0.137107756 seconds time elapsed ( +- 1.86% )
Performance counter stats for './phonebook_opt_struct' (100 runs):
374,466 cache-misses # 71.274 % of all cache refs ( +- 0.25% )
525,392 cache-references ( +- 0.86% )
245,306,198 instructions # 1.78 insns per cycle ( +- 0.02% )
137,518,542 cycles ( +- 0.87% )
0.057796514 seconds time elapsed ( +- 2.39% )
git rebase -i HEAD~5
修改前五次 commit messagepick
改成 edit
git commit --amend
git commit --amend --author="twzjwang <twzjwang@gmail.com>"
git rebase --continue
git push --force-with-lease
twzjwang