contributed by <ZixinYang
>
Linux 4.10.0-35-generic
Ubuntu 16.04.1 LTS
L1d cache: 32K
L1i cache: 32K
L2 cache: 256K
L3 cache: 6144K
暖身
$ sudo su
切換到 root 權限下, 以取得所有量測資訊背景知識 review
perf stat 練習
static char array[100000000];
int main(){
int i,j;
for(i = 0; i < 10; i++)
for(j = 0; j < 100000000; j++)
array[j]++;
return 0;
}
結果
Learn More →
static char array[100000000];
int main(){
int i,j;
for(j = 0; j < 1000000000; i++)
for(i = 0; i < 10; j++)
array[j]++;
return 0;
}
結果
Learn More →
cache-references 前後相差了6倍
phonebook_orig.h
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
struct __PHONE_BOOK_ENTRY *pNext;
} entry;
entry *findName(char lastname[], entry *pHead);
entry *append(char lastName[], entry *e);
phonebook_orig.c
entry *findName(char lastname[], entry *pHead)
{
while (pHead != NULL) {
if (strcasecmp(lastname, pHead->lastName) == 0)
return pHead;
pHead = pHead->pNext;
}
return NULL;
}
entry *append(char lastName[], entry *e)
{
/* allocate memory for the new entry and put lastName */
e->pNext = (entry *) malloc(sizeof(entry));
e = e->pNext;
strcpy(e->lastName, lastName);
e->pNext = NULL;
return e;
}
每次搜尋皆從頭搜尋一一比對, 複雜度為
Performance counter stats for './phonebook_orig' (100 runs):
464,8577 cache-misses # 93.742 % of all cache refs ( +- 0.17% )
495,8902 cache-references ( +- 0.04% )
2,6220,2416 instructions # 1.45 insn per cycle ( +- 0.02% )
1,8034,3116 cycles ( +- 0.11% )
0.056852745 seconds time elapsed ( +- 0.37% )
搜尋過程只用到 lastName 的資訊, 所以其他資訊就放在另一個 struct, 這樣可以減少存取不必要的資訊, 降低 cache misses
phonebook_struct.h
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
char firstName[16];
struct __PHONE_BOOK_ENTRY *pNext;
} entry;
typedef struct __PHONE_BOOK_UNUSED {
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
} unused;
entry *findName(char lastname[], entry *pHead);
entry *append(char lastName[], entry *e);
Performance counter stats for './phonebook_opt' (100 runs):
105,6083 cache-misses # 65.584 % of all cache refs ( +- 0.59% )
161,0266 cache-references ( +- 0.10% )
2,4115,1425 instructions # 2.17 insn per cycle ( +- 0.02% )
1,1106,5061 cycles ( +- 0.08% )
0.035205177 seconds time elapsed ( +- 0.21% )
改寫 Makefile
若需要更改標頭檔內容, 同時就要更改 main.c, 而執行 $make
指令時, phonebook_orig.c, phonebook_orig.h 會重新編譯, 就會因為原始程式沒有定義新的變數而發生 error, 因此需要更改Makefile (需確定之前已經產生過原始程式的執行檔)
註解以下這行:
phonebook_orig: $(SRCS_common) phonebook_orig.c phonebook_orig.h
$(CC) $(CFLAGS_common) $(CFLAGS_orig) \
-DIMPL="\"$@.h\"" -o $@ \
$(SRCS_common) $@.c
run 的目標改為 phonebook_opt:
run: $(EXEC)
echo 3 | sudo tee /proc/sys/vm/drop_caches
watch -d -t "./phonebook_opt && echo 3 | sudo tee /proc/sys/vm/drop_caches"
直覺想到可以一次翻一半的電話簿進行比對, 但如果用 linked list 無法快速找到中間節點, 所以我們需要建一棵 BST, 新增每筆資料時依照 lastName 長出 BST, 第一次搜尋先比對 root, 再繼續往子節點搜尋。
phonebook_BST.h
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
struct __PHONE_BOOK_ENTRY *pLeft;
struct __PHONE_BOOK_ENTRY *pRight;
} entry;
entry *findName(char lastname[], entry *pHead);
entry *append(char lastName[], entry *e);
phonebook_BST.c
entry *findName(char lastName[], entry *pHead)
{
if (pHead != NULL){
if(strcmp(lastName, pHead->lastName) == 0) return pHead;
else if (strcmp(lastName, pHead->lastName) < 0) return findName(lastName, pHead->pLeft);
else { return findName(lastName, pHead->pRight); }
}
return NULL;
}
entry *append(char lastName[], entry *e)
{
if(strcmp(e->lastName, "")==0 && !e->pRight){strcpy(e->lastName, lastName); return e;}
entry *current;
current = e;
entry *ptr;
while(current != NULL){
if(strcmp(lastName, current->lastName)==0){
return e;
}
else if(strcmp(lastName, current->lastName)<0){
ptr = current; current = current->pLeft;
}
else{
ptr = current; current = current->pRight;
}
}
entry *Node = (entry *)malloc(sizeof(entry));
strcpy(Node->lastName, lastName);
Node->pLeft = NULL;
Node->pRight = NULL;
if(strcmp(lastName, ptr->lastName)<0){
ptr->pLeft = Node;
}
else{
ptr->pRight = Node;
}
return e;
}
Performance counter stats for './phonebook_opt' (100 runs):
241,3916 cache-misses # 79.843 % of all cache refs ( +- 0.07% )
302,3343 cache-references ( +- 0.09% )
2,4345,1900 instructions # 1.52 insn per cycle ( +- 0.09% )
1,5988,7185 cycles ( +- 0.18% )
0.150832553 seconds time elapsed ( +- 7.75% )
跟原始程式相比 cache-misses下降許多, 但花了更多時間。
Performance counter stats for './phonebook_opt' (100 runs):
99,3860 cache-misses # 77.827 % of all cache refs ( +- 0.22% )
127,7015 cache-references ( +- 0.13% )
2,2206,5066 instructions # 1.77 insn per cycle ( +- 0.06% )
1,2528,9025 cycles ( +- 0.30% )
0.147768829 seconds time elapsed ( +- 7.12% )
參考st9007a同學的共筆, 使用BKDR hash function
phonebook_HASH.h
typedef struct __PHONE_BOOK_INFO {
char firstName[16];
char email[16];
char phone[10];
char cell[10];
char addr1[16];
char addr2[16];
char city[16];
char state[2];
char zip[5];
} info;
typedef struct __PHONE_BOOK_ENTRY {
char lastName[MAX_LAST_NAME_SIZE];
struct __PHONE_BOOK_INFO *pInfo;
struct __PHONE_BOOK_ENTRY *pNext;
} entry;
typedef struct {
entry *cell[HASH_TABLE_SIZE];
} hashTable;
unsigned int BKDRHash(char *str);
entry *findName(char lastName[], hashTable *table);
void append(char lastName[], hashTable *table);
phonebook_HASH.c
unsigned int BKDRHash(char *str)
{
unsigned int seed = 131;
unsigned int hash = 0;
while (*str) {
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
entry *findName(char lastName[], hashTable *table)
{
unsigned int i = BKDRHash(lastName) % HASH_TABLE_SIZE;
entry *head = table->cell[i];
while (strcmp(head->lastName, lastName) != 0) {
head = head->pNext;
if (head == NULL) break;
}
return head;
}
void append(char lastName[], hashTable *table)
{
unsigned int i = BKDRHash(lastName) % HASH_TABLE_SIZE;
entry *e = (entry *) malloc(sizeof(entry));
e->pInfo = (info *) malloc(sizeof(entry));
e->pNext = NULL;
strcpy(e->lastName, lastName);
if (table->cell[i] == NULL) {
table->cell[i] = e;
return;
}
entry *head = table->cell[i];
while (head->pNext != NULL) {
head = head->pNext;
}
head->pNext = e;
}
遇到的問題:
執行$make run
error: recipe for target 'run' failed
error:
make: *** wait: No child processes. Stop.
make: *** Waiting for unfinished jobs....
make: *** wait: No child processes. Stop.
微心得:
花的時間比想像中多, 完成度不足, perf, gnuplot, git, makefile 都不熟, 寫個 c 還有 bug, 有點慘…orz, 不過受到很多刺激感覺滿興奮的, 要再多花時間努力追上。
or
By clicking below, you agree to our terms of service.
New to HackMD? Sign up