課堂練習 ===== ### Q1:撰寫函式 is_little_endian,在 Little endian 機器上返回 1,反之返回 0,應能再任意機器上,無論幾位元的架構都適用 A1: 先以 typedef 定義 unsigned char* 讓資料型態看起來更加直觀。接著以一個 0xff ,強制轉型assign 到資料型態為 byte_pointer 的變數去儲存,最後在以第一組位元組去判別這是 Little endian 還是 Big endian 的系統 。 * Little endian 最低位元組在最低位元。 e.g. 有一個值 uint32_t 0x12345678 那麼在 Little endian 的系統,存到記憶體的排序就會變成是 0x78[0] 0x56[1] 0x34[2] 0x12[3]。 * Big endian 最高位元組在最低位元。 e.g. 有一個值 uint32_t 0x12345678 那麼在 Little endian 的系統,存到記憶體的排序就會變成是 0x12[0] 0x34[1] 0x56[2] 0x78[3]。 ```clike= /**************************************************************************** FileName [ cirFraig.cpp ] PackageName [ cir ] Synopsis [ Define cir FRAIG functions ] Author [ Chung-Yang (Ric) Huang ] Copyright [ Copyleft(c) 2012-present LaDs(III), GIEE, NTU, Taiwan ] ****************************************************************************/ #include <cassert> #include "cirMgr.h" #include "cirGate.h" #include "sat.h" #include "myHashMap.h" #include "util.h" using namespace std; // TODO: Please keep "CirMgr::strash()" and "CirMgr::fraig()" for cir cmd. // Feel free to define your own variables or functions /*******************************/ /* Global variable and enum */ /*******************************/ typedef HashMap<StrashKey, CirGate*> StrashMap; /**************************************/ /* Static varaibles and functions */ /**************************************/ static void PrintMerge(size_t lhs, size_t rhs, bool iv){ cout << "Fraig: " << lhs << " merging " ; if(iv) cout << '!' ; cout << rhs << "..." << endl; } /*******************************************/ /* Public member functions about fraig */ /*******************************************/ // _floatList may be changed. // _unusedList and _undefList won't be changed void CirMgr::strash() { size_t i, size = _dfsList.size(); StrashMap map(getHashSize(size)); StrashKey key; bool change = false; for(i = 0; i < size; i++){ if(_dfsList[i]->getStrashKey(key)){ if(!map.insert(key, _dfsList[i])){ StrashMap::iterator itr = map.find(key); // Strashing: 6 merging 7... cout << "Strashing: " << (*itr).second->getId() << " merging " << _dfsList[i]->getId() << "..." << endl; _dfsList[i]->replace((*itr).second); _totalList[_dfsList[i]->getId()] = 0; change = true; } } } if(change) dfsTraversal(); } void CirMgr::fraig() { SatSolver solver; solver.initialize(); genProofModel(solver); bool SAT; size_t grp_size, Grps_size, tmp_size, i, k, j; fecGrp* grp; Var newV = solver.newVar(); bool inv, change = false; for(i = 0, Grps_size = fecGrps.size(); i < Grps_size; i ++){ vector<CirGate*> tmp; grp = fecGrps[i]; grp_size = grp->size(); tmp.reserve(grp_size); tmp.push_back(grp->at(0)); for(k = 1; k < grp_size; k++){ for(j = 0, tmp_size = tmp.size(); j < tmp_size; j++){ newV = solver.newVar(); inv = (tmp[j]->Value() == grp->at(k)->Value()) ? false : true ; solver.addXorCNF(newV, tmp[j]->getVar(), false, grp->at(k)->getVar(), inv); solver.assumeRelease(); solver.assumeProperty(newV, true); // k = 1 SAT = solver.assumpSolve(); if(!SAT){ PrintMerge(tmp[j]->getId(), grp->at(k)->getId(), inv); grp->at(k)->replace(tmp[j], inv); //solver.reset(); //genProofModel(solver); //dfsTraversal(); //grp->at(k)->genProofModel(solver); _totalList[grp->at(k)->getId()] = 0; change = true; break; } } if(SAT) tmp.push_back(grp->at(k)); } } if(change) { dfsTraversal(); cout << "Updating by UNSAT... #Total FEC Group = 0" << endl; } for(size_t i = 0, n = _dfsList.size(); i < n; i++){ /*_dfsList[i]->Value() = 0;*/ _dfsList[i]->setFECgrp(0); } for(size_t i = 0, n = fecGrps.size(); i < n; i++) delete fecGrps[i]; for(size_t i = 0, n = manager.size(); i < n; i++) delete manager[i]; manager.clear(); fecGrps.clear(); } /********************************************/ /* Private member functions about fraig */ /********************************************/ void CirMgr::genProofModel(SatSolver &s) { Var v; for(size_t i = 0, n = _dfsList.size(); i < n; i++){ v = s.newVar(); _dfsList[i]->setVar(s); } v = s.newVar(); s.addAigCNF(_const0->getVar(), v, false, v, true); for(size_t i = 0, n = _dfsList.size(); i < n; i++){ _dfsList[i]->genProofModel(s); } } ``` ### Q2:撰寫函式 leftmost_one,產生得以找出數值 x 最高位 1 的位元遮罩 A2: 把變數 x 移位,並OR自己,進而把最高位元以下的數字都變為 1 ,最後在以 x&(~x>>1)的技巧,把最高位元以下的值都變為0。 * e.g. x = 00111111, ~x = 11000000, ~x>>1 = 11100000, x&(~x>>1) = 00100000。 ```clike= #include<stdio.h> int leftmost_one(unsigned x){ x = x | (x >> 1); x = x | (x >> 2); x = x | (x >> 4); x = x | (x >> 8); x = x | (x >> 16); return x&(~x>>1); } int main(){ printf("%X",leftmost_one(8000)); } ``` ### Q3:撰寫函式 float_negate,給定浮點數 f,回傳 -f,倘若 f 是 NaN,直接回傳 f。輸入數值範圍為有效的 2^32 個數值 * NaN(Not a Number,非數)exp=255並且frac≠0 * sig = sign(符號),以移位方式直接得出。 * exp = exponent(指數),先移位去除後23位的 frac 再用遮罩的方式去除sig的部分 * frac = fraction(尾數),以遮罩方式去除 sig 以及 exp部分 * 單精度的s,exp和frac字段分別是1位元,8位元,23位元。 根據以上概念,分別找出 sig 、 exp 、frac 再判別是否為NaN,最後返回負數時再把 ~sig+exp+frac移位組合回去。 A3: ````clike= #include <stdio.h> typedef unsigned float_bits; float_bits float_negate(float_bits f) { unsigned sig = f >> 31; unsigned exp = f >> 23 & 0xFF; unsigned frac = f & 0x7FFFFF; int is_NAN = (exp == 0xFF) && (frac != 0); if (is_NAN) { return f; } return ~sig << 31 | exp << 23 | frac; } ```` ### Q4:以 C 語言指標改寫原本使用陣列表示法的氣泡排序程式碼 (Page 327) / 延伸題目: 最佳化 / Analysis on Bubble Sort Algorithm Optimization A4: 直接把 array 變成指標 e.g. data[1] 等價 *(data+1) , 以 sizeof(a)/sizeof(a[0]) 去計算 array 裡面的元素數目,在交換的時候以 bitwise xor 去實現,省下一個變數,不過根據[此網友](http://www.voidcn.com/article/p-wvujgsum-oy.html)去實作,這個方式比最簡單的temp的方式還慢,我想是因為temp多一個變數,以空間換取時間的原因。 ``` clike= #include <stdio.h> void bubble_AAA(long *data, long count); int main() { long a[5]={1,-8,5,-7,15}; long c = sizeof(a)/sizeof(a[0]); bubble_AAA(a,size); for(long j=0;j<size;j++) printf("%ld\n",*(a+j)); } void bubble_AAA(long *data, long count){ for(long last = 0;last < count ; last++){ for( long i = last + 1 ; i < count + 1; i++ ){ if(*(data+last)>*(data+i)){ *(data+last) = *(data+last) ^ *(data+i); *(data+i) = *(data+i) ^ *(data+last); *(data+last) =*(data+last) ^ *(data+i); } } } } ``` ### Q5 計算 N=64 和 N=60 的 cache miss rate A5: 從題目可得知 C=4KB=4096 B=16 E=1 S=256 在 N = 64情況 ### Q6 撰寫更快的轉置矩陣實作 ##### 看書思考中 ```clike= #include <stdio.h> #include <stdlib.h> #include <assert.h> #define LOOP 1000 #define MAX 1024 #define LEN MAX*MAX #define BLOCK 16 void transpose(int* dst, int* src, int N) { int i, j; for (i = 0; i <= N-1; i++) for (j = 0; j <= N-1; j++) dst[j*N+i] = src[i*N+j]; } void effective_transpose(int* dst, int* src, int N) { int i, j, a, b; for (i = 0; i <= N-BLOCK; i+=BLOCK) for (j = 0; j <= N-BLOCK; j+=BLOCK) for (a = i; a < i+BLOCK; a++) for (b = j; b < j+BLOCK; b++) dst[b*N+a] = src[a*N+b]; for (; i <= N-1; i++) for (; j <= N-1; j++) dst[j*N+i] = src[i*N+j]; } void test(void) { int* d = (int*)malloc(sizeof(int)*LEN); int* s = (int*)malloc(sizeof(int)*LEN); transpose(d, s, MAX); for (int i = 0; i < MAX; i++) for (int j = 0; j < MAX; j++) assert(s[i*MAX+j] == d[j*MAX+i]); effective_transpose(d, s, MAX); for (int i = 0; i < MAX; i++) for (int j = 0; j < MAX; j++) assert(s[i*MAX+j] == d[j*MAX+i]); free((void*)d); free((void*)s); } void prof(void) { int* d = (int*)malloc(sizeof(int)*LEN); int* s = (int*)malloc(sizeof(int)*LEN); for (int c = 0; c < LOOP; c++) transpose(d, s, MAX); free((void*)d); free((void*)s); } void prof_effect(void) { int* d = (int*)malloc(sizeof(int)*LEN); int* s = (int*)malloc(sizeof(int)*LEN); for (int c = 0; c < LOOP; c++) effective_transpose(d, s, MAX); free((void*)d); free((void*)s); } int main(int argc, char* argv[]) { /*test();*/ /*prof();*/ prof_effect(); return 0; } ``` ### Q7 有向圖轉換為無向圖 ##### 看書思考中 ````clike= void convert(int* src, int N) { int i, j; for (i = 0; i <= N-1; i++) for (j = 0; j <= N-1; j++) src[j*N+i] = src[i*N+j] || src[j*N+i]; } void effective_convert(int* src, int N) { int i, j, a, b, tmp; for (i = 0; i <= N-BLOCK; i+=BLOCK) /* brilliant! not j = 0 here */ for (j = i; j <= N-BLOCK; j+=BLOCK) for (a = i; a < i+BLOCK; a++) for (b = j; b < j+BLOCK; b++) { /* brilliant! store two value in one loop */ tmp = src[b*N+a] || src[a*N+b]; src[b*N+a] = tmp; src[a*N+b] = tmp; } for (; i <= N-1; i++) for (; j <= N-1; j++) src[j*N+i] = src[i*N+j] || src[j*N+i]; } void test(void) { int* s = (int*)malloc(sizeof(int)*LEN); int* e = (int*)malloc(sizeof(int)*LEN); convert(s, MAX); effective_convert(e, MAX); for (int i = 0; i < MAX; i++) for (int j = 0; j < MAX; j++) assert(s[i*MAX+j] == e[i*MAX+j]); free((void*)s); free((void*)e); } void prof(void) { int* s = (int*)malloc(sizeof(int)*LEN); for (int c = 0; c < LOOP; c++) convert(s, MAX); free((void*)s); } void prof_effect(void) { int* s = (int*)malloc(sizeof(int)*LEN); for (int c = 0; c < LOOP; c++) effective_convert(s, MAX); free((void*)s); } int main(int argc, char* argv[]) { /*test();*/ /*prof();*/ prof_effect(); return 0; } ```