課堂練習
=====
### Q1:撰寫函式 is_little_endian,在 Little endian 機器上返回 1,反之返回 0,應能再任意機器上,無論幾位元的架構都適用
A1:
先以 typedef 定義 unsigned char* 讓資料型態看起來更加直觀。接著以一個 0xff ,強制轉型assign 到資料型態為 byte_pointer 的變數去儲存,最後在以第一組位元組去判別這是 Little endian 還是 Big endian 的系統 。
* Little endian 最低位元組在最低位元。
e.g. 有一個值 uint32_t 0x12345678 那麼在 Little endian 的系統,存到記憶體的排序就會變成是 0x78[0] 0x56[1] 0x34[2] 0x12[3]。
* Big endian 最高位元組在最低位元。
e.g. 有一個值 uint32_t 0x12345678 那麼在 Little endian 的系統,存到記憶體的排序就會變成是 0x12[0] 0x34[1] 0x56[2] 0x78[3]。
```clike=
/****************************************************************************
FileName [ cirFraig.cpp ]
PackageName [ cir ]
Synopsis [ Define cir FRAIG functions ]
Author [ Chung-Yang (Ric) Huang ]
Copyright [ Copyleft(c) 2012-present LaDs(III), GIEE, NTU, Taiwan ]
****************************************************************************/
#include <cassert>
#include "cirMgr.h"
#include "cirGate.h"
#include "sat.h"
#include "myHashMap.h"
#include "util.h"
using namespace std;
// TODO: Please keep "CirMgr::strash()" and "CirMgr::fraig()" for cir cmd.
// Feel free to define your own variables or functions
/*******************************/
/* Global variable and enum */
/*******************************/
typedef HashMap<StrashKey, CirGate*> StrashMap;
/**************************************/
/* Static varaibles and functions */
/**************************************/
static void PrintMerge(size_t lhs, size_t rhs, bool iv){
cout << "Fraig: " << lhs << " merging " ;
if(iv) cout << '!' ;
cout << rhs << "..." << endl;
}
/*******************************************/
/* Public member functions about fraig */
/*******************************************/
// _floatList may be changed.
// _unusedList and _undefList won't be changed
void
CirMgr::strash()
{
size_t i, size = _dfsList.size();
StrashMap map(getHashSize(size));
StrashKey key;
bool change = false;
for(i = 0; i < size; i++){
if(_dfsList[i]->getStrashKey(key)){
if(!map.insert(key, _dfsList[i])){
StrashMap::iterator itr = map.find(key);
// Strashing: 6 merging 7...
cout << "Strashing: " << (*itr).second->getId()
<< " merging " << _dfsList[i]->getId() << "..." << endl;
_dfsList[i]->replace((*itr).second);
_totalList[_dfsList[i]->getId()] = 0;
change = true;
}
}
}
if(change) dfsTraversal();
}
void
CirMgr::fraig()
{
SatSolver solver;
solver.initialize();
genProofModel(solver);
bool SAT;
size_t grp_size, Grps_size, tmp_size, i, k, j;
fecGrp* grp;
Var newV = solver.newVar();
bool inv, change = false;
for(i = 0, Grps_size = fecGrps.size(); i < Grps_size; i ++){
vector<CirGate*> tmp;
grp = fecGrps[i];
grp_size = grp->size();
tmp.reserve(grp_size);
tmp.push_back(grp->at(0));
for(k = 1; k < grp_size; k++){
for(j = 0, tmp_size = tmp.size(); j < tmp_size; j++){
newV = solver.newVar();
inv = (tmp[j]->Value() == grp->at(k)->Value()) ? false : true ;
solver.addXorCNF(newV, tmp[j]->getVar(), false, grp->at(k)->getVar(), inv);
solver.assumeRelease();
solver.assumeProperty(newV, true); // k = 1
SAT = solver.assumpSolve();
if(!SAT){
PrintMerge(tmp[j]->getId(), grp->at(k)->getId(), inv);
grp->at(k)->replace(tmp[j], inv);
//solver.reset();
//genProofModel(solver);
//dfsTraversal();
//grp->at(k)->genProofModel(solver);
_totalList[grp->at(k)->getId()] = 0;
change = true;
break;
}
}
if(SAT) tmp.push_back(grp->at(k));
}
}
if(change) {
dfsTraversal();
cout << "Updating by UNSAT... #Total FEC Group = 0" << endl;
}
for(size_t i = 0, n = _dfsList.size(); i < n; i++){
/*_dfsList[i]->Value() = 0;*/ _dfsList[i]->setFECgrp(0);
}
for(size_t i = 0, n = fecGrps.size(); i < n; i++)
delete fecGrps[i];
for(size_t i = 0, n = manager.size(); i < n; i++)
delete manager[i];
manager.clear();
fecGrps.clear();
}
/********************************************/
/* Private member functions about fraig */
/********************************************/
void
CirMgr::genProofModel(SatSolver &s)
{
Var v;
for(size_t i = 0, n = _dfsList.size(); i < n; i++){
v = s.newVar();
_dfsList[i]->setVar(s);
}
v = s.newVar();
s.addAigCNF(_const0->getVar(), v, false, v, true);
for(size_t i = 0, n = _dfsList.size(); i < n; i++){
_dfsList[i]->genProofModel(s);
}
}
```
### Q2:撰寫函式 leftmost_one,產生得以找出數值 x 最高位 1 的位元遮罩
A2:
把變數 x 移位,並OR自己,進而把最高位元以下的數字都變為 1 ,最後在以 x&(~x>>1)的技巧,把最高位元以下的值都變為0。
* e.g. x = 00111111, ~x = 11000000, ~x>>1 = 11100000, x&(~x>>1) = 00100000。
```clike=
#include<stdio.h>
int leftmost_one(unsigned x){
x = x | (x >> 1);
x = x | (x >> 2);
x = x | (x >> 4);
x = x | (x >> 8);
x = x | (x >> 16);
return x&(~x>>1);
}
int main(){
printf("%X",leftmost_one(8000));
}
```
### Q3:撰寫函式 float_negate,給定浮點數 f,回傳 -f,倘若 f 是 NaN,直接回傳 f。輸入數值範圍為有效的 2^32 個數值
* NaN(Not a Number,非數)exp=255並且frac≠0
* sig = sign(符號),以移位方式直接得出。
* exp = exponent(指數),先移位去除後23位的 frac 再用遮罩的方式去除sig的部分
* frac = fraction(尾數),以遮罩方式去除 sig 以及 exp部分
* 單精度的s,exp和frac字段分別是1位元,8位元,23位元。
根據以上概念,分別找出 sig 、 exp 、frac 再判別是否為NaN,最後返回負數時再把 ~sig+exp+frac移位組合回去。
A3:
````clike=
#include <stdio.h>
typedef unsigned float_bits;
float_bits float_negate(float_bits f) {
unsigned sig = f >> 31;
unsigned exp = f >> 23 & 0xFF;
unsigned frac = f & 0x7FFFFF;
int is_NAN = (exp == 0xFF) && (frac != 0);
if (is_NAN) {
return f;
}
return ~sig << 31 | exp << 23 | frac;
}
````
### Q4:以 C 語言指標改寫原本使用陣列表示法的氣泡排序程式碼 (Page 327) / 延伸題目: 最佳化 / Analysis on Bubble Sort Algorithm Optimization
A4: 直接把 array 變成指標 e.g. data[1] 等價 *(data+1) , 以 sizeof(a)/sizeof(a[0]) 去計算 array 裡面的元素數目,在交換的時候以 bitwise xor 去實現,省下一個變數,不過根據[此網友](http://www.voidcn.com/article/p-wvujgsum-oy.html)去實作,這個方式比最簡單的temp的方式還慢,我想是因為temp多一個變數,以空間換取時間的原因。
``` clike=
#include <stdio.h>
void bubble_AAA(long *data, long count);
int main()
{
long a[5]={1,-8,5,-7,15};
long c = sizeof(a)/sizeof(a[0]);
bubble_AAA(a,size);
for(long j=0;j<size;j++)
printf("%ld\n",*(a+j));
}
void bubble_AAA(long *data, long count){
for(long last = 0;last < count ; last++){
for( long i = last + 1 ; i < count + 1; i++ ){
if(*(data+last)>*(data+i)){
*(data+last) = *(data+last) ^ *(data+i);
*(data+i) = *(data+i) ^ *(data+last);
*(data+last) =*(data+last) ^ *(data+i);
}
}
}
}
```
### Q5 計算 N=64 和 N=60 的 cache miss rate
A5: 從題目可得知 C=4KB=4096 B=16 E=1 S=256
在 N = 64情況
### Q6 撰寫更快的轉置矩陣實作
##### 看書思考中
```clike=
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#define LOOP 1000
#define MAX 1024
#define LEN MAX*MAX
#define BLOCK 16
void transpose(int* dst, int* src, int N) {
int i, j;
for (i = 0; i <= N-1; i++)
for (j = 0; j <= N-1; j++)
dst[j*N+i] = src[i*N+j];
}
void effective_transpose(int* dst, int* src, int N) {
int i, j, a, b;
for (i = 0; i <= N-BLOCK; i+=BLOCK)
for (j = 0; j <= N-BLOCK; j+=BLOCK)
for (a = i; a < i+BLOCK; a++)
for (b = j; b < j+BLOCK; b++)
dst[b*N+a] = src[a*N+b];
for (; i <= N-1; i++)
for (; j <= N-1; j++)
dst[j*N+i] = src[i*N+j];
}
void test(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
transpose(d, s, MAX);
for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == d[j*MAX+i]);
effective_transpose(d, s, MAX);
for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == d[j*MAX+i]);
free((void*)d);
free((void*)s);
}
void prof(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
for (int c = 0; c < LOOP; c++)
transpose(d, s, MAX);
free((void*)d);
free((void*)s);
}
void prof_effect(void) {
int* d = (int*)malloc(sizeof(int)*LEN);
int* s = (int*)malloc(sizeof(int)*LEN);
for (int c = 0; c < LOOP; c++)
effective_transpose(d, s, MAX);
free((void*)d);
free((void*)s);
}
int main(int argc, char* argv[]) {
/*test();*/
/*prof();*/
prof_effect();
return 0;
}
```
### Q7 有向圖轉換為無向圖
##### 看書思考中
````clike=
void convert(int* src, int N) {
int i, j;
for (i = 0; i <= N-1; i++)
for (j = 0; j <= N-1; j++)
src[j*N+i] = src[i*N+j] || src[j*N+i];
}
void effective_convert(int* src, int N) {
int i, j, a, b, tmp;
for (i = 0; i <= N-BLOCK; i+=BLOCK)
/* brilliant! not j = 0 here */
for (j = i; j <= N-BLOCK; j+=BLOCK)
for (a = i; a < i+BLOCK; a++)
for (b = j; b < j+BLOCK; b++) {
/* brilliant! store two value in one loop */
tmp = src[b*N+a] || src[a*N+b];
src[b*N+a] = tmp;
src[a*N+b] = tmp;
}
for (; i <= N-1; i++)
for (; j <= N-1; j++)
src[j*N+i] = src[i*N+j] || src[j*N+i];
}
void test(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
int* e = (int*)malloc(sizeof(int)*LEN);
convert(s, MAX);
effective_convert(e, MAX);
for (int i = 0; i < MAX; i++)
for (int j = 0; j < MAX; j++)
assert(s[i*MAX+j] == e[i*MAX+j]);
free((void*)s);
free((void*)e);
}
void prof(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
for (int c = 0; c < LOOP; c++)
convert(s, MAX);
free((void*)s);
}
void prof_effect(void) {
int* s = (int*)malloc(sizeof(int)*LEN);
for (int c = 0; c < LOOP; c++)
effective_convert(s, MAX);
free((void*)s);
}
int main(int argc, char* argv[]) {
/*test();*/
/*prof();*/
prof_effect();
return 0;
}
```