# 2018q3 第 6 週測驗題
### 測驗 `1`
考慮以下透過 [Huffman coding](https://en.wikipedia.org/wiki/Huffman_coding) 進行資料壓縮的程式碼 (`huff.c`):
```C
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#define ASCII_SIZE 128
typedef struct _node {
struct _node *left, *right;
unsigned weight;
char data;
} node_t;
typedef struct {
unsigned size;
node_t *array[ASCII_SIZE];
} min_heap_t;
typedef struct {
int arr[ASCII_SIZE / 4];
int length;
char data;
} pair_t;
static inline void swap_node(node_t **a, node_t **b) {
node_t *t = *a;
*a = *b;
*b = t;
}
static void shift_up_2(min_heap_t *heap) {
int i = 0;
while (i < heap->size) {
(heap->array)[i] = (heap->array)[i + 2];
(heap->array)[i + 1] = (heap->array)[i + 3];
i++;
}
heap->size -= XXC;
}
static void add_to_heap(node_t *to, min_heap_t *heap) {
int pos = heap->size++;
heap->array[pos] = to;
if (heap->size > XXE) {
while ((heap->array[pos - 1])->weight > (heap->array[pos])->weight) {
swap_node(&(heap->array[pos - 1]), &(heap->array[pos]));
if (--pos == 0)
break;
}
}
}
static node_t *combine_node_ts(node_t *lighter, node_t *heavier) {
node_t *new_node = calloc(1, sizeof(node_t));
new_node->left = lighter;
new_node->right = heavier;
new_node->weight = lighter->weight + heavier->weight;
return new_node;
}
static node_t *build_hufftree(min_heap_t *heap) {
while (heap->size > 1) {
add_to_heap(combine_node_ts(heap->array[0], heap->array[1]), heap);
shift_up_2(heap);
}
return heap->array[XXD];
}
void encode(FILE *in, FILE *out, pair_t *pairs) {
int curr_size = 0;
unsigned buffer = 0;
for (;;) {
int ch = fgetc(in);
if (ch == EOF)
break;
int i = 0;
while (i++ < pairs[ch].length) {
buffer <<= 1;
buffer += (pairs[ch].arr)[i];
curr_size++;
if (curr_size == 8) {
fwrite(&buffer, 1, 1, out);
curr_size = 0;
buffer = 0;
}
}
}
while (curr_size < 8) {
buffer <<= 1;
curr_size++;
}
rewind(in);
fwrite(&buffer, 1, 1, out);
fclose(out);
}
void build_pairings(node_t *root, int arr[], int top, pair_t *pairs) {
if (root->left) {
arr[top] = 0;
XXA;
}
if (root->right) {
arr[top] = 1;
XXB;
}
if (!(root->left) && !(root->right)) {
uint8_t index = root->data;
for (int i = 0; i < top; i++)
pairs[index].arr[i] = arr[i];
pairs[index].length = top;
pairs[index].data = root->data;
}
}
min_heap_t *scan_file(FILE *in) {
node_t *dictionary = calloc(ASCII_SIZE, sizeof(node_t));
min_heap_t *heap = calloc(1, sizeof(min_heap_t));
int ch;
for (;;) {
if ((ch = fgetc(in)) == EOF)
break;
dictionary[ch].weight++;
}
for (ch = 0; ch < ASCII_SIZE; ch++) {
if (dictionary[ch].weight == 0)
continue;
dictionary[ch].data = ch;
add_to_heap(&(dictionary[ch]), heap);
}
rewind(in);
return heap;
}
int main(int argc, char *argv[])
{
if (argc != 2) {
printf("Usage: %s <input file>\n", argv[0]);
return -1;
}
FILE *in = fopen(argv[1], "r");
FILE *out = fopen("out", "wb");
int arr[ASCII_SIZE];
min_heap_t *data_heap = scan_file(in);
pair_t *pairs = calloc(ASCII_SIZE, sizeof(pair_t));
build_pairings(build_hufftree(data_heap), arr, 0, pairs);
printf("Compressing...\n");
encode(in, out, pairs);
FILE *read_out = fopen("out", "r");
fseek(in, 0L, SEEK_END);
fseek(read_out, 0L, SEEK_END);
int before = ftell(in);
int after = ftell(read_out);
double efficiency = 100 - (((double) after / (double) before) * 100);
printf("Achieved %f %% compression.\n", efficiency);
fclose(read_out);
fclose(in);
free(data_heap);
free(pairs);
return 0;
}
```
參考執行結果為:
```shell
$ ./huff huff.c
Compressing...
Achieved 40.820361 % compression.
```
請嘗試補完上述 `build_pairings` 函式裡頭透過遞迴呼叫的敘述 `XXA` 和 `XXB`,連帶 `XXC`, `XXD`, `XXE` 等數字
==作答區==
XXA = ?
* `(a)` build_pairings(root->left, arr, top - 2, pairs);
* `(b)` build_pairings(root->left, arr, top - 1, pairs);
* `(c)` build_pairings(root->left, arr, top, pairs);
* `(d)` build_pairings(root->left, arr + 1, top, pairs);
* `(e)` build_pairings(root->left, arr, top + 2, pairs);
* `(f)` build_pairings(root->left, arr, top + 1, pairs);
XXB = ?
* `(a)` build_pairings(root->right, arr, top - 2, pairs);
* `(b)` build_pairings(root->right, arr, top - 1, pairs);
* `(c)` build_pairings(root->right, arr, top, pairs);
* `(d)` build_pairings(root->right, arr + 1, top, pairs);
* `(e)` build_pairings(root->right, arr, top + 2, pairs);
* `(f)` build_pairings(root->right, arr, top + 1, pairs);
XXC = ?
* `(a)` 0
* `(b)` 1
* `(c)` 2
* `(d)` 3
* `(e)` 4
* `(f)` 5
XXD = ?
* `(a)` 0
* `(b)` 1
* `(c)` 2
* `(d)` 3
* `(e)` 4
* `(f)` 5
XXE = ?
* `(a)` 0
* `(b)` 1
* `(c)` 2
* `(d)` 3
* `(e)` 4
* `(f)` 5
:::success
延伸題目:
1. 回顧 dict 作業,提出資料壓縮的方案;
2. 以上述程式碼為基礎,實作對應的 decompressor/decoder,並探究改善壓縮率的方案;
3. 實作程式正確驗證和效能分析機制;
:::