2018q3 第 6 週測驗題

測驗 `1`

考慮以下透過 Huffman coding 進行資料壓縮的程式碼 (huff.c):

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>

#define ASCII_SIZE 128

typedef struct _node {
    struct _node *left, *right;
    unsigned weight;
    char data;
} node_t;

typedef struct {
    unsigned size;
    node_t *array[ASCII_SIZE];
} min_heap_t;

typedef struct {
    int arr[ASCII_SIZE / 4];
    int length;
    char data;
} pair_t;

static inline void swap_node(node_t **a, node_t **b) {
    node_t *t = *a;
    *a = *b;
    *b = t;
}

static void shift_up_2(min_heap_t *heap) {
    int i = 0;
    while (i < heap->size) {
        (heap->array)[i] = (heap->array)[i + 2];
        (heap->array)[i + 1] = (heap->array)[i + 3];
        i++;
    }
    heap->size -= XXC;
}

static void add_to_heap(node_t *to, min_heap_t *heap) {
    int pos = heap->size++;
    heap->array[pos] = to;
    if (heap->size > XXE) {
        while ((heap->array[pos - 1])->weight > (heap->array[pos])->weight) {
            swap_node(&(heap->array[pos - 1]), &(heap->array[pos]));
            if (--pos == 0)
                break;
        }
    }
}

static node_t *combine_node_ts(node_t *lighter, node_t *heavier) {
    node_t *new_node = calloc(1, sizeof(node_t));
    new_node->left = lighter;
    new_node->right = heavier;
    new_node->weight = lighter->weight + heavier->weight;
    return new_node;
}

static node_t *build_hufftree(min_heap_t *heap) {
    while (heap->size > 1) {
        add_to_heap(combine_node_ts(heap->array[0], heap->array[1]), heap);
        shift_up_2(heap);
    }
    return heap->array[XXD];
}

void encode(FILE *in, FILE *out, pair_t *pairs) {
    int curr_size = 0;
    unsigned buffer = 0;

    for (;;) {
        int ch = fgetc(in);
        if (ch == EOF)
            break;
        int i = 0;
        while (i++ < pairs[ch].length) {
            buffer <<= 1;
            buffer += (pairs[ch].arr)[i];
            curr_size++;
            if (curr_size == 8) {
                fwrite(&buffer, 1, 1, out);
                curr_size = 0;
                buffer = 0;
            }
        }
    }

    while (curr_size < 8) {
        buffer <<= 1;
        curr_size++;
    }

    rewind(in);
    fwrite(&buffer, 1, 1, out);
    fclose(out);
}

void build_pairings(node_t *root, int arr[], int top, pair_t *pairs) {
    if (root->left) {
        arr[top] = 0;
        XXA;
    }
    if (root->right) {
        arr[top] = 1;
        XXB;
    }
    if (!(root->left) && !(root->right)) {
        uint8_t index = root->data;
        for (int i = 0; i < top; i++)
            pairs[index].arr[i] = arr[i];
        pairs[index].length = top;
        pairs[index].data = root->data;
    }
}

min_heap_t *scan_file(FILE *in) {
    node_t *dictionary = calloc(ASCII_SIZE, sizeof(node_t));
    min_heap_t *heap = calloc(1, sizeof(min_heap_t));
    int ch;
    for (;;) {
        if ((ch = fgetc(in)) == EOF)
            break;
        dictionary[ch].weight++;
    }

    for (ch = 0; ch < ASCII_SIZE; ch++) {
        if (dictionary[ch].weight == 0)
            continue;
        dictionary[ch].data = ch;
        add_to_heap(&(dictionary[ch]), heap);
    }

    rewind(in);
    return heap;
}

int main(int argc, char *argv[])
{
    if (argc != 2) {
        printf("Usage: %s <input file>\n", argv[0]);
        return -1;
    }
    FILE *in = fopen(argv[1], "r");
    FILE *out = fopen("out", "wb");
    int arr[ASCII_SIZE];

    min_heap_t *data_heap = scan_file(in);
    pair_t *pairs = calloc(ASCII_SIZE, sizeof(pair_t));
    build_pairings(build_hufftree(data_heap), arr, 0, pairs);

    printf("Compressing...\n");
    encode(in, out, pairs);

    FILE *read_out = fopen("out", "r");
    fseek(in, 0L, SEEK_END);
    fseek(read_out, 0L, SEEK_END);
    int before = ftell(in);
    int after = ftell(read_out);

    double efficiency = 100 - (((double) after / (double) before) * 100);
    printf("Achieved %f %% compression.\n", efficiency);

    fclose(read_out);
    fclose(in);
    free(data_heap);
    free(pairs);
    return 0;
}

參考執行結果為:

$ ./huff huff.c 
Compressing...
Achieved 40.820361 % compression.

請嘗試補完上述 build_pairings 函式裡頭透過遞迴呼叫的敘述 XXA 和 XXB，連帶 XXC, XXD, XXE 等數字

作答區

XXA = ?

(a) build_pairings(root->left, arr, top - 2, pairs);
(b) build_pairings(root->left, arr, top - 1, pairs);
(c) build_pairings(root->left, arr, top, pairs);
(d) build_pairings(root->left, arr + 1, top, pairs);
(e) build_pairings(root->left, arr, top + 2, pairs);
(f) build_pairings(root->left, arr, top + 1, pairs);

XXB = ?

(a) build_pairings(root->right, arr, top - 2, pairs);
(b) build_pairings(root->right, arr, top - 1, pairs);
(c) build_pairings(root->right, arr, top, pairs);
(d) build_pairings(root->right, arr + 1, top, pairs);
(e) build_pairings(root->right, arr, top + 2, pairs);
(f) build_pairings(root->right, arr, top + 1, pairs);

XXC = ?

(a) 0
(b) 1
(c) 2
(d) 3
(e) 4
(f) 5

XXD = ?

(a) 0
(b) 1
(c) 2
(d) 3
(e) 4
(f) 5

XXE = ?

(a) 0
(b) 1
(c) 2
(d) 3
(e) 4
(f) 5

延伸題目：

回顧 dict 作業，提出資料壓縮的方案;
以上述程式碼為基礎，實作對應的 decompressor/decoder，並探究改善壓縮率的方案;
實作程式正確驗證和效能分析機制;

2018q3 第 6 週測驗題

測驗 1

測驗 `1`