Try   HackMD

2019q3 第 16 週測驗題

目的: 檢驗學員對記憶體管理的認知


測驗 1

Gap Buffer 是種動態陣列的實作方式,廣泛用於文字編輯器一類的程式。其手法是將「空白」(gap, 指字串沒有填滿緩衝區的部分) 移動到緩衝區中間,如下圖:

Image Not Showing Possible Reasons
  • The image file may be corrupted
  • The server hosting the image is unavailable
  • The image path is incorrect
  • The image format is not supported
Learn More →

當我們執行插入或刪除動作時,緩衝區中的 gap 會隨之移動。例如,下面一句話中,初始情況下空白在緩衝區的最後:

This is a smple txt.[        ]

這裡我們使用中括號 (即 [ ]) 表示空白

當我們想要在 smple 中插入 a 字元時,編輯器首先把空白移動到 s 後面:

This is a s[        ]mple txt.

然後在 s 後面插入 a

This is a sa[       ]mple txt.  

若我們想在 txt 中插入一個 e,則需要同樣的步驟:

This is a sample t[       ]xt.
This is a sample te[      ]xt.  

這種方法所基於的假設是,編輯文本時通常為連續接受輸入,其他時間則較少更動。

部分 Emacs 變種使用 Gap Buffer,包括古老的 Emacs on TECO、現代的 GNU/Emacs 及早期 Gosling Emacs (Java 之父 James Gosling 在 1981 年用 C 語言撰寫的 Emacs 編輯器,後來 Richard Stallman 取用部分程式碼打造出 GNU Emacs),和 Scintilla (包括 Code::Blocks 許多整合開發環境裡頭使用的程式碼編輯器元件) 也使用 Gap Buffer。某些 Emacs 版本改用 Linked Line

一個實作案例:假設文字編輯器的編輯區已有 Hi, world! 字串,然後游標 (cursor) 在 , 前面,我們用下方表示法:

Hi<cursor>, world!。

gap buffer 會先建立下方的陣列,在此我們假設 gap 的長度是 4 且僅有一個 (事實上可能有不少這種 gap),\xx 表示 ASCII 編碼為 16 進位 xx 的字元:

[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [a] [b] [c] [d]
'H' 'i' \00 \00 \00 \00 ',' ' ' 'w' 'o' 'r' 'l' 'd' '!'

當使用者刪去一個字元後,變成這樣:

[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [a] [b] [c] [d]
'H' \00 \00 \00 \00 \00 ',' ' ' 'w' 'o' 'r' 'l' 'd' '!'

使用者輸入 ello,變成這樣:

[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [a] [b] [c] [d]
'H' 'e' 'l' 'l' 'o' \00 ',' ' ' 'w' 'o' 'r' 'l' 'd' '!'

然後使用者右移游標,變成這樣:

[0] [1] [2] [3] [4] [5] [6] [7] [8] [9] [a] [b] [c] [d]
'H' 'e' 'l' 'l' 'o' ',' \00 ' ' 'w' 'o' 'r' 'l' 'd' '!'

需要注意的是,當遇到緩衝區填滿的情況,需要重新配置記憶體再複製資料內容,所以 gap buffer 對於大檔案的處理會有效率疑慮。

走訪上述的編輯過程:

Hi<cursor>[    ], world!
H<cursor>[     ], world!
Hello<cursor>[ ], world!
Hello,<cursor>[ ] world!

考慮以下 Gap Buffer 實作程式碼:

#include <stdlib.h>

typedef struct {
    char *buf;
    size_t size;
    size_t lb, rb;
} gapbuf_buffer_t;

typedef enum {
    GAPBUF_SUCCESS = 0,
    GAPBUF_ENOMEM, /* Out of memory    */
    GAPBUF_ERANGE, /* Range error      */
    GAPBUF_EARG,   /* Invalid argument */
    GAPBUF_EMAX    /* Max error number */
} gapbuf_error_t;

gapbuf_error_t gapbuf_insert(gapbuf_buffer_t *gb,
                             const char *buf,
                             const size_t n);

gapbuf_error_t gapbuf_delete(gapbuf_buffer_t *gap, size_t n);

#include <limits.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>

/* Calculates the capacity of the gap buffer given a requested size. */
static size_t _gapbuf_calc_buf_capacity(const size_t n)
{
    size_t v = n - 1;
    v |= v >> 1;
    v |= v >> 2;
    v |= v >> 4;
    v |= v >> 8;
    v |= v >> 16;
#if (__WORDSIZE == 64)
    v |= v >> 32;
#endif
    v++;
    return v;
}

/**
 * Grows the buffer such that `n` more characters fit in the gap.
 *
 * The functions tests if adding `n` chars to the buffer would
 * overflow and then reallocates buffer capacity if necessary.
 *
 * @param gap Pointer to the gap buffer instance.
 * @param n   The additional number of chars to provision for.
 *
 * @return Nonzero on any failure. Consult `errno`.
 */
static int _gapbuf_provision_buf_capacity(gapbuf_buffer_t *gap, const size_t n)
{
    /* check if we need to extend the size of the buffer */
    if (gap->lb + n >= gap->rb) {
        /* allocate new buffer */
        size_t new_siz = _gapbuf_calc_buf_capacity(gap->size + n);
        char *new_buf;
        if (!(new_buf = malloc(new_siz)))
            return -1; /* ENOMEM */
        /* copy contents */
        size_t new_rb = KCCC - gap->size;
        size_t rlen = gap->size - KDDD;
        if (gap->buf) {
            memcpy(new_buf, gap->buf, gap->lb);
            memcpy(new_buf + new_rb, gap->buf + gap->rb, rlen);
        }
        /* update gap buffer struct */
        char *tmp = gap->buf;
        gap->buf = new_buf;
        gap->rb = new_rb;
        gap->size = new_siz;
        free(tmp);
    }
    return 0;
}

gapbuf_error_t gapbuf_alloc(gapbuf_buffer_t **gb,
                            const char *content,
                            const size_t len)
{
    if (!(*gb = malloc(sizeof(gapbuf_buffer_t))))
        return GAPBUF_ENOMEM;
    (*gb)->size = 0;
    (*gb)->lb = 0, (*gb)->rb = 0;
    (*gb)->buf = NULL;
    KAAA;
    return GAPBUF_SUCCESS;
}

gapbuf_error_t gapbuf_free(gapbuf_buffer_t *gb)
{
    if (gb) {
        free(gb->buf);
        free(gb);
    }
    return GAPBUF_SUCCESS;
}

size_t gapbuf_read(const gapbuf_buffer_t *gb, char *buf, const size_t bufsiz)
{
    if (!(gb && buf) || (buf && !bufsiz))
        return GAPBUF_EARG;
    /* copy lhs */
    size_t lsiz = gb->lb;
    size_t n = (bufsiz < lsiz) ? bufsiz : lsiz;
    memcpy(buf, gb->buf, n);
    /* copy rhs */
    size_t rsiz = bufsiz > 0 ? KBBB : 0;
    n = (n < rsiz) ? n : rsiz;
    memcpy(buf + gb->lb, gb->buf + gb->rb, n);
    /* terminate string */
    size_t total_len = gb->lb + rsiz;
    size_t term_index = total_len < bufsiz ? total_len : bufsiz;
    buf[term_index] = '\0';
    return term_index;
}

gapbuf_error_t gapbuf_insert(gapbuf_buffer_t *gb,
                             const char *buf,
                             const size_t n)
{
    if (!(gb && buf))
        return GAPBUF_EARG;
    if (_gapbuf_provision_buf_capacity(gb, n) != 0)
        return GAPBUF_ENOMEM;
    memcpy(gb->buf + gb->lb, buf, n);
    gb->lb += n;
    return GAPBUF_SUCCESS;
}

gapbuf_error_t gapbuf_delete(gapbuf_buffer_t *gb, size_t n)
{
    if (n > gb->lb) /* cannot move beyond left boundary */
        return GAPBUF_ERANGE;
    gb->lb -= n;
    return GAPBUF_SUCCESS;
}

gapbuf_error_t gapbuf_fwd(gapbuf_buffer_t *gb, size_t n)
{
    if (n + gb->rb > gb->size)
        return GAPBUF_ERANGE;
    memmove(gb->buf + gb->lb, gb->buf + gb->rb, n);
    gb->lb += n;
    gb->rb += n;
    return GAPBUF_SUCCESS;
}

gapbuf_error_t gapbuf_rwd(gapbuf_buffer_t *gb, size_t n)
{
    if (n > gb->lb)
        return GAPBUF_ERANGE;
    memmove(gb->buf + gb->rb - n, gb->buf + gb->lb - n, n);
    gb->lb -= n;
    gb->rb -= n;
    return GAPBUF_SUCCESS;
}

#define my_assert(test, message) \
    do {                         \
        if (!(test))             \
            return message;      \
    } while (0)
#define my_run_test(test)       \
    do {                        \
        char *message = test(); \
        tests_run++;            \
        if (message)            \
            return message;     \
    } while (0)

int tests_run = 0;

static char *test_gapbuf_alloc_happy_path()
{
    printf("%s... ", __func__);
    char *expected = "0123456789";
    gapbuf_buffer_t *gap;
    my_assert(gapbuf_alloc(&gap, expected, strlen(expected)) == GAPBUF_SUCCESS,
              "Could not allocate buffer");
    my_assert(gap->size == 16, "Unexpected allocation size");
    char data[32];
    size_t n = gapbuf_read(gap, data, 32);
    data[n] = '\0';
    gapbuf_free(gap);
    my_assert(strncmp(expected, data, n) == 0,
              "Initialization string is not consistent");
    printf("OK\n");
    return 0;
}

static char *test_gapbuf_alloc_zero_length()
{
    printf("%s... ", __func__);
    gapbuf_buffer_t *gap;
    my_assert(gapbuf_alloc(&gap, "", 0) == GAPBUF_SUCCESS,
              "Could not allocate empty buffer");
    my_assert(gap, "Empty buffer allocation failed");
    my_assert(gap->size == 0, "Unexpected allocation size");
    my_assert(gapbuf_insert(gap, "asdf", 4) == GAPBUF_SUCCESS,
              "Could not insert into empty buffer");
    char data[8];
    size_t n = gapbuf_read(gap, data, 8);
    my_assert(strncmp("asdf", data, n) == 0, "String inconsistency");
    gapbuf_free(gap);
    printf("OK\n");
    return 0;
}

static char *test_gapbuf_read_into_insufficient_buffer()
{
    printf("%s... ", __func__);
    gapbuf_buffer_t *gap;
    my_assert(gapbuf_alloc(&gap, "0123456789", 10) == GAPBUF_SUCCESS,
              "Could not allocate buffer");
    char too_short[4];
    size_t n = gapbuf_read(gap, too_short, 4);
    my_assert(strncmp("0123456789", too_short, n) == 0,
              "String inconsistency for short buffer");
    printf("OK\n");
    return 0;
}

static char *test_suite()
{
    my_run_test(test_gapbuf_alloc_happy_path);
    my_run_test(test_gapbuf_alloc_zero_length);
    my_run_test(test_gapbuf_read_into_insufficient_buffer);
    return 0;
}

int main()
{
    char *result = test_suite();
    if (result != 0) {
        printf("%s\n", result);
    } else {
        printf("ALL TESTS PASSED\n");
    }
    printf("Tests run: %d\n", tests_run);

    return result != 0;
}

參考的執行輸出如下:

test_gapbuf_alloc_happy_path... OK
test_gapbuf_alloc_zero_length... OK
test_gapbuf_read_into_insufficient_buffer... OK
ALL TESTS PASSED
Tests run: 3

請參照程式碼註解,補完程式碼。

作答區

KAAA = ?

  • (a) 空白即可
  • (b) if (content) return gapbuf_insert(*gb, content, len)
  • (c) if (len > 0) return gapbuf_insert(*gb, content, len)
  • (d) if (content && len) return gapbuf_insert(*gb, content, len)

KBBB = ?

  • (a) gb->size - gb->rb - 1
  • (b) gb->size - gb->rb
  • (c) gb->size - gb->lb
  • (d) gb->size - gb->lb - 1
  • (e) gb->rb
  • (f) gb->lb

KCCC = ?

  • (a) gap->lb
  • (b) gap->lb + gap->rb
  • (c) gap->lb + new_siz
  • (d) gap->rb + new_siz

KDDD = ?

  • (a) gap->lb
  • (b) gap->rb
  • (c) (gap->rb - 1)
  • (d) (gap->rb + 1)
  • (e) (gap->lb - 1)
  • (f) (gap->lb + 1)

延伸問題:

  1. 解釋上述程式碼運作原理並指出實作的缺失;
  2. 透過 gcc builtins 和硬體指令去改進 _gapbuf_calc_buf_capacity 的效能;
  3. 以上述程式碼為基礎,開發簡易的文字編輯器;
  4. 學習 Mazu-Editor 實作手法,探究其原理;