2021q1 Homework3 (quiz3)

contributed by < cyrong >

第 3 週測驗題

測驗1

避免列出完整程式碼，你應該事先拆解並分析。

Image Not Showing Possible Reasons

The image file may be corrupted
The server hosting the image is unavailable
The image path is incorrect
The image format is not supported

Learn More →

jserv

已經將完整程式碼換成測驗題網址

補完程式碼

OFF = ?

(d)

LLL = ?

(b) 32 - __builtin_clz(n) - 1

NNN = ?

(a) 16

CCC = ?

(d) mask[(uint8_t) byte / 8] & 1 << (uint8_t) byte % 8

SSS = ?

(c) mask[(uint8_t) byte / 8] |= 1 << (uint8_t) byte % 8

程式碼原理解釋

xs 資料結構

typedef union {
    /* allow strings up to 15 bytes to stay on the stack
     * use the last byte as a null terminator and to store flags
     * much like fbstring:
     * https://github.com/facebook/folly/blob/master/folly/docs/FBString.md
     */
    char data[16];

    struct {
        uint8_t filler[15],
            /* how many free bytes in this stack allocated string
             * same idea as fbstring
             */
            space_left : 4,
            /* if it is on heap, set to 1 */
            is_ptr : 1, is_large_string : 1, flag2 : 1, flag3 : 1;
    };

    /* heap allocated */
    struct {
        char *ptr;
        /* supports strings up to 2^MAX_STR_LEN_BITS - 1 bytes */
        size_t size : MAX_STR_LEN_BITS,
                      /* capacity is always a power of 2 (unsigned)-1 */
                      capacity : 6;
        /* the last 4 bits are important flags */
    };
} xs;

和 fbstring 一樣的想法

字串長度小於或等於 15 個位元組，則放在 stack。
- filler 用於放置最多 15 個字元
- space_left 存放 15 - length ，當 length 最大來到 15 時存放的值會是 0 也就是 0x0 ，此時後 2 個 bits is_ptr 和 is_large_string 也都會是 0 ，再後 2 個 bits 未使用也會是 0 ，因此 15 個字元後接的是 0x00 也就是 \0 ，這樣一來可以就用來表示 15 個字元
- is_ptr 如果字串被分配在 heap 就會是 1，否則為 0
- is_large_string 判斷字串是否過大，也就是長度有沒有超過 LARGE_STRING_LEN
- flag2 flag3 未使用
字串長度大於或等於 16 個位元組，則放在 heap (透過 malloc 系統呼叫配置所需字串大小)
- ptr 字串指標，若是字傳為 large string ， ptr 的前 4 bytes 會用於作 reference count
- size 表示字串 siz，通過設定 MAX_STR_LENBI_BITS 最大可至 2⁵⁴ - 1
- capacity 用來表示字串在 heap 的使用量，使用 6 個 bit 就可以覆蓋上面設定的字串大小

/* Memory leaks happen if the string is too long but it is still useful for
 * short strings.
 */
#define xs_tmp(x)                                                   \
    ((void) ((struct {                                              \
         _Static_assert(sizeof(x) <= MAX_STR_LEN, "it is too big"); \
         int dummy;                                                 \
     }){1}),                                                        \
     xs_new(&xs_literal_empty(), x))

一開使用的 xs_tmp 使用 _Static_assert 判斷字串會不會太大，不過 _Static_assert 不能放進 expression 中，因此使用一種 trick 把它放進 struct 中，struct 不能沒有成員因此多加一個 dummy 成員

檢查結束後跳到 x_new

xs *xs_new(xs *x, const void *p)
{
    *x = xs_literal_empty();
    size_t len = strlen(p) + 1;
    if (len > NNN) {
        x->capacity = ilog2(len) + 1;
        x->size = len - 1;
        x->is_ptr = true;
        xs_allocate_data(x, x->size, 0);
        memcpy(xs_data(x), p, len);
    } else {
        memcpy(x->data, p, len);
        x->space_left = 15 - (len - 1);
    }
    return x;
}

先初始化一個空的 xs ，xs_literal_empty 使用的技巧是 designated initializers
是小字串的話就這樣放在 stack

長度若超過 16 (字串 + \0) 就分配 heap

xs_allocate_data

static void xs_allocate_data(xs *x, size_t len, bool reallocate)
{
    /* Medium string */
    if (len < LARGE_STRING_LEN) {
        x->ptr = reallocate ? realloc(x->ptr, (size_t) 1 << x->capacity)
                            : malloc((size_t) 1 << x->capacity);
        return;
    }

    /* Large string */
    x->is_large_string = 1;

    /* The extra 4 bytes are used to store the reference count */
    x->ptr = reallocate ? realloc(x->ptr, (size_t)(1 << x->capacity) + 4)
                        : malloc((size_t)(1 << x->capacity) + 4);

    xs_set_refcnt(x, 1);
}

分成 Medium 和 Large string ，如果是 Large 的話，結構中的 ptr 前 4 個 bytes 會是用於作 reference count ，所以在 Large 的 malloc 中會需要多分配 4 bytes

xs_trim

xs *xs_trim(xs *x, const char *trimset)
{
    if (!trimset[0])
        return x;

    char *dataptr = xs_data(x), *orig = dataptr;

    if (xs_cow_lazy_copy(x, &dataptr))
        orig = dataptr;

    /* similar to strspn/strpbrk but it operates on binary data */
    uint8_t mask[32] = {0};

#define check_bit(byte) (mask[(uint8_t) byte / 8] & 1 << (uint8_t) byte % 8)
#define set_bit(byte) (mask[(uint8_t) byte / 8] |= 1 << (uint8_t) byte % 8)
    size_t i, slen = xs_size(x), trimlen = strlen(trimset);

    for (i = 0; i < trimlen; i++)
        set_bit(trimset[i]);
    for (i = 0; i < slen; i++)
        if (!check_bit(dataptr[i]))
            break;
    for (; slen > 0; slen--)
        if (!check_bit(dataptr[slen - 1]))
            break;
    dataptr += i;
    slen -= i;

    /* reserved space as a buffer on the heap.
     * Do not reallocate immediately. Instead, reuse it as possible.
     * Do not shrink to in place if < 16 bytes.
     */
    memmove(orig, dataptr, slen);
    /* do not dirty memory unless it is needed */
    if (orig[slen])
        orig[slen] = 0;

    if (xs_is_ptr(x))
        x->size = slen;
    else
        x->space_left = 15 - slen;
    return x;
#undef check_bit
#undef set_bit
}

中間遇到 xs_cow_lazy_copy

static bool xs_cow_lazy_copy(xs *x, char **data)
{
    if (xs_get_refcnt(x) <= 1)
        return false;

    /* Lazy copy */
    xs_dec_refcnt(x);
    xs_allocate_data(x, x->size, 0);

    if (data) {
        memcpy(xs_data(x), *data, x->size);

        /* Update the newly allocated pointer */
        *data = xs_data(x);
    }
    return true;
}

此段程式實作的是 CoW ，降低 refcnt 1 並新分配記憶體給 x
因為原本 x 在做的是 reference ，所以要新分配記憶體給它
若是原本不是在做 reference 則回傳 false，因為沒有必要

回到 xs_trim ，程式中用 mask 紀錄 trim_set 中的資料，紀錄方式為字元的前 5 bits 作為 mask index 後 3 bits 作為 mask[index] 的值，再用 mask 跟 dataptr 頭尾進行對比，找到要消除的對象 index

並設定 dataptr 與 slen 紀錄中間要保留的字串對 orig 進行修改

xs_concat

xs *xs_concat(xs *string, const xs *prefix, const xs *suffix)
{
    size_t pres = xs_size(prefix), sufs = xs_size(suffix),
           size = xs_size(string), capacity = xs_capacity(string);

    char *pre = xs_data(prefix), *suf = xs_data(suffix),
         *data = xs_data(string);

    xs_cow_lazy_copy(string, &data);

    if (size + pres + sufs <= capacity) {
        memmove(data + pres, data, size);
        memcpy(data, pre, pres);
        memcpy(data + pres + size, suf, sufs + 1);

        if (xs_is_ptr(string))
            string->size = size + pres + sufs;
        else
            string->space_left = 15 - (size + pres + sufs);
    } else {
        xs tmps = xs_literal_empty();
        xs_grow(&tmps, size + pres + sufs);
        char *tmpdata = xs_data(&tmps);
        memcpy(tmpdata + pres, data, size);
        memcpy(tmpdata, pre, pres);
        memcpy(tmpdata + pres + size, suf, sufs + 1);
        xs_free(string);
        *string = tmps;
        string->size = size + pres + sufs;
    }
    return string;
}

這段程式實做 xs 的連接，若是加上 prefix 和 suffix 後會超出原本的 capacity 則新創建一個 xs

此時會需要用到 xs_grow

xs_grow
























/* grow up to specified size */
xs *xs_grow(xs *x, size_t len)
{
    char buf[16];

    if (len <= xs_capacity(x))
        return x;

    /* Backup first */
    if (!xs_is_ptr(x))
        memcpy(buf, x->data, 16);

    //x->is_ptr = true;
    x->capacity = ilog2(len) + 1;

    if (xs_is_ptr(x)) {
        xs_allocate_data(x, len, 1);
    } else {
        x->is_ptr = true;
        xs_allocate_data(x, len, 0);
        memcpy(xs_data(x), buf, 16);
    }
    return x;
}

這裡程式實作讓 input x ，可以根據 len 進行 size 增長，實作完後 x 分配的空間會增加
若是 x 原本是短字串，將其轉成中長字串，再將原字串內容複製進去

在原程式中 13 行 x->is_ptr = true 的設定會讓下面的 if 判斷一直是 true，因此進行修改

回到 xs_concat
作完 xs_grow 後，複製 prefix , data , suffix 進去 tmp ，將原先字串內容釋放，換上 tmp

字串複製

若是要考慮 CoW，則要先判斷要複製的字串是否需要 reference ，也就是判斷是不是 Large String

xs *xs_copy(xs *dest, xs* src)
{
    xs_free(dest);
    if(xs_is_large_string(src)) {
        dest->ptr = src->ptr;
        dest->size = src->size;
        dest->capacity = src->capacity;
        dest->is_ptr = true;
        dest->is_large_string = true;
        xs_inc_refcnt(dest);
    } else {
        xs_new(dest, xs_data(src));
    }
    return dest;
}

先清除 dest 原資料，接著判斷 src 是否為 large string
是就作 reference
否就新創一個新的非 large string

2021q1 Homework3 (quiz3)

測驗1

程式碼原理解釋

字串複製

空間效率