2020q3 第 15 週測驗題

tags: `sysprog2020`

目的: 引導學員挑戰 LeetCode 和複習記憶體管理機制

測驗 `1`

LeetCode 835. Image Overlap 給定二個影像 A 和 B，二者皆為大小相同的二維正方形矩陣，內部以二進位表示。我們轉換其中一個影像，向左、右、上，或下滑動任意數量的單位，並將其置於另一個影像之上，隨後，該轉換的重疊 (overlap) 指二個影像都有 1 的位置數量，注意：轉換不包含向任一方向旋轉。於是，最大可能的重疊為何？

範例 1:

輸入

$A = (\begin{array}{ccc} 1 & 1 & 0 \\ 0 & 1 & 0 \\ 0 & 1 & 0 \end{array})$

$B = (\begin{array}{ccc} 0 & 0 & 0 \\ 0 & 1 & 1 \\ 0 & 0 & 1 \end{array})$
輸出:
- 3
解釋
- 將 A 向右移動 1 個單位，然後向下移動 1 個單位
- 二張影像中都存在 1 的位置總數為 3，見下圖紅色標記

思路：找出所有 1 的位置，對兩個矩陣的所有這些位置進行求差，再統計這些差出現最多的次數。兩個坐標的差表示什麼？即是將其中一個坐標移動到另一個坐標需要移動的向量。於是，在走訪個別單元的過程中，我們找出 A 中所有值為 1 的坐標，移動到 B 中所有值為 1 的坐標需要移動的向量。於是乎，在這些向量中出現次數最多的向量，就是我們要求的整個矩陣應該移動的向量。

以下程式碼針對 LeetCode 835. Image Overlap，提出可能的解法：

#include <string.h>
#define max(a, b) (((a) > (b)) ? (a) : (b))

static inline int overlap_count(unsigned int *AA, unsigned int *BB, int size)
{
    int res = 0;
    for (int i = 0; i < size; ++i)
        res += __builtin_popcount(AA[i] & BB[i]);
    return res;
}

static void translate(unsigned int *v,
                      int vsize,
                      int i,
                      int j,
                      unsigned int *res)
{
    memcpy(res, v, sizeof(unsigned int) * vsize);
    int mask[32];
    for (int k = 0; k < 32; k++)
        mask[k] = (1U << k) - 1;
    int n = vsize;
    if (i >= 0) {
        for (int k = 0; k < n; k++)
            res[k] >>= i;
    } else {
        i = -i;
        for (int k = 0; k < n; k++)
            res[k] = ((res[k] << i) & mask[n]);
    }

    if (j >= 0) {
        ITER1;
        for (int k = 0; k < j; k++)
            res[k] = 0;
    } else {
        j = -j;
        ITER2;
        for (int k = n - j; k < n; k++)
            res[k] = 0;
    }
}

int largestOverlap(int **img1, int img1Size, int *img1ColSize,
                   int **img2, int img2Size, int *img2ColSize)
{
    int n = img1Size;
    unsigned int A[n], B[n];
    memset(A, 0, sizeof(unsigned int) * n);
    memset(B, 0, sizeof(unsigned int) * n);
    for (int i = 0; i < n; ++i) {
        for (int j = 0; j < n; ++j) {
            if (img1[i][j] == 1)
                A[i] |= 1 << j;
            if (img2[i][j] == 1)
                B[i] |= 1 << j;
        }
    }

    int res = 0;
    for (int i = -(n - 1); i <= (n - 1); ++i) {
        for (int j = -(n - 1); j <= (n - 1); ++j) {
            unsigned int AA[n];
            translate(A, n, i, j, AA);
            res = max(res, overlap_count(AA, B, n));
        }
    }
    return res;
}

請補完程式碼，使其符合 LeetCode 題目要求。

參考資料:

LeetCode 835. Image Overlap

作答區

ITER1 = ?

(a) for (int k = 0; k < n - j; k++) res[k + j] = res[k]
(b) for (int k = 0; k <= n - j; k++) res[k + j] = res[k]
(c) for (int k = n - j - 1; k >= 0; k--) res[k + j] = res[k]
(d) for (int k = n - j; k >= 0; k--) res[k + j] = res[k]

ITER2 = ?

(a) for (int k = j; k < n; k++) res[k + j] = res[k]
(b) for (int k = j; k < n; k++) res[k - j] = res[k]
(c) for (int k = j - 1; k < n; k++) res[k + j] = res[k]
(d) for (int k = j - 1; k < n; k++) res[k - j] = res[k]

延伸題目:

解釋上述程式碼運作原理
提出效率更高，且空間佔用更少的 C 程式碼實作

測驗 `2`

LeetCode 1569. Number of Ways to Reorder Array to Get Same BST 給定一個陣列 nums 表示

1

到

n

的排列，依據元素在 nums 中的順序，依次插入一個初始為空的二元搜尋樹 (BST)。請你統計將 nums 重新排序後，滿足以下條件的個數：

重排後得到的 BST 與 nums 原本數值順序得到的 BST 相同。

例如，給定

n u m s = [2, 1, 3]

，我們得到一棵 2 為 root、1 為 left 及 3 為 right 的樹。

陣列

[2, 3, 1]

也能得到相同的 BST，但

[3, 2, 1]

會得到一棵不同的 BST 。

由於答案可能會很大，請將結果對

10^{9} + 7

取餘數。

為何是
$10^{9} + 7$ 呢？請見 Modulo 10^9+7 (1000000007)

範例 1

輸入
- $n u m s = [2, 1, 3]$
輸出
- 1
解釋
- 我們將 nums 重排，
  $[2, 3, 1]$ 可得到相同的 BST
- 沒有其他得到相同 BST 的方法

範例 2

輸入
- $n u m s = [3, 4, 5, 1, 2]$
輸出
- 5
解釋
- 下面 5 個陣列可得到相同的 BST
- $[3, 1, 2, 4, 5]$
- $[3, 1, 4, 2, 5]$
- $[3, 1, 4, 5, 2]$
- $[3, 4, 1, 2, 5]$
- $[3, 4, 1, 5, 2]$

範例 3

輸入
- $n u m s = [1, 2, 3]$
輸出
- 0
解釋
- 沒有其他排列順序能得到相同的 BST

範例 4

輸入
- $n u m s = [3, 1, 2, 5, 4, 6]$
輸出
- 19

範例 5

輸入
$n u m s = [9, 4, 2, 1, 3, 6, 5, 7, 8, 14, 11, 10, 12, 13, 16, 15, 17, 18]$
輸出
- 216212978
解釋
- 得到相同 BST 的個數是 3216212999
- 將其對
  $10^{9} + 7$ 取餘後得到 216212978

根節點是陣列第一個數，分為左右兩個子樹，左右子樹之間的順序若不變即可
假設左子樹

L

長度

n L

，右子樹

R

長度

n R

，則有效組合數為

C_{n L + n R}^{n L} \times f (L) \times f (R)

#include <string.h>

static const int MOD = 1e9 + 7; /* prime */

static int recursive(int *nums, int nums_size, int N, DECL)
{
    if (nums_size <= 2)
        return 1;
    int left[N + 1], left_size = 0, right[N + 1], right_size = 0;
    for (int i = 1; i < nums_size; i++)
        nums[i] > nums[0] ? (right[right_size++] = nums[i])
                          : (left[left_size++] = nums[i]);
    int l = recursive(left, left_size, N, cache),
        r = recursive(right, right_size, N, cache);
    return (((long) cache[nums_size - 1][left_size] * l % MOD) * r) % MOD;
}

int numOfWays(int *nums, int N)
{
    int comb[N + 1][N + 1]; /* combination table */
    memset(comb, 0, sizeof(comb));

    comb[0][0] = 1;
    for (int i = 1; i <= N; i++) {
        comb[i][0] = 1;
        for (int j = 1; j <= N; j++)
            INIT;
    }
    return recursive(nums, N, N, comb) - 1;
}

參考資訊:

花花醬 LeetCode 1569

注意：DECL 若是 int **cache，會在 undefined behavior sanitizer (UBsan) 的執行階段遇到 Load of misaligned address

請補完程式碼，使其符合 LeetCode 題目要求。

作答區

DECL = ?

(a) int cache[N][N]
(b) int cache[N - 1][N - 1]
(c) int cache[N + 1][N]
(d) int (*cache)[N + 1]
(e) int (*cache)[N]

INT = ?

(a) comb[i][j] = (comb[i - 1][j] + comb[i - 1][j - 1]) % MOD
(b) comb[i][j] = (comb[i - 1][j] + comb[i - 1][j - 1])
(c) comb[i][j] = (comb[i][j] + comb[i - 1][j - 1]) % MOD
(d) comb[i][j] = (comb[i][j] + comb[i - 1][j - 1])
(e) comb[i][j] = (comb[i][j - 1] + comb[i - 1][j - 1]) % MOD
(f) comb[i][j] = (comb[i][j - 1] + comb[i - 1][j - 1])

延伸題目:

解釋上述程式碼運作原理
提出效率更高，且空間佔用更少的 C 程式碼實作

測驗 `3`

在 Ubuntu/Debian Linux 中，可執行 sudo apt install dictionaries-common 以安裝辭典套件，於是可得到 /usr/share/dict/words 檔案，後者包含超過 10 萬個單字/詞:

$ wc -l /usr/share/dict/words

考慮以下 C 程式 (strsearch.c) 可從給定的詞典中檢索:

/*
 * Hash table is represented as `htable` array of size N (N = number of lines in
 * dict), where each element either points to the end of a singly-linked list or
 * is equal to zero. Lists are stored in pre-allocated array `clist` of size N.
 *
 * This implementation is memory-efficient and cache-friendly, requiring only
 * 12N + O(1) bytes of "real" memory which can be smaller than the size of
 * dict, sacrificing however for request processing speed, which is
 * O(req_size) in most cases, but may be up to O(dict_size) due to hash collision.
 */

#include <fcntl.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>

/* 2 additional bytes for '\n' and '\0' */
#define MAX_REQUEST_SIZE (128 * 1024 * 1024 + 2)

struct htable_entry {
    uint32_t ptr;
};

struct collision_list {
    uint32_t fpos;
    uint32_t len;
    uint32_t prev;
};

struct compr_collision_list {
    uint32_t fpos;
    uint32_t len;
};

typedef struct htable_entry htable_entry_t;
typedef struct collision_list collision_list_t;
typedef struct compr_collision_list compr_collision_list_t;

static char *dict;
static htable_entry_t *htable;
static collision_list_t *clist;
static compr_collision_list_t *cclist;
static size_t num_of_bytes, num_of_lines;
static size_t num_of_buckets;
static int clist_size = 1;

static uint32_t hash(char *s, size_t len)
{
    uint32_t hash = 5381;
    for (size_t i = 0; i < len; i++)
        hash = ((hash << 5) + hash) + (s[i] - 32);
    return hash % num_of_buckets;
}

static void htable_insert(size_t fpos, size_t len)
{
    uint32_t h = hash(dict + fpos, len);
    clist[clist_size].fpos = fpos;
    clist[clist_size].len = len;
    clist[clist_size].prev = htable[h].ptr;
    htable[h].ptr = clist_size;
    ++clist_size;
}

static bool htable_lookup(char *s, size_t len)
{
    uint32_t h = hash(s, len);
    uint32_t ptr = htable[h].ptr;
    uint32_t nextptr = htable[h + 1].ptr;
    for (; ptr < nextptr; ptr++) {
        if ((len == cclist[ptr].len) &&
            !memcmp(s, dict + cclist[ptr].fpos, len))
            return true;
    }
    return false;
}

int main(int argc, char **argv)
{
    if (argc != 2) {
        printf("usage: %s dictionary_file\n", argv[0]);
        return 2;
    }

    /* Map dictionary file directly to memory for easier access from program */
    int dictfd;
    if ((dictfd = open(argv[1], O_RDONLY)) < 0)
        return 1;

    struct stat st;
    fstat(dictfd, &st);
    num_of_bytes = st.st_size;
    if ((dict = mmap(0, num_of_bytes, PROT_READ, MAP_PRIVATE, dictfd, 0)) ==
        MAP_FAILED)
        return 1;

    /* Count number of lines in file */
    for (size_t i = 0; i < num_of_bytes; i++) {
        if (dict[i] == '\n')
            num_of_lines++;
    }
    num_of_lines++;

    num_of_buckets = num_of_lines;
    htable = calloc(num_of_buckets + 1, sizeof(htable_entry_t));
    clist = malloc((num_of_lines + 1) * sizeof(collision_list_t));

    size_t prev_start = 0;
    for (size_t i = 0; i < num_of_bytes; i++) {
        if (dict[i] == '\n') {
            htable_insert(prev_start, P1);
            prev_start = P2;
        }
    }

    /* Compress collision list and update ptrs in htable accordingly */
    htable[num_of_buckets].ptr = num_of_lines;
    cclist = malloc((num_of_lines + 1) * sizeof(compr_collision_list_t));
    size_t clines = 0;
    for (size_t i = 0; i < num_of_buckets; i++) {
        uint32_t ptr = htable[i].ptr;
        htable[i].ptr = clines;
        while (ptr) {
            cclist[clines].fpos = clist[ptr].fpos;
            cclist[clines].len = clist[ptr].len;
            ptr = clist[ptr].prev;
            clines++;
        }
    }
    free(clist);

    /* Ready to accept requests */
    char *req_buf = malloc(MAX_REQUEST_SIZE);

    while (!feof(stdin)) {
        fgets(req_buf, MAX_REQUEST_SIZE, stdin);
        size_t len = strlen(req_buf);
        /* Exit on "exit" command */
        if (!strncmp(req_buf, "exit\n", len))
            break;
        printf("%s: %s\n", req_buf, htable_lookup(req_buf, len) ? "YES" : "NO");
    }

    free(req_buf);
    free(htable);
    free(cclist);
    munmap(dict, num_of_bytes);
    close(dictfd);
    return 0;
}

給定測試輸入 (檔名: inputs) 如下:

gnu
linux
kernel
X
XX
exit

測試方式:

$ ./strsearch /usr/share/dict/words < inputs

預期執行結果:

gnu
: YES
linux
: NO
kernel
: YES
X
: YES
XX
: NO

請補完程式碼。

作答區

P1 = ?

(a) prev_start + i
(b) prev_start + i + 1
(c) i - prev_start
(d) i - prev_start + 1

P2 = ?

(a) i
(b) i + 1
(c) i - 1

延伸題目:

解釋上述程式碼運作原理
提出效率更高，且空間佔用更少的 C 程式碼實作

2020q3 第 15 週測驗題

tags: sysprog2020

測驗 1

測驗 2

測驗 3

Read more

你所不知道的 C 語言：數值系統

建構 User-Mode Linux 的實驗環境

Linux 核心設計: Scheduler(8): Energy Aware Scheduling

你所不知道的 C 語言: linked list 和非連續記憶體

tags: `sysprog2020`

測驗 `1`

測驗 `2`

測驗 `3`