目的:
注意須知:
1
考慮下方檔案 4thought.c
是 ACM-ICPC 題目 4 thought 的一個解法,假設程式的輸入符合 4 thought 的描述,請補完程式碼:
#include <stdbool.h>
#include <stdio.h>
enum {
opType1 = 0x1 << 0, opType2 = 0x1 << 1,
opType3 = 0x1 << 4, opType4 = 0x1 << 5,
};
static int operate(int op, int a, int b) {
switch (op) {
case opType1: return a + b;
case opType2: return a - b;
case opType3: return a * b;
case opType4: return (int) a / b;
}
return 0;
}
static char op_to_char(int op) {
return "+-*/?"[op - 1];
}
static int op_to_prio(int op) {
return ((int[]){opType1, opType2, opType3, opType4, -1})[op - 1];
}
static int calc(int op1, int op2, int op3) {
op1 = op_to_prio(op1);
op2 = op_to_prio(op2);
op3 = op_to_prio(op3);
bool p1 = (op1 & 0x0F) == 0; // = 1 for * or /
bool p2 = (op2 & 0x0F) == 0; // else = 0
bool p3 = (op3 & 0x0F) == 0;
// (4 + 4 + 4 + 4) or (4 / 4 / 4 / 4)
if ((p1 == p2) && (p2 == p3))
return operate(op3, operate(op2, operate(op1, 4, 4), 4), 4);
/* Write your code here */
return 0;
}
int main(void) {
int n;
scanf("%d", &n);
int sol[n];
for (int i = 0; i < n; i++)
scanf("%d", &sol[i]);
bool validSolution = false;
for (int i = 0; i < n; i++) {
for (int op1 = 4; op1 > 0; op1--) {
for (int op2 = 4; op2 > 0; op2--) {
for (int op3 = 4; op3 > 0; op3--) {
int sol_checked = calc(op1, op2, op3);
if (sol_checked == sol[i]) {
validSolution = true;
char op1char = op_to_char(op1);
char op2char = op_to_char(op2);
char op3char = op_to_char(op3);
printf("4 %c 4 %c 4 %c 4 = %d\n", op1char, op2char,
op3char, sol[i]);
op1 = -1; op2 = -1; op3 = -1;
break;
}
}
}
}
if (!validSolution)
printf("no solution\n");
validSolution = false;
}
return 0;
}
注意: 你應該要實作 calc
函式中標註 /* Write your code here */
之後的程式碼。除了撰寫程式,你應該提供對應的程式碼註解。
延伸問題:
2
考慮以下程式碼 (fitbits.c
) 可檢驗輸入的整數 x
是否可用 n
個位元來表示,例如 (x = 4, n = 9) 要回傳 true
, 當 (x = 4, n = 2) 回傳 false
。
#include <stdbool.h>
bool fit_bits(int x, int n) {
/* Write your code here */
return (bool) x;
}
實作的程式碼不能有任何邏輯條件判斷 (如 if
, else
, ?
) 或迴圈 (如 for
, while
, goto
, switch
, case
, break
, continue
),可用的運算子是 >>
, <<
, -
, +
, !
, ~
, &
, |
請補完程式碼,作答時需要包含函式宣告及定義。除了撰寫程式,你應該提供對應的程式碼註解。
延伸問題: 在重視資訊安全的專案中,找出類似用法的程式碼,予以解說並進行相關 information leaks 的實驗
是不是覺得有點挫折呢?原因很簡單:你沒有如期寫作業 review,趕快動手!
3
考慮以下程式碼 (is-less-equal.c
) 可檢驗輸入的整數 x
和 y
,是否存在 true
, 當 (x = 14, n = 9) 回傳 false
。
#include <stdbool.h>
bool is_leq(int x, int y) {
int s;
/* Write your code here */
return (bool) s;
}
實作的程式碼不能有任何邏輯條件判斷 (如 if
, else
, ?
) 或迴圈 (如 for
, while
, goto
, switch
, case
, break
, continue
),當然也不能用 >=
, >
, <
, <=
, -
等運算子。可用的運算子是 >>
, <<
, +
, ~
請補完程式碼,作答時需要包含函式宣告及定義。除了撰寫程式,你應該提供對應的程式碼註解。
延伸問題: 在重視資訊安全的專案中,找出類似用法的程式碼,予以解說並進行相關 information leaks 的實驗
4
考慮一種針對短字串的最佳化操作,假設字串總是小於等於 8 bytes,標的硬體是像 x86_64 這樣的 64-bit 架構而且是 little endian,於是我們可實作類似以下的程式碼 (ministr.c
):
#include <stdint.h>
#include <stdio.h>
#include <string.h>
typedef union {
uint64_t integer;
char array[8];
} mini_str;
static unsigned BitScanReverse(uint64_t x) {
return 63 - __builtin_clzll(x);
}
/**
* Find the length of the given mini_str.
* @param str string to find length of
* @return length of the given string
*/
unsigned mini_strlen(mini_str str) {
// Special case for empty string.
if (str.integer == 0) return 0;
// Otherwise find first non-zero bit (which will be in the first non-zero
// byte), and find which byte it is in.
// FIXME: Assumes little-endian.
unsigned msb = BitScanReverse(str.integer);
return msb / 8 + 1;
}
/**
* Create a new mini_str with length 0.
* @return newly created mini_str
*/
mini_str mini_str_new(void) {
// Create string of all null bytes.
mini_str str = {.integer = 0};
return str;
}
/**
* Append str2 to the end of str1 and return the reult.
* @param str1 first string
* @param str2 second string
* @return combined string
*/
mini_str mini_strcat(mini_str str1, mini_str str2) {
// Shift str2 along by str1Len characters to move it into position.
unsigned str1Len = mini_strlen(str1);
str2.integer <<= str1Len * 8; // FIXME: Assumes little-endian.
/* Write your code here */
return str1;
}
#define mini_str_to_c(mini_str) ((const char *) (mini_str).array)
#define mini_str_to_cNoConst(mini_str) ((char *) (mini_str).array)
/**
* Create a mini_str from a standard C character array.
* @param cstr Null-terminated C-string to use as input
* @return newly created mini_str
*/
mini_str mini_str_from_c(const char *cstr) {
// Create empty string.
mini_str mini_str = mini_str_new();
// Copy string.
strncpy(mini_str_to_cNoConst(mini_str), cstr, 7);
return mini_str;
}
int main(int argc, char **argv) {
mini_str all = mini_str_from_c("All ");
mini_str red = mini_str_from_c("red");
mini_str cat = mini_strcat(all, red);
printf("%s\n", mini_str_to_c(cat));
return 0;
}
這裡的 __builtin_clzll
是 GCC builtin function,作用是 bit scan,程式預期輸出為:
All red
你應該要實作 calc
函式中標註 /* Write your code here */
之後的程式碼。除了撰寫程式,你應該提供對應的程式碼註解。
注意: 實作的程式碼不能有任何邏輯條件判斷 (如 if
, else
, ?
) 或迴圈 (如 for
, while
, goto
, switch
, case
, break
, continue
),也不能用 >=
, >
, <
, <=
, -
等運算子。
延伸問題:
5
population count 簡稱 popcount 或叫 sideways sum,是計算數值的二進位表示中,有多少位元是 1
,在一些場合下很有用,例如計算 0-1 稀疏矩陣 (sparse matrix)或 bit array 中非 0
元素個數、計算兩個字串的 Hamming distance。Intel 在 2008 年 11 月 Nehalem 架構的處理器 Core i7 引入 SSE4.2 指令集,其中就有 CRC32
和 POPCNT
指令,POPCNT
可處理 16-bit, 32-bit, 64-bit 整數。
GCC 提供對應的 builtin function:
x
總共有幾個 1
。使用示範:
int x = 5328; // 00000000000000000001010011010000
printf("%d\n", __builtin_popcount(x)); // 5
以下是個存在實作缺陷的版本:
int popcnt_naive(int n) {
int count = 0;
while (n) {
if (n & 1)
++count;
n = n >> 1;
}
return count;
}
呼叫 popcnt_naive(-1)
時,會造成無窮迴圈,請指出錯誤所在,並且重寫為正確的版本。
6
延伸測驗 5
,實作 branchless 的 popcnt
並附上對應的程式原理解說。
延伸問題:
popcnt
的應用場景;挫折到此就不是只有一點了吧?原因很簡單:你沒有如期寫作業 review,趕快動手!
7
考慮到以下程式 (alloc.c
) 是 aligned_alloc 的一種簡易實作:
#include <stdlib.h>
// Number of bytes used for storing the aligned pointer offset.
// up to 64KB alignment, a size which is already unlikely to be
// used for alignment.
typedef uint16_t offset_t;
#define PTR_OFFSET_SIZE sizeof(offset_t)
#define align_up(num, align) \
(((num) + ((align) - 1)) & ~((align) - 1))
void *aligned_malloc(size_t align, size_t size) {
void *ptr = NULL;
// size must be a power of two.
if (!((align & (align - 1)) == 0))
return ptr;
if (align && size) {
// allocate extra bytes to meet the alignment
uint32_t header_size = PTR_OFFSET_SIZE + (align - 1);
void *p = malloc(size + header_size);
/* Write your code here */
}
return ptr;
}
其作用是配置針對 align
個 bytes 對齊的記憶體空間,可對照閱讀 Introduction & Allocators 以掌握原理。你應該要實作 aligned_malloc
函式中標註 /* Write your code here */
之後的程式碼。除了撰寫程式,你應該提供對應的程式碼註解。
注意: 輸入的 align
應該要是 2N (power of 2),否則就回傳 NULL
。
延伸問題:
8
延伸測驗 7
,實作 aligned_free
,其原型宣告如下:
void aligned_free(void *ptr);
除了撰寫程式,你應該提供對應的程式碼註解。
9
考慮以下 64-bit GCD (greatest common divisor, 最大公因數) 求值函式:
#include <stdint.h>
uint64_t gcd64(uint64_t u, uint64_t v) {
if (!u || !v) return u | v;
while (v) {
uint64_t t = v;
v = u % v;
u = t;
}
return u;
}
改寫為以下等價實作:
#include <stdint.h>
uint64_t gcd64(uint64_t u, uint64_t v) {
if (!u || !v) return u | v;
int shift;
for (shift = 0; !((u | v) & 1); shift++) {
u /= 2, v /= 2;
}
while (!(u & 1))
u /= 2;
do {
while (!(v & 1))
v /= 2;
if (u < v) {
v -= u;
} else {
uint64_t t = u - v;
u = v;
v = t;
}
} while (/* Write your code here */);
return /* Write your code here */;
}
補完以上程式碼,即標注 /* Write your code here */
的部分,需要抄寫 while
和 return
所在的程式碼。
10
承測驗 9
, 透過 gcc 內建的 __builtin_ctz (Returns the number of trailing 0-bits in x, starting at the least significant bit position) 改寫程式碼如下:
#include <stdint.h>
uint64_t gcd64(uint64_t u, uint64_t v) {
if (!u || !v) return u | v;
int shift = __builtin_ctzll(/* Write your code here */);
u >>= __builtin_ctzll(u);
while (v) {
v >>= __builtin_ctzll(v);
if (u < v) {
/* Write your code here */
} else {
uint64_t t = u - v;
u = v, v = t;
}
}
return /* Write your code here */;
}
請補完程式碼,作答時需要一併包含原本函式內容。除了撰寫程式,你應該提供對應的程式碼註解。
延伸問題: 解釋上述程式程式運作原理,以及在 x86_64 上透過 __builtin_ctz 改寫 GCD 對效能的提升
嚇到吃手手了嗎?原因很簡單:你沒有如期寫作業 review,趕快動手!
11
考慮到 memcmp 一種實作如下: (行為和 ISO C90 有出入)
#include <stdint.h>
#include <stddef.h>
int memcmp(const uint8_t *m1, const uint8_t *m1, size_t n) {
for (size_t i = 0; i < n; ++i ) {
int diff = m1[i] - m2[i];
if (diff != 0) return (diff < 0) ? -1 : +1;
}
return 0;
}
我們可能因此承受 information leakage 的風險,於是著手避免使用 conditional branching 一類的指令,從而避免 side-channel attack。
為了避免 conditional branch 指令的出現,我們可將 (res > 0) - (res < 0)
替換為 ((res - 1) >> 8) + (res >> 8) + 1
。隨後我們實作下方功能等價但避免 branch 的 cst_memcmp
:
#include <stdint.h>
#include <stddef.h>
int cst_memcmp(const void *m1, const void *m2, size_t n) {
const uint8_t *pm1 = (const uint8_t *) m1 + n;
const uint8_t *pm2 = (const uint8_t *) m2 + n;
int res = 0;
if (n) {
do {
int diff = *--pm1 - *--pm2;
/* Write your code here */
} while (pm1 != m1);
}
return ((res - 1) >> 8) + (res >> 8) + 1;
}
注意: 在 Linux 核心內部的實作方式可見:
請補完程式碼,作答時需要一併包含原本函式內容。除了撰寫程式,你應該提供對應的程式碼註解。
注意: 在 /* Write your code here */
所在的程式碼作用區域 (scope) 中,不得存任何邏輯條件判斷 (如 if
, else
, ?
) 或迴圈 (如 for
, while
, goto
, switch
, case
, break
, continue
)
延伸問題:
12
給定一個 circular linked list 實作如下: (檔案 list.h
)
typedef struct __list_t {
struct __list_t *prev, *next;
} list_t;
/*
* Initialize a list to empty. Because these are circular lists, an "empty"
* list is an entry where both links point to itself. This makes insertion
* and removal simpler because they do not need any branches.
*/
static inline void list_init(list_t *list) {
list->prev = list;
list->next = list;
}
/*
* Append the provided entry to the end of the list. This assumes the entry
* is not in a list already because it overwrites the linked list pointers.
*/
static inline void list_push(list_t *list, list_t *entry) {
list_t *prev = list->prev;
entry->prev = prev;
entry->next = list;
prev->next = entry;
list->prev = entry;
}
/*
* Remove the provided entry from whichever list it is currently in. This
* assumes that the entry is in a list. You do not need to provide the list
* because the lists are circular, so the list's pointers will automatically
* be updated if the first or last entries are removed.
*/
static inline void list_remove(list_t *entry) {
list_t *prev = entry->prev;
list_t *next = entry->next;
prev->next = next;
next->prev = prev;
}
/*
* Remove and return the first entry in the list or NULL if the list is empty.
*/
static inline list_t *list_pop(list_t *list) {
/* Write your code here */
}
請依循程式註解的描述,參照 list_push
, 實作可正確運作的 list_pop
。作答時需要一併包含原本函式內容。除了撰寫程式,你應該提供對應的程式碼註解。
注意: 應善用 list_remove
和已實作的函式。
延伸問題:
13
考慮一個 bit array 的實作 (bit-array.c
) 如下:
#include <errno.h>
#include <inttypes.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#define MAX(a, b) (((a) >= (b)) ? (a) : (b))
#define trailing_zeros(x) \
((x) ? (__typeof(x)) __builtin_ctzll(x) : (__typeof(x)) sizeof(x) * 8)
#define leading_zeros(x) \
((x) ? (__typeof(x)) __builtin_clzll(x) : (__typeof(x)) sizeof(x) * 8)
// 64 bit words
typedef uint64_t word_t, word_addr_t, bit_index_t;
typedef struct {
word_t *words;
bit_index_t num_of_bits;
// Number of words used -- this is just round_up(num_of_bits / 64)
// if num_of_bits == 0, this is 0
word_addr_t num_of_words;
// For more efficient allocation we use realloc only to double size --
// not for adding every word. Initial size is INIT_CAPACITY_WORDS.
word_addr_t capacity_in_words;
} BIT_ARRAY;
#define roundup_bits2words64(bits) (((bits) + 63) / 64)
// Round a number up to the nearest number that is a power of two
#define roundup2pow(x) (1UL << (64 - leading_zeros(x)))
// Bit array (bitset)
#define _TYPESHIFT(arr, word, shift) \
((__typeof(*(arr)))((__typeof(*(arr)))(word) << (shift)))
#define bitsetX_wrd(wrdbits, pos) ((pos) / (wrdbits))
#define bitsetX_idx(wrdbits, pos) ((pos) % (wrdbits))
#define bitset_wrd(arr, pos) bitsetX_wrd(sizeof(*(arr)) * 8, pos)
#define bitset_idx(arr, pos) bitsetX_idx(sizeof(*(arr)) * 8, pos)
#define bitset64_wrd(pos) ((pos) >> 6)
#define bitset64_idx(pos) ((pos) &63)
#define bitmask(nbits, type) \
((nbits) ? ~(type) 0 >> (sizeof(type) * 8 - (nbits)) : (type) 0)
#define bitmask32(nbits) bitmask(nbits, uint32_t)
#define bitmask64(nbits) bitmask(nbits, uint64_t)
#define bitset2_get(arr, wrd, idx) (((arr)[wrd] >> (idx)) & 0x1)
#define bitset2_set(arr, wrd, idx) ((arr)[wrd] |= _TYPESHIFT(arr, 1, idx))
#define bitset_op(func, arr, pos) \
func(arr, bitset_wrd(arr, pos), bitset_idx(arr, pos))
#define bitset_op2(func, arr, pos, bit) \
func(arr, bitset_wrd(arr, pos), bitset_idx(arr, pos), bit)
#define bitset_get(arr, pos) bitset_op(bitset2_get, arr, pos)
#define bitset_set(arr, pos) bitset_op(bitset2_set, arr, pos)
#define bit_array_get(arr, i) bitset_get((arr)->words, i)
#define bit_array_set(arr, i) bitset_set((arr)->words, i)
#define POPCOUNT(x) (unsigned) __builtin_popcountll(x)
// word of all 1s
#define WORD_MAX (~(word_t) 0)
// If cannot allocate memory, set errno to ENOMEM, return NULL
BIT_ARRAY *bit_array_alloc(BIT_ARRAY *bitarr, bit_index_t nbits) {
bitarr->num_of_bits = nbits;
bitarr->num_of_words = roundup_bits2words64(nbits);
bitarr->capacity_in_words = MAX(8, roundup2pow(bitarr->num_of_words));
bitarr->words =
(word_t *) calloc(bitarr->capacity_in_words, sizeof(word_t));
if (bitarr->words == NULL) {
errno = ENOMEM;
return NULL;
}
return bitarr;
}
// If cannot allocate memory, set errno to ENOMEM, return NULL
BIT_ARRAY *bit_array_create(bit_index_t nbits) {
BIT_ARRAY *bitarr = (BIT_ARRAY *) malloc(sizeof(BIT_ARRAY));
if (bitarr == NULL || bit_array_alloc(bitarr, nbits) == NULL) {
if (bitarr != NULL)
free(bitarr);
errno = ENOMEM;
return NULL;
}
return bitarr;
}
// Print this array to a file stream. Prints '0's and '1'. Doesn't print
// newline.
void bit_array_print(const BIT_ARRAY *bitarr, FILE *fout) {
for (bit_index_t i = 0; i < bitarr->num_of_bits; i++)
fprintf(fout, "%c", bit_array_get(bitarr, i) ? '1' : '0');
}
// set a bit (to 1) at position b
void bit_array_set_bit(BIT_ARRAY *bitarr, bit_index_t b) {
bit_array_set(bitarr, b);
}
// Set multiple bits at once.
// e.g. set bits 1, 20 & 31: bit_array_set_bits(bitarr, 3, 1,20,31);
void bit_array_set_bits(BIT_ARRAY *bitarr, size_t n, ...) {
va_list argptr;
va_start(argptr, n);
for (size_t i = 0; i < n; i++) {
unsigned int bit_index = va_arg(argptr, unsigned int);
bit_array_set_bit(bitarr, bit_index);
}
va_end(argptr);
}
static word_t _next_permutation(word_t v) {
// http://graphics.stanford.edu/~seander/bithacks.html#NextBitPermutation
word_t t = v | (v - 1); // t gets v's least significant 0 bits set to 1
// Next set to 1 the most significant bit to change,
// set to 0 the least significant ones, and add the necessary 1 bits.
return (t + 1) | (((~t & (t + 1)) - 1) >> (trailing_zeros(v) + 1));
}
// Get the next permutation of an array with a fixed size and given number of
// bits set. Also known as next lexicographic permutation.
// Given a bit array find the next lexicographic orginisation of the bits
// Number of possible combinations given by (size choose bits_set) i.e. nCk
// 00011 -> 00101 -> 00110 -> 01001 -> 01010 ->
// 01100 -> 10001 -> 10010 -> 10100 -> 11000 -> 00011 (back to start)
void bit_array_next_permutation(BIT_ARRAY *bitarr) {
/* Write your code here */
}
int main(void) {
BIT_ARRAY *bitarr = bit_array_create(10);
bit_array_print(bitarr, stdout);
fputc('\n', stdout);
bit_array_set_bits(bitarr, 3, 1, 2, 5);
bit_array_print(bitarr, stdout);
fputc('\n', stdout);
return 0;
}
其中函式 bit_array_next_permutation
可將指定的 bit array 所有排列組合予以列舉 (
延伸問題:
你能看到這裡,算你厲害,恭喜你,本週有新作業了!