#include <stdint.h>
#define __DECLARE_ROTATE(bits, type) \
static inline type rotl##bits(const type v, int c) \
{ \
const int mask = (bits) - (1); \
c &= mask; \
\
return (v << c) | (v >> ((-c) & mask)); \
} \
\
static inline type rotr##bits(const type v, int c) \
{ \
const int mask = (bits) - (1); \
c &= mask; \
\
return (v >> c) | (v << ((-c) & mask)); \
}
#define DECLARE_ROTATE(bits) __DECLARE_ROTATE(bits, uint##bits##_t)
bits
必須為 2 的指數, 也就是 2, 4, 8 …bits
=8, mask=7=0b111, 目的是產生一個上限遮罩, 也就是說:
c &= mask
: 當 c = 1 時: c & mask = 1; 會等價於 c = 9 = 0b1001 & 0b0111 = 1, 因為總共為 8 個 bits, 所以 circular-shift x bits = circular-shift x+8 bits轉向看 linux kernel 是怎麼寫的.
看到 include/linux/bitops.h
的
/**
* rol64 - rotate a 64-bit value left
* @word: value to rotate
* @shift: bits to roll
*/
static inline __u64 rol64(__u64 word, unsigned int shift)
{
return (word << (shift & 63)) | (word >> ((-shift) & 63));
}
/**
* ror64 - rotate a 64-bit value right
* @word: value to rotate
* @shift: bits to roll
*/
static inline __u64 ror64(__u64 word, unsigned int shift)
{
return (word >> (shift & 63)) | (word << ((-shift) & 63));
}
#include <stdint.h>
static inline uintptr_t align_up(uintptr_t sz, size_t alignment)
{
// aliment = 4 = 0b100; mask = 0b11
// aliment = 7 = 0b111; mask = 0b110 = 6
uintptr_t mask = alignment - 1;
if ((alignment & mask) == 0) { /* power of two? */
return (sz & ~mask) + (((sz & mask) + mask) & (mask + 1));
}
// sz = 7 * k + r
// (7 * k + r + 6) / 7 * 7
// = ( k + (r+6) / 7)* 7
// = 7 (k+1)
return (((sz + mask) / alignment) * alignment);
}
align_up
給定 uintptr_t sz
回傳對齊 size_t alignment
, 也就是可以被size_t alignment
整除的, 回傳值 (uintptr_t
, 以下稱作 ret
)
並且滿足下列條件
考慮以下狀況:
alignment
= 7, 令 sz = 7 * k + r (0 mask
= 6 = alignment
- 1再考慮以下狀況
alignment
= 4 = 0b100; mask
= 3 = 0b110: 進位, + alignment = & alignment = & (mask + 1); (* + mask) & alignment = 1 * alignment
這就帶到了 測驗 β - 2: 甚麼時候會用到 bit rotation
首先看到 crypto/sha3_generic.c
static SHA3_INLINE void keccakf_round(u64 st[25])
{
u64 t[5], tt, bc[5];
/* Theta */
bc[0] = st[0] ^ st[5] ^ st[10] ^ st[15] ^ st[20];
bc[1] = st[1] ^ st[6] ^ st[11] ^ st[16] ^ st[21];
bc[2] = st[2] ^ st[7] ^ st[12] ^ st[17] ^ st[22];
bc[3] = st[3] ^ st[8] ^ st[13] ^ st[18] ^ st[23];
bc[4] = st[4] ^ st[9] ^ st[14] ^ st[19] ^ st[24];
t[0] = bc[4] ^ rol64(bc[1], 1);
t[1] = bc[0] ^ rol64(bc[2], 1);
t[2] = bc[1] ^ rol64(bc[3], 1);
t[3] = bc[2] ^ rol64(bc[4], 1);
t[4] = bc[3] ^ rol64(bc[0], 1);
...
#include <stdio.h>
#include <unistd.h>
int main(void)
{
// 2^N * N = 49152 = 4 * 4 * 4 * 4 * 4 * 4 * 12
// N = 12
// because child proccess share same fd with parent proccess
// but child holds their own buffer
// therefore each proccess holds "-" * N
// there will be 2^N proccess
// = 2^N * N
int N = 12;
for (int i = 0; i < N; i++) {
fork();
printf("-");
}
fflush(stdout);
return 0;
}
根據 CS:APP 第十章, 以及 man page. FORK(2)
The child inherits copies of the parent's set of open file descriptors. Each file descriptor in the child refers to the same open file description (see open(2)) as the corresponding file descriptor in the parent.
也就是說 child proccess 跟 parent proccess refer 到同一個 file descriptor table entry
但是 buffer 卻不是相同的, 也就是說, 每個 proccess write 到自己的 buffer
對於這題來說, 每個 proccess 會寫 "-" * N 到 buffer
並且因為 fflush(stdout) 指向同一個 fd.
於是就會寫
於是 f(12)=49152
更進一步, 我們可以透過 strace 來觀察
execve("./main", ["./main"], 0x7ffe0e14e190 /* 21 vars */) = 0
... 讀 cache
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14238
fstat(1, {st_mode=S_IFIFO|0600, st_size=0, ...}) = 0
brk(NULL) = 0x55e31a2a6000
brk(0x55e31a2c7000) = 0x55e31a2c7000
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14261
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14270
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14282
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14295
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14305
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14315
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=14238, si_uid=1000, si_status=0, si_utime=0, si_stime=1} ---
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14324
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14537
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=14315, si_uid=1000, si_status=0, si_utime=0, si_stime=0} ---
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14815
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14821
clone(child_stack=NULL, flags=CLONE_CHILD_CLEARTID|CLONE_CHILD_SETTID|SIGCHLD, child_tidptr=0x7f911e113810) = 14826
write(1, "------------", 12) = 12
exit_group(0) = ?
+++ exited with 0 +++
49152
可以看到單個 proccess 寫了 12 個 "-" 到 stdout, write(1, "------------", 12)
為了理解這段程式邏輯, 首先先看 queue 裡面的 element, 也就是 node 的定義
typedef struct { /* Queue node */
void *value;
void *next;
} node_t;
這是一個 singly-linked list, value 的部分為一個 pointer of void 指向一段記憶體位置, 也是 data 儲存的地方.
這樣設計的好處是可以把 data 存儲在 node 以外的記憶體位置, 增加分配的自由度.
typedef struct { /* Two lock queue */
node_t *first, *last;
mtx_t *first_mutex, *last_mutex;
} con_queue_t;
看到這個 structure 我們只能知道有兩個 pointer of node_t 跟兩把對應的 mutex, 實際上怎麼運行的需要看到之後的邏輯
inline static node_t *_con_node_init(void *value)
{
node_t *node = malloc(sizeof(node_t));
if (!node)
return NULL;
node->value = value;
node->next = NULL;
return node;
}
我覺得這段比較有意思的是: 會在 queue 裡面先放一個 dummy node.
所有如果只有 dummy node 的時候就是空的狀態
con_queue_t *con_init()
{
...
node_t *dummy = _con_node_init(NULL);
if (!dummy) {
con_free(queue);
return NULL;
}
queue->first = queue->last = dummy;
return queue;
}
要把一個 node push 進一個 queue, 首先要先 acquire last
這把鎖, 可想而知, 當我們要 pop 的時候就是要 acqiuire first
這把鎖.
這樣做的目的是保證 push, pop 不會競爭.
int con_push(con_queue_t *restrict queue, void *restrict new_element)
{
/* Prepare new node */
node_t *node = _con_node_init(new_element);
if (!node)
return Q_ERROR;
/* Add to queue with lock */
mtx_lock(queue->last_mutex);
// AAA;
// FIXME: put the node to the end
queue->last->next = node; queue->last = node;
mtx_unlock(queue->last_mutex);
return Q_OK;
}
void *con_pop(con_queue_t *queue)
{
mtx_lock(queue->first_mutex);
node_t *node = queue->first; /* Node to be removed */
node_t *new_header = queue->first->next; /* become the first in the queue */
/* Queue is empty */
if (!new_header) {
mtx_unlock(queue->first_mutex);
return NULL;
}
/* Queue not empty: retrieve data and rewire */
void *return_value = node->next->value; // FIXME: BBB
queue->first = new_header; //FIXME: CCC
mtx_unlock(queue->first_mutex);
/* Free removed node and return */
free(node);
return return_value;
}
我在 Linux DESKTOP-S362NF0 5.4.72-microsoft-standard-WSL2 #1 SMP Wed Oct 28 23:40:43 UTC 2020 x86_64 x86_64 x86_64 GNU/Linux
環境下使用
gcc -Wall -std=c11 -o queue queue.c -lpthread
會發現在連結時期有 undefined reference
錯誤. 原因是因為找不到 libpthread.
於是修改 Makefile 如下
CC=gcc
CFLAGS=-g -Wall -std=c11
LIBS= -lpthread
SRCS=$(wildcard *.c)
OBJS=$(patsubst %.c,%.o,$(SRCS))
all: main
$(OBJS): %.o:%.c
$(CC) $(CFLAGS) $(LIBS) -c -o $@ $^
main: $(OBJS)
$(CC) $(CFLAGS) -static $(LIBS) -o $@ $^ /usr/lib/x86_64-linux-gnu/libpthread.a
clean:
$(RM) *.o main
透過靜態聯結直接指定 libpthread archive 檔案解決上述問題.