contributed by < paulpeng-popo
>
$ gcc --version
gcc (Ubuntu 11.3.0-1ubuntu1~22.04) 11.3.0
$ lscpu
Architecture: x86_64
CPU op-mode(s): 32-bit, 64-bit
Byte Order: Little Endian
Address sizes: 39 bits physical, 48 bits virtual
CPU(s): 12
On-line CPU(s) list: 0-11
Thread(s) per core: 2
Core(s) per socket: 6
Socket(s): 1
NUMA node(s): 1
Vendor ID: GenuineIntel
CPU family: 6
Model: 158
Model name: Intel(R) Core(TM) i7-8700 CPU @ 3.20GHz
Stepping: 10
CPU max MHz: 4600.0000
CPU min MHz: 800.0000
BogoMIPS: 6399.96
Virtualization: VT-x
L1d: 192 KiB (6 instances)
L1i: 192 KiB (6 instances)
L2: 1.5 MiB (6 instances)
L3: 12 MiB (1 instance)
NUMA node0 CPU(s): 0-11
q_new()
確認 queue.h 中對 q_new()
行為的定義,可利用 INIT_LIST_HEAD
簡化程式碼
struct list_head *head = malloc(sizeof(struct list_head));
if (head) {
INIT_LIST_HEAD(head);
}
return head;
好奇 LIST_HEAD
跟 INIT_LIST_HEAD
到底有什麼差,所以試著用 LIST_HEAD 改寫,結果遇到 function returns address of local variable,猜測是 variable 在 heap 空間被釋放後便跟著消失了,回傳的 struct list_head *
會變成 dangling pointer,因此編譯器回報錯誤
LIST_HEAD(head);
return &head;
目前想到的解決辦法是宣告成 static,但副作用就是在 q_free()
的時候不能加 free(l)
這行,否則會 Undefined behavior,另外,此寫法在 q_test 沒問題,但 make test 會卡住,原因不明
static LIST_HEAD(head);
return &head;
上方的 1.
, 2.
, 3.
, 4.
沒有存在的必要,儘量以簡潔清晰的方式展現。
paulpeng 收到
q_free()
可以利用 list_entry
找到 struct list_head
外層的 element_t
指標
list_entry
/ container_of
利用 offsetof
算出每個 element_t
中 member 與 element_t
起始位址的 offset,後續就能用傳入的 member 位址往前算 offset 個 bytes,得到 element_t
的起始位址
list_for_each
跟 list_for_each_safe
可以搭配 list_entry
的使用達成相似的功能 ; list_for_each_safe
跟 list_for_each_entry_safe
也是如此
struct list_head *node, *safe;
list_for_each_safe (node, safe, l) {
q_release_element(list_entry(node, element_t, list));
}
free(l);
發現 list_for_each_entry_safe
,會去抓下一個 entry 存到 safe,但如果下一個 entry,也就是 safe->member.next
本身就是 head node,其外層並沒有 element_t
pointer 指向它,好奇究竟為什麼不會報錯
safe = list_entry(safe->member.next, __typeof__(*entry), member))
做個小實驗,結果 first 與 amz 的確指到同一個 object,或是說,tmp->list
是可以執行通過的,代表魔法藏在 list_entry
裡面
element_t *tmp = list_entry(head, element_t, list);
element_t *first = list_first_entry(head, element_t, list);
element_t *amz = list_entry(tmp->list.next, element_t, list);
猜測,head node 經過 list_entry
的計算後,返回了一個暫時的 element_t
pointer,而裡面只會包含 list member,但若要嘗試 printf
出 tmp->value
便會 Segmentation fault
q_insert_[head|tail]
使用 List API 的 list_add
和 list_add_tail
在字串複製方面想到可以用 4 種方式實作
void * memcpy ( void * destination, const void * source, size_t num );
void * memmove ( void * destination, const void * source, size_t num );
char * strcpy ( char * destination, const char * source );
char * strncpy ( char * destination, const char * source, size_t num );
memmove
可以用來保證 src 跟 dst 不重疊strdup
的方法可以 malloc 跟 copy 一起做The stpncpy() and strncpy() functions shall copy not more than n bytes (bytes that follow a NUL character are not copied) from the array pointed to by s2 to the array pointed to by s1.
If the array pointed to by s2 is a string that is shorter than n bytes, NUL characters shall be appended to the copy in the array pointed to by s1, until n bytes in all are written.
If copying takes place between objects that overlap, the behavior is undefined.
The strdup() function returns a pointer to a new string which is a duplicate of the string s. Memory for the new string is obtained with malloc(3), and can be freed with free(3).
/* q_insert_head() */
element_t *entry = malloc(sizeof(element_t));
if (!entry) {
return false;
}
size_t len = strlen(s) + 1;
entry->value = malloc(sizeof(char) * len);
strncpy(entry->value, s, len - 1);
(entry->value)[len - 1] = '\0';
list_add(&entry->list, head);
/* q_insert_tail() */
element_t *entry = malloc(sizeof(element_t));
if (!entry) {
return false;
}
size_t len = strlen(s) + 1;
entry->value = malloc(sizeof(char) * len);
strncpy(entry->value, s, len - 1);
(entry->value)[len - 1] = '\0';
list_add_tail(&entry->list, head);
terry23304 跟 Paintako 提醒,entry->value
malloc 的部分需要對回傳值做檢查,因此改成如下
size_t len = strlen(s) + 1;
entry->value = malloc(sizeof(char) * len);
if (!entry->value) {
free(entry);
return false;
}
同時增加對參數 s
的檢查
if (!head || !s) {
return false;
}
後來參考到 25077667 的寫法,將兩個 functions 相似的功能寫在一起,使函式容易維護,而需要不同操作的地方利用 function pointer 的技巧來達成
static bool q_insert(struct list_head *head,
char *s,
void (*op)(struct list_head *, struct list_head *))
{
...
}
bool q_insert_head(struct list_head *head, char *s)
{
return q_insert(head, s, list_add);
}
bool q_insert_tail(struct list_head *head, char *s)
{
return q_insert(head, s, list_add_tail);
}
q_insert
沒有存在的必要,應在 q_insert_tail
中重用 q_insert_head
。
q_remove_[head|tail]()
注意到除了 unlink node 外,還會將 element 內的 value 複製到 sp 中,猜測此做法是為了方便讓呼叫端確認移除的 value 內容
/* q_remove_head() */
element_t *entry = list_first_entry(head, element_t, list);
list_del_init(&entry->list);
if (sp) {
strncpy(sp, entry->value, bufsize);
sp[bufsize - 1] = '\0';
}
/* q_remove_tail() */
element_t *entry = list_last_entry(head, element_t, list);
list_del_init(&entry->list);
if (sp) {
strncpy(sp, entry->value, bufsize);
sp[bufsize - 1] = '\0';
}
參考來自 Shiritai 的作法,使用前處理器的技巧,對程式碼做簡化
#define q_remove(suffix, list_api) \
element_t *q_remove_##suffix(struct list_head *head, char *sp, \
size_t bufsize) \
{ \
if (!head || list_empty(head)) \
return NULL; \
element_t *entry = list_api(head, element_t, list); \
list_del_init(&entry->list); \
if (sp) { \
strncpy(sp, entry->value, bufsize); \
sp[bufsize - 1] = '\0'; \
} \
return entry; \
}
/* q_remove_head() */
q_remove(head, list_first_entry);
/* q_remove_tail() */
q_remove(tail, list_last_entry);
撰寫更精簡的程式碼。
q_size()
int len = 0;
struct list_head *node;
list_for_each (node, head) {
len++;
}
q_delete_mid()
第一時間想到的方法是,從兩端向中間走訪,雖然效果與快慢指標法相同,但缺點就是只在 circular doubly-linked list 的資料結構上有效
struct list_head *front = head->next;
struct list_head *back = head->prev;
while (front != back && front->next != back) {
front = front->next;
back = back->prev;
}
// delete the node which is pointed by front
list_del_init(front);
element_t *entry = list_entry(front, element_t, list);
q_release_element(entry);
return true;
所以最後還是改回快慢指標,並獨立成一子函式,方便後續實作使用
這個「所以」的依據為何?
static void _find_mid(struct list_head **mid, struct list_head *head)
{
*mid = head->next;
struct list_head *fast = head->next->next;
while (fast != head && fast && fast->next != head && fast->next) {
*mid = (*mid)->next;
fast = fast->next->next;
}
}
q_delete_dup()
以 sorted list 為基礎,刪除具重複值的 node,只需要判斷相鄰兩節點是否值相同即可
bool dup = false;
struct list_head *del_list = q_new();
element_t *entry, *safe;
list_for_each_entry_safe (entry, safe, head, list) {
if (&safe->list != head && !strcmp(entry->value, safe->value)) {
list_move(&entry->list, del_list);
dup = true;
} else if (dup) {
list_move(&entry->list, del_list);
dup = false;
}
}
q_free(del_list);
}
後來想想,這個作法把欲刪除的節點都移到一條新的 list_head
上,不但多用記憶體空間,又在 q_free
的時候多跑一層迴圈,有點多此一舉的感覺
因此搭配後面的 cmpstr()
的函式,可改寫簡化
bool dup = false;
struct list_head *node, *safe;
list_for_each_safe (node, safe, head) {
if (safe != head && !cmpstr(&node, &safe)) {
list_del(node);
q_release_element(list_entry(node, element_t, list));
dup = true;
} else if (dup) {
list_del(node);
q_release_element(list_entry(node, element_t, list));
dup = false;
}
}
q_reverse[K]
and q_swap()
這邊參考 Shiritai 的思路,考慮 q_swap, q_reverse, q_reverseK 性質相似,可以把 q_swap
當 reverseK
的特例處理
list_move
裡面有呼叫到 list_del
做 unlink 的動作,因此使用 list_for_each_safe
/* q_reverse() */
LIST_HEAD(reverse_list);
struct list_head *node, *safe;
list_for_each_safe (node, safe, head) {
list_move(node, &reverse_list);
}
list_splice_init(&reverse_list, head);
reverse 延伸,但是多了 counter 計算是否到達 K group 的最後一個 node
參考 KHLee529 的程式架構
/* q_reverseK() */
int count = 0;
LIST_HEAD(rlist);
LIST_HEAD(tmp);
struct list_head *node, *safe;
struct list_head *start = head;
list_for_each_safe (node, safe, head) {
count++;
if (count == k) {
list_cut_position(&rlist, start, node);
q_reverse(&rlist);
list_splice_tail_init(&rlist, &tmp);
start = safe->prev;
count = 0;
}
}
list_splice_init(&tmp, head);
如前所述,swap 是 K=2 的 reverseK
/* q_swap() */
q_reverseK(head, 2);
q_sort()
使用 strcmp(3) 做字串比大小
The strcmp() and strncmp() functions return an integer less than, equal to, or greater than zero if s1 (or the first n bytes thereof) is found, respectively, to be less than, to match, or be greater than s2.
發現蠻 –- 許多部分需要用到 strcmp,索性寫一個 cmpstr,簡化一下程式碼篇幅
查閱教育部國語辭典「蠻」:
在這裡無助於溝通,儘量使用簡潔且清晰的詞彙。
static inline int cmpstr(const void *p1, const void *p2)
{
element_t *first =
list_entry(*(const struct list_head **) p1, element_t, list);
element_t *second =
list_entry(*(const struct list_head **) p2, element_t, list);
return strcmp(first->value, second->value);
}
參考 sysprog21/linux-list 裡面的 q_sort.c
並改寫
/* stable quick sort */
// pick head first as pivot
pivot = head->next;
list_del(pivot);
list_for_each_safe (node, safe, head) {
if (cmpstr(&node, &pivot) < 0)
list_move_tail(node, &list_less);
else
list_move_tail(node, &list_greater);
}
q_sort(&list_less);
q_sort(&list_greater);
list_add(pivot, head);
list_splice(&list_less, head);
list_splice_tail(&list_greater, head);
實作完,執行 qtest 測試 sort 功能沒問題,但 make test 卻過不了,研究後發現是題目有要求
最後換成 merge sort,參考 你所不知道的 C 語言: linked list 和非連續記憶體 中 LeetCode 21 的講解範例
/* merge sort 分割 */
if (!list || !list->next) {
return list;
}
LIST_HEAD(head);
head.next = list;
struct list_head *slow = NULL, *mid = NULL;
_find_mid(&slow, &head);
mid = slow->next;
slow->next = NULL;
struct list_head *left = merge_sort(list);
struct list_head *right = merge_sort(mid);
return merge_two_lists(left, right);
/* merge sort 合併 */
struct list_head *head = NULL, **ptr = &head, **node;
for (node = NULL; L1 && L2; *node = (*node)->next) {
if (cmpstr(&L1, &L2) < 0)
node = &L1;
else
node = &L2;
*ptr = *node;
ptr = &(*ptr)->next;
}
*ptr = (struct list_head *) ((uintptr_t) L1 | (uintptr_t) L2);
return head;
q_descend()
struct list_head *node, *safe;
list_for_each_safe (node, safe, head) {
element_t *pentry = list_entry(node->prev, element_t, list);
element_t *entry = list_entry(node, element_t, list);
if (&pentry->list != head && strcmp(pentry->value, entry->value) < 0) {
list_del(node);
q_release_element(entry);
}
}
return q_size(head);
這邊我誤解題目的意思,原意是 對任一節點 n,若其右邊有比它大的值,則刪除 n,而我實作成 對任一節點 n,刪除 n 右邊值大於 n 的所有節點
參考 terry23304 的作法後,使用反向迭代,改寫如下
struct list_head *node = head->prev;
struct list_head *pnode = node->prev;
char *max = NULL;
for (; node != head; node = pnode) {
element_t *entry = list_entry(node, element_t, list);
pnode = node->prev;
if (!max || strcmp(entry->value, max) > 0) {
max = entry->value;
} else {
list_del(node);
q_release_element(entry);
}
}
return q_size(head);
q_merge()
把所有 lists 都連接在一起,最後對整個 list 做 sort
queue_contex_t *qhead = list_first_entry(head, queue_contex_t, chain);
list_del_init(&qhead->chain);
queue_contex_t *cur = NULL;
list_for_each_entry (cur, head, chain) {
list_splice_init(cur->q, qhead->q);
qhead->size += cur->size;
}
list_add(&qhead->chain, head);
q_sort(qhead->q);
猜測或許此函式是對 sorted list 進行 merge,這樣只要在 merge 的時候同時處理排序問題就好,而不用在最後進行大型的排序工作,未來可以嘗試改進
Makefile 中有一段,可以判斷 SANITIZER 是否有被打開,接著會把 -fsanitize=address 加到 compile flag 中
# Enable sanitizer(s) or not
ifeq ("$(SANITIZER)","1")
# https://github.com/google/sanitizers/wiki/AddressSanitizerFlags
CFLAGS += -fsanitize=address -fno-omit-frame-pointer -fno-common
LDFLAGS += -fsanitize=address
endif
下命令的時候只要指定 SANITIZER=1
即可
$ make clean && make SANITIZER=1 test
執行後分數未改變
Dude, is my code constant time?