contributed by < BennyWang1007
>
$ gcc --version
gcc (Ubuntu 13.3.0-6ubuntu2~24.04) 13.3.0
$ lscpu
架構: x86_64
CPU 作業模式: 32-bit, 64-bit
Address sizes: 39 bits physical, 48 bits virtual
Byte Order: Little Endian
CPU(s): 20
On-line CPU(s) list: 0-19
供應商識別號: GenuineIntel
Model name: 12th Gen Intel(R) Core(TM) i7-12700H
CPU 家族: 6
型號: 154
每核心執行緒數: 2
每通訊端核心數: 14
Socket(s): 1
製程: 3
CPU(s) scaling MHz: 23%
CPU max MHz: 4700.0000
CPU min MHz: 400.0000
BogoMIPS: 5376.00
Virtualization features:
虛擬: VT-x
Caches (sum of all):
L1d: 544 KiB (14 instances)
L1i: 704 KiB (14 instances)
L2: 11.5 MiB (8 instances)
L3: 24 MiB (1 instance)
NUMA:
NUMA 節點: 1
NUMA node0 CPU(s): 0-19
AAAA = &l->head
BBBB = before
CCCC = &(*p)->next
DDDD = item->next
EEEE = (*pred_ptr)->r
FFFF = &(*pred_ptr)->r
void list_insert_before(list_t *l, list_item_t *before, list_item_t *item)
{
list_item_t **p;
for (p = &l->head; *p != before; p = &(*p)->next)
;
*p = item;
item->next = before;
}
上方程式碼首先創建indirect pointer p
指向 head 的地址(可以想像成 一個新的節點node的next = &head),接著走訪串列直到 p 指向的位置為要插入的目標節點before,此時將*p設為要插入的節點,並將節點指向的下一節點設為目標節點。
void list_merge_sort(list_t *l)
{
if (list_size(l) < 2)
return;
list_t left, right;
left.head = l->head;
/* Split the list into two halves */
list_item_t *mid = l->head, *fast = l->head;
while (fast->next && fast->next->next) {
mid = mid->next;
fast = fast->next->next;
}
right.head = mid->next;
mid->next = NULL;
list_merge_sort(&left);
list_merge_sort(&right);
/* Merge the two sorted lists */
list_t merged;
merged.head = NULL;
list_item_t **p = &merged.head;
while (left.head && right.head) {
list_t *min = left.head->value < right.head->value ? &left : &right;
*p = min->head;
min->head = min->head->next;
p = &(*p)->next;
}
/* Append the remaining elements */
if (left.head)
*p = left.head;
else
*p = right.head;
l->head = merged.head;
}
解釋:
首先我先使用快慢指標找出 list l
的中點,將 l
分成left
、right
兩半。接著分別對兩半呼叫 list_merge_sort
來遞迴地分割。分割完後,將left
、right
兩半頭部的值一一比較,並寫回 l
中,最後將剩餘的部份接到 l
的尾部,完成合併排序。
測試程式
#define print_list(l) \
printf("["); \
do { \
list_item_t *cur = l.head; \
while (cur) { \
printf("%d ", cur->value); \
cur = cur->next; \
} \
printf("]\n"); \
} while (0)
size_t list_size(list_t *l)
{
size_t size = 0;
list_item_t *cur = l->head;
while (cur) {
size++;
cur = cur->next;
}
return size;
}
int main()
{
srand(42); /* fixed seed for reproducibility */
list_reset();
/* Randomize the list */
for (size_t i = 0; i < N; i++) {
size_t j = rand() % N;
list_item_t tmp = items[i];
items[i] = items[j];
items[j] = tmp;
}
for (size_t i = 0; i < N; i++)
list_insert_before(&l, NULL, &items[i]);
printf("Suffled list:\n");
print_list(l);
list_merge_sort(&l);
printf("Sorted list:\n");
print_list(l);
/* Check if the list is sorted */
list_t l2 = l;
for (size_t i = 0; i < N; i++) {
if (l2.head->value != i) {
fprintf(stderr, "ERROR: list is not sorted\n");
return 1;
}
l2.head = l2.head->next;
}
printf("List is sorted\n");
return 0;
}
EEEE = (*pred_ptr)->r
FFFF = &(*pred_ptr)->r
/*
* Structure representing a free memory block in the memory allocator.
* The free tree is a binary search tree that organizes free blocks (of type block_t)
* to efficiently locate a block of appropriate size during memory allocation.
*/
void remove_free_tree(block_t **root, block_t *target)
{
/* Locate the pointer to the target node in the tree. */
block_t **node_ptr = find_free_tree(root, target);
/* If the target node has two children, we need to find a replacement. */
if ((*node_ptr)->l && (*node_ptr)->r) {
/* Find the in-order predecessor:
* This is the rightmost node in the left subtree.
*/
block_t **pred_ptr = &(*node_ptr)->l;
while ((*pred_ptr)->r)
pred_ptr = &(*pred_ptr)->r;
/* Verify the found predecessor using a helper function (for debugging).
*/
block_t *expected_pred = find_predecessor_free_tree(root, *node_ptr);
assert(expected_pred == *pred_ptr);
/* If the predecessor is the immediate left child. */
if (*pred_ptr == (*node_ptr)->l) {
block_t *old_right = (*node_ptr)->r;
*node_ptr = *pred_ptr; /* Replace target with its left child. */
(*node_ptr)->r = old_right; /* Attach the original right subtree. */
assert(*node_ptr != (*node_ptr)->l);
assert(*node_ptr != (*node_ptr)->r);
} else {
/* The predecessor is deeper in the left subtree. */
block_t *old_left = (*node_ptr)->l;
block_t *old_right = (*node_ptr)->r;
block_t *pred_node = *pred_ptr;
/* Remove the predecessor from its original location. */
remove_free_tree(&old_left, *pred_ptr);
/* Replace the target node with the predecessor. */
*node_ptr = pred_node;
(*node_ptr)->l = old_left;
(*node_ptr)->r = old_right;
assert(*node_ptr != (*node_ptr)->l);
assert(*node_ptr != (*node_ptr)->r);
}
}
/* If the target node has one child (or none), simply splice it out. */
else if ((*node_ptr)->l || (*node_ptr)->r) {
block_t *child = ((*node_ptr)->l) ? (*node_ptr)->l : (*node_ptr)->r;
*node_ptr = child;
} else {
/* No children: remove the node. */
*node_ptr = NULL;
}
/* Clear the removed node's child pointers to avoid dangling references. */
target->l = NULL;
target->r = NULL;
}
解釋:remove_free_tree
會將 node target
從二元搜尋樹(BST)中移除。首先先呼叫 find_free_tree
來獲得指向 target
的 pointer,此時分為三種情況:
NULL
完成移除。最後將 Target 的兩個child 指標皆設為 NULL,防止 dangling pointers 出現。
完整測試程式(不包括remove_free_tree
),依序將結點2, 3, 1, 4, 0 進行移除。
#include <assert.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include "list.h"
#define NUM_NODES 5
typedef struct block {
size_t size;
struct block *l, *r;
} block_t;
block_t** find_free_tree(block_t **root, block_t *target)
{
block_t **cur = root;
while (*cur && *cur != target) {
if (target->size < (*cur)->size)
cur = &(*cur)->l;
else
cur = &(*cur)->r;
}
return cur;
}
block_t* find_predecessor_free_tree(block_t **root, block_t *node)
{
block_t *pred = NULL;
block_t *cur = *root;
while (cur && cur != node) {
if (node->size < cur->size) {
cur = cur->l;
} else {
pred = cur;
cur = cur->r;
}
}
return pred;
}
void insert_free_tree(block_t **root, block_t *node)
{
block_t **cur = root;
while (*cur) {
if (node->size < (*cur)->size)
cur = &(*cur)->l;
else
cur = &(*cur)->r;
}
*cur = node;
}
void print_free_tree(block_t *root)
{
if (root) {
print_free_tree(root->l);
printf("%zu ", root->size);
print_free_tree(root->r);
}
}
int main()
{
block_t *root = NULL;
block_t *nodes[NUM_NODES];
for (size_t i = 0; i < NUM_NODES; i++) {
nodes[i] = malloc(sizeof(block_t));
nodes[i]->size = i;
nodes[i]->l = NULL;
nodes[i]->r = NULL;
insert_free_tree(&root, nodes[i]);
}
print_free_tree(root);
printf("\n");
block_t *targets[NUM_NODES] = {nodes[2], nodes[3], nodes[1], nodes[4], nodes[0]};
for (size_t i = 0; i < NUM_NODES; i++) {
remove_free_tree(&root, targets[i]);
print_free_tree(root);
printf("\n");
}
for (size_t i = 0; i < NUM_NODES; i++) {
free(nodes[i]);
}
return 0;
}
輸出符合預期也沒有觸發 assert。
0 1 2 3 4
0 1 3 4
0 1 4
0 4
0
GGGG = head->prev=prev
HHHH = list_entry(pivot, node_t,list)->value
IIII = list_entry(n,node_t,list)->value
JJJJ = pivot
KKKK = right
void quick_sort(struct list_head *list)
{
int n = list_length(list);
int value;
int i = 0;
int max_level = 2 * n;
struct list_head *begin[max_level];
struct list_head *result = NULL, *left = NULL, *right = NULL;
begin[0] = list->next;
list->prev->next = NULL;
while (i >= 0) {
struct list_head *L = begin[i], *R = list_tail(begin[i]);
if (L != R) {
struct list_head *pivot = L;
value = list_entry(pivot, node_t, list)->value;
struct list_head *p = pivot->next;
pivot->next = NULL; /* break the list */
while (p) {
struct list_head *n = p;
p = p->next;
int n_value = list_entry(n, node_t, list)->value;
if (n_value > value) {
n->next = right;
right = n;
} else {
n->next = left;
left = n;
}
}
begin[i] = left;
begin[i + 1] = pivot;
begin[i + 2] = right;
left = right = NULL;
i += 2;
} else {
if (L) {
L->next = result;
result = L;
}
i--;
}
}
list->next = result;
rebuild_list_link(list);
}
解釋上述程式碼運作原理
此程式碼使用迭代式(iterative)的方法實作quick_sort。
begin[]
用來儲存分割的頭和尾,因此大小為 2*list_length。rebuild_list_link(list)
將排好的子序列們連接起來,滿足雙向鏈結串列的特性。研讀〈A comparative study of linked list sorting algorithms〉,分析針對雙向鏈結串列的排序演算法考量,並運用 Linux 核心風格的 List API 予以實作。
AAAA = list_first_entry
BBBB = list_del
CCCC = list_move_tail
DDDD = list_add
EEEE = list_splice
FFFF = list_splice_tail
static void list_quicksort(struct list_head *head)
{
struct list_head list_less, list_greater;
struct listitem *pivot;
struct listitem *item = NULL, *is = NULL;
if (list_empty(head) || list_is_singular(head))
return;
INIT_LIST_HEAD(&list_less);
INIT_LIST_HEAD(&list_greater);
pivot = list_first_entry(head, struct listitem, list);
list_del(&pivot->list);
list_for_each_entry_safe(item, is, head, list) {
if (cmpint(&item->i, &pivot->i) < 0)
list_move_tail(&item->list, &list_less);
else
list_move_tail(&item->list, &list_greater);
}
list_quicksort(&list_less);
list_quicksort(&list_greater);
list_add(&pivot->list, head);
list_splice(&list_less, head);
list_splice_tail(&list_greater, head);
}
運作原理:
此程式碼使用 quick_sort 來對雙向鏈結串列進行排序,相較於 quiz01_3,此實作採用遞迴呼叫的方式。在每次呼叫時,創建兩個 list_head list_less
, list_greater
用來儲存兩個分割後的雙向鏈結串列。與一般 quick_sort 分割方式一致,使用第一個元素當成 pivot
並從原本串列移除,接著走訪整個串列,並一一將節點放入兩個串列,最後遞迴地對兩個分割後的串列進行 quick_sort。合併的部分,首先將 pivot
放到雙向鏈結串列的頭,接著將 list_less
拼接到串列的頭,最後將 list_greater
拼接至尾部,最終串列會是 head -> list_less -> pivot -> list_greater -> head 的順序。
static inline void random_shuffle_array(uint16_t *operations, uint16_t len)
{
for (uint16_t i = 0; i < len; i++) {
/* WARNING biased shuffling */
uint16_t j = get_unsigned16() % (i + 1);
operations[i] = operations[j];
operations[j] = i;
}
}
改進 random_shuffle_array:
首先證明此隨機法的機率。
證明:定義
發現可合併 k 的兩種情況,以及
但在實際測試時發現結果並不是隨機的,如下圖所示:
往下深究發現其實 get_unsigned16() 本身給的結果並不完全隨機,如下圖,橫軸為 [0, UINT16_MAX],總共隨機 100000000 次,其中非零的點只有 569 個,比 10 大的點更只有 373 個。
修改:使用內建的 rand() 函數即可解決問題。
static inline void random_shuffle_array(uint16_t *operations, uint16_t len)
{
for (uint16_t i = 0; i < len; i++) {
// uint16_t j = get_unsigned16() % (i + 1); // original
uint16_t j = rand() % (i + 1); // new
operations[i] = operations[j];
operations[j] = i;
}
}
GGGG = 14
HHHH = 2
IIII = 0
JJJJ = 3
KKKK = 2
LLLL = 1
MMMM = ~1
NNNN = 1
PPPP = 2
static const int mask[] = {0, 8, 12, 14};
static const int magic[] = {2, 1, 0, 0};
unsigned clz2(uint32_t x, int c)
{
if (!x && !c)
return 32;
uint32_t upper = (x >> (16 >> c));
uint32_t lower = (x & (0xFFFF >> mask[c]));
if (c == 3)
return upper ? magic[upper] : 2 + magic[lower];
return upper ? clz2(upper, c + 1) : (16 >> (c)) + clz2(lower, c + 1);
}
解釋:此函數中 c
表示的是遞迴的深度,初始為 0
,每次呼叫函式時首先將數字 x 分成 upper
和 lower
兩部分,當 c = 0, 1, 2, 3 時 lower
為右方16, 8, 4, 2 bits,因此 GGGG
為 magic[upper]
或 2 + magic[lower]
(此處因為是2 bits 2 bits 判斷,因此 +2)。而 magic
表示的是在該 2 bits 中最左方有多少個 0,因此 magic[] = {2, 1, 0, 0};
。
因為是向上取整,在函數結尾加上判斷是否為二的冪即可,如下程式碼。
uint64_t sqrti_ceiling(uint64_t x)
{
// same as sqrti()
...
uint64_t minus_leading1 = x - (1ULL << shift);
if (minus_leading1 > 0)
y++;
return y;
}
一旦發現效能改進的機會,可準備提交改進 int_sqrt 程式碼到 Linux 核心
AAAA = map->bits
BBBB = map->bits
CCCC = first->pprev
DDDD = n->pprev
EEEE = n->pprev
void map_add(map_t *map, int key, void *data)
{
struct hash_key *kn = find_key(map, key);
if (kn)
return;
kn = malloc(sizeof(struct hash_key));
kn->key = key, kn->data = data;
struct hlist_head *h = &map->ht[hash(key, map->bits)];
struct hlist_node *n = &kn->node, *first = h->first;
n->next = first;
if (first)
first->pprev = &n->next;
h->first = n;
n->pprev = &h->first;
}
針對 Linux 核心的 hash table 實作,提供更多圖例並揣摩 Linux 核心開發者
map_add
中,首先使用 find_key
嘗試獲取 key 是否存在在 hash map map
中,若存在則返回。不存在時,先創建一個全新的 hash_key 並將其節點 n
插到 bucket 的頭 h
(next
指向 h
中的第一個節點,若第一個節點不為空,則第一個節點的前一個節點指向節點 n->next
的地址,並將 n
設為 h
的第一個節點,以及n的前一節點指向 h->first
的地址來完成插入)。int main()
{
int nums[] = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67, 69, 71, 73, 75, 77, 79, 80, 83, 85, 87, 89, 91, 93, 95, 97, 99, 81};
int target = 161;
int returnSize;
int *ret = twoSum(nums, sizeof(nums) / sizeof(nums[0]), target, &returnSize);
if (ret) {
printf("[%d, %d]\n", ret[0], ret[1]);
assert(nums[ret[0]] + nums[ret[1]] == target);
free(ret);
printf("Test passed\n");
}
return 0;
}
注意: Linux 核心內部大量使用 hash table,但並非每處的雜湊函數都滿足減少碰撞及避免 clustering 現象的原則,也就是存在若干改進空間
註:vol 3 Section 6.4 閱讀需要付費,無法完成。
fncache
來儲存是否該檔案能被存取,其中 shash()
為該實作的雜湊函式。struct fncache {
struct hlist_node nd;
bool res;
char name[];
};
#define FNHSIZE 61
static struct hlist_head fncache_hash[FNHSIZE];
unsigned shash(const unsigned char *s)
{
unsigned h = 0;
while (*s)
h = 65599 * h + *s++;
return h ^ (h >> 16);
}
static bool lookup_fncache(const char *name, bool *res);
static void update_fncache(const char *name, bool res);
bool file_available(const char *name);
pnsocks.hlist
用來儲存、管理與查找 Linux 核心中的 Phonet sockets。其中在 pn_hash_list()
中,使用 Phonet socker object id obj
的索引來計算雜湊值。...
static int pn_socket_release(struct socket *sock)
{
struct sock *sk = sock->sk;
if (sk) {
sock->sk = NULL;
sk->sk_prot->close(sk, 0);
}
return 0;
}
#define PN_HASHSIZE 16
#define PN_HASHMASK (PN_HASHSIZE-1)
static struct {
struct hlist_head hlist[PN_HASHSIZE];
struct mutex lock;
} pnsocks;
void __init pn_sock_init(void)
{
unsigned int i;
for (i = 0; i < PN_HASHSIZE; i++)
INIT_HLIST_HEAD(pnsocks.hlist + i);
mutex_init(&pnsocks.lock);
}
static struct hlist_head *pn_hash_list(u16 obj)
{
return pnsocks.hlist + (obj & PN_HASHMASK);
}
...