# Lab0 `lib/list_sort.c` 研讀筆記 ::: spoiler `list_sort.c` src ```c // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/export.h> #include <linux/list_sort.h> #include <linux/list.h> /* * Returns a list organized in an intermediate format suited * to chaining of merge() calls: null-terminated, no reserved or * sentinel head node, "prev" links not maintained. */ __attribute__((nonnull(2,3,4))) static struct list_head *merge(void *priv, list_cmp_func_t cmp, struct list_head *a, struct list_head *b) { struct list_head *head, **tail = &head; for (;;) { /* if equal, take 'a' -- important for sort stability */ if (cmp(priv, a, b) <= 0) { *tail = a; tail = &a->next; a = a->next; if (!a) { *tail = b; break; } } else { *tail = b; tail = &b->next; b = b->next; if (!b) { *tail = a; break; } } } return head; } /* * Combine final list merge with restoration of standard doubly-linked * list structure. This approach duplicates code from merge(), but * runs faster than the tidier alternatives of either a separate final * prev-link restoration pass, or maintaining the prev links * throughout. */ __attribute__((nonnull(2,3,4,5))) static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; u8 count = 0; for (;;) { /* if equal, take 'a' -- important for sort stability */ if (cmp(priv, a, b) <= 0) { tail->next = a; a->prev = tail; tail = a; a = a->next; if (!a) break; } else { tail->next = b; b->prev = tail; tail = b; b = b->next; if (!b) { b = a; break; } } } /* Finish linking remainder of list b on to tail */ tail->next = b; do { /* * If the merge is highly unbalanced (e.g. the input is * already sorted), this loop may run many iterations. * Continue callbacks to the client even though no * element comparison is needed, so the client's cmp() * routine can invoke cond_resched() periodically. */ if (unlikely(!++count)) cmp(priv, b, b); b->prev = tail; tail = b; b = b->next; } while (b); /* And the final links to make a circular doubly-linked list */ tail->next = head; head->prev = tail; } /** * list_sort - sort a list * @priv: private data, opaque to list_sort(), passed to @cmp * @head: the list to sort * @cmp: the elements comparison function * * The comparison function @cmp must return > 0 if @a should sort after * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should * sort before @b *or* their original order should be preserved. It is * always called with the element that came first in the input in @a, * and list_sort is a stable sort, so it is not necessary to distinguish * the @a < @b and @a == @b cases. * * The comparison function must adhere to specific mathematical properties * to ensure correct and stable sorting: * - Antisymmetry: cmp(@a, @b) must return the opposite sign of * cmp(@b, @a). * - Transitivity: if cmp(@a, @b) <= 0 and cmp(@b, @c) <= 0, then * cmp(@a, @c) <= 0. * * This is compatible with two styles of @cmp function: * - The traditional style which returns <0 / =0 / >0, or * - Returning a boolean 0/1. * The latter offers a chance to save a few cycles in the comparison * (which is used by e.g. plug_ctx_cmp() in block/blk-mq.c). * * A good way to write a multi-word comparison is:: * * if (a->high != b->high) * return a->high > b->high; * if (a->middle != b->middle) * return a->middle > b->middle; * return a->low > b->low; * * * This mergesort is as eager as possible while always performing at least * 2:1 balanced merges. Given two pending sublists of size 2^k, they are * merged to a size-2^(k+1) list as soon as we have 2^k following elements. * * Thus, it will avoid cache thrashing as long as 3*2^k elements can * fit into the cache. Not quite as good as a fully-eager bottom-up * mergesort, but it does use 0.2*n fewer comparisons, so is faster in * the common case that everything fits into L1. * * * The merging is controlled by "count", the number of elements in the * pending lists. This is beautifully simple code, but rather subtle. * * Each time we increment "count", we set one bit (bit k) and clear * bits k-1 .. 0. Each time this happens (except the very first time * for each bit, when count increments to 2^k), we merge two lists of * size 2^k into one list of size 2^(k+1). * * This merge happens exactly when the count reaches an odd multiple of * 2^k, which is when we have 2^k elements pending in smaller lists, * so it's safe to merge away two lists of size 2^k. * * After this happens twice, we have created two lists of size 2^(k+1), * which will be merged into a list of size 2^(k+2) before we create * a third list of size 2^(k+1), so there are never more than two pending. * * The number of pending lists of size 2^k is determined by the * state of bit k of "count" plus two extra pieces of information: * * - The state of bit k-1 (when k == 0, consider bit -1 always set), and * - Whether the higher-order bits are zero or non-zero (i.e. * is count >= 2^(k+1)). * * There are six states we distinguish. "x" represents some arbitrary * bits, and "y" represents some arbitrary non-zero bits: * 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k * 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k * 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k * 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k * 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k * 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k * (merge and loop back to state 2) * * We gain lists of size 2^k in the 2->3 and 4->5 transitions (because * bit k-1 is set while the more significant bits are non-zero) and * merge them away in the 5->2 transition. Note in particular that just * before the 5->2 transition, all lower-order bits are 11 (state 3), * so there is one list of each smaller size. * * When we reach the end of the input, we merge all the pending * lists, from smallest to largest. If you work through cases 2 to * 5 above, you can see that the number of elements we merge with a list * of size 2^k varies from 2^(k-1) (cases 3 and 5 when x == 0) to * 2^(k+1) - 1 (second merge of case 5 when x == 2^(k-1) - 1). */ __attribute__((nonnull(2,3))) void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) { struct list_head *list = head->next, *pending = NULL; size_t count = 0; /* Count of pending */ if (list == head->prev) /* Zero or one elements */ return; /* Convert to a null-terminated singly-linked list. */ head->prev->next = NULL; /* * Data structure invariants: * - All lists are singly linked and null-terminated; prev * pointers are not maintained. * - pending is a prev-linked "list of lists" of sorted * sublists awaiting further merging. * - Each of the sorted sublists is power-of-two in size. * - Sublists are sorted by size and age, smallest & newest at front. * - There are zero to two sublists of each size. * - A pair of pending sublists are merged as soon as the number * of following pending elements equals their size (i.e. * each time count reaches an odd multiple of that size). * That ensures each later final merge will be at worst 2:1. * - Each round consists of: * - Merging the two sublists selected by the highest bit * which flips when count is incremented, and * - Adding an element from the input as a size-1 sublist. */ do { size_t bits; struct list_head **tail = &pending; /* Find the least-significant clear bit in count */ for (bits = count; bits & 1; bits >>= 1) tail = &(*tail)->prev; /* Do the indicated merge */ if (likely(bits)) { struct list_head *a = *tail, *b = a->prev; a = merge(priv, cmp, b, a); /* Install the merged result in place of the inputs */ a->prev = b->prev; *tail = a; } /* Move one element from input list to pending */ list->prev = pending; pending = list; list = list->next; pending->next = NULL; count++; } while (list); /* End of input; merge together all the pending lists. */ list = pending; pending = pending->prev; for (;;) { struct list_head *next = pending->prev; if (!next) break; list = merge(priv, cmp, pending, list); pending = next; } /* The final merge, rebuilding prev links */ merge_final(priv, cmp, head, pending, list); } EXPORT_SYMBOL(list_sort); ``` ::: ### 標頭研讀 ```c // SPDX-License-Identifier: GPL-2.0 #include <linux/compiler.h> #include <linux/export.h> #include <linux/list_sort.h> #include <linux/list.h> ``` `// SPDX-License-Identifier: GPL-2.0` SPDX(Software Package Data Exchange)是一種標準化的方式,用來標示軟體的授權許可資訊。`GPL-2.0` 則代表採用 GNU 通用公共授權(GNU General Public License)第 2 版 進行授權。這是一種開源許可證,允許使用者自由地使用、修改和分發代碼,但前提是必須遵守 GPL-2.0 的條款,例如分發修改後的代碼時也必須保持開源並提供原始碼。這行註解清楚地告訴開發者這段代碼的法律授權狀態,避免了冗長的版權聲明文字。 `#include <linux/compiler.h>` : 它確保代碼在不同環境下都能正確編譯和運行 `#include <linux/export.h>` : 此定義了用於導出符號的巨集,例如 `EXPORT_SYMBOL`。在 Linux 核心中,有些函數或變數需要被其他核心模組(module)使用,這時就需要透過這些巨集將它們標記為可導出,如果有函數需要被其他模組調用,就會依賴這個頭檔案提供的功能 ## 概述 `list_sort` 用於對linux kernel中的doubly linekd list進行排序。排序部分採用的是穩定的 merge sort algorithm,並支援自定義比較邏輯。 ### 為什麼使用合併排序? 1. 穩定性:合併排序保證相等元素的相對順序不變,這在核心中非常重要。 2. linked list友好:合併排序不需隨機存取,只需指針操作即可分割和合併鏈表。 3. 時間複雜度:O(nlogn),空間複雜度低,適合linked list結構。 ## 實現 實作方面 `list_sort` 仰賴兩個輔助函式 : `merge` 和 `merge_final` ### 輔函式1 `merge` ```C /* * Returns a list organized in an intermediate format suited * to chaining of merge() calls: null-terminated, no reserved or * sentinel head node, "prev" links not maintained. */ ``` 此註解為針對 `merge` 函式的作用 : 此函式回傳的中間格式架構有以下三特點 : 1. Null-terminated(以 NULL 結尾) 2. No reserved or sentinel head node(無保留或哨兵頭節點) 3. "Prev" links not maintained(不維護 "prev" 連結) 作用 : 合併兩個已排序的子列表 a 和 b,生成一個單向鏈表(以 NULL 結尾) src : ```c __attribute__((nonnull(2,3,4))) ``` 此是GCC編譯器提供的一個擴展屬性,用於告訴編譯器某些函數參數不應該為 NULL。具體來說,這裡的 (2,3,4) 表示函數的第 2、第 3 和第 4 個參數必須是非空的ptr ```c static struct list_head *merge(void *priv, list_cmp_func_t cmp, struct list_head *a, struct list_head *b) { struct list_head *head, **tail = &head; for (;;) { if (cmp(priv, a, b) <= 0) { // 穩定性:相等時選 a *tail = a; tail = &a->next; a = a->next; if (!a) { *tail = b; break; } } else { *tail = b; tail = &b->next; b = b->next; if (!b) { *tail = a; break; } } } return head; } ``` 引數 : * `void *priv` : 為一私有泛型資料指標,目的是讓使用者能設置比較邏輯 `cmp` 的對象 * `list_cmp_func_t cmp` : 是一個自訂的比較函數,用來決定兩個串列節點 a 和 b 的相對順序 * `list_cmp_func_t` 定義在 `<linux/list.h>` 中 ```c typedef int (*list_cmp_func_t)(void *priv, const struct list_head *a, const struct list_head *b); ``` * `struct list_head *a` : a 是第一個sorted的linked list head * `struct list_head *b` : b 是第二個sorted的linked list head 邏輯 : 用 `*head` 作為新的list之head,再用 `**tail` 根據 a、b list中element內容比較去選擇將該 element 串入新的list(若相等則取a保證stable)。若 a、b 誰先取完,就將另一個list串到新list後面 ### 輔函式2 `merge_final` 作用:最終合併兩個子列表,並恢復雙向鏈表的 `prev` 指針,形成循環結構。 src : ```c static void merge_final(void *priv, list_cmp_func_t cmp, struct list_head *head, struct list_head *a, struct list_head *b) { struct list_head *tail = head; u8 count = 0; for (;;) { if (cmp(priv, a, b) <= 0) { tail->next = a; a->prev = tail; tail = a; a = a->next; if (!a) break; } else { tail->next = b; b->prev = tail; tail = b; b = b->next; if (!b) { b = a; break; } } } tail->next = b; do { if (unlikely(!++count)) cmp(priv, b, b); // 避免過長迴圈,允許調度 b->prev = tail; tail = b; b = b->next; } while (b); tail->next = head; head->prev = tail; } ``` 邏輯 : 前面部分與 `merge` 相同,新的部分為多維護一個 `count` 與後面使用 do-while 迴圈處理剩餘節點,同時監控迴圈次數(`count`)以避免過長執行。最後閉合形成循環 * `unlikely()` 是一個巨集(macro),作為編譯器的提示,告訴編譯器這個條件`(!++count)`不太可能發生,優化程式執行效率。 * 又因 `count` 為 u8 ,值為255時再執行 `++count` 後,會溢出,`count` 變為 0 * `cmp(priv, b, b)` 為一 no-op ,通常包含一些副作用(side effect),觸發某些系統檢查點。這些副作用給予系統一個機會,讓操作系統的調度器(scheduler)介入,檢查是否需要切換到其他任務。 ### 主函式 `list_sort` : src : ```c void list_sort(void *priv, struct list_head *head, list_cmp_func_t cmp) { struct list_head *list = head->next, *pending = NULL; size_t count = 0; if (list == head->prev) // 0 或 1 個元素,直接返回 return; head->prev->next = NULL; // 斷開循環,轉為單向鏈表 do { size_t bits; struct list_head **tail = &pending; for (bits = count; bits & 1; bits >>= 1) tail = &(*tail)->prev; if (likely(bits)) { struct list_head *a = *tail, *b = a->prev; a = merge(priv, cmp, b, a); a->prev = b->prev; *tail = a; } list->prev = pending; pending = list; list = list->next; pending->next = NULL; count++; } while (list); list = pending; pending = pending->prev; for (;;) { struct list_head *next = pending->prev; if (!next) break; list = merge(priv, cmp, pending, list); pending = next; } merge_final(priv, cmp, head, pending, list); } ``` #### 註解研讀 ```c /** * list_sort - sort a list * @priv: private data, opaque to list_sort(), passed to @cmp * @head: the list to sort * @cmp: the elements comparison function */ ``` * `@priv`:私有資料,對 list_sort 來說是不透明的指標,會直接傳遞給比較函數 @cmp。這允許 @cmp 在比較元素時使用額外的上下文資料。 * `@head`:要排序的linked list的頭部,指標指向串列的起始節點。 * `@cmp`:自訂的比較函數,由使用者提供,用來定義兩個節點之間的順序關係。 ```c /* * The comparison function @cmp must return > 0 if @a should sort after * @b ("@a > @b" if you want an ascending sort), and <= 0 if @a should * sort before @b *or* their original order should be preserved. ``` 描述比較函數 @cmp 的行為 大於 0:表示 @a 應排在 @b 後面。例如,若要升序排序,則 @a > @b 時返回大於 0。 小於等於 0:表示 @a 應排在 @b 前面,或兩者相等時保持原始順序。`list_sort` 是一個穩定排序(stable sort),即相等元素的相對順序不會改變。@cmp 總是以輸入串列中較早出現的元素作為 @a,因此無需明確區分 @a < @b 和 @a == @b 的情況,只需返回 <= 0 即可。 ```c /* The comparison function must adhere to specific mathematical properties: - Antisymmetry: cmp(@a, @b) must return the opposite sign of cmp(@b, @a). - Transitivity: if cmp(@a, @b) <= 0 and cmp(@b, @c) <= 0, then cmp(@a, @c) <= 0. ``` 描述@cmp所需的數學屬性要求 * 反對稱性(Antisymmetry): cmp(@a, @b) 和 cmp(@b, @a) 的返回值符號必須相反。例如,若 cmp(@a, @b) > 0,則 cmp(@b, @a) < 0。這避免了邏輯矛盾(如 @a > @b 且 @b > @a)。 * 傳遞性(Transitivity): 若 cmp(@a, @b) <= 0 且 cmp(@b, @c) <= 0,則 cmp(@a, @c) <= 0 必須成立。這確保排序結果的一致性和正確性。 ```c /* This is compatible with two styles of @cmp function: - The traditional style which returns <0 / =0 / >0, or - Returning a boolean 0/1. ``` @cmp的兩種style * traditional style: 返回 < 0(@a < @b)、= 0(@a == @b)、> 0(@a > @b),類似標準 C 的 `strcmp`。 * boolean style: 返回 0(@a <= @b)或 1(@a > @b)。 這種方式更簡潔,可能節省少量 CPU 週期。例如,`block/blk-mq.c` 中的 `plug_ctx_cmp` 就採用此 ```c /* A good way to write a multi-word comparison is:: if (a->high != b->high) return a->high > b->high; if (a->middle != b->middle) return a->middle > b->middle; return a->low > b->low; ``` @cmp 多欄位比較範例 假設節點有 high、middle、low 三個欄位,依序比較。 先比較 high,若不同,根據 high 的值決定順序;若相同,再比較 middle,最後比較 low。這種方式實現了字典序(lexicographical order),常用於多條件排序。 ```c /* This mergesort is as eager as possible while always performing at least 2:1 balanced merges. Given two pending sublists of size 2^k, they are merged to a size-2^(k+1) list as soon as we have 2^k following elements. ``` 說明此list_sort的eager體現在條件允許時盡早合併子串列,以免最後很長一條merge一小條不balanced的case。且實施的準則為 `2:1 balanced merges` ,當有兩個大小為 2^k 的子串列,且後續有 2^k 個元素時,將這兩個子串列合併成一個大小為 2^(k+1) 的串列,旨在先merge小。 ```c /* Thus, it will avoid cache thrashing as long as 3*2^k elements can fit into the cache. Not quite as good as a fully-eager bottom-up mergesort, but it does use 0.2*n fewer comparisons. ``` 只要快取能容納 3*2^k 個元素,就能避免cache thrashing,相較於fully-eager bottom-up mergesort,這種方法減少了約 0.2*n 次比較。 ```c /* The merging is controlled by "count", the number of elements in the pending lists. Each time we increment "count", we set one bit (bit k) and clear bits k-1 .. 0. Each time this happens (except the very first time for each bit, when count increments to 2^k), we merge two lists of size 2^k into one list of size 2^(k+1). ``` 描述 `count` 的角色為表示待處理串列中的元素總數,用來決定何時進行合併。且描述了運作機制 : 位元操作: 每次 count 增加時,設置某個位元(bit k),並清除低於 k 的位元(k-1 至 0)。 合併觸發: 當 count 達到 2^k 的奇數倍(除了第一次達到 2^k),觸發兩個大小為 2^k 的串列合併,生成一個 2^(k+1) 的串列。 ```c /* This merge happens exactly when the count reaches an odd multiple of 2^k, which is when we have 2^k elements pending in smaller lists. ``` 描述 `count` 機制的merge時機 : 當 `count` 為 2^k 的奇數倍時,表示有 2^k 個元素在較小的待處理串列中,此時可以安全合併兩個 2^k 大小的串列。後續合併時,生成兩個 2^(k+1) 的串列,接著它們會被合併成 2^(k+2),確保待處理串列數量不超過兩個。 ```c /* This merge happens exactly when the count reaches an odd multiple of 2^k, which is when we have 2^k elements pending in smaller lists. ``` 描述pending list的狀態,大小為 2^k 的待處理串列數量取決於: 1. `count` 的第 k 位狀態。 2. 第 k-1 位狀態(k = 0 時,假設 bit -1 恆為 1)。 3. 高位是否為非零(即 `count` >= 2^(k+1))。 ```c /* There are six states we distinguish: 0: 00x: 0 pending of size 2^k; x pending of sizes < 2^k 1: 01x: 0 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k 2: x10x: 0 pending of size 2^k; 2^k + x pending of sizes < 2^k 3: x11x: 1 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k 4: y00x: 1 pending of size 2^k; 2^k + x pending of sizes < 2^k 5: y01x: 2 pending of size 2^k; 2^(k-1) + x pending of sizes < 2^k (merge and loop back to state 2) ``` 描述6種states `x` 表示任意位元,`y` 表示非零高位。 0:`00x` - 無 2^k 大小的串列,x 個小於 2^k 的串列。 1:`01x` - 無 2^k ,2^(k-1) + x 個小於 2^k。 2:`x10x` - 無 2^k ,2^k + x 個小於 2^k。 3:`x11x` - 1 個 2^k ,2^(k-1) + x 個小於 2^k。 4:`y00x` - 1 個 2^k ,2^k + x 個小於 2^k。 5:`y01x` - 2 個 2^k, 2^(k-1) + x 個小於 2^k,觸發合併後回到狀態 2。 ```c /* We gain lists of size 2^k in the 2->3 and 4->5 transitions and merge them away in the 5->2 transition. When we reach the end of the input, we merge all the pending lists, from smallest to largest. ``` 生成 : 2->3 和 4->5:當 bit k-1 為 1 且高位非零時,生成 2^k 大小的串列(state 5)。 合併 : 5->2:合併兩個 2^k 的串列,生成 2^(k+1),並回到state 2 最終合併:處理完所有輸入後,將所有待處理串列從小到大合併,完成排序。 總結 : `list_sort` 透過 `count` 控制合併時機,實現 2:1 balanced merge。它優化了cache使用(需容納 32^k 元素)和比較次數(減少 0.2n 次),特別適合資料能放入 L1 快取的情況。使用者可透過 @cmp 和 @priv 靈活定義排序邏輯 函式流程 1. guard clause與初始化 : * 若list只有 0 或 1 個元素,直接返回。 * 將circular list斷開,轉為單向鏈表。 * 初始化 `*pending`(待合併子列表)為 NULL,`count` 為 0。 2. 主循環:處理輸入 (merge時機為 `count` 達到某個 2^k 的奇數倍時,觸發大小為 2^k 的子列表合併) * 計算合併點: * 檢查 `count` 的二進位表示,找到最低位的 0(記為 bits)。 * 沿著 `pending` 的 `prev` 指針前進 `bits` 步,定位需要合併的子列表。 * 執行合併: * 若 `bits` != 0,取出 `pending` 中的兩個子列表 a 和 b,使用 merge 合併,並更新 `pending`。 * 加入新元素: * 從輸入 `list` 中取出一個元素,作為大小為 1 的子列表,加入 `pending`。 * 將 `count` 加 1。 3. 最終合併 * 從 `pending` 開始,逐對合併子列表,使用 merge * 使用 merge_final 合併剩餘兩個sublists,並恢復doubly linked list `list_sort` 優點特性 * 合併策略 : * 自底向上:從大小為 1 的子列表開始,逐步合併。 * 平衡合併:通過 count 控制,確保每次合併的子列表大小比例接近 2:1。 * 減少合併次數:僅在必要時合併,減少約 20% 的比較次數。 * stable * 在 merge 和 merge_final 中,當 cmp(a, b) <= 0 時優先選 a,保證相等元素的原始順序 * 效能考量 * cache友好:平衡合併使子列表大小適中,適合快取。 * 上下文切換:merge_final 中的計數器設計允許調度器介入 * count 設計 * 每次 count 增加,檢查其最低位的 0,觸發對應大小的子列表合併。bitwise 運算是非常快速的操作,尤其是在核心程式(如作業系統核心)中,對效能要求很高時,這種方法能顯著提升速度。