Tim sort 程式步驟詳解

void timsort(void *priv, struct list_head *head, list_cmp_func_t cmp)
{
    stk_size = 0;

    struct list_head *list = head->next, *tp = NULL;
    if (head == head->prev)
        return;

    /* Convert to a null-terminated singly-linked list. */
    head->prev->next = NULL;

    do {
        /* Find next run */
        struct pair result = find_run(priv, list, cmp);
        result.head->prev = tp;
        tp = result.head;
        list = result.next;
        stk_size++;
        tp = merge_collapse(priv, cmp, tp);
    } while (list);

    /* End of input; merge together all the runs. */
    tp = merge_force_collapse(priv, cmp, tp);

    /* The final merge; rebuild prev links */
    struct list_head *stk0 = tp, *stk1 = stk0->prev;
    while (stk1 && stk1->prev)
        stk0 = stk0->prev, stk1 = stk1->prev;

    if (stk_size <= 1) {
        build_prev_link(head, head, stk0);
        return;
    }    
    merge_final(priv, cmp, head, stk1, stk0);
}

tim sort
重點是看下列這幾行函式

find_run(priv, list, cmp);
merge_collapse(priv, cmp, tp);
merge_force_collapse(priv, cmp, tp);	
build_prev_link(head, head, stk0);
merge_final(priv, cmp, head, stk1, stk0);

	/* Find next run */
	struct pair result = find_run(priv, list, cmp);
	tp = merge_collapse(priv, cmp, tp);
	/* End of input; merge together all the runs. */
	tp = merge_force_collapse(priv, cmp, tp);		

	/* The final merge; rebuild prev links */
	struct list_head *stk0 = tp, *stk1 = stk0->prev;
	while (stk1 && stk1->prev)
		stk0 = stk0->prev, stk1 = stk1->prev;
	if (stk_size <= 1) {
		build_prev_link(head, head, stk0);
		return;
	}    
	merge_final(priv, cmp, head, stk1, stk0);

find_run(priv, list, cmp)：對 link list 分割成 run，其中
- if (cmp(priv, list, next) > 0) 代表節點值 list > next (判斷為降冪)，此時會一邊尋訪節點，一邊將鏈結串列反轉
- while (next && cmp(priv, list, next) <= 0);若節點資料為升冪，則一路紀錄len長度，歸納成一串 run 串列
- prev 被暫時借用，當成紀錄 run_len 變數
merge_collapse(priv, cmp, tp); 維持 run 長度的平衡
merge_force_collapse(priv, cmp, tp);在排序的最後階段使用，它會強制合併所有剩餘的 runs
build_prev_link(head, head, stk0);：尋訪鏈結，將每個節點的 prev 成員，重新接回前一個節點位址
merge_final(priv, cmp, head, stk1, stk0);將兩個主要分支的 runs 進行最終合併。

`find_run（）`

//Step 1: 紀錄鏈結串列的長度及降序/升序
static struct pair find_run(void *priv,
                            struct list_head *list,
                            list_cmp_func_t cmp)
{
    size_t len = 1;
    struct list_head *next = list->next, *head = list;
    struct pair result;

    // !next代表只有單節點
    // 則result 直接回傳 原來節點的 *head 和 *next 
    if (!next) {
        result.head = head, result.next = next;
        return result;
    }

    //接著判斷鏈結串列是降序還是升序。
	
    /*
    cmp(priv, list, next) > 0 代表節點值   list > next (判斷為降序)
    *priv：&count 用來計次find run次數
    */
    if (cmp(priv, list, next) > 0) {
	//節點值   list > next (判斷為降序)
        /* decending run, also reverse the list */
        struct list_head *prev = NULL;
        do {
            len++;
	    //若判斷為降序，則是一邊向前走訪，
	    //一邊透過 list-next = prev 將鏈結串列反轉
            list->next = prev;
            prev = list;//
            list = next;
            next = list->next;
            head = list;
        } while (next && cmp(priv, list, next) > 0);
        list->next = prev;
    } else {
	//節點值 list < next (判斷為升序)
        do {
            len++;
	    //判斷為升序，則一路尋訪節點即可
            list = next;
            next = list->next;
        } while (next && cmp(priv, list, next) <= 0);
        list->next = NULL;
    }
    head->prev = NULL;
    //分割完鏈結串列之後，
    //將 head-next->prev 指向強制轉型的 len ，
    //以此來保存每一個 run 的長度
    head->next->prev = (struct list_head *) len;
    result.head = head, result.next = next;
	
    //最後回傳一個 result ， 
    //result 為 pair 結構，
    // head 保存 run 的開頭， 
    //next 保存下一段要被分割的鏈結串列開頭。
    return result;
}

先解釋

/*
cmp(priv, list, next) > 0 代表節點值   list > next (判斷為降序)
*priv：&count 用來計次find run次數
*/
if (cmp(priv, list, next) > 0) {
//節點值   list > next (判斷為降序)
	/* decending run, also reverse the list */
	struct list_head *prev = NULL;
	do {
		len++;
	//若判斷為降序，則是一邊向前走訪，
	//一邊透過 list-next = prev 將鏈結串列反轉
		list->next = prev; //step1
		prev = list;//step2.prev先備份 list 值，以當作前1個節點位址
		list = next;//step3.list更新成下個節點位址
		next = list->next;//step4 更新 next節點
		head = list;//step5.更新head節點
	} while (next && cmp(priv, list, next) > 0);
	list->next = prev;//step6
}

在find_run（）函式中

static struct pair find_run(void *priv,
                            struct list_head *list,
                            list_cmp_func_t cmp)
{
       //將len常數值，強至轉型成 (struct list_head *) 節點位址值
        // 並將節點位址 儲存到 prev
	head->next->prev = (struct list_head *) len;
    
}

例如

len=10;
head->next->prev = (struct list_head *) len;

代表
head->next->prev = (struct list_head *) 10;
將(struct list_head *) prev 節點，儲存在記憶體位址10處

然後在 prev明明都是存 10 這類常數值
但是在

run_size(tp->prev->prev)
run_size(tp->prev)

卻能用 tp->prev->prev，tp->prev成功找到前一個節點位址的原因
是因為

static inline size_t run_size(struct list_head *head) {
    if (!head)
        return 0;
    if (!head->next)
        return 1;
    return (size_t) (head->next->prev); // 將 prev 重新轉型為 size_t
}

run_size時又強制轉型(size_t)，將這個節點記憶體位址10，當成 run_size長度10

prev 被暫時借用，當成紀錄 run_len 變數

然後在哪個步驟中將 prev 恢復成串接前一個link list節點呢？
答案是

merge_final 和 build_prev_link 函數中，prev 指針的恢復主要在 build_prev_link 函數中完成。

static void merge_final(void *priv,
                        list_cmp_func_t cmp,
                        struct list_head *head,
                        struct list_head *a,
                        struct list_head *b)
{

 ......
    /* Finish linking remainder of list b on to tail */
    build_prev_link(head, tail, b);
}

static void build_prev_link(struct list_head *head,
                            struct list_head *tail,
                            struct list_head *list)
{
    tail->next = list;
    do {
        list->prev = tail;  // 在這裡設置每個節點的 prev 指向前一個節點
        tail = list;
        list = list->next;
    } while (list);

    /* The final links to make a circular doubly-linked list */
    tail->next = head;  // 將最後一個節點的 next 指向頭節點
    head->prev = tail;  // 將頭節點的 prev 指向最後一個節點，完成雙向循環鏈表的連接
}

圖解

可以看到
經過 head->next->prev = (struct list_head *) len;後
next->prev成員已經替換成儲存 run的個數 0x2 個

在節點位址
5a490和 5a5f8 位址的prev 都儲存 0x02

prev 指針的恢復似乎發生在 merge_final 函數中，該函數負責最後的合併過程，並重建前驅鏈接。具體地，build_prev_link 函數被調用以重構整個雙向鏈表的前驅和後繼關係。這個函數將遍歷從頭到尾的所有節點，正確設置每個節點的 prev 和 next 指針，確保鏈表的完整性。

這種設計方法在某些場景下可以提高效率，但也增加了代碼的複雜度和錯誤發生的風險。在使用此類技術時，確保充分測試所有相關功能以防止數據結構損壞。

`merge_collapse()`

對應到 Timsort 的修正版的演算法合併策略

Timsort 的修正版

故在合併策略加入新的條件：

當加入新的 Run 或進行 Merge 後，若未合併的 Run 有 4 個以上，設 A, B, C, D 為最右邊尚未合併的四個 Runs ，若 A <= B + C 或 B <= C + D 時，則 C 與 B 或 D 選長度小的合併

C 與 B 或 D 選長度小的合併
意思是
C 獨立出來， B 和 D 取裡面較小的 ，和 C 合併

if( B < D)// 代表 B 較小
{
// C 和 B 合併

}else{  // 代表 D 較小

// C 和 D 合併
}

測驗2 題目描述

過程中，Timsort 運用 merge_collapse 函式來確保堆疊上的 run 長度保持平衡。該函式主要檢查堆疊頂端的 3 個 run 是否滿足以下原則：

A 的長度要大於 B 和 C 的長度總和。

B 的長度要大於 C 的長度。

當不符合這些條件時，函式會決定是否進行合併。
代表當

A <= B + C 長度

B <= C 長度
就要合併

總結 `merge_collapse` 合併策略

若 A <= B + C 或 B <= C + D 時，則 C 與 B 或 D 選長度小的合併
B <= C 長度==>則 B和C 合併

if( A <= B + C 或 B <= C + D)
{
 // 1. 若 A <= B + C 或 B <= C + D 時，則 C 與 B 或 D 選長度小的合併
	if( B < D)// 代表 B 較小
	{
		// C 和 B 合併

	}else{  // 代表 D 較小

		// C 和 D 合併
	}	
	
}else if(B <= C ){ 
// 2.B <= C 長度==>則 B和C 合併
	
	// run 只有2個的情況下，
	// tim sort提到，當 B > C 的長度 不用合併
	// 反之 若  B <= C 長度則需要合併
	
	// C 和 B 合併
}

對應到 `merge_collapse()` pseudo code

用 pseudo code 解釋程式原理

static struct list_head *merge_collapse(void *priv,
                                        list_cmp_func_t cmp,
                                        struct list_head *tp)
{

	while (run 個數 >= 2)
	{
         // run 個數 取前3個run點 A B C ，或是取4個run點 A B C D
		// if( A <= B + C 或 B <= C + D)
		if( (run 個數 >= 3 && run(A)<=run(B)+run(C)) 或是 (run 個數 >= 4 && run(B)<=run(C)+run(D)) )
		{
			//C 獨立出來 ， B 和 D 取裡面較小的 ，和 C 合併 
			if(run (B) <run (D)) // 代表 B 較小
			{
				// C （tp->prev 節點） 和 B 合併
				tp->prev = merge_at(priv, cmp, tp->prev);
			}else{ // 代表 D 較小
				// C 和 D（tp節點 ） 合併
				tp = merge_at(priv, cmp, tp);
			}
							

		}else if (run(B) <= run(C)){ // 2. B <= C 長度==>則 B 和 C 合併	 

			//C（tp節點 ）  和 B 合併。
			tp = merge_at(priv, cmp, tp);
		}else{
		
			break;
		} 


	}

	return tp;
}

對應到實際程式的節點

若 n >= 3
因為是取前三點 A B C
所以對應到節點位址會是
A:tp->prev->prev
B:tp->prev
C:tp

若 n >= 4
因為是取前四點 A B C D
所以對應到節點位址會是
A:tp->prev->prev->prev
B:tp->prev->prev
C:tp->prev
D:tp

static struct list_head *merge_collapse(void *priv,
                                        list_cmp_func_t cmp,
                                        struct list_head *tp)
{
    int n;
    while ((n = stk_size) >= 2) {
        if ((n >= 3 &&
             run_size(tp->prev->prev) <= run_size(tp->prev) + run_size(tp)) ||
            (n >= 4 && run_size(tp->prev->prev->prev) <=
                           run_size(tp->prev->prev) + run_size(tp->prev))) 
        {
            if (run_size(tp->prev->prev) < run_size(tp)) {
                tp->prev = merge_at(priv, cmp, tp->prev);
            } else {
                tp = merge_at(priv, cmp, tp);
            }
        } else if (run_size(tp->prev) <= run_size(tp)) {
            tp = merge_at(priv, cmp, tp);
        } else {
            break;
        }
    }

    return tp;
}

因為 merge_at()
是取 *at目前節點 和 at->prev 前一步節點，做合併

所以 merge_at() 輸入節點時，要輸入後面的節點位址

例如
B–>C 節點合併: 要輸入後面的C節點位址 merge_at(priv, cmp, C 節點);

C–>D 節點合併: 要輸入後面的D節點位址 merge_at(priv, cmp, D 節點);

// C （tp->prev 節點） 和 B 合併
tp->prev = merge_at(priv, cmp, tp->prev);

// C 和 D（tp節點 ） 合併
tp = merge_at(priv, cmp, tp);

`merge_at()`

根據輸入變數 struct list_head *at 處裡時

size_t len = run_size(at) + run_size(at->prev);
可發現合併時，是取 *at目前節點 和 at->prev 前一步節點，做合併

merge(priv, cmp, at->prev, at); ：取 *at目前節點 和 at->prev 前一步節點，做 merge 合併

static struct list_head *merge_at(void *priv,
                                  list_cmp_func_t cmp,
                                  struct list_head *at)
{
    size_t len = run_size(at) + run_size(at->prev);
    struct list_head *prev = at->prev->prev;
    struct list_head *list = merge(priv, cmp, at->prev, at);
    list->prev = prev;
    list->next->prev = (struct list_head *) len;//合併後，得更新紀錄該 run 的長度
    --stk_size;
    return list;
}

`stk_size` 變數

static size_t stk_size;變數：用來紀錄你的 run 個數

在 find_run(priv, list, cmp); 時，每分割出一組 run
就紀錄 stk_size++;

void timsort(void *priv, struct list_head *head, list_cmp_func_t cmp)
{
    stk_size = 0;

....
    do {
        /* Find next run */
        struct pair result = find_run(priv, list, cmp);
        result.head->prev = tp;
        tp = result.head;
        list = result.next;
        stk_size++;//find_run() 每分割出一組 run 就紀錄 stk_size++
        ...
    }

同理，在合併的最底層邏輯函數 merge_at（）
每合併一次，就會減少一組 run，
因此在merge_at（）紀錄 --stk_size;

static struct list_head *merge_at(void *priv,
                                  list_cmp_func_t cmp,
                                  struct list_head *at)
{
    size_t len = run_size(at) + run_size(at->prev);
    struct list_head *prev = at->prev->prev;
    struct list_head *list = merge(priv, cmp, at->prev, at);
    list->prev = prev;
    list->next->prev = (struct list_head *) len;
    --stk_size;//每合併一次，就會減少一組 run
				//因此在這裡做 --stk_size
    return list;
}

`merge_force_collapse()`

在 timsort 演算法裡
merge_force_collapse()是用在
Step 3: 合併堆疊剩餘的 run

void timsort(void *priv, struct list_head *head, list_cmp_func_t cmp)
{
    stk_size = 0;

    struct list_head *list = head->next, *tp = NULL;
    if (head == head->prev)
        return;

    /* Convert to a null-terminated singly-linked list. */
    head->prev->next = NULL;

    do {
        /* Find next run */
        struct pair result = find_run(priv, list, cmp);//Step 1: 紀錄鏈結串列的長度及降序/升序
        result.head->prev = tp;
        tp = result.head;
        list = result.next;
        stk_size++;
        tp = merge_collapse(priv, cmp, tp);// Step 2: 維持 run 長度的平衡
    } while (list);

    /* End of input; merge together all the runs. */
    tp = merge_force_collapse(priv, cmp, tp);//Step 3: 合併堆疊剩餘的 run

和前面 merge_collapse() 演算法，判斷選哪一個 run 合併的根據一樣

static struct list_head *merge_force_collapse(void *priv,
                                              list_cmp_func_t cmp,
                                              struct list_head *tp)
{
    while (run 個數 >= 3) {
	// run 個數 取前3個run點 A B C ，或是取4個run點 A B C D
	//C 獨立出來 ， B 和 D 取裡面較小的 ，和 C 合併 
	if(run (B) <run (D)) // 代表 B 較小
	{
		// C （tp->prev 節點） 和 B 合併
		tp->prev = merge_at(priv, cmp, tp->prev);
	}else{ // 代表 D 較小
		// C 和 D（tp節點 ） 合併
		tp = merge_at(priv, cmp, tp);
	}	
    }
    return tp;
}

static struct list_head *merge_force_collapse(void *priv,
                                              list_cmp_func_t cmp,
                                              struct list_head *tp)
{
    while (stk_size >= 3) {
        if (run_size(tp->prev->prev) < run_size(tp)) {
            tp->prev = merge_at(priv, cmp, tp->prev);
        } else {
            tp = merge_at(priv, cmp, tp);
        }
    }
    return tp;
}

在 TimSort 中，保持 runs 的長度平衡是非常重要的，這有助於最小化合併操作的深度和運行時間。

平衡合併：當 run_size(tp->prev->prev) < run_size(tp) 時，選擇合併 tp->prev 和 tp->prev->prev 這兩個較小的 runs，有助於保持合併後的新 run 的長度平衡。
防止不平衡：如果直接合併 tp 和 tp->prev，可能會導致新的 run 過大，從而在後續合併中引入更高的時間複雜度。

`build_prev_link()`

prev 變數，之前 find_run() 被暫時借用，當成紀錄 run_len 變數(紀錄每組 run 的長度)
因此得用 build_prev_link() 函式，將 prev 恢復成串接前一個 link list 節點位址

merge_final 和 build_prev_link 函數中，prev 指標的恢復主要在 build_prev_link 函數中完成。

static void merge_final(void *priv,
                        list_cmp_func_t cmp,
                        struct list_head *head,
                        struct list_head *a,
                        struct list_head *b)
{

 ......
    /* Finish linking remainder of list b on to tail */
    build_prev_link(head, tail, b);
}

static void build_prev_link(struct list_head *head,
                            struct list_head *tail,
                            struct list_head *list)
{
    tail->next = list;
    do {
        list->prev = tail;  // 在這裡設置每個節點的 prev 指向前一個節點
        tail = list;
        list = list->next;
    } while (list);

    /* The final links to make a circular doubly-linked list */
    tail->next = head;  // 將最後一個節點的 next 指向頭節點
    head->prev = tail;  // 將頭節點的 prev 指向最後一個節點，完成雙向鍊結的連接
}

`merge_final()`

merge 合併到最後只剩兩串 link list
設定輸入的兩串 link list struct list_head *a 和 struct list_head *b

static void merge_final(void *priv,
                        list_cmp_func_t cmp,
                        struct list_head *head,
                        struct list_head *a,
                        struct list_head *b)
{
    struct list_head *tail = head;

    for (;;) {
        /* if equal, take 'a' -- important for sort stability */
		// a b 取較小的 link list 接到 head 
        if (cmp(priv, a, b) <= 0) {
            // if a < b
	    // a link list 接到 head 
			
	    tail->next = a;
            a->prev = tail;
            tail = a;
            a = a->next;
            if (!a)
                break;
        } else {
	    // if a > b
	    // b link list 接到 head 
            tail->next = b;
            b->prev = tail;
            tail = b;
            b = b->next;
            if (!b) {
                b = a;
                break;
            }
        }
    }

    /* Finish linking remainder of list b on to tail */
    build_prev_link(head, tail, b);
}

find_run（）

prev 被暫時借用，當成紀錄 run_len 變數

merge_collapse()

Timsort 的修正版

總結 merge_collapse 合併策略

對應到 merge_collapse() pseudo code

merge_at()

stk_size 變數

merge_force_collapse()

build_prev_link()

merge_final()