# System Call states
## Hello.c
```c=
//Hello.c
#include <stdio.h>
int main(void){
printf("Hello linux kernel\n");
return 0;
}
```
利用gcc編譯後並使用strace
```c=
strace ./hello.o -o hello.log
```
會得到
```cmake=
execve("./hello", ["./hello"], 0x7ffec1847c90 /* 62 vars */) = 0
brk(NULL) = 0x555ff5ee0000
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=77067, ...}) = 0
mmap(NULL, 77067, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f3ff20ff000
close(3) = 0
access("/etc/ld.so.nohwcap", F_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0\240\35\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2030928, ...}) = 0
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f3ff20fd000
mmap(NULL, 4131552, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f3ff1af8000
mprotect(0x7f3ff1cdf000, 2097152, PROT_NONE) = 0
mmap(0x7f3ff1edf000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1e7000) = 0x7f3ff1edf000
mmap(0x7f3ff1ee5000, 15072, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f3ff1ee5000
close(3) = 0
arch_prctl(ARCH_SET_FS, 0x7f3ff20fe4c0) = 0
mprotect(0x7f3ff1edf000, 16384, PROT_READ) = 0
mprotect(0x555ff5c25000, 4096, PROT_READ) = 0
mprotect(0x7f3ff2112000, 4096, PROT_READ) = 0
munmap(0x7f3ff20ff000, 77067) = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
brk(NULL) = 0x555ff5ee0000
brk(0x555ff5f01000) = 0x555ff5f01000
write(1, "Hello linux kernel\n", 19) = 19
exit_group(0) = ?
+++ exited with 0 +++
```
可以看到write指令會把字串進行輸出,測試python檔案會變成很長一串會有更多的system call。
## systemcall的包裝函數(Wrapper Function)
在一般是無法使用C等高階語言進行System call要使用assembly language幫忙才可以(MOV、ADD等),所以我們這時候就會透過Wrapper function來進行委派,這時候一些instuction就可以用啦,以下進行範例使用getppid
```c=
//loop.c
#include <sys/types.h>
#include <unistd.h>
int main(void){
for(;;)
getppid();
}
```
getppid()就是回傳父行程的意思。可以使用Sat命列來得知在CPU中的使用狀況
```c=
sar -P ALL 1
```
並且執行後可以使用下面指令來顯示pid
```
./loop &
```
```
廿二時四分十九秒 CPU %user %nice %system %iowait %steal %idle
廿二時四分廿秒 all 9.58 0.00 12.85 0.00 0.00 77.57
廿二時四分廿秒 0 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿秒 1 41.30 0.00 58.70 0.00 0.00 0.00
廿二時四分廿秒 2 2.68 0.00 0.89 0.00 0.00 96.43
廿二時四分廿秒 3 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿秒 CPU %user %nice %system %iowait %steal %idle
廿二時四分廿一秒 all 10.08 0.00 12.34 0.00 0.00 77.58
廿二時四分廿一秒 0 0.00 0.00 0.95 0.00 0.00 99.05
廿二時四分廿一秒 1 42.35 0.00 57.65 0.00 0.00 0.00
廿二時四分廿一秒 2 2.91 0.00 0.00 0.00 0.00 97.09
廿二時四分廿一秒 3 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿一秒 CPU %user %nice %system %iowait %steal %idle
廿二時四分廿二秒 all 9.62 0.00 13.67 0.00 0.00 76.71
廿二時四分廿二秒 0 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿二秒 1 40.00 0.00 60.00 0.00 0.00 0.00
廿二時四分廿二秒 2 1.96 0.00 0.00 0.00 0.00 98.04
廿二時四分廿二秒 3 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿二秒 CPU %user %nice %system %iowait %steal %idle
廿二時四分廿三秒 all 9.61 0.00 12.99 0.00 0.00 77.40
廿二時四分廿三秒 0 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿三秒 1 42.35 0.00 57.65 0.00 0.00 0.00
廿二時四分廿三秒 2 2.04 0.00 0.00 0.00 0.00 97.96
廿二時四分廿三秒 3 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿三秒 CPU %user %nice %system %iowait %steal %idle
廿二時四分廿四秒 all 11.81 0.00 16.83 0.00 0.00 71.36
廿二時四分廿四秒 0 0.00 0.00 0.00 0.00 0.00 100.00
廿二時四分廿四秒 1 38.78 0.00 61.22 0.00 0.00 0.00
廿二時四分廿四秒 2 8.00 0.00 6.00 0.00 0.00 86.00
廿二時四分廿四秒 3 0.00 0.00 2.04 0.00 0.00 97.96
```
## 標準C函式庫
可以使用ldd來看與合種函式庫進行連結
```c=
$ ldd /bin/echo
linux-vdso.so.1 (0x00007ffd25943000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f9c95a5b000)
/lib64/ld-linux-x86-64.so.2 (0x00007f9c96055000)
```
以及剛剛使用的loop檔
```c=
$ ldd loop
linux-vdso.so.1 (0x00007ffc0d92e000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f833a263000)
/lib64/ld-linux-x86-64.so.2 (0x00007f833a856000)
```
也可以從這邊得知python其實也是引用了不少C語言的函式庫,所以才會說python還是基於C來跑。
```c=
$ldd /usr/bin/python3
linux-vdso.so.1 (0x00007ffc49924000)
libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f4640eb4000)
libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007f4640c95000)
libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007f4640a91000)
libutil.so.1 => /lib/x86_64-linux-gnu/libutil.so.1 (0x00007f464088e000)
libexpat.so.1 => /lib/x86_64-linux-gnu/libexpat.so.1 (0x00007f464065c000)
libz.so.1 => /lib/x86_64-linux-gnu/libz.so.1 (0x00007f464043f000)
libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f46400a1000)
/lib64/ld-linux-x86-64.so.2 (0x00007f46412a5000)
```
## OS提供的指令
OS所提供的可以簡單用下方幾種來表示:
* 系統初始化 init
* 變更 sysctl,nice,sync
* 檔案操作 touch,mkdir
* 文字資料處理 grep,sort,uniq
* 效能量測 sar,iostat
* 編譯器 gcc
* 腳本執行環境 perl,python,ruby
* shell bash,sh
* 視窗系統 X11
# 行程Process
## fork()
在系統中OS Kernel裡的PID(process identifier)0代表的是kernel來做SWAP交換分頁用,則1就是表示init,所以就在這邊會進行fork,例如Web server就可以fork出兩個child process,產生出web process和database process。
```c=
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <err.h>
static void child(){
printf("I'm child,my pid is %d\n",getpid());
exit(EXIT_SUCCESS);
}
static void parent(pid_t pid_c){
printf("I'm parent,my pid is %d.\n And my child pid is %d.\n",getpid(),pid_c);
exit(EXIT_SUCCESS);
}
int main(void){
pid_t process;
process = fork();
if(process==-1){
err(EXIT_FAILURE,"fork() failed");
}
else if(process == 0){
child();
//fork函數會把子行程的pid返回給母行程、將0返回給子行程。利用這個來讓母行程與子行程處理分支
}
else{
parent(process);
//母行程會輸出本身的pid與子行程的pid後結束,子行程會在輸出本身的pid後結束
}
err(EXIT_FAILURE,"shouldn't reach here");
}
```
輸出結果如下
```c=
$ ./fork
I'm parent,my pid is 7774.
And my child pid is 7775.
I'm child,my pid is 7775
```
## execve()
當我們想執行完一個程式後依然保有控制權,如果要新增行程,就會在母行程fork()出來後再呼叫exec(),就是所謂的fork and exec的流程,母行程會建立完echo hello的程式後將自身pid及子行程pid輸出後便結束。
```c=
//ForkandExec.c
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <err.h>
static void child(){
char *args[] = {"/bin/echo","Hi Linux",NULL};
// argv list第一个參數應該指向與正在執行文件關聯的檔案,必須使NULL結尾
printf("I'm child,my pid is %d\n",getpid());
fflush(stdout);
//強制印到檔案上
execve("/bin/echo",args,NULL);
err(EXIT_FAILURE,"exec() failed");
}
static void parent(pid_t pid_c){
printf("I'm parent,my pid is %d.\n And my child pid is %d.\n",getpid(),pid_c);
exit(EXIT_SUCCESS);
}
int main(void){
pid_t process;
process = fork();
if(process==-1){
err(EXIT_FAILURE,"fork() failed");
}
else if(process == 0){
child();
}
else{
parent(process);
}
err(EXIT_FAILURE,"shouldn't reach here");
}
```
執行結果
```c=
$ ./ForkandExec
I'm parent,my pid is 8656.
I'm child,my pid is 8657
And my child pid is 8657.
$ Hi Linux
```

## 排程器
行程會在各核心上執行,主要會利用下面的程式去計算一下幾個迴圈才會耗費到一毫秒。實驗中n代表要同時運作的行程數,total代表要讓程式運作的合計時間,resol統計資訊的採集間隔。
並使用taskset指定在某個CPU上執行。
```c=
taskset -c 0 ./sheduler 1 100 1 > core-process.txt
```
scheduler.c程式碼如下
```c=
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <err.h>
#define NLOOP_FOR_ESTIMATION 10000000000UL
#define NSECS_PER_MSEC 1000000UL
#define NSECS_PER_SEC 1000000000UL
static inline long diff_nsec(struct timespec before,struct timespec after){
return ((after.tv_sec*NSECS_PER_SEC + after.tv_nsec)
-(before.tv_sec*NSECS_PER_SEC+before.tv_nsec));
}
//inline 內嵌函數,編譯也不一定會實作,如果處理很短並且很常呼叫的話可以加上他
//timesoec 測量時間的函數,常用在評估程式執行效能的方法
static unsigned long loops_per_msec(){
struct timespec before,after;
clock_gettime(CLOCK_MONOTONIC,&before);
unsigned long i;
for(i=0;i<NLOOP_FOR_ESTIMATION;i++)
;
clock_gettime(CLOCK_MONOTONIC,&after);
int ret;
return NLOOP_FOR_ESTIMATION*NSECS_PER_MSEC/diff_nsec(before,after);
}
static inline void load(unsigned long nloop){
unsigned long i;
for(i=0;i<nloop;i++)
;
}
static void child_fn(int id,struct timespec *buf,int nrecord,unsigned long nloop_per_resol,struct timespec start){
int i;
for(i=0;i<nrecord;i++){
struct timespec ts;
load(nloop_per_resol);
clock_gettime(CLOCK_MONOTONIC, &ts);
buf[i] = ts;
}
for(i=0;i<nrecord;i++){
printf("%d\t%ld\t%d\n",id,diff_nsec(start,buf[i])/NSECS_PER_MSEC,(i+1)*100/nrecord);
}
exit(EXIT_SUCCESS);
}
static void parent_fn(int nproc){
int i;
for(i=0;i<nproc;i++)
wait(NULL);
}
static pid_t *pids;
int main(int argc, char *argv[]){
int ret = EXIT_FAILURE;
if(argc<4){
fprintf(stderr,"usage: %s <nproc> <total[ms]> <resolution [ms]>\n",argv[0]);
exit(EXIT_FAILURE);
}
int nproc = atoi(argv[1]);
int total = atoi(argv[2]);
int resol = atoi(argv[3]);
if(nproc<1){
fprintf(stderr,"<nproc>(%d) should be >= 1\n", nproc);
exit(EXIT_FAILURE);
}
if(total<1){
fprintf(stderr,"<total>(%d) should be >= 1\n", total);
exit(EXIT_FAILURE);
}
if(resol<1){
fprintf(stderr,"<resol>(%d) should be >= 1\n", resol);
exit(EXIT_FAILURE);
}
if(total%resol){
fprintf(stderr,"<total>(%d) should be multiple of <resolution>(%d)\n", total,resol);
exit(EXIT_FAILURE);
}
int nrecord = total/resol;
struct timespec *logbuf = malloc (nrecord * sizeof(struct timespec));
if(!logbuf)
err(EXIT_FAILURE,"malloc(logbuf) failed");
puts("estimation workload which takes just one millisecond");
unsigned long nloop_per_resol = loops_per_msec() * resol;
puts("end estimation");
fflush(stdout);
pids = malloc(nproc * sizeof(pid_t));
if(pids==NULL){
warn("malloc(pids) failed");
goto free_logbuf;
}
struct timespec start;
clock_gettime(CLOCK_MONOTONIC,&start);
int i,ncreated;
for(i=0,ncreated=0;i<nproc;i++,ncreated++){
pids[i] = fork();
if(pids[i]<0){
goto wait_children;
}else if (pids[i]==0){
child_fn(i,logbuf,nrecord,nloop_per_resol,start);
}
}
ret = EXIT_SUCCESS;
wait_children:
if(ret == EXIT_FAILURE)
for(i=0;i<ncreated;i++)
warn("kill(%d) failed",pids[i]);
for(i=0;i<ncreated;i++)
if(wait(NULL)<0)
warn("wait() failed.");
free_pids:
free(pids);
free_logbuf:
free(logbuf);
exit(ret);
}
```
可以藉由此程式進行不同行程數的結果比較,例如:1、2、4。
## Taskset指令
可以使用作業系統所提供的taskset的命令,並使用-c引數來指定在某個邏輯CPU上運作。
```c=
taskset -c 0 ./sheduler 1 100 1 > core-process.txt
```
可以將行程為1,程式運作總時間100,統計資訊採樣間隔1(毫秒),輸出到core-process.txt上去看結果,並使用繪圖軟體進行繪製圖片,
## 行程狀態及轉換
| 狀態 | 意思 |
| -------- | -------- |
| 執行狀態 | 目前正在使用邏輯CPU |
| 待命狀態 | 等待CPU時間分配 |
| 休眠狀態 | 等待某事件的發生。事件發生前是不會使用到CPU時間 |
| 殭屍狀態 | 在行程結束後等待母行程接受結束狀態 |
在Linux中可以使用ps ax命令來看系統行程的顯示列表來供確認,例如以下指令可以看到有多少行程在執行。
```c=
$ ps ax| wc -l
247
```

## 吞吐量與延遲(Throughput & Latency)
吞吐量代表單位時間的總工作量。越高越好。公式是已完成的行程數量/經過時間。
延遲代表各處理開始到結束為止經過的時間。越短越好。公式是處理結束時刻-處理開始時刻。
簡單來說邏輯CPU的運算資源使用的越多吞吐量越高,代表CPU閒置的比率越低。則閒置的比率越低代表吞吐量越高。
我們可以透過以下的實驗來進行一下簡單測試,使用loop程式來進行實驗。
首先可以看到執行中的部分顯示0代表沒有待命的程式,表示閒置中
```c
$ sar -q 1 1
Linux 5.4.0-150-generic (LinuxKernel18) 廿廿三年七月廿三日 _x86_64_ (4 CPU)
廿三時十九分廿一秒 runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15 blocked
廿三時十九分廿二秒 0 614 0.00 0.00 0.00 0
Average: 0 614 0.00 0.00 0.00 0
```
再來執行loop
```c
$ taskset -c 0 ./loop &
[1] 25127
```
再查看一次,發現出現了一個待命行程
```c
$ sar -q 1 1
Linux 5.4.0-150-generic (LinuxKernel18) 廿廿三年七月廿三日 _x86_64_ (4 CPU)
廿三時十九分57秒 runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15 blocked
廿三時十九分58秒 1 615 0.15 0.03 0.01 0
Average: 1 615 0.15 0.03 0.01 0
```
再執行第二個程式loop
```c
$ taskset -c 0 ./loop &
[2] 25131
```
可以發現有兩個待命程式
```c
$ sar -q 1 1
Linux 5.4.0-150-generic (LinuxKernel18) 廿廿三年七月廿三日 _x86_64_ (4 CPU)
廿三時廿分廿七秒 runq-sz plist-sz ldavg-1 ldavg-5 ldavg-15 blocked
廿三時廿分廿八秒 2 616 0.57 0.14 0.05 0
Average: 2 616 0.57 0.14 0.05 0
```
最後殺死兩個行程,完成實驗
```c
$ kill 25127 25131
```
總結來說,在一個CPU行程上進行處理的行程只有一個,在可執行複數行程的情況下,就是將各行程於適當的time slot在CPU上依順序處理。
所以在多核心的CPU環境下不同時運作複數行程,吞吐量不會升高,則在同一個邏輯CPU時,就算行程數增加的比邏輯CPU數多吞吐量也不會升高。
## 經過時間與使用時間
在Linux底下可以使用time命令來取得開始到結束的時間,分別為經過時間,從行程開始之後到結束為止的經過時間,就像是使用碼表去量測開始到結束的時間。使用時間則是代表行程實際使用到邏輯cpu的時間。

我們使用total(處理的所需時間)10秒及resol(進度顯示)10秒來作為sheduler程式的測試
```c
$ time taskset -c 0 ./sheduler 1 10000 10000
estimation workload which takes just one millisecond
end estimation
0 16293 100
real 0m26.901s
user 0m26.895s
sys 0m0.004s
```
real的值代表經過時間。將user加上sys就可以得到他。user值就代表行程在執行當中使用到CPU時間,sys值就代表來自user space委託kernel執行systemcall的時間。
行程數為2的執行結果
```c
$ time taskset -c 0 ./sheduler 2 10000 10000
estimation workload which takes just one millisecond
end estimation
0 33275 100
1 33283 100
real 0m43.875s
user 0m43.852s
sys 0m0.004s
```
那如果是兩個CPU一個行程的情況會得到與1個CPU差不多的結果
```c
$ time taskset -c 0,1 ./sheduler 1 10000 10000
estimation workload which takes just one millisecond
end estimation
0 16719 100
real 0m27.329s
user 0m27.328s
sys 0m0.000s
```
那如果是兩個CPU兩個行程的情況會得到與一個CPU兩個行程差不多的結果
```c
$ time taskset -c 0,1 ./sheduler 2 10000 10000
estimation workload which takes just one millisecond
end estimation
1 16773 100
0 16812 100
real 0m27.479s
user 0m44.204s
sys 0m0.029s
```
休眠行程就是我們可以指定秒數休眠然後並結束的行程
```c
$ time sleep 10
real 0m10.003s
user 0m0.001s
sys 0m0.000s
```
實際的行程會以非常複雜的方式轉換各種狀態,這邊也可以使用ps -eo命令裡的etime欄位跟time欄位所示可以將每個行程、命令名、經過時間、使用時間呈現出來看看
```c
$ ps -eo pid,comm,etime,time
PID COMMAND ELAPSED TIME
1 systemd 9-00:40:11 00:00:13
2 kthreadd 9-00:40:11 00:00:00
3 rcu_gp 9-00:40:11 00:00:00
4 rcu_par_gp 9-00:40:11 00:00:00
6 kworker/0:0H 9-00:40:11 00:00:00
8 mm_percpu_wq 9-00:40:11 00:00:00
9 ksoftirqd/0 9-00:40:11 00:00:01
10 rcu_sched 9-00:40:11 00:01:22
11 migration/0 9-00:40:11 00:00:12
12 idle_inject/0 9-00:40:11 00:00:00
14 cpuhp/0 9-00:40:11 00:00:00
15 cpuhp/1 9-00:40:11 00:00:00
16 idle_inject/1 9-00:40:11 00:00:00
17 migration/1 9-00:40:11 00:00:12
18 ksoftirqd/1 9-00:40:11 00:00:00
20 kworker/1:0H-kb 9-00:40:11 00:00:00
21 cpuhp/2 9-00:40:11 00:00:00
22 idle_inject/2 9-00:40:11 00:00:00
23 migration/2 9-00:40:11 00:00:11
24 ksoftirqd/2 9-00:40:11 00:00:00
26 kworker/2:0H 9-00:40:11 00:00:00
27 cpuhp/3 9-00:40:11 00:00:00
28 idle_inject/3 9-00:40:11 00:00:00
29 migration/3 9-00:40:11 00:00:12
30 ksoftirqd/3 9-00:40:11 00:00:05
32 kworker/3:0H-kb 9-00:40:11 00:00:00
33 kdevtmpfs 9-00:40:11 00:00:00
34 netns 9-00:40:11 00:00:00
35 rcu_tasks_kthre 9-00:40:11 00:00:00
36 kauditd 9-00:40:11 00:00:00
37 khungtaskd 9-00:40:11 00:00:01
38 oom_reaper 9-00:40:11 00:00:00
39 writeback 9-00:40:11 00:00:00
40 kcompactd0 9-00:40:11 00:00:00
41 ksmd 9-00:40:11 00:00:00
42 khugepaged 9-00:40:11 00:00:03
89 kintegrityd 9-00:40:11 00:00:00
90 kblockd 9-00:40:11 00:00:00
91 blkcg_punt_bio 9-00:40:11 00:00:00
92 tpm_dev_wq 9-00:40:11 00:00:00
93 ata_sff 9-00:40:11 00:00:00
94 md 9-00:40:11 00:00:00
95 edac-poller 9-00:40:11 00:00:00
96 devfreq_wq 9-00:40:11 00:00:00
97 watchdogd 9-00:40:11 00:00:00
102 kswapd0 9-00:40:10 00:00:00
103 ecryptfs-kthrea 9-00:40:10 00:00:00
105 kthrotld 9-00:40:10 00:00:00
106 acpi_thermal_pm 9-00:40:10 00:00:00
107 scsi_eh_0 9-00:40:10 00:00:00
108 scsi_tmf_0 9-00:40:10 00:00:00
109 scsi_eh_1 9-00:40:10 00:00:00
110 scsi_tmf_1 9-00:40:10 00:00:00
112 vfio-irqfd-clea 9-00:40:10 00:00:00
114 ipv6_addrconf 9-00:40:10 00:00:00
123 kstrp 9-00:40:10 00:00:00
126 kworker/u9:0 9-00:40:10 00:00:00
139 charger_manager 9-00:40:10 00:00:00
182 scsi_eh_2 9-00:40:10 00:00:00
183 scsi_tmf_2 9-00:40:10 00:00:00
185 kworker/0:1H-kb 9-00:40:10 00:00:05
206 kworker/3:1H-kb 9-00:40:09 00:00:05
208 jbd2/sda1-8 9-00:40:09 00:00:03
209 ext4-rsv-conver 9-00:40:09 00:00:00
223 kworker/1:1H-kb 9-00:40:09 00:00:08
239 kworker/2:1H-kb 9-00:40:09 00:00:08
244 systemd-journal 9-00:40:09 00:00:04
271 systemd-udevd 9-00:40:09 00:00:01
281 loop0 9-00:40:09 00:00:00
297 loop1 9-00:40:09 00:00:00
298 loop2 9-00:40:09 00:00:00
319 systemd-resolve 9-00:40:09 00:00:03
322 loop3 9-00:40:09 00:00:00
348 loop4 9-00:40:09 00:00:00
349 loop5 9-00:40:09 00:00:00
350 loop6 9-00:40:09 00:00:00
377 loop7 9-00:40:09 00:00:00
383 iprt-VBoxWQueue 9-00:40:09 00:00:00
398 irq/18-vmwgfx 9-00:40:09 00:00:01
402 ttm_swap 9-00:40:09 00:00:00
412 cryptd 9-00:40:08 00:00:00
415 loop9 9-00:40:08 00:00:00
522 dbus-daemon 9-00:40:08 00:00:29
573 cron 9-00:40:08 00:00:01
574 systemd-logind 9-00:40:08 00:00:01
575 acpid 9-00:40:08 00:00:00
576 ModemManager 9-00:40:08 00:00:00
577 networkd-dispat 9-00:40:08 00:00:00
579 wpa_supplicant 9-00:40:08 00:00:05
580 avahi-daemon 9-00:40:08 00:00:00
583 irqbalance 9-00:40:08 00:00:15
587 avahi-daemon 9-00:40:08 00:00:00
591 udisksd 9-00:40:08 00:00:01
595 rsyslogd 9-00:40:08 00:00:00
603 NetworkManager 9-00:40:08 00:00:20
604 accounts-daemon 9-00:40:08 00:00:15
636 polkitd 9-00:40:08 00:00:00
737 unattended-upgr 9-00:40:08 00:00:00
758 gdm3 9-00:40:08 00:00:00
763 VBoxDRMClient 9-00:40:08 00:04:23
766 VBoxService 9-00:40:08 00:01:34
806 gdm-session-wor 9-00:40:07 00:00:00
884 systemd 9-00:40:07 00:00:00
885 (sd-pam) 9-00:40:07 00:00:00
904 gdm-wayland-ses 9-00:40:07 00:00:00
906 dbus-daemon 9-00:40:07 00:00:00
916 gnome-session-b 9-00:40:07 00:00:00
933 gnome-shell 9-00:40:07 00:01:51
941 upowerd 9-00:40:07 00:00:00
959 Xwayland 9-00:40:06 00:00:00
970 at-spi-bus-laun 9-00:40:06 00:00:00
975 dbus-daemon 9-00:40:06 00:00:00
977 at-spi2-registr 9-00:40:06 00:00:00
981 pulseaudio 9-00:40:06 00:00:00
982 rtkit-daemon 9-00:40:06 00:00:14
997 ibus-daemon 9-00:40:06 00:00:00
1000 ibus-dconf 9-00:40:06 00:00:00
1003 ibus-x11 9-00:40:06 00:00:00
1007 ibus-portal 9-00:40:06 00:00:00
1013 xdg-permission- 9-00:40:06 00:00:00
1027 dhclient 9-00:40:05 00:00:00
1041 whoopsie 9-00:40:05 00:00:00
1046 kerneloops 9-00:40:05 00:00:05
1048 kerneloops 9-00:40:05 00:00:05
1100 boltd 9-00:40:05 00:00:00
1104 packagekitd 9-00:40:05 00:00:16
1105 gsd-xsettings 9-00:40:05 00:00:00
1108 gsd-a11y-settin 9-00:40:05 00:00:00
1111 gsd-clipboard 9-00:40:05 00:00:00
1115 gsd-color 9-00:40:05 00:04:32
1116 gsd-datetime 9-00:40:05 00:00:00
1117 gsd-housekeepin 9-00:40:05 00:00:00
1119 gsd-keyboard 9-00:40:05 00:00:00
1123 gsd-media-keys 9-00:40:05 00:00:00
1129 gsd-mouse 9-00:40:05 00:00:00
1131 gsd-power 9-00:40:05 00:00:00
1134 gsd-print-notif 9-00:40:05 00:00:00
1135 gsd-rfkill 9-00:40:05 00:00:00
1137 gsd-screensaver 9-00:40:05 00:00:00
1147 gsd-sharing 9-00:40:05 00:00:00
1150 gsd-smartcard 9-00:40:05 00:00:00
1152 gsd-sound 9-00:40:05 00:00:00
1162 gsd-wacom 9-00:40:05 00:00:00
1176 colord 9-00:40:05 00:00:00
1199 ibus-engine-sim 9-00:40:05 00:00:00
1221 gdm-session-wor 9-00:39:19 00:00:00
1225 systemd 9-00:39:16 00:00:00
1226 (sd-pam) 9-00:39:16 00:00:00
1239 gnome-keyring-d 9-00:39:16 00:00:00
1243 gdm-x-session 9-00:39:16 00:00:00
1245 Xorg 9-00:39:16 00:04:50
1257 dbus-daemon 9-00:39:16 00:00:00
1260 gnome-session-b 9-00:39:16 00:00:00
1359 VBoxClient 9-00:39:15 00:00:00
1360 VBoxClient 9-00:39:15 00:00:00
1374 VBoxClient 9-00:39:15 00:00:00
1375 VBoxClient 9-00:39:15 00:05:37
1382 VBoxClient 9-00:39:15 00:00:00
1383 VBoxClient 9-00:39:15 00:39:03
1387 VBoxClient 9-00:39:15 00:00:00
1388 VBoxClient 9-00:39:15 00:03:06
1398 ssh-agent 9-00:39:15 00:00:05
1408 at-spi-bus-laun 9-00:39:15 00:00:00
1413 dbus-daemon 9-00:39:15 00:00:00
1415 at-spi2-registr 9-00:39:15 00:00:23
1450 gnome-shell 9-00:39:15 00:09:09
1464 gvfsd 9-00:39:15 00:00:00
1469 gvfsd-fuse 9-00:39:14 00:00:00
1480 pulseaudio 9-00:39:14 00:00:04
1493 ibus-daemon 9-00:39:14 00:00:10
1497 ibus-dconf 9-00:39:14 00:00:00
1499 xdg-permission- 9-00:39:14 00:00:00
1501 ibus-x11 9-00:39:14 00:00:00
1507 ibus-portal 9-00:39:14 00:00:00
1516 gnome-shell-cal 9-00:39:14 00:00:00
1526 evolution-sourc 9-00:39:13 00:00:00
1528 gvfs-udisks2-vo 9-00:39:13 00:00:00
1535 gvfs-mtp-volume 9-00:39:13 00:00:00
1539 gvfs-afc-volume 9-00:39:13 00:00:00
1544 gvfs-gphoto2-vo 9-00:39:13 00:00:00
1547 goa-daemon 9-00:39:13 00:00:00
1551 gvfs-goa-volume 9-00:39:13 00:00:00
1562 goa-identity-se 9-00:39:13 00:00:00
1567 gsd-power 9-00:39:13 00:00:00
1568 gsd-print-notif 9-00:39:13 00:00:00
1571 gsd-rfkill 9-00:39:13 00:00:00
1573 gsd-screensaver 9-00:39:13 00:00:00
1578 gsd-sharing 9-00:39:13 00:00:00
1580 gsd-smartcard 9-00:39:13 00:00:00
1585 gsd-xsettings 9-00:39:13 00:00:00
1590 gsd-wacom 9-00:39:13 00:00:00
1593 gsd-sound 9-00:39:13 00:00:00
1602 gsd-a11y-settin 9-00:39:13 00:00:00
1605 gsd-color 9-00:39:13 00:04:27
1608 gsd-clipboard 9-00:39:13 00:00:00
1611 gsd-housekeepin 9-00:39:13 00:00:07
1613 gsd-datetime 9-00:39:13 00:00:00
1616 gsd-media-keys 9-00:39:13 00:00:00
1618 gsd-keyboard 9-00:39:13 00:00:00
1625 gsd-mouse 9-00:39:13 00:00:00
1647 gsd-printer 9-00:39:12 00:00:00
1665 nautilus-deskto 9-00:39:12 00:00:43
1667 gsd-disk-utilit 9-00:39:12 00:00:00
1712 evolution-calen 9-00:39:12 00:00:00
1715 gvfsd-trash 9-00:39:12 00:00:00
1731 ibus-engine-sim 9-00:39:12 00:00:03
1742 dconf-service 9-00:39:11 00:00:00
1753 evolution-calen 9-00:39:11 00:00:00
1772 evolution-addre 9-00:39:11 00:00:00
1785 evolution-addre 9-00:39:11 00:00:00
1804 gnome-terminal- 9-00:39:08 00:00:35
1813 bash 9-00:39:07 00:00:00
1831 update-notifier 9-00:38:12 00:00:19
1833 gnome-software 9-00:38:12 00:00:37
1900 deja-dup-monito 9-00:37:12 00:00:00
2001 gvfsd-metadata 9-00:23:26 00:00:00
3390 bash 9-00:21:58 00:00:00
4281 loop10 8-16:49:23 00:00:00
4305 snapd 8-16:49:21 00:02:15
4787 loop11 8-16:47:59 00:00:00
4939 loop12 8-16:47:54 00:00:00
5032 loop13 8-16:47:53 00:00:00
5244 loop14 8-16:47:52 00:00:00
5271 loop15 8-16:47:52 00:00:00
5415 loop16 8-16:47:50 00:00:00
5547 loop17 8-16:47:24 00:00:00
5679 loop18 8-16:47:00 00:00:00
5817 loop19 8-16:45:05 00:00:00
5943 kworker/2:7-eve 8-16:41:34 00:01:03
5948 loop20 8-16:41:34 00:00:00
6071 kworker/1:7-eve 8-16:41:27 00:01:03
6076 loop21 8-16:41:27 00:00:00
6351 kworker/3:5-eve 8-16:39:54 00:01:29
6357 loop22 8-16:39:54 00:00:00
12855 kworker/0:2-eve 6-12:15:22 00:02:43
25281 cupsd 22:14:22 00:00:00
25282 cups-browsed 22:14:22 00:00:00
26240 kworker/2:1-cgr 07:04:16 00:00:00
27079 kworker/u8:1-ev 03:14:09 00:00:00
27090 kworker/u8:0-ev 03:02:40 00:00:00
27128 kworker/0:1-cgr 02:18:18 00:00:00
27188 kworker/1:0-cgr 01:19:43 00:00:00
27232 kworker/u8:2-ev 23:41 00:00:00
27234 kworker/3:0-cgr 20:18 00:00:00
27254 ps 00:00 00:00:00
```
可以從上述知道大多數的行程從開始已經過了8-9天左右,也可以看到他們分別的邏輯CPU使用時間。一些程式很多時間處於休眠是因為一些程式是開啟才會用,都等待來自使用者的開啟等等。
若不斷使用CPU時間進行處理運算的話,如果沒有其他的運作中行程存在會讓使用時間=經過時間與並行執行數相乘的結果接近。
## 優先權的變更
在System call中有個nice()指令可以進行優先權的變更,數值範圍在[-19,20]之間,預設為0,在高優先權的行程會拿到比較多的CPU時間,在低優先權的行程只能獲得較少的CPU時間,任何人都可以調低優先權,但是只有ROOT(sudo)可以調高優先權。
接著我們改變一下sceduler程式的要求
* 運作的行程數固定為2個
* 將引數1指定為total,引數2指定為resol
* 兩個行程中將其中一個設定為priority 0。另一個為priority 5。
以下為sceduler_nice.c程式
```c
#include <sys/types.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <err.h>
#define NLOOP_FOR_ESTIMATION 10000000000UL
#define NSECS_PER_MSEC 1000000UL
#define NSECS_PER_SEC 1000000000UL
static inline long diff_nsec(struct timespec before,struct timespec after){
return ((after.tv_sec*NSECS_PER_SEC + after.tv_nsec)
-(before.tv_sec*NSECS_PER_SEC+before.tv_nsec));
}
//inline 內嵌函數,編譯也不一定會實作,如果處理很短並且很常呼叫的話可以加上他
//timesoec 測量時間的函數,常用在評估程式執行效能的方法
static unsigned long loops_per_msec(){
struct timespec before,after;
clock_gettime(CLOCK_MONOTONIC,&before);
unsigned long i;
for(i=0;i<NLOOP_FOR_ESTIMATION;i++)
;
clock_gettime(CLOCK_MONOTONIC,&after);
int ret;
return NLOOP_FOR_ESTIMATION*NSECS_PER_MSEC/diff_nsec(before,after);
}
static inline void load(unsigned long nloop){
unsigned long i;
for(i=0;i<nloop;i++)
;
}
static void child_fn(int id,struct timespec *buf,int nrecord,unsigned long nloop_per_resol,struct timespec start){
int i;
for(i=0;i<nrecord;i++){
struct timespec ts;
load(nloop_per_resol);
clock_gettime(CLOCK_MONOTONIC, &ts);
buf[i] = ts;
}
for(i=0;i<nrecord;i++){
printf("%d\t%ld\t%d\n",id,diff_nsec(start,buf[i])/NSECS_PER_MSEC,(i+1)*100/nrecord);
}
exit(EXIT_SUCCESS);
}
static void parent_fn(int nproc){
int i;
for(i=0;i<nproc;i++)
wait(NULL);
}
static pid_t *pids;
int main(int argc, char *argv[]){
int ret = EXIT_FAILURE;
if(argc<3){
fprintf(stderr,"usage: %s <nproc> <total[ms]> <resolution [ms]>\n",argv[0]);
exit(EXIT_FAILURE);
}
int nproc = 2;
int total = atoi(argv[1]);
int resol = atoi(argv[2]);
if(total<1){
fprintf(stderr,"<total>(%d) should be >= 1\n", total);
exit(EXIT_FAILURE);
}
if(resol<1){
fprintf(stderr,"<resol>(%d) should be >= 1\n", resol);
exit(EXIT_FAILURE);
}
if(total % resol){
fprintf(stderr,"<total>(%d) should be multiple of <resolution>(%d)\n",total, resol);
exit(EXIT_FAILURE);
}
int nrecord = total/resol;
struct timespec *logbuf = malloc (nrecord * sizeof(struct timespec));
if(!logbuf)
err(EXIT_FAILURE,"malloc(logbuf) failed");
unsigned long nloop_per_resol = loops_per_msec() * resol;
pids = malloc(nproc * sizeof(pid_t));
if(pids==NULL){
warn("malloc(pids) failed");
goto free_logbuf;
}
struct timespec start;
clock_gettime(CLOCK_MONOTONIC,&start);
int i,ncreated;
for(i=0,ncreated=0;i<nproc;i++,ncreated++){
pids[i] = fork();
if(pids[i]<0){
goto wait_children;
}else if (pids[i]==0){
child_fn(i,logbuf,nrecord,nloop_per_resol,start);
}
}
ret = EXIT_SUCCESS;
wait_children:
if(ret == EXIT_FAILURE)
for(i=0;i<ncreated;i++)
if(kill(pids[i],SIGINT)<0)
warn("kill(%d) failed",pids[i]);
for(i=0;i<ncreated;i++)
if(wait(NULL)<0)
warn("wait() failed.");
free_pids:
free(pids);
free_logbuf:
free(logbuf);
exit(ret);
}
```
執行為了要凸顯效果所以會在邏輯CPU 0上執行
```c
$ taskset -c 0 ./sheduler_nice 100 1
1 1 1
1 2 2
1 4 3
1 6 4
1 8 5
1 9 6
1 11 7
1 25 8
1 27 9
1 29 10
1 30 11
1 32 12
1 33 13
1 35 14
1 51 15
1 53 16
1 55 17
1 56 18
1 58 19
1 60 20
1 61 21
1 63 22
1 77 23
1 79 24
1 80 25
1 82 26
1 84 27
1 85 28
1 100 29
1 102 30
1 103 31
1 105 32
1 107 33
1 108 34
1 110 35
1 112 36
1 125 37
1 126 38
1 128 39
1 129 40
1 131 41
1 132 42
1 133 43
1 148 44
1 149 45
1 151 46
1 152 47
1 154 48
1 156 49
1 157 50
1 159 51
1 173 52
1 174 53
1 176 54
1 178 55
1 179 56
1 181 57
1 182 58
1 199 59
1 200 60
1 202 61
1 203 62
1 205 63
1 206 64
1 208 65
1 210 66
1 211 67
1 213 68
1 215 69
1 216 70
1 218 71
1 231 72
1 232 73
1 234 74
1 236 75
1 237 76
1 239 77
1 251 78
1 252 79
1 254 80
1 256 81
1 257 82
1 259 83
1 272 84
1 274 85
1 275 86
1 277 87
1 279 88
1 280 89
1 282 90
1 283 91
1 304 92
1 305 93
1 307 94
1 309 95
1 310 96
1 312 97
1 314 98
1 316 99
1 317 100
0 13 1
0 15 2
0 17 3
0 18 4
0 20 5
0 22 6
0 23 7
0 37 8
0 39 9
0 41 10
0 42 11
0 44 12
0 45 13
0 47 14
0 49 15
0 50 16
0 64 17
0 65 18
0 67 19
0 69 20
0 71 21
0 72 22
0 74 23
0 87 24
0 88 25
0 90 26
0 91 27
0 93 28
0 94 29
0 96 30
0 98 31
0 112 32
0 113 33
0 115 34
0 116 35
0 118 36
0 120 37
0 122 38
0 123 39
0 135 40
0 137 41
0 138 42
0 140 43
0 142 44
0 143 45
0 144 46
0 146 47
0 160 48
0 162 49
0 163 50
0 165 51
0 167 52
0 168 53
0 170 54
0 184 55
0 185 56
0 187 57
0 188 58
0 190 59
0 191 60
0 193 61
0 194 62
0 196 63
0 218 64
0 220 65
0 222 66
0 223 67
0 225 68
0 226 69
0 228 70
0 240 71
0 242 72
0 243 73
0 245 74
0 247 75
0 248 76
0 260 77
0 262 78
0 264 79
0 265 80
0 267 81
0 269 82
0 270 83
0 286 84
0 287 85
0 289 86
0 290 87
0 292 88
0 294 89
0 295 90
0 297 91
0 298 92
0 300 93
0 302 94
0 303 95
0 319 96
0 321 97
0 322 98
0 323 99
0 325 100
```
可以簡單的發現優先權較高(nice值0)與優先權低的(nice值5)相較之下獲得了比較多的CPU時間。所以行程1會接著行程0之後才會執行。
優先權設定可以透過nice指令,並使用-n來指定優先權。
可以用sar的輸出結果來去看%nice所表示的,可以看到透過他的%user時間比例。