# CFS Infrastructures: rq, task_struct, cfs_rq, sched_entity etc. 筆記 (3) ## `cfs_rq` ```c=570 /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; unsigned int nr_running; unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ unsigned int idle_nr_running; /* SCHED_IDLE */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ s64 avg_vruntime; u64 avg_load; u64 exec_clock; u64 min_vruntime; #ifdef CONFIG_SCHED_CORE unsigned int forceidle_seq; u64 min_vruntime_fi; #endif #ifndef CONFIG_64BIT u64 min_vruntime_copy; #endif struct rb_root_cached tasks_timeline; /* * 'curr' points to currently running entity on this cfs_rq. * It is set to NULL otherwise (i.e when none are currently running). */ struct sched_entity *curr; struct sched_entity *next; #ifdef CONFIG_SCHED_DEBUG unsigned int nr_spread_over; #endif #ifdef CONFIG_SMP /* * CFS load tracking */ struct sched_avg avg; #ifndef CONFIG_64BIT u64 last_update_time_copy; #endif struct { raw_spinlock_t lock ____cacheline_aligned; int nr; unsigned long load_avg; unsigned long util_avg; unsigned long runnable_avg; } removed; #ifdef CONFIG_FAIR_GROUP_SCHED u64 last_update_tg_load_avg; unsigned long tg_load_avg_contrib; long propagate; long prop_runnable_sum; /* * h_load = weight * f(tg) * * Where f(tg) is the recursive weight fraction assigned to * this group. */ unsigned long h_load; u64 last_h_load_update; struct sched_entity *h_load_next; #endif /* CONFIG_FAIR_GROUP_SCHED */ #endif /* CONFIG_SMP */ #ifdef CONFIG_FAIR_GROUP_SCHED struct rq *rq; /* CPU runqueue to which this cfs_rq is attached */ /* * leaf cfs_rqs are those that hold tasks (lowest schedulable entity in * a hierarchy). Non-leaf lrqs hold other higher schedulable entities * (like users, containers etc.) * * leaf_cfs_rq_list ties together list of leaf cfs_rq's in a CPU. * This list is used during load balance. */ int on_list; struct list_head leaf_cfs_rq_list; struct task_group *tg; /* group that "owns" this runqueue */ /* Locally cached copy of our task_group's idle value */ int idle; #ifdef CONFIG_CFS_BANDWIDTH int runtime_enabled; s64 runtime_remaining; u64 throttled_pelt_idle; #ifndef CONFIG_64BIT u64 throttled_pelt_idle_copy; #endif u64 throttled_clock; u64 throttled_clock_pelt; u64 throttled_clock_pelt_time; u64 throttled_clock_self; u64 throttled_clock_self_time; int throttled; int throttle_count; struct list_head throttled_list; struct list_head throttled_csd_list; #endif /* CONFIG_CFS_BANDWIDTH */ #endif /* CONFIG_FAIR_GROUP_SCHED */ }; ``` ### RBtree in `cfs_rq` ![image](https://hackmd.io/_uploads/ByNpn_gGA.png) ```c void init_cfs_rq(struct cfs_rq *cfs_rq) { cfs_rq->tasks_timeline = RB_ROOT_CACHED; u64_u32_store(cfs_rq->min_vruntime, (u64)(-(1LL << 20))); #ifdef CONFIG_SMP raw_spin_lock_init(&cfs_rq->removed.lock); #endif } ``` ```c struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) { struct rb_node *left = rb_first_cached(&cfs_rq->tasks_timeline); if (!left) return NULL; return __node_2_se(left); } ``` ## `sched_entity` ```c=536 struct sched_entity { /* For load-balancing: */ struct load_weight load; struct rb_node run_node; u64 deadline; u64 min_vruntime; struct list_head group_node; unsigned int on_rq; u64 exec_start; u64 sum_exec_runtime; u64 prev_sum_exec_runtime; u64 vruntime; s64 vlag; u64 slice; u64 nr_migrations; #ifdef CONFIG_FAIR_GROUP_SCHED int depth; struct sched_entity *parent; /* rq on which this entity is (to be) queued: */ struct cfs_rq *cfs_rq; /* rq "owned" by this entity/group: */ struct cfs_rq *my_q; /* cached value of my_q->h_nr_running */ unsigned long runnable_weight; #endif #ifdef CONFIG_SMP /* * Per entity load average tracking. * * Put into separate cache line so it does not * collide with read-mostly values above. */ struct sched_avg avg; #endif }; ``` ### `task_group` ![image](https://hackmd.io/_uploads/ByPDa_eMA.png) > The scheduling entities se(1) and se(3) represents some individual tasks, whereas se(2) represents a group of tasks with child scheduling entities se(A) and se(B). Since se(2) corresponds to a group, it owns a CFS runqueue that can be accessed through its my_q field. Entities that belongs to a group, like se(A) and se(B), have a non-NULL member called parent that enables them to access their parent task_group. Note that the entirety of Figure 4.1 only represents one per-CPU runqueue rq of struct rq, in which the cfs_rq for the root taskgroup is a reference of rq->cfs_rq.