协程实现原理-前置知识

背景

协程在1958年被Melvin Conway提出,想要为 COBOL 高级编程语言去实现一个 one-pass 的编译器。比当时进程, 线程的概念还要早一步诞生。不过当时并没有流行起来,因为不符合当时(一直持续到 1990 年代)以C语言为代表的命令式编程语言自顶向下设计思想。但是随着互联网发展,服务器对于高并发要求越来越高,出现C10K,C100K,C10M等问题。协程也就重新回归视野。

进程与线程

  • 基本结构
  • 上下文切换

基本结构

再了解协程的实现之前,先简单回顾下进程和线程,进程在操作系统中是作为资源分配的基本单位,而线程则是执行单位。不过在Linux中不管是线程还是进程都被称为任务(Task),task可以简单分为以下几个部分:

  1. thread_info
  2. mm_struct
  3. tty_struct
  4. fs_struct
  5. files_struct
  6. signal_struct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
struct task_struct {
volatile long state; /* -1 unrunnable, 0 runnable, >0 stopped */
struct thread_info *thread_info;
atomic_t usage;
unsigned long flags; /* per process flags, defined below */
unsigned long ptrace;

int lock_depth; /* Lock depth */

int prio, static_prio;
struct list_head run_list;
prio_array_t *array;

unsigned long sleep_avg;
unsigned long last_run;

unsigned long policy;
unsigned long cpus_allowed;
unsigned int time_slice, first_time_slice;

struct list_head tasks;
struct list_head ptrace_children;
struct list_head ptrace_list;

struct mm_struct *mm, *active_mm;

/* task state */
struct linux_binfmt *binfmt;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned long personality;
int did_exec:1;
pid_t pid;
pid_t pgrp;
pid_t tty_old_pgrp;
pid_t session;
pid_t tgid;
/* boolean value for session group leader */
int leader;
/*
* pointers to (original) parent process, youngest child, younger sibling,
* older sibling, respectively. (p->father can be replaced with
* p->parent->pid)
*/
struct task_struct *real_parent; /* real parent process (when being debugged) */
struct task_struct *parent; /* parent process */
struct list_head children; /* list of my children */
struct list_head sibling; /* linkage in my parent's children list */
struct task_struct *group_leader;

/* PID/PID hash table linkage. */
struct pid_link pids[PIDTYPE_MAX];

wait_queue_head_t wait_chldexit; /* for wait4() */
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */

unsigned long rt_priority;
unsigned long it_real_value, it_prof_value, it_virt_value;
unsigned long it_real_incr, it_prof_incr, it_virt_incr;
struct timer_list real_timer;
struct list_head posix_timers; /* POSIX.1b Interval Timers */
unsigned long utime, stime, cutime, cstime;
u64 start_time;
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
unsigned long min_flt, maj_flt, nswap, cmin_flt, cmaj_flt, cnswap;
/* process credentials */
uid_t uid,euid,suid,fsuid;
gid_t gid,egid,sgid,fsgid;
int ngroups;
gid_t groups[NGROUPS];
kernel_cap_t cap_effective, cap_inheritable, cap_permitted;
int keep_capabilities:1;
struct user_struct *user;
/* limits */
struct rlimit rlim[RLIM_NLIMITS];
unsigned short used_math;
char comm[16];
/* file system info */
int link_count, total_link_count;
struct tty_struct *tty; /* NULL if no tty */
unsigned int locks; /* How many file locks are being held */
/* ipc stuff */
struct sysv_sem sysvsem;
/* CPU-specific state of this task */
struct thread_struct thread;
/* filesystem information */
struct fs_struct *fs;
/* open file information */
struct files_struct *files;
/* namespace */
struct namespace *namespace;
/* signal handlers */
struct signal_struct *signal;
struct sighand_struct *sighand;

sigset_t blocked, real_blocked;
struct sigpending pending;

unsigned long sas_ss_sp;
size_t sas_ss_size;
int (*notifier)(void *priv);
void *notifier_data;
sigset_t *notifier_mask;

void *security;

/* Thread group tracking */
u32 parent_exec_id;
u32 self_exec_id;
/* Protection of (de-)allocation: mm, files, fs, tty */
spinlock_t alloc_lock;
/* Protection of proc_dentry: nesting proc_lock, dcache_lock, write_lock_irq(&tasklist_lock); */
spinlock_t proc_lock;
/* context-switch lock */
spinlock_t switch_lock;

/* journalling filesystem info */
void *journal_info;

/* VM state */
struct reclaim_state *reclaim_state;

struct dentry *proc_dentry;
struct backing_dev_info *backing_dev_info;

unsigned long ptrace_message;
siginfo_t *last_siginfo; /* For ptrace use. */
}

为了直观了解进程和线程,这里分别实现两个demo:

  • 多进程
  • 多线程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#include<stdio.h>
#include<stdlib.h>
#include<unistd.h>
#include <sys/types.h>

void print_numbers(char *prefix, int count) {
for (int i = 0; i < count; ++i) {
printf("%s: %d\n", prefix, i);
sleep(100000); // 等待 100 毫秒
}
}

int main(int argc, char const *argv[])
{
pid_t child_pid;
if((child_pid == fork()) == 0){
print_numbers("Process 1", 5);
exit(0);
}
if ((child_pid = fork()) == 0) {
// 子进程执行 print_numbers
print_numbers("Process 2", 5);
exit(0);
}
wait(NULL);
wait(NULL);
return 0;
}

1
2
3
4
5
6
7
8
9
10
11
12
./multiprocessing
Process 1: 0
Process 2: 0


➜ ~ ps -ef |grep multiprocessing
root 2214 652 0 11:47 pts/0 00:00:00 ./multiprocessing
root 2215 2214 0 11:47 pts/0 00:00:00 ./multiprocessing
root 2216 2215 0 11:47 pts/0 00:00:00 ./multiprocessing
root 2228 1876 0 11:47 pts/7 00:00:00 grep --color=auto --exclude-dir=.bzr --exclude-dir=CVS --exclude-dir=.git --exclude-dir=.hg --exclude-dir=.svn --exclude-dir=.idea --exclude-dir=.tox multiprocessing
➜ ~ pstree -p 2214
multiprocessing(2214)───multiprocessing(2215)───multiprocessing(2216)

创建了多个进程,通过ps aux可以看到它一个进程就是2120内存

创建多线程:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
#include<stdio.h>
#include<unistd.h>
#include<pthread.h>

void *print_numbers(void *arg) {
char *prefix = (char *)arg;
for (int i = 0; i < 5; ++i) {
printf("%s: %d\n", prefix, i);
sleep(1000000); // 等待 100 毫秒
}
return NULL;
}

int main() {
pthread_t thread1, thread2;

// 创建第一个线程
pthread_create(&thread1, NULL, print_numbers, "Thread 1");

// 创建第二个线程
pthread_create(&thread2, NULL, print_numbers, "Thread 2");

// 等待两个线程执行完毕
pthread_join(thread1, NULL);
pthread_join(thread2, NULL);

return 0;
}
1
2
3
4
5
6
7
8
9
10
11
 ./multi_thread_demo
Thread 1: 0
Thread 2: 0


ps aux |grep multi_thread_demo
root 3193 0.0 0.0 84704 944 pts/0 Sl+ 11:55 0:00 ./multi_thread_demo
root 3216 0.0 0.0 4032 2184 pts/7 S+ 11:55 0:00 grep --color=auto --exclude-dir=.bzr --exclude-dir=CVS --exclude-dir=.git --exclude-dir=.hg --exclude-dir=.svn --exclude-dir=.idea --exclude-dir=.tox multi_thread_demo
➜ ~ pstree -p 3193
multi_thread_de(3193)─┬─{multi_thread_de}(3194)
└─{multi_thread_de}(3195)

上下文切换(context switch)

上下文切换概念很简单,就是保存Running状态进程的寄存器,并且为可运行状态的进程恢复一些寄存器的值,这一块操作系统会执行一些底层汇编代码来保存通用寄存器、程序计数器,以及当前正在运行的进程的内核栈指针。然后恢复寄存器,程序计数器在切换到内核栈。

可以来看下进程上下文切换都有那些成本:

  1. 保存和恢复寄存器状态
  2. 保存和恢复内存页表
  3. 切换堆栈
  4. 清理和设置硬件上下文
  5. 切换任务状态
  6. TLB(Translation Lookaside Buffer)刷新

再来看下协程上下文切换成本:

  1. 保存和恢复寄存器状态
  2. 保护和切换堆栈
  3. 切换任务状态
  4. TLB刷新
  5. 清理和设置硬件上下文

可以看到相比较于进程省去了一个切换内存页表的成本,因为线程是共享进程中的资源,在之前基本结构中也提到过。所以使用多线程成本和性能方面会比多进程表现好。

进程内存段布局

这是一个关于32位X86架构上运行的Linux进程的内存段布局(从高地址向低地址):

img

参考连接


协程实现原理-前置知识
http://example.com/2024/02/22/协程实现原理/
Author
John Doe
Posted on
February 22, 2024
Licensed under