Nginx的轮询算法

1、概述

Nginx轮询算法应用于http模块和stream模块的upstream块,根据权重选择相应的服务器进行负载均衡。

1.1、相关配置

1
2
3
4
5
6
7
8
9
10
11

http {

upstream backend {
server 127.0.0.1:8080 weight=10;
server 192.168.1.2:8080 weight=10;
}

}

>若upstream块中没有指定负载均衡算法,则默认使用轮询算法。

2、算法实现

Nginx中http_upstream_module跟stream_upstream_module模块都使用了轮询算法,这里以stream_upstrean模块为例

2.1、相关结构体

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
typedef struct {
ngx_stream_upstream_rr_peers_t *peers;
ngx_stream_upstream_rr_peer_t *current;
uintptr_t *tried; //若后端服务器数量大于32,则使用此字段表示各个服务器是否使用
uintptr_t data; //若后端服务器数量小于32,则使用此字段表示各个服务器是否使用
} ngx_stream_upstream_rr_peer_data_t;


typedef struct ngx_stream_upstream_rr_peer_s ngx_stream_upstream_rr_peer_t;

struct ngx_stream_upstream_rr_peer_s {
struct sockaddr *sockaddr;
socklen_t socklen;
ngx_str_t name; //服务器名称,一般是ip地址
ngx_str_t server;

ngx_int_t current_weight;
ngx_int_t effective_weight; //RR算法权重,初始值与weight相同
ngx_int_t weight; //配置的权重

ngx_uint_t conns; //该后端peer上面的成功连接数

ngx_uint_t fails; //已尝试失败次数
time_t accessed;
time_t checked;

ngx_uint_t max_fails; //配置的max_fails阈值
time_t fail_timeout;

ngx_uint_t down; /* unsigned down:1; */

#if (NGX_STREAM_SSL)
void *ssl_session;
int ssl_session_len;
#endif

ngx_stream_upstream_rr_peer_t *next;

#if (NGX_STREAM_UPSTREAM_ZONE)
ngx_atomic_t lock;
#endif
};


typedef struct ngx_stream_upstream_rr_peers_s ngx_stream_upstream_rr_peers_t;

struct ngx_stream_upstream_rr_peers_s {
ngx_uint_t number; //服务器数量 为后端配置了多少个服务器

#if (NGX_STREAM_UPSTREAM_ZONE)
ngx_slab_pool_t *shpool;
ngx_atomic_t rwlock;
#endif

ngx_uint_t total_weight; //所有服务器的权重和

unsigned single:1; //是否只有一个服务器
unsigned weighted:1;

ngx_str_t *name;

ngx_stream_upstream_rr_peers_t *next;

ngx_stream_upstream_rr_peer_t *peer; //服务器信息 //所有的peer[]服务器信息通过peers->peer连接在一起
};

上述3个结构体是整个轮询算法的核心,ngx_stream_upstream_rr_peer_data_t结构体保存已经选择过的服务器以及当前选择的服务器,ngx_stream_upstream_rr_peers_t结构体中几个weight成员用来选择服务器。

2.1 初始化ngx_stream_upstream_rr_peer_t结构体

Nginx在解析配置文件时,当解析到upstream块时,会调用ngx_stream_upstream_init_round_robin函数,将配置文件中的server保存在ngx_stream_upstream_rr_peer_t结构体中。接下来看看ngx_stream_upstream_init_round_robin函数定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210

ngx_int_t
ngx_stream_upstream_init_round_robin(ngx_conf_t *cf,
ngx_stream_upstream_srv_conf_t *us)
{
ngx_url_t u;
ngx_uint_t i, j, n, w;
ngx_stream_upstream_server_t *server;
ngx_stream_upstream_rr_peer_t *peer, **peerp;
ngx_stream_upstream_rr_peers_t *peers, *backup;//一个是正常的servers,一个是backup servers

us->peer.init = ngx_stream_upstream_init_round_robin_peer;//upstream中服务器节点的初始化赋值

if (us->servers) {
server = us->servers->elts;

n = 0;
w = 0;

for (i = 0; i < us->servers->nelts; i++) {
if (server[i].backup) {
continue;
}

n += server[i].naddrs;
w += server[i].naddrs * server[i].weight;
}

if (n == 0) {
ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
"no servers in upstream \"%V\" in %s:%ui",
&us->host, us->file_name, us->line);
return NGX_ERROR;
}

peers = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peers_t));
if (peers == NULL) {
return NGX_ERROR;
}

peer = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peer_t) * n);
if (peer == NULL) {
return NGX_ERROR;
}

peers->single = (n == 1);
peers->number = n;
peers->weighted = (w != n);
peers->total_weight = w;
peers->name = &us->host;

n = 0;
peerp = &peers->peer;
//初始化peer数据
for (i = 0; i < us->servers->nelts; i++) {
if (server[i].backup) {
continue;
}

for (j = 0; j < server[i].naddrs; j++) {
peer[n].sockaddr = server[i].addrs[j].sockaddr;
peer[n].socklen = server[i].addrs[j].socklen;
peer[n].name = server[i].addrs[j].name;
peer[n].weight = server[i].weight;
peer[n].effective_weight = server[i].weight;
peer[n].current_weight = 0;
peer[n].max_fails = server[i].max_fails;
peer[n].fail_timeout = server[i].fail_timeout;
peer[n].down = server[i].down;
peer[n].server = server[i].name;

*peerp = &peer[n];
peerp = &peer[n].next;
n++;
}
}

us->peer.data = peers;

/* backup servers */

n = 0;
w = 0;

for (i = 0; i < us->servers->nelts; i++) {
if (!server[i].backup) {
continue;
}

n += server[i].naddrs;
w += server[i].naddrs * server[i].weight;
}

if (n == 0) {
return NGX_OK;
}

backup = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peers_t));
if (backup == NULL) {
return NGX_ERROR;
}

peer = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peer_t) * n);
if (peer == NULL) {
return NGX_ERROR;
}

peers->single = 0;
backup->single = 0;
backup->number = n;
backup->weighted = (w != n);
backup->total_weight = w;
backup->name = &us->host;

n = 0;
peerp = &backup->peer;

for (i = 0; i < us->servers->nelts; i++) {
if (!server[i].backup) {
continue;
}

for (j = 0; j < server[i].naddrs; j++) {
peer[n].sockaddr = server[i].addrs[j].sockaddr;
peer[n].socklen = server[i].addrs[j].socklen;
peer[n].name = server[i].addrs[j].name;
peer[n].weight = server[i].weight;
peer[n].effective_weight = server[i].weight;
peer[n].current_weight = 0;
peer[n].max_fails = server[i].max_fails;
peer[n].fail_timeout = server[i].fail_timeout;
peer[n].down = server[i].down;
peer[n].server = server[i].name;

*peerp = &peer[n];
peerp = &peer[n].next;
n++;
}
}

peers->next = backup;

return NGX_OK;
}


/* an upstream implicitly defined by proxy_pass, etc. */

if (us->port == 0) {
ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
"no port in upstream \"%V\" in %s:%ui",
&us->host, us->file_name, us->line);
return NGX_ERROR;
}

ngx_memzero(&u, sizeof(ngx_url_t));

u.host = us->host;
u.port = us->port;

if (ngx_inet_resolve_host(cf->pool, &u) != NGX_OK) {
if (u.err) {
ngx_log_error(NGX_LOG_EMERG, cf->log, 0,
"%s in upstream \"%V\" in %s:%ui",
u.err, &us->host, us->file_name, us->line);
}

return NGX_ERROR;
}

n = u.naddrs;

peers = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peers_t));
if (peers == NULL) {
return NGX_ERROR;
}

peer = ngx_pcalloc(cf->pool, sizeof(ngx_stream_upstream_rr_peer_t) * n);
if (peer == NULL) {
return NGX_ERROR;
}

peers->single = (n == 1);
peers->number = n;
peers->weighted = 0;
peers->total_weight = n;
peers->name = &us->host;

peerp = &peers->peer;

for (i = 0; i < u.naddrs; i++) {
peer[i].sockaddr = u.addrs[i].sockaddr;
peer[i].socklen = u.addrs[i].socklen;
peer[i].name = u.addrs[i].name;
peer[i].weight = 1;
peer[i].effective_weight = 1;
peer[i].current_weight = 0;
peer[i].max_fails = 1;
peer[i].fail_timeout = 10;
*peerp = &peer[i];
peerp = &peer[i].next;
}

us->peer.data = peers;

/* implicitly defined upstream has no backup servers */

return NGX_OK;
}

  1. 如果没有指定其他负载均衡算法,则此函数在配置解析阶段执行,由ngx_stream_upstream_init_main_conf函数调用执行。
  2. 将配置中的后端服务器使用peer连接起来,对backup服务器采取同样的操作。

2.2 获取一个选中的server

选择一个上游peer,主要由ngx_http_upstream_get_round_robin_peer实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
ngx_int_t
ngx_http_upstream_get_round_robin_peer(ngx_peer_connection_t *pc, void *data)
{
ngx_http_upstream_rr_peer_data_t *rrp = data;

ngx_int_t rc;
ngx_uint_t i, n;
ngx_http_upstream_rr_peer_t *peer;
ngx_http_upstream_rr_peers_t *peers;

ngx_log_debug1(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, try: %ui", pc->tries);

pc->cached = 0;
pc->connection = NULL;

peers = rrp->peers;
//如果启用了zone,则加锁
ngx_http_upstream_rr_peers_wlock(peers);
//只配置了一个server,则直接使用
if (peers->single) {
peer = peers->peer;
//单个server被标记为down,不可用
if (peer->down) {
goto failed;
}

if (peer->max_conns && peer->conns >= peer->max_conns) {
goto failed;
}

rrp->current = peer;

} else {

/* there are several peers */
// 调用选择peer的函数。
peer = ngx_http_upstream_get_peer(rrp);

if (peer == NULL) {
goto failed;
}

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"get rr peer, current: %p %i",
peer, peer->current_weight);
}

// 赋值socket数据
pc->sockaddr = peer->sockaddr;
pc->socklen = peer->socklen;
pc->name = &peer->name;

//此peer上的连接数自增
peer->conns++;

// 解锁
ngx_http_upstream_rr_peers_unlock(peers);

return NGX_OK;

// 错误处理
failed:

if (peers->next) {

ngx_log_debug0(NGX_LOG_DEBUG_HTTP, pc->log, 0, "backup servers");

rrp->peers = peers->next;

n = (rrp->peers->number + (8 * sizeof(uintptr_t) - 1))
/ (8 * sizeof(uintptr_t));

for (i = 0; i < n; i++) {
rrp->tried[i] = 0;
}

ngx_http_upstream_rr_peers_unlock(peers);

rc = ngx_http_upstream_get_round_robin_peer(pc, rrp);

if (rc != NGX_BUSY) {
return rc;
}

ngx_http_upstream_rr_peers_wlock(peers);
}

ngx_http_upstream_rr_peers_unlock(peers);

pc->name = peers->name;

return NGX_BUSY;
}


//轮询的核心算法
static ngx_http_upstream_rr_peer_t *
ngx_http_upstream_get_peer(ngx_http_upstream_rr_peer_data_t *rrp)
{
time_t now;
uintptr_t m;
ngx_int_t total;
ngx_uint_t i, n, p;
ngx_http_upstream_rr_peer_t *peer, *best;

now = ngx_time();

best = NULL;
total = 0;

#if (NGX_SUPPRESS_WARN)
p = 0;
#endif

for (peer = rrp->peers->peer, i = 0;
peer;
peer = peer->next, i++)
{
//查找已选择过的peer
n = i / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << i % (8 * sizeof(uintptr_t));
//已选择过,跳过
if (rrp->tried[n] & m) {
continue;
}
//被标记为不可用,跳过
if (peer->down) {
continue;
}
//失败次数超过阈值,跳过
if (peer->max_fails
&& peer->fails >= peer->max_fails
&& now - peer->checked <= peer->fail_timeout)
{
continue;
}
//最大连接数超过配置,跳过
if (peer->max_conns && peer->conns >= peer->max_conns) {
continue;
}
//调整当前peer权重
peer->current_weight += peer->effective_weight;
total += peer->effective_weight;

if (peer->effective_weight < peer->weight) {
peer->effective_weight++;
}
//根据权重选择
if (best == NULL || peer->current_weight > best->current_weight) {
best = peer;
p = i;
}
}
//没有选择到合适的peer,退出
if (best == NULL) {
return NULL;
}

rrp->current = best;

n = p / (8 * sizeof(uintptr_t));
m = (uintptr_t) 1 << p % (8 * sizeof(uintptr_t));
//标记已选中
rrp->tried[n] |= m;
//调整选中节点的权重
best->current_weight -= total;
//调整不可用时间
if (now - best->checked > best->fail_timeout) {
best->checked = now;
}

return best;
}



2.3 释放server

释放上游函数主要由ngx_http_upstream_free_round_robin_peer实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
void
ngx_http_upstream_free_round_robin_peer(ngx_peer_connection_t *pc, void *data,
ngx_uint_t state)
{
ngx_http_upstream_rr_peer_data_t *rrp = data;

time_t now;
ngx_http_upstream_rr_peer_t *peer;

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"free rr peer %ui %ui", pc->tries, state);

/* TODO: NGX_PEER_KEEPALIVE */

peer = rrp->current;

ngx_http_upstream_rr_peers_rlock(rrp->peers);
ngx_http_upstream_rr_peer_lock(rrp->peers, peer);

// 单个peer情况
if (rrp->peers->single) {

peer->conns--;

ngx_http_upstream_rr_peer_unlock(rrp->peers, peer);
ngx_http_upstream_rr_peers_unlock(rrp->peers);

pc->tries = 0;
return;
}
//如果以失败的状态进入此函数,调整权重
if (state & NGX_PEER_FAILED) {
now = ngx_time();

peer->fails++;
peer->accessed = now;
peer->checked = now;
//调整权重
if (peer->max_fails) {
peer->effective_weight -= peer->weight / peer->max_fails;

if (peer->fails >= peer->max_fails) {
ngx_log_error(NGX_LOG_WARN, pc->log, 0,
"upstream server temporarily disabled");
}
}

ngx_log_debug2(NGX_LOG_DEBUG_HTTP, pc->log, 0,
"free rr peer failed: %p %i",
peer, peer->effective_weight);
//调整权重
if (peer->effective_weight < 0) {
peer->effective_weight = 0;
}

} else {

/* mark peer live if check passed */

if (peer->accessed < peer->checked) {
peer->fails = 0;
}
}


//此上游连接数自减
peer->conns--;

// 若启用了zone模块,则解锁
ngx_http_upstream_rr_peer_unlock(rrp->peers, peer);
ngx_http_upstream_rr_peers_unlock(rrp->peers);

// 重试次数自减
if (pc->tries) {
pc->tries--;
}
}

3、总结