Nginx的共享内存详解

1. 简述

1.1 应用介绍

nginx使用共享内存的模块有ngx_http_file_cache_module、ngx_http_limit_conn_module、ngx_http_limit_req_module等模块。无一例外,这几个模块都是使用nginx实现的红黑树,基于共享内存来保存他们所需要的数据。

2 源码详解

2.1 首先来看看相关结构体定义

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
typedef struct ngx_slab_page_s  ngx_slab_page_t;

struct ngx_slab_page_s {
uintptr_t slab;
ngx_slab_page_t *next; //下一个page页
uintptr_t prev; //上一个page页
};

// slab状态结构体
typedef struct {
ngx_uint_t total;
ngx_uint_t used;

ngx_uint_t reqs;
ngx_uint_t fails;
} ngx_slab_stat_t;


typedef struct {
ngx_shmtx_sh_t lock; //mutex锁

size_t min_size; //设定的最小内存块长度;
size_t min_shift; //ngx_init_zone_pool中默认为3

ngx_slab_page_t *pages; //每一页对应一个ngx_slab_page_t页描述结构体,所有的ngx_slab_page_t存放在连续的内存中构成数组,而pages就是数组首地址
ngx_slab_page_t *last; //最后页面地址
ngx_slab_page_t free; //所有的空闲页组成一个链表挂在free成员上

ngx_slab_stat_t *stats;
ngx_uint_t pfree; // 剩余页数

u_char *start; //第一页的首地址
u_char *end; //指向这段共享内存的尾部

ngx_shmtx_t mutex; //Nginx封装的互斥锁

u_char *log_ctx; // slab操作失败时会记录日志,为区别是哪个slab共享内存出错,可以在slab中分配一段内存存放描述的字符串,然后再用
log_ctx指向这个字符串
u_char zero; // 表示空字符串防止出错

unsigned log_nomem:1;

void *data; //slab的模块自由使用,slab管理内存时不会用到它
void *addr; //指向所属的ngx_shm_zone_t里的ngx_shm_t成员的addr成员,一般用于指示一段共享内存块的起始位置
} ngx_slab_pool_t;

2.2 相关函数及其介绍

主要涉及到共享内存的初始化,加锁申请,不加锁申请,加锁释放,不加锁释放等函数。首先来看下初始化的两个函数ngx_slab_sizes_init、ngx_slab_init,ngx_slab_sizes_init函数使用操作系统内存页初始化slab_max_size。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

void
ngx_slab_sizes_init(void)
{
ngx_uint_t n;

ngx_slab_max_size = ngx_pagesize / 2;
ngx_slab_exact_size = ngx_pagesize / (8 * sizeof(uintptr_t));
for (n = ngx_slab_exact_size; n >>= 1; ngx_slab_exact_shift++) {
/* void */
}
}

void
ngx_slab_init(ngx_slab_pool_t *pool)
{
u_char *p;
size_t size;
ngx_int_t m;
ngx_uint_t i, n, pages;
ngx_slab_page_t *slots, *page;
//最小分配的空间是8byte
pool->min_size = (size_t) 1 << pool->min_shift;

slots = ngx_slab_slots(pool);

p = (u_char *) slots;
size = pool->end - p;

ngx_slab_junk(p, size);

n = ngx_pagesize_shift - pool->min_shift;

for (i = 0; i < n; i++) {
/* only "next" is used in list head */
slots[i].slab = 0;
slots[i].next = &slots[i];
slots[i].prev = 0;
}
//跳过上面那些slab page
p += n * sizeof(ngx_slab_page_t);

pool->stats = (ngx_slab_stat_t *) p;
ngx_memzero(pool->stats, n * sizeof(ngx_slab_stat_t));

p += n * sizeof(ngx_slab_stat_t);

size -= n * (sizeof(ngx_slab_page_t) + sizeof(ngx_slab_stat_t));

pages = (ngx_uint_t) (size / (ngx_pagesize + sizeof(ngx_slab_page_t)));

pool->pages = (ngx_slab_page_t *) p;
ngx_memzero(pool->pages, pages * sizeof(ngx_slab_page_t));

page = pool->pages;

/* only "next" is used in list head */
//初始化free,free.next是下次分配页时候的入口
pool->free.slab = 0;
pool->free.next = page;
pool->free.prev = 0;
//更新第一个slab page的状态,这儿slab成员记录了整个缓存区的页数目
page->slab = pages;
page->next = &pool->free;
page->prev = (uintptr_t) &pool->free;
//实际缓存区(页)的开头,对齐
//因为对齐的原因,使得m_page数组和数据区域之间可能有些内存无法使用
pool->start = ngx_align_ptr(p + pages * sizeof(ngx_slab_page_t),
ngx_pagesize);

m = pages - (pool->end - pool->start) / ngx_pagesize;
if (m > 0) {
pages -= m;
page->slab = pages;
}
//跳过pages * sizeof(ngx_slab_page_t)
pool->last = pool->pages + pages;
pool->pfree = pages;

pool->log_nomem = 1;
pool->log_ctx = &pool->zero;
pool->zero = '\0';
}


接下来看看申请内存的相关函数ngx_slab_alloc、ngx_slab_alloc_locked、ngx_slab_calloc、ngx_slab_calloc_locked

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338

void *
ngx_slab_alloc(ngx_slab_pool_t *pool, size_t size)
{
void *p;
//由于是共享内存,所以在进程间需要用锁来保持同步
ngx_shmtx_lock(&pool->mutex);

p = ngx_slab_alloc_locked(pool, size);

ngx_shmtx_unlock(&pool->mutex);

return p;
}

void *
ngx_slab_alloc_locked(ngx_slab_pool_t *pool, size_t size)
{
size_t s;
uintptr_t p, m, mask, *bitmap;
ngx_uint_t i, n, slot, shift, map;
ngx_slab_page_t *page, *prev, *slots;

if (size > ngx_slab_max_size) {

ngx_log_debug1(NGX_LOG_DEBUG_ALLOC, ngx_cycle->log, 0,
"slab alloc: %uz", size);

page = ngx_slab_alloc_pages(pool, (size >> ngx_pagesize_shift)
+ ((size % ngx_pagesize) ? 1 : 0));
if (page) {
p = ngx_slab_page_addr(pool, page);

} else {
p = 0;
}

goto done;
}

if (size > pool->min_size) {
shift = 1;
for (s = size - 1; s >>= 1; shift++) { /* void */ }
slot = shift - pool->min_shift;

} else {
shift = pool->min_shift;
slot = 0;
}

pool->stats[slot].reqs++;

ngx_log_debug2(NGX_LOG_DEBUG_ALLOC, ngx_cycle->log, 0,
"slab alloc: %uz slot: %ui", size, slot);

slots = ngx_slab_slots(pool);
page = slots[slot].next;

if (page->next != page) {

if (shift < ngx_slab_exact_shift) {

bitmap = (uintptr_t *) ngx_slab_page_addr(pool, page);

map = (ngx_pagesize >> shift) / (8 * sizeof(uintptr_t));

for (n = 0; n < map; n++) {

if (bitmap[n] != NGX_SLAB_BUSY) {

for (m = 1, i = 0; m; m <<= 1, i++) {
if (bitmap[n] & m) {
continue;
}

bitmap[n] |= m;

i = (n * 8 * sizeof(uintptr_t) + i) << shift;

p = (uintptr_t) bitmap + i;

pool->stats[slot].used++;

if (bitmap[n] == NGX_SLAB_BUSY) {
for (n = n + 1; n < map; n++) {
if (bitmap[n] != NGX_SLAB_BUSY) {
goto done;
}
}

prev = ngx_slab_page_prev(page);
prev->next = page->next;
page->next->prev = page->prev;

page->next = NULL;
page->prev = NGX_SLAB_SMALL;
}

goto done;
}
}
}

} else if (shift == ngx_slab_exact_shift) {

for (m = 1, i = 0; m; m <<= 1, i++) {
if (page->slab & m) {
continue;
}

page->slab |= m;

if (page->slab == NGX_SLAB_BUSY) {
prev = ngx_slab_page_prev(page);
prev->next = page->next;
page->next->prev = page->prev;

page->next = NULL;
page->prev = NGX_SLAB_EXACT;
}

p = ngx_slab_page_addr(pool, page) + (i << shift);

pool->stats[slot].used++;

goto done;
}

} else { /* shift > ngx_slab_exact_shift */

mask = ((uintptr_t) 1 << (ngx_pagesize >> shift)) - 1;
mask <<= NGX_SLAB_MAP_SHIFT;

for (m = (uintptr_t) 1 << NGX_SLAB_MAP_SHIFT, i = 0;
m & mask;
m <<= 1, i++)
{
if (page->slab & m) {
continue;
}

page->slab |= m;

if ((page->slab & NGX_SLAB_MAP_MASK) == mask) {
prev = ngx_slab_page_prev(page);
prev->next = page->next;
page->next->prev = page->prev;

page->next = NULL;
page->prev = NGX_SLAB_BIG;
}

p = ngx_slab_page_addr(pool, page) + (i << shift);

pool->stats[slot].used++;

goto done;
}
}

ngx_slab_error(pool, NGX_LOG_ALERT, "ngx_slab_alloc(): page is busy");
ngx_debug_point();
}
//分出一页加入到m_slot数组对应元素中
page = ngx_slab_alloc_pages(pool, 1);

if (page) {
if (shift < ngx_slab_exact_shift) {
bitmap = (uintptr_t *) ngx_slab_page_addr(pool, page);

n = (ngx_pagesize >> shift) / ((1 << shift) * 8);

if (n == 0) {
n = 1;
}

/* "n" elements for bitmap, plus one requested */

for (i = 0; i < (n + 1) / (8 * sizeof(uintptr_t)); i++) {
bitmap[i] = NGX_SLAB_BUSY;
}

m = ((uintptr_t) 1 << ((n + 1) % (8 * sizeof(uintptr_t)))) - 1;
bitmap[i] = m;

map = (ngx_pagesize >> shift) / (8 * sizeof(uintptr_t));

for (i = i + 1; i < map; i++) {
bitmap[i] = 0;
}

page->slab = shift;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_SMALL;

slots[slot].next = page;

pool->stats[slot].total += (ngx_pagesize >> shift) - n;

p = ngx_slab_page_addr(pool, page) + (n << shift);

pool->stats[slot].used++;

goto done;

} else if (shift == ngx_slab_exact_shift) {

page->slab = 1;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_EXACT;

slots[slot].next = page;

pool->stats[slot].total += 8 * sizeof(uintptr_t);

p = ngx_slab_page_addr(pool, page);

pool->stats[slot].used++;

goto done;

} else { /* shift > ngx_slab_exact_shift */

page->slab = ((uintptr_t) 1 << NGX_SLAB_MAP_SHIFT) | shift;
page->next = &slots[slot];
page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_BIG;

slots[slot].next = page;

pool->stats[slot].total += ngx_pagesize >> shift;

p = ngx_slab_page_addr(pool, page);

pool->stats[slot].used++;

goto done;
}
}

p = 0;

pool->stats[slot].fails++;

done:

ngx_log_debug1(NGX_LOG_DEBUG_ALLOC, ngx_cycle->log, 0,
"slab alloc: %p", (void *) p);

return (void *) p;
}

/* 由于是共享内存,所以在进程间需要用锁来保持同步 */
void *
ngx_slab_calloc(ngx_slab_pool_t *pool, size_t size)
{
void *p;

ngx_shmtx_lock(&pool->mutex);

p = ngx_slab_calloc_locked(pool, size);

ngx_shmtx_unlock(&pool->mutex);

return p;
}

void *
ngx_slab_calloc_locked(ngx_slab_pool_t *pool, size_t size)
{
void *p;

p = ngx_slab_alloc_locked(pool, size);
if (p) {
ngx_memzero(p, size);
}

return p;
}


static ngx_slab_page_t *
ngx_slab_alloc_pages(ngx_slab_pool_t *pool, ngx_uint_t pages)
{
ngx_slab_page_t *page, *p;
//初始化的时候pool->free.next默认指向第一个pool->pages
//从pool->free.next开始,每次取(slab page) page = page->next
for (page = pool->free.next; page != &pool->free; page = page->next) {

if (page->slab >= pages) {

if (page->slab > pages) {
page[page->slab - 1].prev = (uintptr_t) &page[pages];

page[pages].slab = page->slab - pages;
page[pages].next = page->next;
page[pages].prev = page->prev;

p = (ngx_slab_page_t *) page->prev;
p->next = &page[pages];
page->next->prev = (uintptr_t) &page[pages];

} else {//page页不够用了,则free的next和prev都指向自己
p = (ngx_slab_page_t *) page->prev;
p->next = page->next;
page->next->prev = page->prev;
}
//NGX_SLAB_PAGE_START标记page是分配的pages个页的第一个页,并在第一个页page中记录出其后连续的pages个页是一起分配的
page->slab = pages | NGX_SLAB_PAGE_START;
page->next = NULL;
page->prev = NGX_SLAB_PAGE;

pool->pfree -= pages;
//pages为1。则直接返回该page
if (--pages == 0) {
return page;
}

for (p = page + 1; pages; pages--) {
//如果分配的页数pages>1,更新后面page slab的slab成员为NGX_SLAB_PAGE_BUSY
p->slab = NGX_SLAB_PAGE_BUSY;
p->next = NULL;
p->prev = NGX_SLAB_PAGE;
p++;
}

return page;
}
}

if (pool->log_nomem) {
ngx_slab_error(pool, NGX_LOG_CRIT,
"ngx_slab_alloc() failed: no memory");
}

return NULL;
}


最后来看看释放内存相关函数ngx_slab_free、ngx_slab_free_locked

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228

void
ngx_slab_free(ngx_slab_pool_t *pool, void *p)
{
ngx_shmtx_lock(&pool->mutex);

ngx_slab_free_locked(pool, p);

ngx_shmtx_unlock(&pool->mutex);
}

void
ngx_slab_free_locked(ngx_slab_pool_t *pool, void *p)
{
size_t size;
uintptr_t slab, m, *bitmap;
ngx_uint_t i, n, type, slot, shift, map;
ngx_slab_page_t *slots, *page;

ngx_log_debug1(NGX_LOG_DEBUG_ALLOC, ngx_cycle->log, 0, "slab free: %p", p);

if ((u_char *) p < pool->start || (u_char *) p > pool->end) {
ngx_slab_error(pool, NGX_LOG_ALERT, "ngx_slab_free(): outside of pool");
goto fail;
}
//根据p找到需要释放的m_page元素
n = ((u_char *) p - pool->start) >> ngx_pagesize_shift;
page = &pool->pages[n];
//如果分配的时候一次性分配多个page,则第一个page的slab指定本次一次性分配了多少个页page
slab = page->slab;
type = ngx_slab_page_type(page);

switch (type) {

case NGX_SLAB_SMALL:

shift = slab & NGX_SLAB_SHIFT_MASK;
size = (size_t) 1 << shift;

if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}

n = ((uintptr_t) p & (ngx_pagesize - 1)) >> shift;
m = (uintptr_t) 1 << (n % (8 * sizeof(uintptr_t)));
n /= 8 * sizeof(uintptr_t);
bitmap = (uintptr_t *)
((uintptr_t) p & ~((uintptr_t) ngx_pagesize - 1));

if (bitmap[n] & m) {
slot = shift - pool->min_shift;

if (page->next == NULL) {
slots = ngx_slab_slots(pool);

page->next = slots[slot].next;
slots[slot].next = page;

page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_SMALL;
page->next->prev = (uintptr_t) page | NGX_SLAB_SMALL;
}

bitmap[n] &= ~m;

n = (ngx_pagesize >> shift) / ((1 << shift) * 8);

if (n == 0) {
n = 1;
}

i = n / (8 * sizeof(uintptr_t));
m = ((uintptr_t) 1 << (n % (8 * sizeof(uintptr_t)))) - 1;

if (bitmap[i] & ~m) {
goto done;
}

map = (ngx_pagesize >> shift) / (8 * sizeof(uintptr_t));

for (i = i + 1; i < map; i++) {
if (bitmap[i]) {
goto done;
}
}

ngx_slab_free_pages(pool, page, 1);

pool->stats[slot].total -= (ngx_pagesize >> shift) - n;

goto done;
}

goto chunk_already_free;

case NGX_SLAB_EXACT:

m = (uintptr_t) 1 <<
(((uintptr_t) p & (ngx_pagesize - 1)) >> ngx_slab_exact_shift);
size = ngx_slab_exact_size;

if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}
//slab(位图)中对应的位为1
if (slab & m) {
slot = ngx_slab_exact_shift - pool->min_shift;

if (slab == NGX_SLAB_BUSY) {
slots = ngx_slab_slots(pool);

page->next = slots[slot].next;
slots[slot].next = page;

page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_EXACT;
page->next->prev = (uintptr_t) page | NGX_SLAB_EXACT;
}

page->slab &= ~m;

if (page->slab) {
goto done;
}

ngx_slab_free_pages(pool, page, 1);

pool->stats[slot].total -= 8 * sizeof(uintptr_t);

goto done;
}

goto chunk_already_free;

case NGX_SLAB_BIG:
//slab的高16位是slot块的位图,低16位用于存储slot块大小的偏移
shift = slab & NGX_SLAB_SHIFT_MASK;
size = (size_t) 1 << shift;

if ((uintptr_t) p & (size - 1)) {
goto wrong_chunk;
}

m = (uintptr_t) 1 << ((((uintptr_t) p & (ngx_pagesize - 1)) >> shift)
+ NGX_SLAB_MAP_SHIFT);
//该slab块确实正在被使用
if (slab & m) {
slot = shift - pool->min_shift;

if (page->next == NULL) {
slots = ngx_slab_slots(pool);

page->next = slots[slot].next;
slots[slot].next = page;

page->prev = (uintptr_t) &slots[slot] | NGX_SLAB_BIG;
page->next->prev = (uintptr_t) page | NGX_SLAB_BIG;
}

page->slab &= ~m;

if (page->slab & NGX_SLAB_MAP_MASK) {
goto done;
}
//如果page页中所有slab块都不在使用就将该页面链入free中
ngx_slab_free_pages(pool, page, 1);

pool->stats[slot].total -= ngx_pagesize >> shift;

goto done;
}

goto chunk_already_free;
//用户归还整个页面
case NGX_SLAB_PAGE:

if ((uintptr_t) p & (ngx_pagesize - 1)) {
goto wrong_chunk;
}

if (!(slab & NGX_SLAB_PAGE_START)) {
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): page is already free");
goto fail;
}

if (slab == NGX_SLAB_PAGE_BUSY) {
ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): pointer to wrong page");
goto fail;
}
//计算归还page的个数
size = slab & ~NGX_SLAB_PAGE_START;
//归还页面
ngx_slab_free_pages(pool, page, size);

ngx_slab_junk(p, size << ngx_pagesize_shift);

return;
}

/* not reached */

return;

done:

pool->stats[slot].used--;

ngx_slab_junk(p, size);

return;

wrong_chunk:

ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): pointer to wrong chunk");

goto fail;

chunk_already_free:

ngx_slab_error(pool, NGX_LOG_ALERT,
"ngx_slab_free(): chunk is already free");

fail:

return;
}

3 总结

以上就是slab相关函数实现,nginx的slab共享内存借鉴了linux内核的内存管理的实现。