-
Notifications
You must be signed in to change notification settings - Fork 11
Expand file tree
/
Copy pathramfs_cause_oom
More file actions
249 lines (234 loc) · 6.82 KB
/
ramfs_cause_oom
File metadata and controls
249 lines (234 loc) · 6.82 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
OOM的日志:
[ 3994.943403] active_anon:1916 inactive_anon:7318 isolated_anon:32
active_file:311 inactive_file:2079 isolated_file:0
unevictable:15361039 dirty:0 writeback:0 unstable:0
free:59725 slab_reclaimable:42227 slab_unreclaimable:24575
mapped:300389 shmem:465 pagetables:3476 bounce:0
free_cma:0
[ 3994.943407] Node 1 Normal free:45212kB min:45348kB low:56684kB high:68020kB active_anon:2380kB inactive_anon:8144kB active_file:64kB inactive_file:328kB unevictable:30862812kB isolated(anon):0kB isolated(file):0kB present:134217728kB managed:32501996kB mlocked:7376kB dirty:0kB writeback:0kB mapped:180756kB shmem:4kB slab_reclaimable:77140kB slab_unreclaimable:30920kB kernel_stack:2896kB pagetables:1432kB unstable:0kB bounce:0kB free_cma:0kB writeback_tmp:0kB pages_scanned:47491 all_unreclaimable? yes
[ 3994.943413] lowmem_reserve[]: 0 0 0 0
[ 3994.943415] Node 1 Normal: 471*4kB (UEM) 363*8kB (UEM) 222*16kB (UEM) 176*32kB (UEM) 135*64kB (EM) 65*128kB (UM) 30*256kB (UM) 5*512kB (UM) 0*1024kB 0*2048kB 1*4096kB (R) = 45268kB
[ 3994.943427] Node 0 hugepages_total=95 hugepages_free=2 hugepages_surp=0 hugepages_size=1048576kB
[ 3994.943429] Node 1 hugepages_total=95 hugepages_free=87 hugepages_surp=0 hugepages_size=1048576kB
[ 3994.943430] 15250101 total pagecache pages
[ 3994.943432] 4916 pages in swap cache
[ 3994.943433] Swap cache stats: add 528400, delete 523484, find 100279/115359
[ 3994.943434] Free swap = 3604956kB
[ 3994.943435] Total swap = 4194300kB
[ 3994.943437] 67073695 pages RAM
[ 3994.943438] 0 pages HighMem/MovableOnly
[ 3994.943438] 50927604 pages reserved
发现unevictable 占用的内存太大了。
crash> kmem -i
PAGES TOTAL PERCENTAGE
TOTAL MEM 65953451 251.6 GB ----
FREE 62432 243.9 MB 0% of TOTAL MEM
USED 65891019 251.4 GB 99% of TOTAL MEM
SHARED 314014 1.2 GB 0% of TOTAL MEM
BUFFERS 200 800 KB 0% of TOTAL MEM
CACHED 15243221 58.1 GB 23% of TOTAL MEM
SLAB 65466 255.7 MB 0% of TOTAL MEM
TOTAL SWAP 1048575 4 GB ----
SWAP USED 129902 507.4 MB 12% of TOTAL SWAP
SWAP FREE 918673 3.5 GB 87% of TOTAL SWAP
COMMIT LIMIT 9121620 34.8 GB ----
COMMITTED 641419 2.4 GB 7% of TOTAL LIMIT
crash>
CACHED 15243221 和 unevictable:15361039 大小对的上。
cached + unevictable 怀疑是ramfs占据的内存。
crash> mount
MOUNT SUPERBLK TYPE DEVNAME DIRNAME
......
ffff8807aedff700 ffff883fd0492800 hugetlbfs hugetlbfs /dev/xxxxx
ffff8807d0a66e00 ffff883fd0717800 ramfs ramfs /var/lib/libvirt/qemu/ram
hugetlbfs 占用190G:
crash> p hstates
hstates = $3 =
{{
next_nid_to_alloc = 0,
next_nid_to_free = 0,
order = 18,
mask = 18446744072635809792,
max_huge_pages = 190,
nr_huge_pages = 190,
free_huge_pages = 20,
resv_huge_pages = 20,
分析ramfs占用多少内存:
crash> super_block ffff883fd0717800
struct super_block {
s_list = {
next = 0xffffffff819d4790 <super_blocks>,
prev = 0xffff883fd0492800
},
s_dev = 39,
s_blocksize_bits = 12 '\f',
s_blocksize = 4096,
s_maxbytes = 9223372036854775807,
s_type = 0xffffffff819e4200 <ramfs_fs_type>,
s_op = 0xffffffff8168c980 <ramfs_ops>,
dq_op = 0x0,
s_qcop = 0x0,
s_export_op = 0x0,
s_flags = 1610612736,
s_magic = 2240043254,
s_root = 0xffff88076ed3afc0,
s_umount = {
count = 0,
wait_lock = {
raw_lock = {
{
head_tail = 0,
tickets = {
head = 0,
tail = 0
}
}
}
},
wait_list = {
next = 0xffff883fd0717878,
prev = 0xffff883fd0717878
}
},
s_count = 1,
s_active = {
counter = 2
},
s_security = 0xffff883fd071b600,
s_xattr = 0x0,
s_inodes = {
next = 0xffff8806d48ac1c8,
prev = 0xffff88076ee1d448
},
s_anon = {
first = 0x0
},
s_files_deprecated = 0x0,
s_mounts = {
next = 0xffff8807d0a66e60,
prev = 0xffff8807d0a66b60
},
s_inodes是ramfs所有inode的链表,查询每个inode的size:
crash> list -o 0x108 -H 0xffff8806d48ac1c8 -s inode.i_size
ffff8806d48ad7e0
i_size = 1073741824
ffff8806d48ae120
i_size = 1073741824
ffff8806d48a84a0
i_size = 1073741824
ffff8806d48a9bc0
i_size = 1073741824
ffff8806d48ab530
i_size = 1073741824
ffff8806d48af5f0
i_size = 1073741824
ffff8806d48ab780
i_size = 1073741824
ffff8806d48aea60
i_size = 1073741824
ffff8806d48ac7b0
i_size = 1073741824
ffff8806d48aded0
i_size = 1073741824
ffff8806d48ae810
i_size = 1073741824
ffff8806d48a8940
i_size = 1073741824
ffff8806d48af150
i_size = 1073741824
ffff8806d48adc80
i_size = 1073741824
ffff8806d48af840
i_size = 1073741824
ffff8806d48aa2b0
i_size = 1073741824
ffff8806d48acc50
i_size = 1073741824
ffff8806d48ab9d0
i_size = 1073741824
ffff8806d48ae370
i_size = 1073741824
ffff8806d48a9280
i_size = 1073741824
ffff8806d48aef00
i_size = 1073741824
ffff8806d48a9030
i_size = 1073741824
ffff8806d48abc20
i_size = 1073741824
ffff8806d48af3a0
i_size = 1073741824
ffff8806d48ab090
i_size = 1073741824
ffff8806d48afce0
i_size = 1073741824
ffff8806d48ad590
i_size = 1073741824
ffff8806d48ac560
i_size = 1073741824
ffff8806d48aa9a0
i_size = 1073741824
ffff8806d48a8250
i_size = 1073741824
ffff8806d48aae40
i_size = 1073741824
ffff8806d48ad0f0
i_size = 1073741824
ffff8806d48a9970
i_size = 1073741824
ffff8806d48abe70
i_size = 1073741824
ffff8806d48a8000
i_size = 1073741824
ffff88076ef95590
i_size = 1073741824
ffff88076ef950f0
i_size = 1073741824
ffff88076ef97ce0
i_size = 1073741824
ffff88076ef94ea0
i_size = 1073741824
ffff88076ef965c0
i_size = 1073741824
ffff88076ef95340
i_size = 1073741824
ffff88076ef96810
i_size = 1073741824
ffff88076ef96370
i_size = 1073741824
ffff88076ef95a30
i_size = 1073741824
ffff88076ef96a60
i_size = 1073741824
ffff88076ef973a0
i_size = 1073741824
ffff88076ef96cb0
i_size = 1073741824
ffff88076ef95ed0
i_size = 1073741824
ffff88076ef947b0
i_size = 1073741824
ffff88076ef975f0
i_size = 1073741824
ffff88076ef96120
i_size = 1073741824
ffff88076ef94c50
i_size = 1073741824
ffff88076ef95c80
i_size = 1073741824
ffff88076ef97a90
i_size = 1073741824
ffff88076ef97840
i_size = 1073741824
ffff88076ef96f00
i_size = 1073741824
ffff88076ef957e0
i_size = 1073741824
ffff88076ee1d340
i_size = 0
ffff883fd0717798
i_size = 0
发现ramfs占据58G,刚好也对应起来。
所以问题在于ramfs占据内存太大,ramfs使用的是pagecache的内存,但是会设置为dirty、unevictable,系统无法回收。
ramfs默认不会限制大小,如果一直增加会存在耗尽内存的风险。
修复方法是替换为tmpfs,并且限制大小。