需要Subscript才能取得DebugInfo套件
# subscription-manager clean # subscription-manager register --force --auto-attach --username xxx --password xxx # subscription-manager repos --enable=rhel-8-for-x86_64-baseos-debug-rpms # yum install -y kernel-debuginfo-4.18.0-193.14.3.el8_2.x86_64
載入vmcore
# crash /usr/lib/debug/lib/modules/4.18.0-193.14.3.el8_2.x86_64/vmlinux vmcore_filename
KERNEL: /usr/lib/debug/lib/modules/4.18.0-193.14.3.el8_2.x86_64/vmlinux
DUMPFILE: vmcore [PARTIAL DUMP]
CPUS: 12
...:
PANIC: "BUG: unable to handle kernel NULL pointer dereference at 0000000000000115"
...:
COMMAND: "kworker/5:1"
TASK: ffff91396df00000 [THREAD_INFO: ffff91396df00000]
CPU: 5
STATE: TASK_RUNNING (PANIC)
backtrace
crash> bt
PID: 60228 TASK: ffff91396df00000 CPU: 5 COMMAND: "kworker/5:1"
#0 [ffffa481e3ee7b60] machine_kexec at ffffffff9e859a5e
#1 [ffffa481e3ee7bb8] __crash_kexec at ffffffff9e9591fd
#2 [ffffa481e3ee7c80] crash_kexec at ffffffff9e95a0dd
#3 [ffffa481e3ee7c98] oops_end at ffffffff9e821edd
#4 [ffffa481e3ee7cb8] no_context at ffffffff9e86872e
#5 [ffffa481e3ee7d10] do_page_fault at ffffffff9e869262
#6 [ffffa481e3ee7d40] page_fault at ffffffff9f20120e
[exception RIP: memcg_deactivate_kmem_caches+105]
RIP: ffffffff9ea44109 RSP: ffffa481e3ee7df8 RFLAGS: 00010206
RAX: ffff913975db8000 RBX: 0000000000000005 RCX: ffff913258c1aba8
RDX: ffff91396df00000 RSI: ffff913975fd8000 RDI: ffffffff9fad18c0
RBP: ffff912a87d77e00 R8: 0000000000000008 R9: 0000796f72747365
R10: 8080808080808080 R11: 0000000000000010 R12: ffff913975fd8000
R13: 0000000000000013 R14: ffff913223634000 R15: ffff913975db8098
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
#7 [ffffa481e3ee7e28] memcg_offline_kmem at ffffffff9eaab986
#8 [ffffa481e3ee7e48] mem_cgroup_css_offline at ffffffff9eaad792
#9 [ffffa481e3ee7e80] css_killed_work_fn at ffffffff9e95f59d
#10 [ffffa481e3ee7e98] process_one_work at ffffffff9e8ce7d7
#11 [ffffa481e3ee7ed8] worker_thread at ffffffff9e8ceef0
#12 [ffffa481e3ee7f10] kthread at ffffffff9e8d4802
#13 [ffffa481e3ee7f50] ret_from_fork at ffffffff9f200255
disassemble
crash> dis -l memcg_deactivate_kmem_caches+105
/usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c: 770
0xffffffff9ea44109 <memcg_deactivate_kmem_caches+105>: cmpq $0x0,0x110(%rbx)
/usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c +770
static void kmemcg_cache_deactivate(struct kmem_cache *s)
{
if (WARN_ON_ONCE(is_root_cache(s)))
return;
is_root_cache()
static inline bool is_root_cache(struct kmem_cache *s)
{
return !s->memcg_params || s->memcg_params->is_root_cache;
}
kmemcg_cache_deactivate()是由memcg_deactivate_kmem_caches()呼叫
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg,
struct mem_cgroup *parent)
{
int idx;
struct memcg_cache_array *arr;
struct kmem_cache *s, *c;
unsigned int nr_reparented;
idx = memcg_cache_id(memcg);
get_online_cpus();
get_online_mems();
mutex_lock(&slab_mutex);
list_for_each_entry(s, &slab_root_caches, root_caches_node) {
arr = rcu_dereference_protected(s->memcg_params.memcg_caches,
lockdep_is_held(&slab_mutex));
c = arr->entries[idx];
if (!c)
continue;
kmemcg_cache_deactivate(c);
call stack
crash> bt -FFls
#6 [ffffa481e3ee7d40] page_fault+30 at ffffffff9f20120e
/usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/arch/x86/entry/entry_64.S: 1164
[exception RIP: memcg_deactivate_kmem_caches+105]
RIP: ffffffff9ea44109 RSP: ffffa481e3ee7df8 RFLAGS: 00010206
RAX: ffff913975db8000 RBX: 0000000000000005 RCX: ffff913258c1aba8
RDX: ffff91396df00000 RSI: ffff913975fd8000 RDI: ffffffff9fad18c0
RBP: ffff912a87d77e00 R8: 0000000000000008 R9: 0000796f72747365
R10: 8080808080808080 R11: 0000000000000010 R12: ffff913975fd8000
R13: 0000000000000013 R14: ffff913223634000 R15: ffff913975db8098
ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018
/usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c: 770
ffffa481e3ee7d48: [ffff913975db8098:kmalloc-2k] [ffff913223634000:kmalloc-4k]
ffffa481e3ee7d58: 0000000000000013 [ffff913975fd8000:kmalloc-4k]
ffffa481e3ee7d68: [ffff912a87d77e00:kmem_cache] 0000000000000005
ffffa481e3ee7d78: 0000000000000010 8080808080808080
ffffa481e3ee7d88: 0000796f72747365 0000000000000008
ffffa481e3ee7d98: [ffff913975db8000:kmalloc-2k] [ffff913258c1aba8:kmalloc-4k]
ffffa481e3ee7da8: [ffff91396df00000:task_struct] [ffff913975fd8000:kmalloc-4k]
ffffa481e3ee7db8: slab_mutex ffffffffffffffff
ffffa481e3ee7dc8: memcg_deactivate_kmem_caches+105 0000000000000010
ffffa481e3ee7dd8: 0000000000010206 ffffa481e3ee7df8
ffffa481e3ee7de8: 0000000000000018 memcg_deactivate_kmem_caches+59
ffffa481e3ee7df8: [ffff913223634000:kmalloc-4k] [ffff913975fd8000:kmalloc-4k]
ffffa481e3ee7e08: [ffff913223634b18:kmalloc-4k] [ffff913223634000:kmalloc-4k]
ffffa481e3ee7e18: [ffff913223634b28:kmalloc-4k] [ffff913223634b18:kmalloc-4k]
ffffa481e3ee7e28: memcg_offline_kmem+54
bt -FFls | grep -Eo 'ffff.{12}:kmem_cache'
kmem_cache struct
crash> struct kmem_cache
struct kmem_cache {
struct kmem_cache_cpu *cpu_slab;
slab_flags_t flags;
unsigned long min_partial;
unsigned int size;
unsigned int object_size;
unsigned int offset;
unsigned int cpu_partial;
struct kmem_cache_order_objects oo;
struct kmem_cache_order_objects max;
struct kmem_cache_order_objects min;
gfp_t allocflags;
int refcount;
void (*ctor)(void *);
unsigned int inuse;
unsigned int align;
unsigned int red_left_pad;
const char *name;
struct list_head list;
struct kobject kobj;
struct work_struct kobj_remove_work;
struct memcg_cache_params memcg_params;
unsigned int max_attr_size;
struct kset *memcg_kset;
unsigned int remote_node_defrag_ratio;
unsigned int *random_seq;
unsigned int useroffset;
unsigned int usersize;
struct kmem_cache_node *node[1024];
}
SIZE: 8680
memcg_params offset (0x110)
crash> struct -ox kmem_cache
struct kmem_cache {
[0x0] struct kmem_cache_cpu *cpu_slab;
[0x8] slab_flags_t flags;
[0x10] unsigned long min_partial;
[0x18] unsigned int size;
[0x1c] unsigned int object_size;
[0x20] unsigned int offset;
[0x24] unsigned int cpu_partial;
[0x28] struct kmem_cache_order_objects oo;
[0x2c] struct kmem_cache_order_objects max;
[0x30] struct kmem_cache_order_objects min;
[0x34] gfp_t allocflags;
[0x38] int refcount;
[0x40] void (*ctor)(void *);
[0x48] unsigned int inuse;
[0x4c] unsigned int align;
[0x50] unsigned int red_left_pad;
[0x58] const char *name;
[0x60] struct list_head list;
[0x70] struct kobject kobj;
[0xd0] struct work_struct kobj_remove_work;
[0x110] struct memcg_cache_params memcg_params;
[0x1c0] unsigned int max_attr_size;
[0x1c8] struct kset *memcg_kset;
[0x1d0] unsigned int remote_node_defrag_ratio;
[0x1d8] unsigned int *random_seq;
[0x1e0] unsigned int useroffset;
[0x1e4] unsigned int usersize;
[0x1e8] struct kmem_cache_node *node[1024];
}
SIZE: 0x21e8
disassemble memcg_deactivate_kmem_caches
crash> dis -x memcg_deactivate_kmem_caches
0xffffffff9ea440a0 <memcg_deactivate_kmem_caches>: nopl 0x0(%rax,%rax,1) [FTRACE NOP]
0xffffffff9ea440a5 <memcg_deactivate_kmem_caches+0x5>: push %r15
0xffffffff9ea440a7 <memcg_deactivate_kmem_caches+0x7>: push %r14
0xffffffff9ea440a9 <memcg_deactivate_kmem_caches+0x9>: mov %rdi,%r14
0xffffffff9ea440ac <memcg_deactivate_kmem_caches+0xc>: push %r13
0xffffffff9ea440ae <memcg_deactivate_kmem_caches+0xe>: push %r12
0xffffffff9ea440b0 <memcg_deactivate_kmem_caches+0x10>: mov %rsi,%r12
0xffffffff9ea440b3 <memcg_deactivate_kmem_caches+0x13>: push %rbp
0xffffffff9ea440b4 <memcg_deactivate_kmem_caches+0x14>: push %rbx
0xffffffff9ea440b5 <memcg_deactivate_kmem_caches+0x15>: test %rdi,%rdi
0xffffffff9ea440b8 <memcg_deactivate_kmem_caches+0x18>: je 0xffffffff9ea442a1 <memcg_deactivate_kmem_caches+0x201>
0xffffffff9ea440be <memcg_deactivate_kmem_caches+0x1e>: movslq 0x9c8(%rdi),%r13
0xffffffff9ea440c5 <memcg_deactivate_kmem_caches+0x25>: callq 0xffffffff9e8b2310 <cpus_read_lock>
0xffffffff9ea440ca <memcg_deactivate_kmem_caches+0x2a>: callq 0xffffffff9ea98fd0 <get_online_mems>
0xffffffff9ea440cf <memcg_deactivate_kmem_caches+0x2f>: mov $0xffffffff9fad18c0,%rdi
0xffffffff9ea440d6 <memcg_deactivate_kmem_caches+0x36>: callq 0xffffffff9f092380 <mutex_lock>
0xffffffff9ea440db <memcg_deactivate_kmem_caches+0x3b>: mov 0x108d75e(%rip),%rax # 0xffffffff9fad1840
0xffffffff9ea440e2 <memcg_deactivate_kmem_caches+0x42>: lea -0x120(%rax),%rbp
0xffffffff9ea440e9 <memcg_deactivate_kmem_caches+0x49>: cmp $0xffffffff9fad1840,%rax
0xffffffff9ea440ef <memcg_deactivate_kmem_caches+0x4f>: je 0xffffffff9ea44197 <memcg_deactivate_kmem_caches+0xf7>
0xffffffff9ea440f5 <memcg_deactivate_kmem_caches+0x55>: mov 0x118(%rbp),%rax
0xffffffff9ea440fc <memcg_deactivate_kmem_caches+0x5c>: lea (%rax,%r13,8),%r15
0xffffffff9ea44100 <memcg_deactivate_kmem_caches+0x60>: mov 0x10(%r15),%rbx
0xffffffff9ea44104 <memcg_deactivate_kmem_caches+0x64>: test %rbx,%rbx
0xffffffff9ea44107 <memcg_deactivate_kmem_caches+0x67>: je 0xffffffff9ea4417d <memcg_deactivate_kmem_caches+0xdd>
0xffffffff9ea44109 <memcg_deactivate_kmem_caches+0x69>: cmpq $0x0,0x110(%rbx)
P.S. root cause: kmem_cache=(rbx)0000000000000005, memcg_params=rbx+0x110