需要Subscript才能取得DebugInfo套件
# subscription-manager clean # subscription-manager register --force --auto-attach --username xxx --password xxx # subscription-manager repos --enable=rhel-8-for-x86_64-baseos-debug-rpms # yum install -y kernel-debuginfo-4.18.0-193.14.3.el8_2.x86_64
載入vmcore
# crash /usr/lib/debug/lib/modules/4.18.0-193.14.3.el8_2.x86_64/vmlinux vmcore_filename KERNEL: /usr/lib/debug/lib/modules/4.18.0-193.14.3.el8_2.x86_64/vmlinux DUMPFILE: vmcore [PARTIAL DUMP] CPUS: 12 ...: PANIC: "BUG: unable to handle kernel NULL pointer dereference at 0000000000000115" ...: COMMAND: "kworker/5:1" TASK: ffff91396df00000 [THREAD_INFO: ffff91396df00000] CPU: 5 STATE: TASK_RUNNING (PANIC)
backtrace
crash> bt PID: 60228 TASK: ffff91396df00000 CPU: 5 COMMAND: "kworker/5:1" #0 [ffffa481e3ee7b60] machine_kexec at ffffffff9e859a5e #1 [ffffa481e3ee7bb8] __crash_kexec at ffffffff9e9591fd #2 [ffffa481e3ee7c80] crash_kexec at ffffffff9e95a0dd #3 [ffffa481e3ee7c98] oops_end at ffffffff9e821edd #4 [ffffa481e3ee7cb8] no_context at ffffffff9e86872e #5 [ffffa481e3ee7d10] do_page_fault at ffffffff9e869262 #6 [ffffa481e3ee7d40] page_fault at ffffffff9f20120e [exception RIP: memcg_deactivate_kmem_caches+105] RIP: ffffffff9ea44109 RSP: ffffa481e3ee7df8 RFLAGS: 00010206 RAX: ffff913975db8000 RBX: 0000000000000005 RCX: ffff913258c1aba8 RDX: ffff91396df00000 RSI: ffff913975fd8000 RDI: ffffffff9fad18c0 RBP: ffff912a87d77e00 R8: 0000000000000008 R9: 0000796f72747365 R10: 8080808080808080 R11: 0000000000000010 R12: ffff913975fd8000 R13: 0000000000000013 R14: ffff913223634000 R15: ffff913975db8098 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 #7 [ffffa481e3ee7e28] memcg_offline_kmem at ffffffff9eaab986 #8 [ffffa481e3ee7e48] mem_cgroup_css_offline at ffffffff9eaad792 #9 [ffffa481e3ee7e80] css_killed_work_fn at ffffffff9e95f59d #10 [ffffa481e3ee7e98] process_one_work at ffffffff9e8ce7d7 #11 [ffffa481e3ee7ed8] worker_thread at ffffffff9e8ceef0 #12 [ffffa481e3ee7f10] kthread at ffffffff9e8d4802 #13 [ffffa481e3ee7f50] ret_from_fork at ffffffff9f200255
disassemble
crash> dis -l memcg_deactivate_kmem_caches+105 /usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c: 770 0xffffffff9ea44109 <memcg_deactivate_kmem_caches+105>: cmpq $0x0,0x110(%rbx)
/usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c +770
static void kmemcg_cache_deactivate(struct kmem_cache *s) { if (WARN_ON_ONCE(is_root_cache(s))) return;
is_root_cache()
static inline bool is_root_cache(struct kmem_cache *s) { return !s->memcg_params || s->memcg_params->is_root_cache; }
kmemcg_cache_deactivate()是由memcg_deactivate_kmem_caches()呼叫
void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg, struct mem_cgroup *parent) { int idx; struct memcg_cache_array *arr; struct kmem_cache *s, *c; unsigned int nr_reparented; idx = memcg_cache_id(memcg); get_online_cpus(); get_online_mems(); mutex_lock(&slab_mutex); list_for_each_entry(s, &slab_root_caches, root_caches_node) { arr = rcu_dereference_protected(s->memcg_params.memcg_caches, lockdep_is_held(&slab_mutex)); c = arr->entries[idx]; if (!c) continue; kmemcg_cache_deactivate(c);
call stack
crash> bt -FFls #6 [ffffa481e3ee7d40] page_fault+30 at ffffffff9f20120e /usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/arch/x86/entry/entry_64.S: 1164 [exception RIP: memcg_deactivate_kmem_caches+105] RIP: ffffffff9ea44109 RSP: ffffa481e3ee7df8 RFLAGS: 00010206 RAX: ffff913975db8000 RBX: 0000000000000005 RCX: ffff913258c1aba8 RDX: ffff91396df00000 RSI: ffff913975fd8000 RDI: ffffffff9fad18c0 RBP: ffff912a87d77e00 R8: 0000000000000008 R9: 0000796f72747365 R10: 8080808080808080 R11: 0000000000000010 R12: ffff913975fd8000 R13: 0000000000000013 R14: ffff913223634000 R15: ffff913975db8098 ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0018 /usr/src/debug/kernel-4.18.0-193.14.3.el8_2/linux-4.18.0-193.14.3.el8_2.x86_64/mm/slab_common.c: 770 ffffa481e3ee7d48: [ffff913975db8098:kmalloc-2k] [ffff913223634000:kmalloc-4k] ffffa481e3ee7d58: 0000000000000013 [ffff913975fd8000:kmalloc-4k] ffffa481e3ee7d68: [ffff912a87d77e00:kmem_cache] 0000000000000005 ffffa481e3ee7d78: 0000000000000010 8080808080808080 ffffa481e3ee7d88: 0000796f72747365 0000000000000008 ffffa481e3ee7d98: [ffff913975db8000:kmalloc-2k] [ffff913258c1aba8:kmalloc-4k] ffffa481e3ee7da8: [ffff91396df00000:task_struct] [ffff913975fd8000:kmalloc-4k] ffffa481e3ee7db8: slab_mutex ffffffffffffffff ffffa481e3ee7dc8: memcg_deactivate_kmem_caches+105 0000000000000010 ffffa481e3ee7dd8: 0000000000010206 ffffa481e3ee7df8 ffffa481e3ee7de8: 0000000000000018 memcg_deactivate_kmem_caches+59 ffffa481e3ee7df8: [ffff913223634000:kmalloc-4k] [ffff913975fd8000:kmalloc-4k] ffffa481e3ee7e08: [ffff913223634b18:kmalloc-4k] [ffff913223634000:kmalloc-4k] ffffa481e3ee7e18: [ffff913223634b28:kmalloc-4k] [ffff913223634b18:kmalloc-4k] ffffa481e3ee7e28: memcg_offline_kmem+54
bt -FFls | grep -Eo 'ffff.{12}:kmem_cache'
kmem_cache struct
crash> struct kmem_cache struct kmem_cache { struct kmem_cache_cpu *cpu_slab; slab_flags_t flags; unsigned long min_partial; unsigned int size; unsigned int object_size; unsigned int offset; unsigned int cpu_partial; struct kmem_cache_order_objects oo; struct kmem_cache_order_objects max; struct kmem_cache_order_objects min; gfp_t allocflags; int refcount; void (*ctor)(void *); unsigned int inuse; unsigned int align; unsigned int red_left_pad; const char *name; struct list_head list; struct kobject kobj; struct work_struct kobj_remove_work; struct memcg_cache_params memcg_params; unsigned int max_attr_size; struct kset *memcg_kset; unsigned int remote_node_defrag_ratio; unsigned int *random_seq; unsigned int useroffset; unsigned int usersize; struct kmem_cache_node *node[1024]; } SIZE: 8680
memcg_params offset (0x110)
crash> struct -ox kmem_cache struct kmem_cache { [0x0] struct kmem_cache_cpu *cpu_slab; [0x8] slab_flags_t flags; [0x10] unsigned long min_partial; [0x18] unsigned int size; [0x1c] unsigned int object_size; [0x20] unsigned int offset; [0x24] unsigned int cpu_partial; [0x28] struct kmem_cache_order_objects oo; [0x2c] struct kmem_cache_order_objects max; [0x30] struct kmem_cache_order_objects min; [0x34] gfp_t allocflags; [0x38] int refcount; [0x40] void (*ctor)(void *); [0x48] unsigned int inuse; [0x4c] unsigned int align; [0x50] unsigned int red_left_pad; [0x58] const char *name; [0x60] struct list_head list; [0x70] struct kobject kobj; [0xd0] struct work_struct kobj_remove_work; [0x110] struct memcg_cache_params memcg_params; [0x1c0] unsigned int max_attr_size; [0x1c8] struct kset *memcg_kset; [0x1d0] unsigned int remote_node_defrag_ratio; [0x1d8] unsigned int *random_seq; [0x1e0] unsigned int useroffset; [0x1e4] unsigned int usersize; [0x1e8] struct kmem_cache_node *node[1024]; } SIZE: 0x21e8
disassemble memcg_deactivate_kmem_caches
crash> dis -x memcg_deactivate_kmem_caches 0xffffffff9ea440a0 <memcg_deactivate_kmem_caches>: nopl 0x0(%rax,%rax,1) [FTRACE NOP] 0xffffffff9ea440a5 <memcg_deactivate_kmem_caches+0x5>: push %r15 0xffffffff9ea440a7 <memcg_deactivate_kmem_caches+0x7>: push %r14 0xffffffff9ea440a9 <memcg_deactivate_kmem_caches+0x9>: mov %rdi,%r14 0xffffffff9ea440ac <memcg_deactivate_kmem_caches+0xc>: push %r13 0xffffffff9ea440ae <memcg_deactivate_kmem_caches+0xe>: push %r12 0xffffffff9ea440b0 <memcg_deactivate_kmem_caches+0x10>: mov %rsi,%r12 0xffffffff9ea440b3 <memcg_deactivate_kmem_caches+0x13>: push %rbp 0xffffffff9ea440b4 <memcg_deactivate_kmem_caches+0x14>: push %rbx 0xffffffff9ea440b5 <memcg_deactivate_kmem_caches+0x15>: test %rdi,%rdi 0xffffffff9ea440b8 <memcg_deactivate_kmem_caches+0x18>: je 0xffffffff9ea442a1 <memcg_deactivate_kmem_caches+0x201> 0xffffffff9ea440be <memcg_deactivate_kmem_caches+0x1e>: movslq 0x9c8(%rdi),%r13 0xffffffff9ea440c5 <memcg_deactivate_kmem_caches+0x25>: callq 0xffffffff9e8b2310 <cpus_read_lock> 0xffffffff9ea440ca <memcg_deactivate_kmem_caches+0x2a>: callq 0xffffffff9ea98fd0 <get_online_mems> 0xffffffff9ea440cf <memcg_deactivate_kmem_caches+0x2f>: mov $0xffffffff9fad18c0,%rdi 0xffffffff9ea440d6 <memcg_deactivate_kmem_caches+0x36>: callq 0xffffffff9f092380 <mutex_lock> 0xffffffff9ea440db <memcg_deactivate_kmem_caches+0x3b>: mov 0x108d75e(%rip),%rax # 0xffffffff9fad1840 0xffffffff9ea440e2 <memcg_deactivate_kmem_caches+0x42>: lea -0x120(%rax),%rbp 0xffffffff9ea440e9 <memcg_deactivate_kmem_caches+0x49>: cmp $0xffffffff9fad1840,%rax 0xffffffff9ea440ef <memcg_deactivate_kmem_caches+0x4f>: je 0xffffffff9ea44197 <memcg_deactivate_kmem_caches+0xf7> 0xffffffff9ea440f5 <memcg_deactivate_kmem_caches+0x55>: mov 0x118(%rbp),%rax 0xffffffff9ea440fc <memcg_deactivate_kmem_caches+0x5c>: lea (%rax,%r13,8),%r15 0xffffffff9ea44100 <memcg_deactivate_kmem_caches+0x60>: mov 0x10(%r15),%rbx 0xffffffff9ea44104 <memcg_deactivate_kmem_caches+0x64>: test %rbx,%rbx 0xffffffff9ea44107 <memcg_deactivate_kmem_caches+0x67>: je 0xffffffff9ea4417d <memcg_deactivate_kmem_caches+0xdd> 0xffffffff9ea44109 <memcg_deactivate_kmem_caches+0x69>: cmpq $0x0,0x110(%rbx)
P.S. root cause: kmem_cache=(rbx)0000000000000005, memcg_params=rbx+0x110