------- Comment From <email address hidden> 2018-01-23 09:23 EDT------- Hi Joseph,
I was able to reproduce the problem on HWE Kernal (XENIAL):
-- System
root@tuletapio2-lp3:~# uname -a Linux tuletapio2-lp3 4.13.13 #1 SMP Tue Jan 23 07:41:39 CST 2018 ppc64le ppc64le ppc64le GNU/Linux
root@tuletapio2-lp3:~# cat /proc/meminfo HugePages_Total: 2 HugePages_Free: 2 HugePages_Rsvd: 0 HugePages_Surp: 0 Hugepagesize: 16777216 kB
root@tuletapio2-lp3:~# cat /proc/cmdline BOOT_IMAGE=/boot/vmlinux-4.13.13 root=UUID=728bebfe-83ba-410d-917b-b552edbbb0a3 ro quiet splash default_hugepagesz=16G hugepagesz=16G hugepages=2
-- DUMP Unable to handle kernel paging request for data at address 0x5deadbeef0000108 Faulting instruction address: 0xc0000000002cf374 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=2048 NUMA pSeries Modules linked in: CPU: 7 PID: 3769 Comm: mem-on-off-test Not tainted 4.13.13 #1 task: c0000007d9400000 task.stack: c0000007d9480000 NIP: c0000000002cf374 LR: c0000000002cf298 CTR: c000000000134ef0 REGS: c0000007d94837d0 TRAP: 0380 Not tainted (4.13.13) MSR: 800000000280b033 <SF,VEC,VSX,EE,FP,ME,IR,DR,RI,LE> CR: 44422484 XER: 00000000 CFAR: c0000000002cf308 SOFTE: 1 GPR00: c0000000002cf28c c0000007d9483a50 c000000000e6ff00 c000000000fbc580 GPR04: f000000004000000 5deadbeef0000100 5deadbeef0000200 0000000000000002 GPR08: 5deadbeef0000000 0000000000000001 c000000000fbc580 c0000007fb000628 GPR12: 0000000000002200 c00000000fd02a00 0000000000001000 f000000004000000 GPR16: 0000000000000002 0000000000000000 c000000000ea3b00 0000000000001000 GPR20: 00000000ffffdb4f 0000000000000001 c000000fffd44600 c0000007d9483b60 GPR24: 0000000000100000 c000000000f0dad8 c000000000eb2dd0 0000000000000001 GPR28: 0000000000000000 c000000000fc4580 c000000000fd4580 f000000004000000 NIP [c0000000002cf374] dissolve_free_huge_page+0x124/0x230 LR [c0000000002cf298] dissolve_free_huge_page+0x48/0x230 Call Trace: [c0000007d9483a50] [c0000000002cf28c] dissolve_free_huge_page+0x3c/0x230 (unreliable) [c0000007d9483a90] [c0000000002cf548] dissolve_free_huge_pages+0xc8/0x150 [c0000007d9483ae0] [c0000000002ee1c8] __offline_pages.constprop.5+0x398/0xa90 [c0000007d9483c30] [c000000000645870] memory_subsys_offline+0x60/0xf0 [c0000007d9483c60] [c000000000623434] device_offline+0xf4/0x130 [c0000007d9483ca0] [c000000000645718] store_mem_state+0x178/0x190 [c0000007d9483ce0] [c00000000061ea34] dev_attr_store+0x34/0x60 [c0000007d9483d00] [c0000000003bdd10] sysfs_kf_write+0x60/0xa0 [c0000007d9483d20] [c0000000003bcaac] kernfs_fop_write+0x16c/0x240 [c0000007d9483d70] [c000000000314cf4] __vfs_write+0x34/0x70 [c0000007d9483d90] [c00000000031673c] vfs_write+0xcc/0x230 [c0000007d9483de0] [c000000000318410] SyS_write+0x60/0x110 [c0000007d9483e30] [c00000000000b860] system_call+0x58/0x6c Instruction dump: e90a0030 88ff0007 7fa94000 419e0120 3d005dea e8df0028 e8bf0020 7fe4fb78 6108dbee 7d435378 790807c6 6508f000 <f8c50008> f8a60000 7d094378 61080100 ---[ end trace 23e3dda3fe0a58bd ]--
------- Comment From <email address hidden> 2018-01-23 09:23 EDT-------
Hi Joseph,
I was able to reproduce the problem on HWE Kernal (XENIAL):
-- System
root@tuletapio2 -lp3:~# uname -a
Linux tuletapio2-lp3 4.13.13 #1 SMP Tue Jan 23 07:41:39 CST 2018 ppc64le ppc64le ppc64le GNU/Linux
root@tuletapio2 -lp3:~# cat /proc/meminfo
HugePages_Total: 2
HugePages_Free: 2
HugePages_Rsvd: 0
HugePages_Surp: 0
Hugepagesize: 16777216 kB
root@tuletapio2 -lp3:~# cat /proc/cmdline /boot/vmlinux- 4.13.13 root=UUID= 728bebfe- 83ba-410d- 917b-b552edbbb0 a3 ro quiet splash default_ hugepagesz= 16G hugepagesz=16G hugepages=2
BOOT_IMAGE=
-- DUMP VSX,EE, FP,ME,IR, DR,RI,LE> free_huge_ page+0x124/ 0x230 free_huge_ page+0x48/ 0x230 free_huge_ page+0x3c/ 0x230 (unreliable) free_huge_ pages+0xc8/ 0x150 pages.constprop .5+0x398/ 0xa90 subsys_ offline+ 0x60/0xf0 offline+ 0xf4/0x130 state+0x178/ 0x190 store+0x34/ 0x60 write+0x60/ 0xa0 fop_write+ 0x16c/0x240 0x34/0x70 0xcc/0x230 0x60/0x110 call+0x58/ 0x6c
Unable to handle kernel paging request for data at address 0x5deadbeef0000108
Faulting instruction address: 0xc0000000002cf374
Oops: Kernel access of bad area, sig: 11 [#1]
SMP NR_CPUS=2048
NUMA
pSeries
Modules linked in:
CPU: 7 PID: 3769 Comm: mem-on-off-test Not tainted 4.13.13 #1
task: c0000007d9400000 task.stack: c0000007d9480000
NIP: c0000000002cf374 LR: c0000000002cf298 CTR: c000000000134ef0
REGS: c0000007d94837d0 TRAP: 0380 Not tainted (4.13.13)
MSR: 800000000280b033 <SF,VEC,
CR: 44422484 XER: 00000000
CFAR: c0000000002cf308 SOFTE: 1
GPR00: c0000000002cf28c c0000007d9483a50 c000000000e6ff00 c000000000fbc580
GPR04: f000000004000000 5deadbeef0000100 5deadbeef0000200 0000000000000002
GPR08: 5deadbeef0000000 0000000000000001 c000000000fbc580 c0000007fb000628
GPR12: 0000000000002200 c00000000fd02a00 0000000000001000 f000000004000000
GPR16: 0000000000000002 0000000000000000 c000000000ea3b00 0000000000001000
GPR20: 00000000ffffdb4f 0000000000000001 c000000fffd44600 c0000007d9483b60
GPR24: 0000000000100000 c000000000f0dad8 c000000000eb2dd0 0000000000000001
GPR28: 0000000000000000 c000000000fc4580 c000000000fd4580 f000000004000000
NIP [c0000000002cf374] dissolve_
LR [c0000000002cf298] dissolve_
Call Trace:
[c0000007d9483a50] [c0000000002cf28c] dissolve_
[c0000007d9483a90] [c0000000002cf548] dissolve_
[c0000007d9483ae0] [c0000000002ee1c8] __offline_
[c0000007d9483c30] [c000000000645870] memory_
[c0000007d9483c60] [c000000000623434] device_
[c0000007d9483ca0] [c000000000645718] store_mem_
[c0000007d9483ce0] [c00000000061ea34] dev_attr_
[c0000007d9483d00] [c0000000003bdd10] sysfs_kf_
[c0000007d9483d20] [c0000000003bcaac] kernfs_
[c0000007d9483d70] [c000000000314cf4] __vfs_write+
[c0000007d9483d90] [c00000000031673c] vfs_write+
[c0000007d9483de0] [c000000000318410] SyS_write+
[c0000007d9483e30] [c00000000000b860] system_
Instruction dump:
e90a0030 88ff0007 7fa94000 419e0120 3d005dea e8df0028 e8bf0020 7fe4fb78
6108dbee 7d435378 790807c6 6508f000 <f8c50008> f8a60000 7d094378 61080100
---[ end trace 23e3dda3fe0a58bd ]--