Older crashdump analysis confirmed the bnx2x driver/status being in traffic class setup / stop hardware in LLDP path.
PID: 3936 TASK: ffff883fdc9b1c00 CPU: 11 COMMAND: "kworker/11:0" #0 [ffff883fec593ce0] __schedule at ffffffff81850bae #1 [ffff883fec593d30] schedule at ffffffff818510f5 #2 [ffff883fec593d48] schedule_preempt_disabled at ffffffff8185139e #3 [ffff883fec593d58] __mutex_lock_slowpath at ffffffff81852fd9 #4 [ffff883fec593db0] mutex_lock at ffffffff8185306f #5 [ffff883fec593dc8] rtnl_lock at ffffffff81756e15 #6 [ffff883fec593dd8] bnx2x_sp_rtnl_task at ffffffffc025d8c4 [bnx2x] #7 [ffff883fec593e20] process_one_work at ffffffff8109e68b #8 [ffff883fec593e60] worker_thread at ffffffff8109e9fb #9 [ffff883fec593ec0] kthread at ffffffff810a4dc7 #10 [ffff883fec593f50] ret_from_fork at ffffffff81855735
Check this stack frame:
#6 [ffff883fec593dd8] bnx2x_sp_rtnl_task at ffffffffc025d8c4 [bnx2x]
Which is 9 x 8-byte/64-bit values long:
#7 [ffff883fec593e20]
ffff883fec593e20 - ffff883fec593dd8 = 0x48 bytes = 72 bytes = 9 x 8 bytes.
crash> rd ffff883fec593dd8 9 ffff883fec593dd8: ffffffffc025d8c4 ffff883feaa0a178 ..%.....x...?... ffff883fec593de8: 6199482b89f76272 ffff883fe9571080 rb..+H.a..W.?... ffff883fec593df8: ffff883ffdf56b40 ffff883ffdf5b400 @k..?.......?... ffff883fec593e08: 00000000000002c0 ffff881fe93f0dd8 ..........?..... ffff883fec593e18: ffff883fec593e58 X>Y.?...
The top of the stack has the RIP/next-instruction contents, which matches what's in the stack frame line.
ffffffffc025d8c4
Looking at the disassembly, it's right after the 'callq rtnl_lock', as expected.
static void bnx2x_sp_rtnl_task(struct work_struct *work) {
rdi = work
0xffffffffc025d890 <bnx2x_sp_rtnl_task>: nopl 0x0(%rax,%rax,1) [FTRACE NOP] 0xffffffffc025d895 <bnx2x_sp_rtnl_task+5>: push %rbp 0xffffffffc025d896 <bnx2x_sp_rtnl_task+6>: mov %rsp,%rbp 0xffffffffc025d899 <bnx2x_sp_rtnl_task+9>: push %r15 0xffffffffc025d89b <bnx2x_sp_rtnl_task+11>: push %r14 0xffffffffc025d89d <bnx2x_sp_rtnl_task+13>: push %r13 0xffffffffc025d89f <bnx2x_sp_rtnl_task+15>: push %r12
0xffffffffc025d8a1 <bnx2x_sp_rtnl_task+17>: lea -0x598(%rdi),%r12
^ struct bnx2x *bp = container_of(work, struct bnx2x, sp_rtnl_task.work);
r12 = bp
0xffffffffc025d8a8 <bnx2x_sp_rtnl_task+24>: push %rbx 0xffffffffc025d8a9 <bnx2x_sp_rtnl_task+25>: mov %rdi,%rbx
rbx = rdi = work
<from the future.. stackframe from mutex_lock().. >
work = rbx = 0xffff881fe93f0dd8
crash> struct work_struct ffff881fe93f0dd8 struct work_struct { data = { counter = 704 }, entry = { next = 0xffff881fe93f0de0, prev = 0xffff881fe93f0de0 }, func = 0xffffffffc025d890 <bnx2x_sp_rtnl_task> }
bp = 0xffff881fe93f0840 (offset in asm above)
crash> eval 0xffff881fe93f0dd8 - 0x598 hexadecimal: ffff881fe93f0840 decimal: 18446612269371426880 (-131804338124736) octal: 1777774201775117604100 binary: 1111111111111111100010000001111111101001001111110000100001000000
crash> struct bnx2x ffff881fe93f0840 struct bnx2x { fp = 0xffff881fe95c4000, sp_objs = 0xffff881fe9fb0000, fp_stats = 0xffff881fe935c000, bnx2x_txq = 0xffff881fe87ef000, regview = 0xffffc9001d000000, doorbells = 0xffffc90019878000, ... dev = 0xffff881fe93f0000, pdev = 0xffff881fef03b000, iro_arr = 0xffff881ff0756000, recovery_state = BNX2X_RECOVERY_DONE, ... cnic_support = 1 '\001', cnic_enabled = false, cnic_loaded = false, cnic_probe = 0xffffffffc0243280 <bnx2x_cnic_probe>, fcoe_init = false, ... sp_task = { ... func = 0xffffffffc0251960 <bnx2x_sp_task> ... sp_rtnl_task = { work = { data = { counter = 704 }, entry = { next = 0xffff881fe93f0de0, prev = 0xffff881fe93f0de0 }, func = 0xffffffffc025d890 <bnx2x_sp_rtnl_task> }, ... fw_ver = "FFV14.04.18 \000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000", ... dcb_state = 1, dcbx_enabled = 2, dcbx_mode_uset = false, dcbx_config_params = { overwrite_settings = 1, admin_dcbx_version = 0, admin_ets_enable = 1, admin_pfc_enable = 1, admin_tc_supported_tx_enable = 1, admin_ets_configuration_tx_enable = 1, admin_ets_recommendation_tx_enable = 0, admin_pfc_tx_enable = 1, admin_application_priority_tx_enable = 1, admin_ets_willing = 1, admin_ets_reco_valid = 1, admin_pfc_willing = 1, admin_app_priority_willing = 1, admin_configuration_bw_precentage = {100, 0, 0, 0, 0, 0, 0, 0}, admin_configuration_ets_pg = {0, 0, 0, 0, 0, 0, 0, 0}, admin_recommendation_bw_precentage = {100, 0, 0, 0, 0, 0, 0, 0}, admin_recommendation_ets_pg = {0, 1, 2, 3, 4, 5, 6, 7}, admin_pfc_bitmap = 0, admin_priority_app_table = {{ valid = 0, priority = 0, traffic_type = 0, app_id = 0 }, { valid = 0, priority = 0, traffic_type = 0, app_id = 0 }, { valid = 0, priority = 0, traffic_type = 0, app_id = 0 }, { valid = 0, priority = 0, traffic_type = 0, app_id = 0 }}, admin_default_priority = 0 }, dcbx_port_params = { pfc = { enabled = 0, priority_non_pauseable_mask = 0 }, ets = { enabled = 0, num_of_cos = 0 '\000', cos_params = {{ bw_tbl = 4294967295, pri_bitmask = 0, strict = 3 '\003', pauseable = 0 '\000' }, { bw_tbl = 4294967295, pri_bitmask = 0, strict = 3 '\003', pauseable = 0 '\000' }, { bw_tbl = 4294967295, pri_bitmask = 0, strict = 3 '\003', pauseable = 0 '\000' }} }, app = { enabled = 0, traffic_type_priority = {4294967295, 4294967295, 4294967295} } }, dcb_version = 22,
looks valid
and what it would do in this function.. bnx2x_sp_rtnl_task()
crash> struct -x bnx2x.sp_rtnl_state ffff881fe93f0840 sp_rtnl_state = 0x201
crash> eval 0x201 hexadecimal: 201 decimal: 513 octal: 1001 binary: 0000000000000000000000000000000000000000000000000000001000000001
binary: 1000000001
bit 0 is set. BNX2X_SP_RTNL_SETUP_TC = 0 bit 9 is set. BNX2X_SP_RTNL_TX_STOP = 9
crash> whatis sp_rtnl_flag enum sp_rtnl_flag { BNX2X_SP_RTNL_SETUP_TC = 0 BNX2X_SP_RTNL_TX_TIMEOUT = 1 BNX2X_SP_RTNL_FAN_FAILURE = 2 BNX2X_SP_RTNL_AFEX_F_UPDATE = 3 BNX2X_SP_RTNL_ENABLE_SRIOV = 4 BNX2X_SP_RTNL_VFPF_MCAST = 5 BNX2X_SP_RTNL_VFPF_CHANNEL_DOWN = 6 BNX2X_SP_RTNL_RX_MODE = 7 BNX2X_SP_RTNL_HYPERVISOR_VLAN = 8 BNX2X_SP_RTNL_TX_STOP = 9 BNX2X_SP_RTNL_GET_DRV_VERSION = 10 BNX2X_SP_RTNL_ADD_VXLAN_PORT = 11 BNX2X_SP_RTNL_DEL_VXLAN_PORT = 12 };
Older crashdump analysis confirmed the bnx2x driver/status
being in traffic class setup / stop hardware in LLDP path.
PID: 3936 TASK: ffff883fdc9b1c00 CPU: 11 COMMAND: "kworker/11:0" preempt_ disabled at ffffffff8185139e lock_slowpath at ffffffff81852fd9
#0 [ffff883fec593ce0] __schedule at ffffffff81850bae
#1 [ffff883fec593d30] schedule at ffffffff818510f5
#2 [ffff883fec593d48] schedule_
#3 [ffff883fec593d58] __mutex_
#4 [ffff883fec593db0] mutex_lock at ffffffff8185306f
#5 [ffff883fec593dc8] rtnl_lock at ffffffff81756e15
#6 [ffff883fec593dd8] bnx2x_sp_rtnl_task at ffffffffc025d8c4 [bnx2x]
#7 [ffff883fec593e20] process_one_work at ffffffff8109e68b
#8 [ffff883fec593e60] worker_thread at ffffffff8109e9fb
#9 [ffff883fec593ec0] kthread at ffffffff810a4dc7
#10 [ffff883fec593f50] ret_from_fork at ffffffff81855735
Check this stack frame:
#6 [ffff883fec593dd8] bnx2x_sp_rtnl_task at ffffffffc025d8c4 [bnx2x]
Which is 9 x 8-byte/64-bit values long:
#7 [ffff883fec593e20]
ffff883fec593e20 - ffff883fec593dd8 = 0x48 bytes = 72 bytes = 9 x 8 bytes.
crash> rd ffff883fec593dd8 9
ffff883fec593dd8: ffffffffc025d8c4 ffff883feaa0a178 ..%.....x...?...
ffff883fec593de8: 6199482b89f76272 ffff883fe9571080 rb..+H.a..W.?...
ffff883fec593df8: ffff883ffdf56b40 ffff883ffdf5b400 @k..?.......?...
ffff883fec593e08: 00000000000002c0 ffff881fe93f0dd8 ..........?.....
ffff883fec593e18: ffff883fec593e58 X>Y.?...
The top of the stack has the RIP/next- instruction contents,
which matches what's in the stack frame line.
ffffffffc025d8c4
Looking at the disassembly, it's right after the 'callq rtnl_lock', as expected.
static void bnx2x_sp_ rtnl_task( struct work_struct *work)
{
rdi = work
0xffffffffc025d890 <bnx2x_ sp_rtnl_ task>: nopl 0x0(%rax,%rax,1) [FTRACE NOP] sp_rtnl_ task+5> : push %rbp sp_rtnl_ task+6> : mov %rsp,%rbp sp_rtnl_ task+9> : push %r15 sp_rtnl_ task+11> : push %r14 sp_rtnl_ task+13> : push %r13 sp_rtnl_ task+15> : push %r12
0xffffffffc025d895 <bnx2x_
0xffffffffc025d896 <bnx2x_
0xffffffffc025d899 <bnx2x_
0xffffffffc025d89b <bnx2x_
0xffffffffc025d89d <bnx2x_
0xffffffffc025d89f <bnx2x_
0xffffffffc025d8a1 <bnx2x_ sp_rtnl_ task+17> : lea -0x598(%rdi),%r12
^
struct bnx2x *bp = container_of(work, struct bnx2x, sp_rtnl_task.work);
r12 = bp
0xffffffffc025d8a8 <bnx2x_ sp_rtnl_ task+24> : push %rbx sp_rtnl_ task+25> : mov %rdi,%rbx
0xffffffffc025d8a9 <bnx2x_
rbx = rdi = work
<from the future.. stackframe from mutex_lock().. >
work = rbx = 0xffff881fe93f0dd8
crash> struct work_struct ffff881fe93f0dd8 sp_rtnl_ task>
struct work_struct {
data = {
counter = 704
},
entry = {
next = 0xffff881fe93f0de0,
prev = 0xffff881fe93f0de0
},
func = 0xffffffffc025d890 <bnx2x_
}
bp = 0xffff881fe93f0840 (offset in asm above)
crash> eval 0xffff881fe93f0dd8 - 0x598 26880 (-131804338124736) 7604100 110001000000111 111110100100111 111000010000100 0000
hexadecimal: ffff881fe93f0840
decimal: 184466122693714
octal: 177777420177511
binary: 111111111111111
crash> struct bnx2x ffff881fe93f0840 DONE, sp_rtnl_ task> 000\000\ 000\000\ 000\000\ 000\000\ 000\000\ 000\000\ 000\000\ 000\000\ 000", config_ params = { settings = 1, dcbx_version = 0, ets_enable = 1, pfc_enable = 1, tc_supported_ tx_enable = 1, ets_configurati on_tx_enable = 1, ets_recommendat ion_tx_ enable = 0, pfc_tx_ enable = 1, application_ priority_ tx_enable = 1, ets_willing = 1, ets_reco_ valid = 1, pfc_willing = 1, app_priority_ willing = 1, configuration_ bw_precentage = {100, 0, 0, 0, 0, 0, 0, 0}, configuration_ ets_pg = {0, 0, 0, 0, 0, 0, 0, 0}, recommendation_ bw_precentage = {100, 0, 0, 0, 0, 0, 0, 0}, recommendation_ ets_pg = {0, 1, 2, 3, 4, 5, 6, 7}, pfc_bitmap = 0, priority_ app_table = {{ default_ priority = 0
priority_ non_pauseable_ mask = 0
traffic_ type_priority = {4294967295, 4294967295, 4294967295}
struct bnx2x {
fp = 0xffff881fe95c4000,
sp_objs = 0xffff881fe9fb0000,
fp_stats = 0xffff881fe935c000,
bnx2x_txq = 0xffff881fe87ef000,
regview = 0xffffc9001d000000,
doorbells = 0xffffc90019878000,
...
dev = 0xffff881fe93f0000,
pdev = 0xffff881fef03b000,
iro_arr = 0xffff881ff0756000,
recovery_state = BNX2X_RECOVERY_
...
cnic_support = 1 '\001',
cnic_enabled = false,
cnic_loaded = false,
cnic_probe = 0xffffffffc0243280 <bnx2x_cnic_probe>,
fcoe_init = false,
...
sp_task = {
...
func = 0xffffffffc0251960 <bnx2x_sp_task>
...
sp_rtnl_task = {
work = {
data = {
counter = 704
},
entry = {
next = 0xffff881fe93f0de0,
prev = 0xffff881fe93f0de0
},
func = 0xffffffffc025d890 <bnx2x_
},
...
fw_ver = "FFV14.04.18 \000\000\
...
dcb_state = 1,
dcbx_enabled = 2,
dcbx_mode_uset = false,
dcbx_
overwrite_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
admin_
valid = 0,
priority = 0,
traffic_type = 0,
app_id = 0
}, {
valid = 0,
priority = 0,
traffic_type = 0,
app_id = 0
}, {
valid = 0,
priority = 0,
traffic_type = 0,
app_id = 0
}, {
valid = 0,
priority = 0,
traffic_type = 0,
app_id = 0
}},
admin_
},
dcbx_port_params = {
pfc = {
enabled = 0,
},
ets = {
enabled = 0,
num_of_cos = 0 '\000',
cos_params = {{
bw_tbl = 4294967295,
pri_bitmask = 0,
strict = 3 '\003',
pauseable = 0 '\000'
}, {
bw_tbl = 4294967295,
pri_bitmask = 0,
strict = 3 '\003',
pauseable = 0 '\000'
}, {
bw_tbl = 4294967295,
pri_bitmask = 0,
strict = 3 '\003',
pauseable = 0 '\000'
}}
},
app = {
enabled = 0,
}
},
dcb_version = 22,
looks valid
and what it would do in this function.. bnx2x_sp_ rtnl_task( )
crash> struct -x bnx2x.sp_rtnl_state ffff881fe93f0840
sp_rtnl_state = 0x201
crash> eval 0x201 000000000000000 000000000000000 000000000100000 0001
hexadecimal: 201
decimal: 513
octal: 1001
binary: 000000000000000
binary: 1000000001
bit 0 is set. BNX2X_SP_ RTNL_SETUP_ TC = 0 RTNL_TX_ STOP = 9
bit 9 is set. BNX2X_SP_
crash> whatis sp_rtnl_flag SP_RTNL_ SETUP_TC = 0 SP_RTNL_ TX_TIMEOUT = 1 SP_RTNL_ FAN_FAILURE = 2 SP_RTNL_ AFEX_F_ UPDATE = 3 SP_RTNL_ ENABLE_ SRIOV = 4 SP_RTNL_ VFPF_MCAST = 5 SP_RTNL_ VFPF_CHANNEL_ DOWN = 6 SP_RTNL_ RX_MODE = 7 SP_RTNL_ HYPERVISOR_ VLAN = 8 SP_RTNL_ TX_STOP = 9 SP_RTNL_ GET_DRV_ VERSION = 10 SP_RTNL_ ADD_VXLAN_ PORT = 11 SP_RTNL_ DEL_VXLAN_ PORT = 12
enum sp_rtnl_flag {
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
BNX2X_
};