CPU soft lockup in a spin lock using tproxy and nfqueue

Bug #2013282 reported by Dávid Major
12
This bug affects 2 people
Affects Status Importance Assigned to Milestone
linux (Ubuntu)
Confirmed
Undecided
Unassigned

Bug Description

I've been experimenting with netfilter's queue target and transparent redirection of IPv4/TCP connections on Ubuntu Jammy.

Initiating excessive connection requests from one source IP address to one IP/port, I could invoke
a soft lockup on a server host with the latest linux-generic 5.15.0.69.67.

With "kernel.softlockup_panic=1":

[ 520.222992] watchdog: BUG: soft lockup - CPU#0 stuck for 26s! [sample-queue-ha:949]
[ 520.223719] Modules linked in: nfnetlink_queue nft_socket nf_socket_ipv4 nf_socket_ipv6
   nft_tproxy nf_tproxy_ipv6 nf_tproxy_ipv4 nft_queue nft_ct nf_conntrack nf_defrag_ipv6
   nf_defrag_ipv4 nf_tables nfnetlink binfmt_misc intel_rapl_msr intel_rapl_common rapl
   kvm_intel kvm nls_iso8859_1 input_leds serio_raw qemu_fw_cfg sch_fq_codel dm_multipath
   scsi_dh_rdac scsi_dh_emc scsi_dh_alua ramoops reed_solomon ipmi_devintf ipmi_msghandler
   pstore_blk msr pstore_zone efi_pstore ip_tables x_tables autofs4 raid10 raid456 libcrc32c
   async_raid6_recov async_memcpy async_pq async_xor xor async_tx raid6_pq raid1 raid0
   multipath linear crct10dif_pclmul crc32_pclmul ghash_clmulni_intel sha512_ssse3 aesni_intel
   crypto_simd cryptd psmouse ahci i2c_i801 i2c_smbus libahci lpc_ich virtio_blk xhci_pci
   xhci_pci_renesas virtio_net net_failover failover
[ 520.223772] CPU: 0 PID: 949 Comm: sample-queue-ha Kdump: loaded Not tainted 6.2.0-rc6 #1
[ 520.223774] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.15.0-1 04/01/2014
[ 520.223775] RIP: 0010:native_queued_spin_lock_slowpath+0x81/0x300
[ 520.223781] Code: 0f 92 c2 41 8b 04 24 0f b6 d2 c1 e2 08 30 e4 09 d0 a9 00 01 ff ff 0f 85
   ec 01 00 00 85 c0 74 12 41 8b 04 24 84 c0 74 0a f3 90 <41> 8b 04 24 84 c0 75 f6 b8 01 00 00
   00 66 41 89 04 24 5b 41 5c 41
[ 520.223782] RSP: 0018:ffffa3e600003938 EFLAGS: 00000202
[ 520.223784] RAX: 0000000000000101 RBX: ffff952b02af9c00 RCX: 0000000000003fff
[ 520.223785] RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffff952b02a91910
[ 520.223786] RBP: ffffa3e600003960 R08: 0000000000039c00 R09: 00000000948c28ba
[ 520.223787] R10: 00000000000124f8 R11: 0000000000000000 R12: ffff952b02a91910
[ 520.223788] R13: ffff952b057a6110 R14: ffff952b02a91910 R15: ffff952b0389c600
[ 520.223789] FS: 00007efc3ba2eb80(0000) GS:ffff952b7fc00000(0000) knlGS:0000000000000000
[ 520.223790] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 520.223791] CR2: 00007f0b96ffcef8 CR3: 000000000551e006 CR4: 0000000000370ef0
[ 520.223795] Call Trace:
[ 520.223796] <IRQ>
[ 520.223805] _raw_spin_lock+0x29/0x30
[ 520.223807] inet_twsk_hashdance+0xe4/0x310
[ 520.223810] tcp_time_wait+0x187/0x310
[ 520.223811] tcp_rcv_state_process+0x10a8/0x10b0
[ 520.223813] ? sk_filter_trim_cap+0x119/0x240
[ 520.223815] ? tcp_inbound_md5_hash+0x54/0x1c0
[ 520.223817] tcp_v4_do_rcv+0xd2/0x280
[ 520.223820] tcp_v4_rcv+0xe14/0xea0
[ 520.223821] ? raw_local_deliver+0x71/0x240
[ 520.223823] ip_protocol_deliver_rcu+0x2e/0x180
[ 520.223825] ip_local_deliver_finish+0x8a/0xb0
[ 520.223826] ip_local_deliver+0x73/0x120
[ 520.223827] ? ip_protocol_deliver_rcu+0x180/0x180
[ 520.223829] ip_sublist_rcv_finish+0x37/0x50
[ 520.223830] ip_sublist_rcv+0x17a/0x200
[ 520.223832] ? ip_rcv_finish_core.constprop.0+0x490/0x490
[ 520.223833] ip_list_rcv+0xfd/0x120
[ 520.223835] __netif_receive_skb_list_core+0x176/0x1e0
[ 520.223837] netif_receive_skb_list_internal+0x19b/0x2b0
[ 520.223839] napi_complete_done+0x7e/0x1c0
[ 520.223842] virtnet_poll+0x3f1/0x5bc [virtio_net]
[ 520.223851] __napi_poll+0x2d/0x180
[ 520.223852] net_rx_action+0x189/0x2c0
[ 520.223854] ? skb_recv_done+0x56/0x60 [virtio_net]
[ 520.223858] __do_softirq+0xdb/0x314
[ 520.223860] __irq_exit_rcu+0x86/0xb0
[ 520.223865] irq_exit_rcu+0x12/0x20
[ 520.223866] common_interrupt+0x8e/0xa0
[ 520.223870] </IRQ>
[ 520.223871] <TASK>
[ 520.223872] asm_common_interrupt+0x2b/0x40
[ 520.223873] RIP: 0010:inet_bind_bucket_destroy+0x0/0x40
[ 520.223875] Code: 74 04 48 89 4a 08 49 89 4e 08 49 83 c6 08 4c 89 70 38 5b 41 5c 41 5d 41
   5e 5d c3 66 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 <66> 0f 1f 00 0f 1f 44 00 00 48 8b 46
   40 48 85 c0 74 01 c3 48 8b 46
[ 520.223876] RSP: 0018:ffffa3e60096f378 EFLAGS: 00000286
[ 520.223877] RAX: ffff952b057a0ab8 RBX: ffff952b057a6aa0 RCX: 00000000a54739c0
[ 520.223878] RDX: ffff952b057a6898 RSI: ffff952b0a20c700 RDI: ffff952b011dfd00
[ 520.223879] RBP: ffffa3e60096f398 R08: ffff952b0544cec0 R09: 000000000000c6ce
[ 520.223880] R10: 0000000000009cad R11: 0000000000000000 R12: ffffffffbb72a5a0
[ 520.223881] R13: ffff952b05561ac0 R14: ffff952b02a91910 R15: ffffffffbb72a5a0
[ 520.223883] ? inet_twsk_bind_unhash+0x54/0xc0
[ 520.223885] inet_twsk_kill+0xe6/0x290
[ 520.223886] inet_twsk_deschedule_put+0x38/0x50
[ 520.223889] nf_tproxy_handle_time_wait4+0xf8/0xca0 [nf_tproxy_ipv4]
[ 520.223892] nft_tproxy_eval+0x4d3/0x4ea [nft_tproxy]
[ 520.223897] nft_do_chain+0xfc/0x640 [nf_tables]
[ 520.223920] ? nft_do_chain+0xfc/0x640 [nf_tables]
[ 520.223927] ? kmem_cache_free+0x353/0x3a0
[ 520.223930] ? skb_free_head+0x5a/0x80
[ 520.223932] ? kfree_skbmem+0x4e/0x90
[ 520.223934] ? consume_skb+0x50/0xd0
[ 520.223936] ? tcp_v4_do_rcv+0xd2/0x280
[ 520.223938] ? tcp_v4_rcv+0xe4a/0xea0
[ 520.223939] ? raw_local_deliver+0x71/0x240
[ 520.223941] ? ip_protocol_deliver_rcu+0x2e/0x180
[ 520.223942] ? ip_local_deliver_finish+0x8f/0xb0
[ 520.223944] ? __nf_conntrack_find_get+0x340/0x390 [nf_conntrack]
[ 520.223952] ? ip_local_deliver+0x73/0x120
[ 520.223953] nft_do_chain_inet+0x8e/0xf0 [nf_tables]
[ 520.223961] ? nf_conntrack_update+0x150/0x3c0 [nf_conntrack]
[ 520.223968] nf_reinject+0xc8/0x260
[ 520.223971] nfqnl_reinject+0x2b/0x60 [nfnetlink_queue]
[ 520.223974] nfqnl_recv_verdict+0x309/0x510 [nfnetlink_queue]
[ 520.223978] nfnetlink_rcv_msg+0x202/0x3b0 [nfnetlink]
[ 520.223983] ? save_fpregs_to_fpstate+0x43/0xa0
[ 520.223985] ? nfnetlink_unbind+0xc0/0xc0 [nfnetlink]
[ 520.223988] netlink_rcv_skb+0x5d/0x100
[ 520.223990] nfnetlink_rcv+0x6e/0x14d [nfnetlink]
[ 520.223993] ? __netlink_lookup+0xc4/0x100
[ 520.223995] netlink_unicast+0x234/0x350
[ 520.223996] netlink_sendmsg+0x261/0x4e0
[ 520.223998] sock_sendmsg+0x3e/0x50
[ 520.224000] ____sys_sendmsg+0x24c/0x2c0
[ 520.224002] ___sys_sendmsg+0x88/0xd0
[ 520.224004] ? __sys_recvfrom+0x149/0x170
[ 520.224007] ? __rseq_handle_notify_resume+0xaa/0x470
[ 520.224009] __sys_sendmsg+0x69/0xc0
[ 520.224011] ? do_syscall_64+0x67/0x80
[ 520.224013] __x64_sys_sendmsg+0x21/0x30
[ 520.224015] do_syscall_64+0x5a/0x80
[ 520.224016] ? do_syscall_64+0x67/0x80
[ 520.224018] ? exit_to_user_mode_prepare+0x35/0x170
[ 520.224020] ? syscall_exit_to_user_mode+0x2a/0x50
[ 520.224022] ? __x64_sys_recvfrom+0x28/0x30
[ 520.224024] ? do_syscall_64+0x67/0x80
[ 520.224026] ? syscall_exit_to_user_mode+0x2a/0x50
[ 520.224027] ? __x64_sys_recvfrom+0x28/0x30
[ 520.224029] ? do_syscall_64+0x67/0x80
[ 520.224030] ? do_syscall_64+0x67/0x80
[ 520.224032] entry_SYSCALL_64_after_hwframe+0x46/0xb0

Bug fix has already been created on the stable upstream:

https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?h=linux-5.15.y&id=2662c5b1f0ef01ee06d42a04b3c77bdf46004ce2

Dávid Major (dmajor)
description: updated
Revision history for this message
Launchpad Janitor (janitor) wrote :

Status changed to 'Confirmed' because the bug affects multiple users.

Changed in linux (Ubuntu):
status: New → Confirmed
To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.