$ eu-addr2line -ifae ./vmlinux-5.4.0-88-generic __tun_detach+0x421
0xffffffff8178b991
unregister_netdevice inlined at /build/linux-q2DMsi/linux-5.4.0/drivers/net/tun.c:731:5 in __tun_detach
/build/linux-q2DMsi/linux-5.4.0/include/linux/netdevice.h:2677:1
__tun_detach
/build/linux-q2DMsi/linux-5.4.0/drivers/net/tun.c:731:5
We get to notifier_call_chain() not from netdev_state_change() as mentioned in the bug report, but unregister_netdevice() from line 731. This means we haven't yet run sock_put(&tfile->sk) from line 736.
Puzzling isn't it? There are calls to sock_put(&tfile->sk) earlier in __tun_detach(), maybe it freed the socket buffer already, which would explain the behaviour.
But then when we run sock_put(&tfile->sk) again, wouldn't we then run into use-after-free territory, when we try free the socket buffer again?
1735 /* Ungrab socket and destroy it, if it was the last reference. */
1736 static inline void sock_put(struct sock *sk)
1737 {
1738 if (refcount_dec_and_test(&sk->sk_refcnt))
1739 sk_free(sk);
1740 }
I have a second call trace that I have been debugging along with the one in the description, I'll add it in the next comment.
I'll keep looking into the patch anyway. I have been running the syzkaller reproducer in a VM for the last few hours, but I haven't reproduced yet.
Hi David,
Thanks for the link, I think that is the most plausible explanation I have
seen so far.
The only problem is, if we look at the patch:
diff --git a/drivers/net/tun.c b/drivers/net/tun.c .24001112c323 100644 rxq_info_ unreg(& tfile-> xdp_rxq) ; ring_cleanup( &tfile- >tx_ring, tun_ptr_free); &tfile- >sk);
index 7a3ab3427369.
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -686,7 +686,6 @@ static void __tun_detach(struct tun_file *tfile, bool clean)
if (tun)
xdp_
ptr_
- sock_put(
}
}
@@ -702,6 +701,9 @@ static void tun_detach(struct tun_file *tfile, bool clean) state_change( dev); &tfile- >sk);
if (dev)
netdev_
rtnl_unlock();
+
+ if (clean)
+ sock_put(
}
static void tun_detach_ all(struct net_device *dev)
It moves the final sock_put( &tfile- >sk) from the end of __tun_detach() to tun_detach(), after the call to netdev_ state_change( dev).
685 static void __tun_detach(struct tun_file *tfile, bool clean) off(tun- >dev); netdevice( tun->dev) ; info_unreg( &tfile- >xdp_rxq) ; cleanup( &tfile- >tx_ring, tun_ptr_free); &tfile- >sk); e(tfile- >tun); state_change( dev);
686 {
...
725 if (clean) {
726 if (tun && tun->numqueues == 0 && tun->numdisabled == 0) {
727 netif_carrier_
728
729 if (!(tun->flags & IFF_PERSIST) &&
730 tun->dev->reg_state == NETREG_REGISTERED)
731 unregister_
732 }
733 if (tun)
734 xdp_rxq_
735 ptr_ring_
736 sock_put(
737 }
738 }
739
740 static void tun_detach(struct tun_file *tfile, bool clean)
741 {
742 struct tun_struct *tun;
743 struct net_device *dev;
744
745 rtnl_lock();
746 tun = rtnl_dereferenc
747 dev = tun ? tun->dev : NULL;
748 __tun_detach(tfile, clean);
749 if (dev)
750 netdev_
751 rtnl_unlock();
752 }
This more or less makes sense, but if you look at the call trace in the bug:
... call_chain+ 0x55/0x80 netdevice_ queue+0x94/ 0x120 0x421/0x430
[455151.894444] notifier_
...
[455151.895239] unregister_
[455151.895383] __tun_detach+
...
$ eu-addr2line -ifae ./vmlinux- 5.4.0-88- generic __tun_detach+0x421 netdevice inlined at /build/ linux-q2DMsi/ linux-5. 4.0/drivers/ net/tun. c:731:5 in __tun_detach linux-q2DMsi/ linux-5. 4.0/include/ linux/netdevice .h:2677: 1 linux-q2DMsi/ linux-5. 4.0/drivers/ net/tun. c:731:5
0xffffffff8178b991
unregister_
/build/
__tun_detach
/build/
We get to notifier_ call_chain( ) not from netdev_ state_change( ) as mentioned in the bug report, but unregister_ netdevice( ) from line 731. This means we haven't yet run sock_put( &tfile- >sk) from line 736.
Puzzling isn't it? There are calls to sock_put( &tfile- >sk) earlier in __tun_detach(), maybe it freed the socket buffer already, which would explain the behaviour.
But then when we run sock_put( &tfile- >sk) again, wouldn't we then run into use-after-free territory, when we try free the socket buffer again?
1735 /* Ungrab socket and destroy it, if it was the last reference. */ dec_and_ test(&sk- >sk_refcnt) )
1736 static inline void sock_put(struct sock *sk)
1737 {
1738 if (refcount_
1739 sk_free(sk);
1740 }
I have a second call trace that I have been debugging along with the one in the description, I'll add it in the next comment.
I'll keep looking into the patch anyway. I have been running the syzkaller reproducer in a VM for the last few hours, but I haven't reproduced yet.
https:/ /syzkaller. appspot. com/bug? id=96eb7f1ce75e f933697f24eeab9 28c4a716edefe /groups. google. com/g/syzkaller -bugs/c/ C0r0nwrvBME/ m/MxQ5Z7_ VBAAJ /syzkaller. appspot. com/x/repro. c?x=11bd3a10f00 000
https:/
https:/
Thanks,
Matthew