Comment 1 for bug 582804

Revision history for this message
Raphaƫl Pinson (raphink) wrote :

This bug has also been reproduced with a tar-pipe-tar process, using

ssh remote tar cfP - /path/to/dir | tar xf -

After some investigation, it seems there is an issue with the ssh process. When the rsync process is stuck in state 'D', the ssh process is in state 'S+':

client:
=================
root 651 0.0 0.0 49252 1080 ? Ss 09:53 0:00 /usr/sbin/sshd
root 2681 0.0 0.0 70572 3308 ? Ss 09:53 0:00 \_ sshd: root@pts/0
root 2693 0.0 0.0 19340 3496 pts/0 Ss 09:53 0:00 | \_ -bash
root 8210 0.6 0.0 4328 800 pts/0 S+ 10:00 0:12 | \_ strace rsync -av --ignore-existing gforge01.vprod.infra.s1.p.fti.net:/var/lib/mailman/ /var/lib/mailman
root 8211 0.2 0.3 124076 14536 pts/0 D+ 10:00 0:04 | \_ rsync -av --ignore-existing gforge01.vprod.infra.s1.p.fti.net:/var/lib/mailman/ /var/lib/mailman
root 8212 5.0 0.1 44556 6756 pts/0 S+ 10:00 1:39 | \_ ssh gforge01.vprod.infra.s1.p.fti.net rsync --server --sender -vlogDtpre.iLsf . /var/lib/mailman/
root 8213 3.3 0.4 285744 19876 pts/0 D+ 10:00 1:05 | \_ rsync -av --ignore-existing gforge01.vprod.infra.s1.p.fti.net:/var/lib/mailman/ /var/lib/mailman
=================

server:
=================
root 26107 5.4 0.2 13452 6616 ? Ss 10:00 1:55 \_ sshd: root@notty
root 26154 2.5 1.3 93880 41428 ? Ss 10:00 0:54 | \_ rsync --server --sender -vlogDtpre.iLsf . /var/lib/mailman/
=================

The strace track on the ssh process on both sides of the network link show that the processes try to access file descriptors that are not listed in /proc:

client:
=================
root@mail01:/var/log# strace -p 8212
Process 8212 attached - interrupt to quit
select(7, [3 4], [5], NULL, NULL

root@mail01:/var/log# ls -l /proc/8212/fd
total 0
lrwx------ 1 root root 64 May 20 10:28 0 -> socket:[11967]
lrwx------ 1 root root 64 May 20 10:28 1 -> socket:[11970]
l-wx------ 1 root root 64 May 20 10:16 2 -> /var/log/rsync_strace
lrwx------ 1 root root 64 May 20 10:28 3 -> socket:[11980]
lrwx------ 1 root root 64 May 20 10:28 4 -> socket:[11967]
lrwx------ 1 root root 64 May 20 10:28 5 -> socket:[11970]
l-wx------ 1 root root 64 May 20 10:28 6 -> /var/log/rsync_strace
=================

server:
=================
[PROD/MASTER] gforge01:~# strace -p 26107
Process 26107 attached - interrupt to quit
select(14, [3 6], [], NULL, NULL

[PROD/MASTER] gforge01:~# ls -l /proc/26107/fd/
total 0
lrwx------ 1 root root 64 2010-05-20 10:36 0 -> /dev/null
lrwx------ 1 root root 64 2010-05-20 10:36 1 -> /dev/null
l-wx------ 1 root root 64 2010-05-20 10:36 10 -> pipe:[4066236232]
lr-x------ 1 root root 64 2010-05-20 10:36 11 -> pipe:[4066236233]
lr-x------ 1 root root 64 2010-05-20 10:36 13 -> pipe:[4066236234]
lrwx------ 1 root root 64 2010-05-20 10:36 2 -> /dev/null
lrwx------ 1 root root 64 2010-05-20 10:36 3 -> socket:[4066235605]
lrwx------ 1 root root 64 2010-05-20 10:36 4 -> socket:[4066235803]
lrwx------ 1 root root 64 2010-05-20 10:36 5 -> socket:[4066236227]
lr-x------ 1 root root 64 2010-05-20 10:36 6 -> pipe:[4066236229]
lr-x------ 1 root root 64 2010-05-20 10:36 7 -> /dev/urandom
l-wx------ 1 root root 64 2010-05-20 10:36 8 -> pipe:[4066236229]
=================