Comment 13 for bug 1946656

Revision history for this message
Maciej Borzecki (maciek-borzecki) wrote :

More in depth look.

root@ubuntu-server:/home/ubuntu-server# ss -x -p -a |grep snapd
u_str LISTEN 0 4096 /run/user/999/snapd-session-agent.socket 21532 * 0 users:(("systemd",pid=1979,fd=31))
u_str LISTEN 0 4096 /run/snapd.socket 16890 * 0 users:(("snapd",pid=1255,fd=5),("systemd",pid=1,fd=67))
u_str LISTEN 0 4096 /run/snapd-snap.socket 16892 * 0 users:(("snapd",pid=1255,fd=9),("systemd",pid=1,fd=69))
u_str ESTAB 0 0 * 17034 * 17038 users:(("snapd",pid=1255,fd=2),("snapd",pid=1255,fd=1))
u_str ESTAB 0 0 /run/snapd.socket 21249 * 21248 users:(("snapd",pid=1255,fd=33))
u_str ESTAB 0 0 /run/snapd.socket 21258 * 21257 users:(("snapd",pid=1255,fd=14))
u_str ESTAB 0 0 /run/snapd.socket 21252 * 21251 users:(("snapd",pid=1255,fd=34))
u_str ESTAB 0 0 /run/snapd.socket 21255 * 21254 users:(("snapd",pid=1255,fd=35))
u_str ESTAB 0 0 /run/snapd.socket 21099 * 21098 users:(("snapd",pid=1255,fd=17))
u_str ESTAB 0 0 /run/snapd.socket 21207 * 21206 users:(("snapd",pid=1255,fd=19))
u_str ESTAB 0 0 /run/snapd.socket 21210 * 21209 users:(("snapd",pid=1255,fd=20))
u_str ESTAB 0 0 /run/snapd.socket 21204 * 21203 users:(("snapd",pid=1255,fd=16))
u_str ESTAB 0 0 /run/snapd.socket 21237 * 21236 users:(("snapd",pid=1255,fd=29))
u_str ESTAB 0 0 /run/snapd.socket 21240 * 21239 users:(("snapd",pid=1255,fd=30))
u_str ESTAB 0 0 /run/snapd.socket 21264 * 21263 users:(("snapd",pid=1255,fd=3))
u_str ESTAB 0 0 /run/snapd.socket 21246 * 21245 users:(("snapd",pid=1255,fd=32))
u_str ESTAB 0 0 /run/snapd.socket 21261 * 21260 users:(("snapd",pid=1255,fd=36))
u_str ESTAB 0 0 /run/snapd.socket 21243 * 21242 users:(("snapd",pid=1255,fd=31))
u_str ESTAB 0 0 /run/snapd.socket 21296 * 21295 users:(("snapd",pid=1255,fd=12))
u_str ESTAB 0 0 /run/snapd.socket 21219 * 21218 users:(("snapd",pid=1255,fd=23))
u_str ESTAB 0 0 /run/snapd.socket 21222 * 21221 users:(("snapd",pid=1255,fd=24))
u_str ESTAB 0 0 /run/snapd.socket 21216 * 21215 users:(("snapd",pid=1255,fd=22))
u_str ESTAB 0 0 /run/snapd.socket 21213 * 21212 users:(("snapd",pid=1255,fd=21))
u_str ESTAB 0 0 /run/snapd.socket 20677 * 20675 users:(("snapd",pid=1255,fd=13))
u_str ESTAB 0 0 /run/snapd.socket 21225 * 21224 users:(("snapd",pid=1255,fd=25))
u_str ESTAB 0 0 /run/snapd.socket 20690 * 20689 users:(("snapd",pid=1255,fd=15))
u_str ESTAB 0 0 /run/snapd.socket 21231 * 21230 users:(("snapd",pid=1255,fd=27))
u_str ESTAB 0 0 /run/snapd.socket 21234 * 21233 users:(("snapd",pid=1255,fd=28))
u_str ESTAB 0 0 /run/snapd.socket 21228 * 21227 users:(("snapd",pid=1255,fd=26))

Sampling the peer connections:

root@ubuntu-server:/home/ubuntu-server# ss -x -p -a |grep 21098
u_str ESTAB 0 0 /run/snapd.socket 21099 * 21098 users:(("snapd",pid=1255,fd=17))
u_str ESTAB 0 0 * 21098 * 21099 users:(("python3.8",pid=1830,fd=20))
root@ubuntu-server:/home/ubuntu-server# ss -x -p -a |grep 21230
u_str ESTAB 0 0 * 21230 * 21231 users:(("python3.8",pid=1830,fd=33))
u_str ESTAB 0 0 /run/snapd.socket 21231 * 21230 users:(("snapd",pid=1255,fd=27))
root@ubuntu-server:/home/ubuntu-server# ss -x -p -a |grep 21242
u_str ESTAB 0 0 /run/snapd.socket 21243 * 21242 users:(("snapd",pid=1255,fd=31))
u_str ESTAB 0 0 * 21242 * 21243 users:(("python3.8",pid=1830,fd=37))
root@ubuntu-server:/home/ubuntu-server# ss -x -p -a |grep 21209
u_str ESTAB 0 0 * 21209 * 21210 users:(("python3.8",pid=1830,fd=26))
u_str ESTAB 0 0 /run/snapd.socket 21210 * 21209 users:(("snapd",pid=1255,fd=20))

Where pid 1830 is:

root@ubuntu-server:/home/ubuntu-server# ps -ef|grep 1830
root 1830 1809 0 08:35 ? 00:00:02 /snap/subiquity/2793/usr/bin/python3.8 -m subiquity.cmd.server

It is unclear to me why there needs to be ~25 connections from subiquity to snapd. The connections do not go away when subiquity calls a snapd.service restart:

root@ubuntu-server:/home/ubuntu-server# ps -ef|grep snapd
root 1255 1 0 08:35 ? 00:00:01 /usr/lib/snapd/snapd
root 2374 1830 0 08:52 ? 00:00:00 systemctl restart snapd.service
root 2415 2223 0 08:52 pts/0 00:00:00 grep --color=auto snapd

Since those are not closed, or just do not enter an idle state and snapd performs a graceful shutdown things take longer, eventually snapd hits the sigterm timeout and systemd issues a SIGKILL.

I think someone needs to investigate in paralell why there are so many connections.