ubuntu ha vlan: neutron-down high load due to constant EPOLL in services with rabit

Bug #1356044 reported by Andrew Woodward
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
Fuel for OpenStack
Confirmed
Critical
Unassigned

Bug Description

{"build_id": "2014-08-11_13-56-51", "ostf_sha": "acf52a59e04fa74d2ed2b68ea225f4d24403b264", "build_number": "423", "auth_required": true, "api": "1.0", "nailgun_sha": "2741cdc0f0615263db2f176899d406207ec4ac04", "production": "docker", "fuelmain_sha": "9d4463400b4924159c978af43855e48bcf2a84b2", "astute_sha": "b52910642d6de941444901b0f20e95ebbcb2b2e9", "feature_groups": ["mirantis"], "release": "5.1", "fuellib_sha": "d9b93edb53c44900bd5bc2c25e7c8af0a1310645"}

ubuntu ha, +ceph

3 controller + ceph-osd
2 compute + ceph-osd

deployment succeeded, but most api's are slow.
all nova-conductors are locked
all neutron-servers are locked
neutron-l3-agent is locked

strace shows (neutron-l3-agent in this case but they look the same)

gettimeofday({1407874804, 747833}, NULL) = 0
epoll_wait(6, {{EPOLLOUT|EPOLLHUP, {u32=10, u64=21890426105495562}}}, 1023, 527) = 1
epoll_ctl(6, EPOLL_CTL_DEL, 10, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(10, "<167>neutron-l3-agent 2014-08-12 19:46:20.095 32284 DEBUG neutron.openstack.common.rpc.amqp [req-b279fe84-955a-4ec2-8c79-5200c8c98ff6 None] Making asynchronous cast on q-plugin... cast /usr/lib/python2.7/dist-packages/neutron/openstack/common/rpc/amqp.py:583\0", 259, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 10, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=10, u64=21890426105495562}}) = 0
epoll_ctl(6, EPOLL_CTL_DEL, 10, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(10, "<167>neutron-l3-agent 2014-08-12 19:46:20.095 32284 DEBUG neutron.openstack.common.rpc.amqp [req-b279fe84-955a-4ec2-8c79-5200c8c98ff6 None] Making asynchronous cast on q-plugin... cast /usr/lib/python2.7/dist-packages/neutron/openstack/common/rpc/amqp.py:583\0", 259, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 10, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=10, u64=21890426105495562}}) = 0
gettimeofday({1407874804, 749601}, NULL) = 0
gettimeofday({1407874804, 749805}, NULL) = 0
epoll_wait(6, {{EPOLLOUT|EPOLLHUP, {u32=10, u64=21890426105495562}}}, 1023, 525) = 1
epoll_ctl(6, EPOLL_CTL_DEL, 10, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(10, "<167>neutron-l3-agent 2014-08-12 19:46:20.095 32284 DEBUG neutron.openstack.common.rpc.amqp [req-b279fe84-955a-4ec2-8c79-5200c8c98ff6 None] Making asynchronous cast on q-plugin... cast /usr/lib/python2.7/dist-packages/neutron/openstack/common/rpc/amqp.py:583\0", 259, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 10, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=10, u64=21890426105495562}}) = 0
epoll_ctl(6, EPOLL_CTL_DEL, 10, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(10, "<167>neutron-l3-agent 2014-08-12 19:46:20.095 32284 DEBUG neutron.openstack.common.rpc.amqp [req-b279fe84-955a-4ec2-8c79-5200c8c98ff6 None] Making asynchronous cast on q-plugin... cast /usr/lib/python2.7/dist-packages/neutron/openstack/common/rpc/amqp.py:583\0", 259, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 10, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=10, u64=21890426105495562}}) = 0

nova-conductor
gettimeofday({1407875179, 65679}, NULL) = 0
gettimeofday({1407875179, 65741}, NULL) = 0
epoll_wait(6, {{EPOLLOUT, {u32=3, u64=21890426105495555}}}, 1023, 7891) = 1
epoll_ctl(6, EPOLL_CTL_DEL, 3, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(3, "<183>nova-conductor 2014-08-12 19:46:33.127 9507 DEBUG nova.openstack.common.loopingcall [-] Dynamic looping call sleeping for 60.00 seconds _inner /usr/lib/python2.7/dist-packages/nova/openstack/common/loopingcall.py:132\0", 222, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 3, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=3, u64=21890426105495555}}) = 0
gettimeofday({1407875179, 66293}, NULL) = 0
gettimeofday({1407875179, 66348}, NULL) = 0
epoll_wait(6, {{EPOLLOUT, {u32=3, u64=21890426105495555}}}, 1023, 7890) = 1
epoll_ctl(6, EPOLL_CTL_DEL, 3, {EPOLLRDBAND|EPOLLHUP|0x4d9020, {u32=0, u64=21890426105495552}}) = 0
sendto(3, "<183>nova-conductor 2014-08-12 19:46:33.127 9507 DEBUG nova.openstack.common.loopingcall [-] Dynamic looping call sleeping for 60.00 seconds _inner /usr/lib/python2.7/dist-packages/nova/openstack/common/loopingcall.py:132\0", 222, 0, NULL, 0) = -1 ENOTCONN (Transport endpoint is not connected)
epoll_ctl(6, EPOLL_CTL_ADD, 3, {EPOLLOUT|EPOLLERR|EPOLLHUP, {u32=3, u64=21890426105495555}}) = 0

restarting the services clears the error

Tags: rabbit
Revision history for this message
Andrew Woodward (xarses) wrote :
Ryan Moe (rmoe)
Changed in fuel:
status: New → Confirmed
importance: High → Critical
milestone: none → 5.1
To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.