[integration tests] after ban l3 agent many times ping from instance to 8.8.8.8 is unsuccessful sometimes

Bug #1583910 reported by Georgy Dyuldin
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
Mirantis OpenStack
Confirmed
High
MOS QA Team

Bug Description

Test result:

https://mirantis.testrail.com/index.php?/tests/view/6109116

Trace:

self = <mos_tests.neutron.python_tests.test_l3_agent.TestL3Agent object at 0x7f1a8494ab50>

    @pytest.mark.testrail_id('542608')
    def test_ban_l3_agents_many_times(self):
        """Ban l3-agent many times and check health of l3-agent

            Scenario:
                1. Revert snapshot with neutron cluster
                2. Create network1, network2
                3. Create router1 and connect it with network1, network2 and
                   external net
                4. Boot vm1 in network1 and associate floating ip
                5. Boot vm2 in network2
                6. Add rules for ping
                7. ping 8.8.8.8, vm1 (both ip) and vm2 (fixed ip) from each other
                8. Ban l3-agent on what router1 is
                9. Wait for route rescheduling
                10. Repeat steps 7-8
                11. Ban l3-agent on what router1 is
                12. Wait for L3 agent dies
                13. Clear last banned L3 agent
                14. Wait for L3 agent alive
                15. Repeat steps 11-14 40 times
                16. Boot one more VM (VM3) in network1
                17. Boot vm3 in network1
                18. ping 8.8.8.8, vm1 (both ip), vm2 (fixed ip) and vm3 (fixed ip)
                    from each other vm

            Duration 30m

            """
        net_id = self.os_conn.neutron.list_networks(
            name="net01")['networks'][0]['id']
        devops_node = self.get_node_with_dhcp(net_id)
        ip = devops_node.data['ip']

        # ban 2 l3 agents
        for _ in range(2):
            self.ban_l3_agent(router_name="router01", _ip=ip)

        for _ in range(40):
            # ban l3 agent
            last_banned_node = self.ban_l3_agent(router_name="router01",
                                                 _ip=ip,
                                                 wait_for_migrate=False,
                                                 wait_for_die=True)
            # clear last banned l3 agent
            self.clear_l3_agent(_ip=ip,
                                router_name="router01",
                                node=last_banned_node,
                                wait_for_alive=True)

        # check pings
> network_checks.check_vm_connectivity(self.env, self.os_conn)

mos_tests/neutron/python_tests/test_l3_agent.py:449:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
mos_tests/functions/network_checks.py:113: in check_vm_connectivity
    timeout=timeout)
mos_tests/functions/network_checks.py:73: in check_ping_from_vm
    vm_password)
mos_tests/functions/network_checks.py:98: in check_ping_from_vm_helper
    vm_login=vm_login, vm_password=vm_password)
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

env = <mos_tests.environment.fuel_client.Environment object at 0x7f1a8494af50>
os_conn = <mos_tests.environment.os_actions.OpenStackActions object at 0x7f1a8494a110>
vm = <Server: server02>, vm_keypair = None
command = 'ping -c1 8.8.8.8 && ping -c1 192.168.1.4 && ping -c1 10.109.16.133'
vm_login = 'cirros', timeout = 240, vm_password = 'cubswin:)'

    def run_on_vm(env, os_conn, vm, vm_keypair=None, command='uname',
                  vm_login="cirros", timeout=3 * 60, vm_password='cubswin:)'):
        """Execute command on vm and return dict with results

        :param vm: server to execute command on
        :param vm_keypair: keypair used during vm creating
        :param command: command to execute
        :param vm_login: username to login to vm via ssh
        :param vm_password: password to login to vm via ssh
        :param timeout: type - int or None
            - if None - execute command and return results
            - if int - wait `timeout` seconds until command exit_code will be 0
        :returns: Dictionary with `exit_code`, `stdout`, `stderr` keys.
            `Stdout` and `stderr` are list of strings
        """
        results = []

        def execute():
            with os_conn.ssh_to_instance(env, vm, vm_keypair,
                                         username=vm_login,
                                         password=vm_password) as remote:
                result = remote.execute(command)
                results.append(result)
                return result

        logger.info('Executing `{cmd}` on {vm_name}'.format(
            cmd=command,
            vm_name=vm.name))

        if timeout is None:
            execute()
        else:
            err_msg = "SSH command: `{command}` completed with 0 exit code"
            wait(lambda: execute()['exit_code'] == 0,
                 sleep_seconds=(1, 60, 5), timeout_seconds=timeout,
                 expected_exceptions=(Exception,),
> waiting_for=err_msg.format(command=command))
E TimeoutExpired: Timeout of 240 seconds expired waiting for SSH command: `ping -c1 8.8.8.8 && ping -c1 192.168.1.4 && ping -c1 10.109.16.133` completed with 0 exit code

mos_tests/functions/network_checks.py:62: TimeoutExpired

Tags: area-qa
Changed in mos:
status: New → Confirmed
Changed in mos:
milestone: none → 9.0
assignee: nobody → MOS QA Team (mos-qa)
importance: Undecided → High
To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.