Comment 8 for bug 1843259

Revision history for this message
Emilien Macchi (emilienm) wrote :

sysctl settings managed by Puppet are visible in this hieradata:

https://logs.rdoproject.org/openstack-periodic-24hr/opendev.org/openstack/tripleo-ci/master/periodic-tripleo-ci-centos-7-ovb-1ctlr_2comp-featureset020-rocky/e9d92b4/logs/overcloud-novacompute-1/etc/puppet/hieradata/service_configs.json.txt.gz

Pasting here:

    "sysctl_settings": {
        "fs.inotify.max_user_instances": {
            "value": 1024
        },
        "fs.suid_dumpable": {
            "value": 0
        },
        "kernel.dmesg_restrict": {
            "value": 1
        },
        "kernel.pid_max": {
            "value": 1048576
        },
        "net.core.netdev_max_backlog": {
            "value": 10000
        },
        "net.ipv4.conf.all.arp_accept": {
            "value": 1
        },
        "net.ipv4.conf.all.arp_notify": {
            "value": 1
        },
        "net.ipv4.conf.all.log_martians": {
            "value": 1
        },
        "net.ipv4.conf.all.secure_redirects": {
            "value": 0
        },
        "net.ipv4.conf.all.send_redirects": {
            "value": 0
        },
        "net.ipv4.conf.default.accept_redirects": {
            "value": 0
        },
        "net.ipv4.conf.default.log_martians": {
            "value": 1
        },
        "net.ipv4.conf.default.secure_redirects": {
            "value": 0
        },
        "net.ipv4.conf.default.send_redirects": {
            "value": 0
        },
        "net.ipv4.ip_forward": {
            "value": 1
        },
        "net.ipv4.ip_nonlocal_bind": {
            "value": 0
        },
        "net.ipv4.neigh.default.gc_thresh1": {
            "value": 1024
        },
        "net.ipv4.neigh.default.gc_thresh2": {
            "value": 2048
        },
        "net.ipv4.neigh.default.gc_thresh3": {
            "value": 4096
        },
        "net.ipv4.tcp_keepalive_intvl": {
            "value": 1
        },
        "net.ipv4.tcp_keepalive_probes": {
            "value": 5
        },
        "net.ipv4.tcp_keepalive_time": {
            "value": 5
        },
        "net.ipv6.conf.all.accept_ra": {
            "value": 0
        },
        "net.ipv6.conf.all.accept_redirects": {
            "value": 0
        },
        "net.ipv6.conf.all.autoconf": {
            "value": 0
        },
        "net.ipv6.conf.all.disable_ipv6": {
            "value": 0
        },
        "net.ipv6.conf.all.ndisc_notify": {
            "value": 1
        },
        "net.ipv6.conf.default.accept_ra": {
            "value": 0
        },
        "net.ipv6.conf.default.accept_redirects": {
            "value": 0
        },
        "net.ipv6.conf.default.autoconf": {
            "value": 0
        },
        "net.ipv6.conf.default.disable_ipv6": {
            "value": 0
        },
        "net.ipv6.conf.lo.disable_ipv6": {
            "value": 0
        },
        "net.ipv6.ip_nonlocal_bind": {
            "value": 0
        },
        "net.netfilter.nf_conntrack_max": {
            "value": 500000
        },
        "net.nf_conntrack_max": {
            "value": 500000
        }
    },

As you can see, nothing about the net.bridge.*; so I suspect this is done outside of TripleO.
Maybe in the RDO nodepool image or just the default in CentOS7. Or maybe done by zuul when deploying.

We should collect the cloud providers where we hit that failure and if there is a pattern (e.g. always RDO cloud or always RAX), then we should probably fix it.