SM re-provision of a r3.0.1.0-18 kilo cluster gets stuck. Pls reference https://review.opencontrail.org/#/c/18972/.
root@cmbu-auto-esx1-lnx02:~# server-manager status server
{
"server": [
{
"id": "cmbu-ceph-perf1",
"ip_address": "10.87.140.197",
"mac_address": "00:25:90:AB:9C:88",
"status": "collector_started"
},
{
"id": "cmbu-ceph-perf2",
"ip_address": "10.87.140.198",
"mac_address": "00:25:90:35:8A:1F",
"status": "provision_started"
},
{
"id": "cmbu-ceph-perf3",
"ip_address": "10.87.140.199",
"mac_address": "00:25:90:92:0E:6C",
"status": "provision_started"
},
{
"id": "cmbu-ceph-perf4",
"ip_address": "10.87.140.200",
"mac_address": "00:25:90:92:0D:F2",
"status": "provision_started"
}
]
}
root@cmbu-auto-esx1-lnx02:~# server-manager show cluster --detail
{
"cluster": [
{
"base_image_id": "",
"email": "",
"id": "test-cluster",
"package_image_id": "",
"parameters": {
"admin_key": "AQDIgtNTgPLWARAAK6gs/fj8m88LnY9DwxJdYA==",
"analytics_data_ttl": "168",
"database_dir": "/home/cassandra",
"database_minimum_diskGB": "32",
"database_token": "",
"domain": "englab.juniper.net",
"encapsulation_priority": "MPLSoUDP,MPLSoGRE,VXLAN",
"external_bgp": "",
"gateway": "10.87.159.254",
"haproxy": "disable",
"internal_vip": "",
"keystone_password": "contrail123",
"keystone_tenant": "admin",
"keystone_username": "admin",
"live_migration": "enable",
"live_migration_nfs_vm_host": "cmbu-ceph-perf3",
"live_migration_storage_scope": "global",
"multi_tenancy": "False",
"osd_bootstrap_key": "AQCq7NFTeJUoBhAAlTVpxwWQJtBej/JDNhT6+Q==",
"password": "c0ntrail123",
"router_asn": "64512",
"service_token": "contrail123",
"storage_fsid": "ab431323-7f44-4738-b7ba-53c7cfdcc2eb",
"storage_mon_secret": "AQBM78tTEMz+GhAA3WiOXQI7UVdIy0YFFuTGdw==",
"storage_virsh_uuid": "2ef02eb8-fe5c-4df2-a66c-efc89bbebf2d",
"subnet_mask": "255.255.224.0",
"use_certificates": "False",
"uuid": "18b06d01-e17a-415d-bca0-3caa5238dc89"
},
"provision_role_sequence": "{'completed': [('cmbu-ceph-perf1', 'haproxy', '2016_03_31__16_28_45'), ('cmbu-ceph-perf1', 'database', '2016_03_31__16_29_09'), ('cmbu-ceph-perf1', 'openstack', '2016_03_31__16_30_28'), ('cmbu-ceph-perf1', 'config', '2016_03_31__16_31_53'), ('cmbu-ceph-perf1', 'control', '2016_03_31__16_32_25')], 'steps': [[(u'cmbu-ceph-perf1', u'collector')], [(u'cmbu-ceph-perf1', u'webui')], [(u'cmbu-ceph-perf2', 'compute'), (u'cmbu-ceph-perf2', 'post_provision'), (u'cmbu-ceph-perf3', 'compute'), (u'cmbu-ceph-perf3', 'post_provision'), (u'cmbu-ceph-perf4', 'compute'), (u'cmbu-ceph-perf4', 'post_provision'), (u'cmbu-ceph-perf2', 'storage-compute'), (u'cmbu-ceph-perf3', 'storage-compute'), (u'cmbu-ceph-perf4', 'storage-compute'), (u'cmbu-ceph-perf1', 'storage-master'), (u'cmbu-ceph-perf1', 'post_provision')]]}",
"provisioned_id": null
}
]
}
root@cmbu-auto-esx1-lnx02:~#
root@cmbu-ceph-perf1:/var/log/contrail# tail contrail-collector.log
2016-03-31 Thu 20:32:36:836.315 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836305 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf3:Compute:Storage-Stats-mgr:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:36:836.344 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836333 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf3:Compute:contrail-vrouter-agent:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:36:836.371 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836361 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf3:Compute:contrail-vrouter-nodemgr:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:36:836.393 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836385 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf4:Compute:Storage-Stats-mgr:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:36:836.415 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836408 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf4:Compute:contrail-vrouter-agent:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:36:836.431 PDT cmbu-ceph-perf1 [Thread 140628369340160, Pid 15721]: SANDESH: Queue Drop: [SYS_INFO]: SandeshMessageStat: 1459481556836425 [INVALID]: SandeshMessageStat: name = cmbu-ceph-perf4:Compute:contrail-vrouter-nodemgr:0 file = controller/src/analytics/generator.cc line = 73
2016-03-31 Thu 20:32:55:508.983 PDT cmbu-ceph-perf1 [Thread 140628710946560, Pid 15721]: ParseNodeParsing Empty node
2016-03-31 Thu 20:32:55:514.169 PDT cmbu-ceph-perf1 [Thread 140628710946560, Pid 15721]: cmbu-ceph-perf1:Global: StatTableWrite: Bad Prefix Tag VncApiStatsLog, api_stats tag api_stats.user: jsonline {"Source|s":"cmbu-ceph-perf1","api_stats.domain_name|s":"default-domain","api_stats.object_type|s":"physical_router","api_stats.operation_type|s":"GET","api_stats.project_name|s":"default-project","api_stats.remote_ip|s":"5.0.0.1:9100","api_stats.resp_code|s":"200","api_stats.response_size|n":24,"api_stats.response_time_in_usec|d":1375,"api_stats.useragent|s":"cmbu-ceph-perf1:/usr/bin/contrail-snmp-collector","name|s":"cmbu-ceph-perf1"}
2016-03-31 Thu 20:33:12:110.644 PDT cmbu-ceph-perf1 [Thread 140628356744960, Pid 15721]: ParseNodeParsing Empty node
2016-03-31 Thu 20:33:12:123.007 PDT cmbu-ceph-perf1 [Thread 140628356744960, Pid 15721]: cmbu-ceph-perf1:Global: StatTableWrite: Bad Prefix Tag VncApiStatsLog, api_stats tag api_stats.user: jsonline {"Source|s":"cmbu-ceph-perf1","api_stats.domain_name|s":"default-domain","api_stats.object_type|s":"virtual_router","api_stats.operation_type|s":"GET","api_stats.project_name|s":"default-project","api_stats.remote_ip|s":"5.0.0.1:9100","api_stats.resp_code|s":"200","api_stats.response_size|n":3250,"api_stats.response_time_in_usec|d":4175,"api_stats.useragent|s":"Restler for node.js","name|s":"cmbu-ceph-perf1"}
root@cmbu-ceph-perf1:/var/log/contrail#
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Config/File[/etc/snmp/snmp.conf]) Dependency Exec[Temporarily delete contrail-analytics to upgrade python-kafka] has failures: true
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Config/File[/etc/snmp/snmp.conf]) Skipping because of failed dependencies
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Exec[redis-del-db-dir]) Dependency Exec[Temporarily delete contrail-analytics to upgrade python-kafka] has failures: true
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Exec[redis-del-db-dir]) Skipping because of failed dependencies
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Service[redis-server]) Dependency Exec[Temporarily delete contrail-analytics to upgrade python-kafka] has failures: true
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Service[redis-server]) Skipping because of failed dependencies
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Service[supervisor-analytics]) Dependency Exec[Temporarily delete contrail-analytics to upgrade python-kafka] has failures: true
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Collector::Service/Service[supervisor-analytics]) Skipping because of failed dependencies
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Lib::Report_status[collector_completed]/Exec[contrail-status-collector_completed]) Dependency Exec[Temporarily delete contrail-analytics to upgrade python-kafka] has failures: true
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: (/Stage[main]/Contrail::Contrail_all/Contrail::Profile::Collector/Contrail::Collector/Contrail::Lib::Report_status[collector_completed]/Exec[contrail-status-collector_completed]) Skipping because of failed dependencies
Mar 31 20:23:04 cmbu-ceph-perf1 puppet-agent[14703]: Finished catalog run in 4.69 seconds
Mar 31 20:23:05 cmbu-ceph-perf1 puppet-agent[17485]: Local environment: "production" doesn't match server specified node environment "contrail_test_pkg", switching agent to "contrail_test_pkg".
Mar 31 20:23:07 cmbu-ceph-perf1 kernel: [39755.793978] Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-vhost instead.
Mar 31 20:23:08 cmbu-ceph-perf1 kernel: [39756.887974] Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-vhost instead.
update:
In build 24, reprovision fails due to bug in contrail_version
Workaround:
On all collector nodes,
Before re-provision or upgrade, please issue:
dpkg -P contrail-analytics contrail- openstack- analytics python-kafka-python
If you have already given provision, and see the setup in the above "stuck" state,
Please issue:
dpkg -P contrail- openstack- analytics