Build 2714 : Alarms: PartialSysinfoCompute,ProcessStatus and ProcessConnectivity alarms are raised for one of the vrouters

Bug #1547366 reported by Ankit Jain
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
Juniper Openstack
Status tracked in Trunk
Trunk
Fix Committed
High
Anish Mehta

Bug Description

Seeing PartialSysinfoCompute,ProcessStatus and ProcessConnectivity alarms being raised for one of the vrouters nodeh3 when everything seems fine with the vrouter, all processes running

also "json_operand1_value": "null", and operand 2 are shown as null please check

root@nodeh3:~# contrail-status
== Contrail vRouter ==
supervisor-vrouter: active
contrail-vrouter-agent active
contrail-vrouter-nodemgr active

Pasting http://nodeg13:8081/analytics/uves/vrouter/nodeh3?flat below:

{

    "ComputeCpuState":

{

    "cpu_info":

[

        {
            "mem_res": ​256356,
            "cpu_share": ​1.60417,
            "used_sys_mem": ​1564416,
            "mem_virt": ​961360,
            "one_min_cpuload": ​0.0025
        }
    ]

},
"NodeStatus":
{

    "deleted": false,
    "disk_usage_info":

[

{

    "partition_space_available_1k": ​389662480,
    "partition_space_used_1k": ​3543076,
    "partition_name": "/dev/mapper/nodeh3--vg-root",
    "partition_type": "ext4"

},

    {
        "partition_space_available_1k": ​160935,
        "partition_space_used_1k": ​67596,
        "partition_name": "/dev/sda1",
        "partition_type": "ext2"
    }

],
"process_info":
[

{

    "process_name": "contrail-vrouter-agent",
    "start_count": ​2,
    "process_state": "PROCESS_STATE_RUNNING",
    "last_stop_time": null,
    "core_file_list": [ ],
    "last_start_time": "1455827102994974",
    "stop_count": ​0,
    "last_exit_time": null,
    "exit_count": ​0

},

    {
        "process_name": "contrail-vrouter-nodemgr",
        "start_count": ​2,
        "process_state": "PROCESS_STATE_RUNNING",
        "last_stop_time": null,
        "core_file_list": [ ],
        "last_start_time": "1455827102991484",
        "stop_count": ​0,
        "last_exit_time": null,
        "exit_count": ​0
    }

],
"process_status":
[

{

    "instance_id": "0",
    "module_id": "contrail-vrouter-nodemgr",
    "state": "Functional",
    "description": null

},
{

    "instance_id": "0",
    "module_id": "contrail-vrouter-agent",
    "state": "Functional",
    "description": null,
    "connection_infos":

[

{

    "server_addrs":

    [
        "10.204.217.53:5269"
    ],
    "status": "Up",
    "type": "XMPP",
    "name": "control-node:10.204.217.53",
    "description": "OpenSent"

},
{

    "server_addrs":

    [
        "10.204.217.60:5269"
    ],
    "status": "Up",
    "type": "XMPP",
    "name": "control-node:10.204.217.60",
    "description": "OpenSent"

},
{

    "server_addrs":

    [
        "10.204.217.53:53"
    ],
    "status": "Up",
    "type": "XMPP",
    "name": "dns-server:10.204.217.53",
    "description": "OpenSent"

},
{

    "server_addrs":

    [
        "10.204.217.60:53"
    ],
    "status": "Up",
    "type": "XMPP",
    "name": "dns-server:10.204.217.60",
    "description": "OpenSent"

},
{

    "server_addrs":

    [
        "10.204.217.53:8086"
    ],
    "status": "Up",
    "type": "Collector",
    "name": null,
    "description": "Established"

},
{

    "server_addrs":

    [
        "10.204.217.53:5998"
    ],
    "status": "Up",
    "type": "Discovery",
    "name": "Collector",
    "description": "SubscribeResponse"

},
{

    "server_addrs":

    [
        "10.204.217.53:5998"
    ],
    "status": "Up",
    "type": "Discovery",
    "name": "dns-server",
    "description": "SubscribeResponse"

},
{

    "server_addrs":

                    [
                        "10.204.217.53:5998"
                    ],
                    "status": "Up",
                    "type": "Discovery",
                    "name": "xmpp-server",
                    "description": "SubscribeResponse"
                }
            ]
        }
    ]

},
"ContrailConfig":
{

    "elements":

    {
        "fq_name": "[\"default-global-system-config\", \"nodeh3\"]",
        "uuid": "\"2d767cca-0ab2-4980-b376-f6f0dd11d9b2\"",
        "virtual_router_dpdk_enabled": "false",
        "parent_type": "\"global-system-config\"",
        "perms2": "{\"owner\": \"f4a835993658491f87e5ff77062a31e2\", \"owner_access\": 7, \"global_access\": 0, \"share\": []}",
        "virtual_router_type": "[]",
        "display_name": "\"nodeh3\"",
        "id_perms": "{\"enable\": true, \"uuid\": {\"uuid_mslong\": 3275942986163308928, \"uuid_lslong\": 12931794894402476466}, \"created\": \"2016-02-18T20:22:02.493483\", \"description\": null, \"creator\": null, \"user_visible\": true, \"last_modified\": \"2016-02-18T20:22:02.493483\", \"permissions\": {\"owner\": \"admin\", \"owner_access\": 7, \"other_access\": 7, \"group\": \"KeystoneServiceAdmin\", \"group_access\": 7}}",
        "virtual_router_ip_address": "\"10.204.217.107\""
    }

},
"UVEAlarms":
{

    "alarms":

[

{

    "any_of":

[

{

    "all_of":

[

{

    "json_operand1_value": "null",
    "rule":

{

    "oper": "==",
    "operand1":

{

    "keys":

    [
        "NodeStatus",
        "process_info"
    ]

},
"operand2":

                        {
                            "json_value": "null"
                        }
                    }
                }
            ]
        }
    ],
    "severity": ​3,
    "ack": false,
    "timestamp": ​1455827033009114,
    "token": "eyJ0aW1lc3RhbXAiOiAxNDU1ODI3MDMzMDA5MTE0LCAiaHR0cF9wb3J0IjogNTk5NSwgImhvc3RfaXAiOiAiMTAuMjA0LjIxNy42MCJ9",
    "type": "ProcessStatus"

},
{

    "any_of":

[

{

    "all_of":

[

{

    "json_operand1_value": "null",
    "rule":

{

    "oper": "==",
    "operand1":

{

    "keys":

    [
        "VrouterAgent",
        "build_info"
    ]

},
"operand2":

                        {
                            "json_value": "null"
                        }
                    }
                }
            ]
        }
    ],
    "severity": ​4,
    "ack": false,
    "timestamp": ​1455827033009278,
    "token": "eyJ0aW1lc3RhbXAiOiAxNDU1ODI3MDMzMDA5Mjc4LCAiaHR0cF9wb3J0IjogNTk5NSwgImhvc3RfaXAiOiAiMTAuMjA0LjIxNy42MCJ9",
    "type": "PartialSysinfoCompute"

},
{

    "any_of":

[

{

    "all_of":

[

{

    "json_operand1_value": "null",
    "rule":

{

    "oper": "==",
    "operand1":

{

    "keys":

    [
        "NodeStatus",
        "process_status"
    ]

},
"operand2":

                                {
                                    "json_value": "null"
                                }
                            }
                        }
                    ]
                }
            ],
            "severity": ​3,
            "ack": false,
            "timestamp": ​1455827033009427,
            "token": "eyJ0aW1lc3RhbXAiOiAxNDU1ODI3MDMzMDA5NDI3LCAiaHR0cF9wb3J0IjogNTk5NSwgImhvc3RfaXAiOiAiMTAuMjA0LjIxNy42MCJ9",
            "type": "ProcessConnectivity"
        }
    ]

},
"VrouterStatsAgent":
{

    "exception_packets": ​410362,
    "total_in_bandwidth_utilization": ​0,
    "uptime": ​1455827105634123,
    "total_flows": ​0,
    "in_bytes": ​0,
    "out_tpkts": ​0,
    "phy_if_stats_list":

[

    {
        "out_bytes": ​54659800,
        "name": "p2p1",
        "duplexity": ​1,
        "out_pkts": ​200733,
        "in_bytes": ​135149151,
        "in_pkts": ​1003603,
        "speed": ​1000
    }

],
"vhost_stats":
{

    "out_bytes": ​135149151,
    "name": "vhost0",
    "duplexity": ​-1,
    "out_pkts": ​1003603,
    "in_bytes": ​54575632,
    "in_pkts": ​198729,
    "speed": ​-1

},
"exception_packets_dropped": ​8,
"in_tpkts": ​0,
"cpu_info":
{

    "sys_mem_info":

{

    "total": ​65683312,
    "used": ​1564416,
    "free": ​64118896,
    "buffers": ​102468

},
"num_cpu": ​4,
"cpu_share": ​1.60417,
"meminfo":
{

    "virt": ​961360,
    "peakvirt": ​1026632,
    "res": ​256356

},
"cpuload":

    {
        "fifteen_min_avg": ​0.02,
        "five_min_avg": ​0.0075,
        "one_min_avg": ​0.0025
    }

},
"phy_if_5min_usage":
[

    {
        "in_bandwidth_usage": ​0,
        "out_bandwidth_usage": ​0,
        "name": "p2p1"
    }

],
"xmpp_stats_list":
[

{

    "ip": "10.204.217.60",
    "reconnects": ​1,
    "in_msgs": ​0,
    "out_msgs": ​2

},

    {
        "ip": "10.204.217.53",
        "reconnects": ​1,
        "in_msgs": ​0,
        "out_msgs": ​1
    }

],
"flow_rate":
{

    "active_flows": ​0,
    "max_flow_deletes_per_second": ​0,
    "added_flows": ​0,
    "deleted_flows": ​0,
    "min_flow_adds_per_second": ​0,
    "min_flow_deletes_per_second": ​0,
    "max_flow_adds_per_second": ​0

},
"drop_stats":
{

    "ds_mcast_df_bit": ​0,
    "ds_flow_no_memory": ​0,
    "ds_composite_invalid_interface": ​0,
    "ds_push": ​0,
    "ds_invalid_if": ​0,
    "ds_pull": ​0,
    "ds_no_fmd": ​0,
    "ds_invalid_arp": ​0,
    "ds_trap_no_if": ​0,
    "ds_arp_reply_no_route": ​0,
    "ds_invalid_source": ​0,
    "ds_flow_action_invalid": ​0,
    "ds_invalid_packet": ​0,
    "ds_flow_invalid_protocol": ​0,
    "ds_discard": ​0,
    "ds_invalid_vnid": ​0,
    "ds_flow_table_full": ​0,
    "ds_invalid_label": ​0,
    "ds_garp_from_vm": ​0,
    "ds_frag_err": ​0,
    "ds_duplicated": ​3,
    "ds_invalid_nh": ​0,
    "ds_arp_no_route": ​0,
    "ds_misc": ​0,
    "ds_flood": ​0,
    "ds_interface_rx_discard": ​0,
    "ds_head_alloc_fail": ​0,
    "ds_flow_unusable": ​0,
    "ds_mcast_clone_fail": ​0,
    "ds_invalid_protocol": ​0,
    "ds_head_space_reserve_fail": ​0,
    "ds_interface_tx_discard": ​0,
    "ds_nowhere_to_go": ​0,
    "ds_cloned_original": ​0,
    "ds_arp_no_where_to_go": ​0,
    "ds_l2_no_route": ​0,
    "ds_cksum_err": ​0,
    "ds_rewrite_fail": ​0,
    "ds_flow_queue_limit_exceeded": ​0,
    "ds_ttl_exceeded": ​0,
    "ds_flow_nat_no_rflow": ​0,
    "ds_clone_fail": ​0,
    "ds_invalid_mcast_source": ​0,
    "ds_interface_drop": ​0,
    "ds_pcow_fail": ​0,
    "ds_flow_action_drop": ​0

},
"total_out_bandwidth_utilization": ​0,
"phy_if_10min_usage":
[

    {
        "in_bandwidth_usage": ​0,
        "out_bandwidth_usage": ​0,
        "name": "p2p1"
    }

],
"phy_if_band":
[

        {
            "in_bandwidth_usage": ​0,
            "out_bandwidth_usage": ​0,
            "name": "p2p1"
        }
    ],
    "aged_flows": ​0,
    "exception_packets_allowed": ​410354,
    "out_bytes": ​0

},
"VrouterAgent":
{

    "dns_servers":

[

    "10.204.217.53",
    "10.204.217.60"

],
"log_category": "*",
"control_ip": "10.204.217.107",
"control_node_list_cfg":
[

    "0.0.0.0",
    "0.0.0.0"

],
"config_file": "/etc/contrail/contrail-vrouter-agent.conf",
"ds_xs_instances": ​2,
"log_level": "SYS_NOTICE",
"tunnel_type": "MPLSoGRE",
"phy_if":
[

    {
        "name": "p2p1",
        "mac_address": "00:25:90:c8:f3:b6"
    }

],
"log_local": true,
"vhost_if":
{

    "name": "vhost0",
    "mac_address": "00:25:90:c8:f3:b6"

},
"platform": "HOST",
"sandesh_http_port": ​8085,
"eth_name": "p2p1",
"log_flow": false,
"headless_mode_cfg": false,
"self_ip_list":
[

    "10.204.217.107"

],
"hostname_cfg": "nodeh3",
"vhost_cfg":
{

    "ip": "10.204.217.107",
    "ip_prefix_len": ​24,
    "name": "vhost0",
    "gateway": "10.204.217.254"

},
"ll_max_vm_flows_cfg": ​2048,
"ll_max_system_flows_cfg": ​2048,
"flow_cache_timeout_cfg": ​0,
"max_vm_flows_cfg": ​100,
"dns_server_list_cfg":
[

    "0.0.0.0",
    "0.0.0.0"

],
"build_info": "{\"build-info\":[{\"build-time\":\"2016-02-18 09:33:08.482493\",\"build-hostname\":\"contrail-ec-build17\",\"build-user\":\"contrail-builder\",\"build-version\":\"3.0\",\"build-id\":\"3.0-2714\",\"build-number\":\"2714\"}]}",
"hypervisor": "kvm",
"ds_addr": "10.204.217.53",
"mode": "VROUTER",
"xmpp_peer_list":
[

{

    "status": true,
    "ip": "10.204.217.60",
    "primary": true,
    "setup_time": ​1455827106838446

},

            {
                "status": true,
                "ip": "10.204.217.53",
                "primary": false,
                "setup_time": ​1455827106841463
            }
        ],
        "log_file": "/var/log/contrail/contrail-vrouter-agent.log"
    }

}

You can also look at the setup if required : nodeg13,nodeg20,nodea21,nodeh3

Raj Reddy (rajreddy)
Changed in juniperopenstack:
importance: Undecided → High
Revision history for this message
OpenContrail Admin (ci-admin-f) wrote : [Review update] master

Review in progress for https://review.opencontrail.org/17476
Submitter: Anish Mehta (<email address hidden>)

Revision history for this message
Anish Mehta (amehta00) wrote :

The alarms were coming from alarmgen on nodeg20, even thought the partition (#1) was currently owned by nodea21.

It seems that there is a corner case where alarmgen can end up with "stranded" alarms if a UVE happens to get deleted just before partition ownership is lost.

Revision history for this message
OpenContrail Admin (ci-admin-f) wrote : A change has been merged

Reviewed: https://review.opencontrail.org/17476
Committed: http://github.org/Juniper/contrail-controller/commit/8149b72e31cc67b7352a20de9f2b9deb659338b2
Submitter: Zuul
Branch: master

commit 8149b72e31cc67b7352a20de9f2b9deb659338b2
Author: Anish Mehta <email address hidden>
Date: Fri Feb 19 18:17:43 2016 -0800

When alarmgen is processing UVE updates, it can get empty contents
for an existing UVE. It may have just raised an alarm against this UVE.

In this case, we used to wait to delete the alarm until the alarm struct comes back from vizd
But if we lost ownership of the partition in the meanwhile, the alarm will stay set indefinately

Change-Id: Ia10ce402785b19ade2e762a86b1b8377ef7e0fdb
Closes-Bug: 1547366

To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.