From 20ddc6a77a684cd84b881c6d377b2e63b9b6bb72 Mon Sep 17 00:00:00 2001 From: Dmitry Ilyin Date: Fri, 28 Nov 2014 16:56:53 +0300 Subject: [PATCH] Reload pcmk cluster if node is not online Change-Id: I27e2fd39c7701fd08bbb2f7e050e0d98e5af081b --- .../lib/puppet/provider/pcmk_reload/pcmk.rb | 131 +++++++++++++++++++++ .../puppet/corosync/lib/puppet/type/pcmk_reload.rb | 23 ++++ deployment/puppet/corosync/manifests/keys.pp | 22 ++++ deployment/puppet/corosync/manifests/online.pp | 10 ++ 4 files changed, 186 insertions(+) create mode 100644 deployment/puppet/corosync/lib/puppet/provider/pcmk_reload/pcmk.rb create mode 100644 deployment/puppet/corosync/lib/puppet/type/pcmk_reload.rb create mode 100644 deployment/puppet/corosync/manifests/keys.pp create mode 100644 deployment/puppet/corosync/manifests/online.pp diff --git a/deployment/puppet/corosync/lib/puppet/provider/pcmk_reload/pcmk.rb b/deployment/puppet/corosync/lib/puppet/provider/pcmk_reload/pcmk.rb new file mode 100644 index 0000000..9d30eba --- /dev/null +++ b/deployment/puppet/corosync/lib/puppet/provider/pcmk_reload/pcmk.rb @@ -0,0 +1,131 @@ +Puppet::Type.type(:pcmk_reload).provide :pcmk do + commands :crm_node => 'crm_node' + commands :crm_mon => 'crm_mon' + commands :ssh => 'ssh' + + RETRY_COUNT = 300 + RETRY_STEP = 6 + + def pacemaker_is_running? + begin + crm_mon '-1' + true + rescue Puppet::ExecutionFailure + false + else + true + end + end + + def crm_mon_data + retry_command { crm_mon '-1' }.to_s + end + + def hostname + return @hostname if @hostname + @hostname = retry_command { crm_node '-n' }.chomp.strip + end + + def reload_node(node) + return unless node + debug "Try to restart corosync on '#{node}'" + retry_command(3) do + ssh node, 'killall -9 corosync; /etc/init.d/corosync restart' + end + end + + def reload_self + reload_node hostname + end + + def nodes_status + data = {} + data[:nodes] = {} + data[:dc] = nil + crm_mon_data.split("\n").each do |line| + if line.start_with? 'Current DC:' + fields = line.split(/\s+/) + data[:dc] = fields[2] if fields[2] and fields[2] != 'NONE' + end + if line.start_with? 'Online:' + fields = line.split /\s+/ + fields.each do |node| + next if %w(Online: [ ]).include? node + data[:nodes].store node, :online + end + end + if line.start_with? 'OFFLINE:' + fields = line.split /\s+/ + fields.each do |node| + next if %w(OFFLINE: [ ]).include? node + data[:nodes].store node, :offline + end + end + end + debug "Status: #{data.inspect}" + data + end + + def retry_command(count = RETRY_COUNT, step = RETRY_STEP) + count.times do + begin + out = yield + rescue Puppet::ExecutionFailure => e + Puppet.debug "Command failed: #{e.message}" + sleep step + else + return out + end + end + fail "Execution timeout after #{count * step} seconds!" + end + + def retry_block_until_true(count = RETRY_COUNT, step = RETRY_STEP) + count.times do + out = yield + return out if out + sleep step + end + fail "Execution timeout after #{count * step} seconds!" + end + + def my_status + nodes_status[:nodes].fetch(hostname, :offline) + end + + def reload_dc + retry_block_until_true do + nodes_status[:dc] + end + reload_node nodes_status[:dc] + end + + def reload_all_nodes + nodes_status[:nodes].each do |node, status| + reload_node node if node + end + end + + def status + debug "Call: status on #{@resource}" + return :offline unless pacemaker_is_running? + my_status + end + + def status=(value) + debug "Call: status='#{value}' on #{@resource}" + return unless value == :online + reload_self + if @resource[:reload] == :all + debug 'Reload corosync on all nodes' + reload_all_nodes + else + debug 'Reload corosync on DC' + reload_dc + end + retry_block_until_true do + my_status == :online + end + end + +end diff --git a/deployment/puppet/corosync/lib/puppet/type/pcmk_reload.rb b/deployment/puppet/corosync/lib/puppet/type/pcmk_reload.rb new file mode 100644 index 0000000..53bf5f4 --- /dev/null +++ b/deployment/puppet/corosync/lib/puppet/type/pcmk_reload.rb @@ -0,0 +1,23 @@ +module Puppet + newtype(:pcmk_reload) do + desc 'Kill and restart corosync on DC if this node is not online' + + newparam(:name) do + isnamevar + end + + newproperty(:status) do + newvalues :online, :offline + defaultto :online + end + + newparam(:reload) do + newvalues :dc, :all + defaultto :all + end + + autorequire(:service) do + [ 'corosync' ] + end + end +end diff --git a/deployment/puppet/corosync/manifests/keys.pp b/deployment/puppet/corosync/manifests/keys.pp new file mode 100644 index 0000000..98b4c9c --- /dev/null +++ b/deployment/puppet/corosync/manifests/keys.pp @@ -0,0 +1,22 @@ +class corosync::keys { + $ssh_private_key = '/var/lib/astute/nova/nova' + $ssh_public_key = '/var/lib/astute/nova/nova.pub' + + install_ssh_keys { 'ssh_key_for_corosync' : + ensure => 'present', + user => 'root', + private_key_path => $ssh_private_key, + public_key_path => $ssh_public_key, + private_key_name => 'id_rsa', + public_key_name => 'id_rsa.pub', + authorized_keys => 'authorized_keys', + } + + file { '/root/.ssh/config' : + ensure => 'present', + owner => 'root', + group => 'root', + mode => '0600', + content => "Host *\n StrictHostKeyChecking no\n UserKnownHostsFile=/dev/null\n", + } +} \ No newline at end of file diff --git a/deployment/puppet/corosync/manifests/online.pp b/deployment/puppet/corosync/manifests/online.pp new file mode 100644 index 0000000..820564e --- /dev/null +++ b/deployment/puppet/corosync/manifests/online.pp @@ -0,0 +1,10 @@ +class corosync::online { + include corosync::keys + + pcmk_reload { 'online' :} + + Install_ssh_keys['ssh_key_for_corosync'] -> Pcmk_reload <||> + File['/root/.ssh/config'] -> Pcmk_reload <||> + Pcmk_reload <||> -> Service <| provider == 'pacemaker' |> + +} \ No newline at end of file -- 1.9.1