ProxMox Stonith driver

Here is a stonith driver for ProxMox 2, 3 and 4 that works great for Containers and VM's in Corosync/Pacemaker.

#!/bin/sh
#
# External STONITH module for a ProxMox managed hypervisors and Containers.
# Uses vzctl, pct and qm in ProxMox versions 2, 3 and 4 as a STONITH device to control guest.
#
# Copyright (c) 2012 Charles Williams <chuck@itadmins.net>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of version 2 of the GNU General Public License as
# published by the Free Software Foundation.
#
# This program is distributed in the hope that it would be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
#
# Further, this software is distributed without any warranty that it is
# free of the rightful claim of any third person regarding infringement
# or the like.  Any license provided herein, whether implied or
# otherwise, applies only to this software file.  Patent licenses, if
# any, provided herein do not apply to combinations of this program with
# other software, or any other product whatsoever.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
#
# primitive p_fence_ct1 stonith:external/proxmox \
#  params hostlist="101" \
#         hosttype="vz" \
#         nodelist="host1 host2 host3" \
#  op monitor interval="60"
# primitive p_fence_vm1 stonith:external/proxmox \
#  params hostlist="102" \
#         hosttype="kvm" \
#         nodelist="host1 host2 host3 host4 host5" \
#  op monitor interval="60"
#location l_fence_ct1 p_fence_ct1 -inf: ct1
#location l_fence_vm1 p_fence_vm1 -inf: vm1
#property stonith-enabled=true

proxmox_get_node() {
        for node in $nodelist
        do
                STATUSRESPONCE=$($SSH $STONITH_USER@$node "sudo ${CTCTL} status ${hostid}" 2>&1)
                if [ $? -eq 0 ] && [ "$STATUSRESPONCE" != "CTID $hostid deleted unmounted down" ]
                then
                        HOSTNODE=$node
                        break
                fi
        done
}

# start a Container or VM
proxmox_start() {
    if [ "$hosttype" = "kvm" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm start ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "VM $hostid was started"
                return 0
        else
                ha_log.sh err "Failed to start VM $hostid (error code: $rtc)"
                ha_log.sh err "$sshout"
                return 1
        fi
    elif [ "$hosttype" = "vz" ] || [ "$hosttype" = "lxc" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo ${CTCTL} start ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ "$sshout" = "Container is already running" ] || [ $rtc -eq 0 ]
        then
                ha_log.sh notice "Container $hostid is running"
                return 0
        fi
    fi
    ha_log.sh err "Failed to start Container $hostid (erro code: $rtc)"
    ha_log.sh err "$sshout"
    return 1
}

# reboot a Container or VM
proxmox_restart() {
    if [ "$hosttype" = "kvm" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm shutdown ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm start ${hostid} 2>&1") 2>&1
                rtc=$?
                if [ $rtc -eq 0 ]
                then
                        ha_log.sh notice "VM $hostid was restarted"
                        return 0
                else
                        ha_log.sh err "Failed to restart VM $hostid (error code: $rtc)"
                        ha_log.sh err "$sshout"
                        return 1
                fi
        else
                sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm stop ${hostid} 2>&1") 2>&1
                rtc=$?
                if [ $rtc -eq 0 ]
                then
                        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm start ${hostid} 2>&1") 2>&1
                        rtc=$?
                        if [ $rtc -eq 0 ]
                        then
                                ha_log.sh notice "VM $hostid was restarted"
                                return 0
                        else
                                ha_log.sh err "Failed to restart VM $hostid (error code: $rtc)"
                                ha_log.sh err "$sshout"
                                return 1
                        fi
                else
                        ha_log.sh err "Failed to restart VM $hostid (error code: $rtc)"
                        ha_log.sh err "$sshout"
                        return 1
                fi
                ha_log.sh err "Failed to restart VM $hostid (error code: $rtc)"
                ha_log.sh err "$sshout"
                return 1
        fi
    elif [ "$hosttype" = "vz" ] || [ "$hosttype" = "lxc" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo ${CTCTL} restart ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "Container $hostid was restarted"
                return 0
        fi
    fi
    ha_log.sh err "Failed to restart Container $hostid (error code: $rtc)"
    ha_log.sh err "$sshout"
    return 1
}

# stop a host
proxmox_stop() {
    if [ "$hosttype" = "kvm" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm shutdown ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "VM $hostid was shutdown"
                return 0
        else
                sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm stop ${hostid} 2>&1") 2>&1
                rtc=$?
                if [ $rtc -eq 0 ]
                then
                        ha_log.sh notice "VM $hostid was killed"
                        return 0
                else
                        ha_log.sh err "Failed to shutdown VM $hostid (error code: $rtc)"
                        ha_log.sh err "$sshout"
                        return 1
                fi
        fi
    elif [ "$hosttype" = "vz" ] || [ "$hosttype" = "lxc" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo ${CTCTL} stop ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "Container $hostid was stopped"
                return 0
        fi

        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo ${CTCTL} status ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $? -eq 0 ]
        then
                ha_log.sh notice "Container $hostid is already stopped"
                return 2
        fi
    fi
    ha_log.sh err "Failed to stop Container $hostid (error code: $rtc)"
    ha_log.sh err "$sshout"
    return 1
}

# get status of stonith device.
# If we can retrieve some info from the container or hypervisor
# the stonith device is OK.
proxmox_status() {
    if [ "$hosttype" = "kvm" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo /usr/sbin/qm status ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "$sshout"
                return 0
        else
                ha_log.sh err "Failed to get status for VM $hostid (error code: $rtc)"
                ha_log.sh err "$sshout"
                return 1
        fi
    elif [ "$hosttype" = "vz" ] || [ "$hosttype" = "lxc" ]
    then
        sshout=$($SSH $STONITH_USER@$HOSTNODE "sudo ${CTCTL} status ${hostid} 2>&1") 2>&1
        rtc=$?
        if [ $rtc -eq 0 ]
        then
                ha_log.sh notice "$sshout"
                return 0
        fi
    fi
    ha_log.sh err "Failed to get status for container $hostid (error code: $rtc)"
    ha_log.sh err "$sshout"
    return 1
}

# check config and set variables
# does not return on error
proxmox_check_config() {
    if [ -z "$hostlist" -o -z "$nodelist" ]
    then
        ha_log.sh err "hostid or nodelist missing; check configuration"
        exit 1
    fi

    if [ "$hosttype" != "vz" ] && [ "$hosttype" != "lxc" ] && [ "$hosttype" != "kvm" ]
    then
        ha_log.sh err "hosttype missing or incorrect; check configuration"
        exit 1
    fi
}

proxmox_info() {
cat << PROXMOXXML
<parameters>
<parameter name="hostlist" unique="1" required="1">
<content type="string" />
<shortdesc lang="en">
List of hosts to control
</shortdesc>
<longdesc lang="en">
List of Host container/VM IDs to control
</longdesc>
</parameter>

<parameter name="hosttype" required="1">
<content type="string" />
<shortdesc lang="en">
Host Type [vz, lxc, kvm]
</shortdesc>
<longdesc lang="en">
Type of Host:
        vz = OpenVZ
        lxc = LXC
        kvm = Qemu/KVM
</longdesc>
</parameter>

<parameter name="nodelist" required="1">
<content type="string" />
<shortdesc lang="en">
Host nodes list
</shortdesc>
<longdesc lang="en">
List of Host nodes that can be used to control
the configured hosts
</longdesc>
</parameter>

<parameter name="stonithuser" required="0">
<content type="string" default="stonith"/>
<shortdesc lang="en">
SSH user
</shortdesc>
<longdesc lang="en">
SSH user that can log into the nodes.

Defaults to user "stonith"
</longdesc>
</parameter>
</parameters>
PROXMOXXML
exit 0
}

#############
# Main code #
#############

# don't fool yourself when testing with stonith(8)
# and transport ssh
unset SSH_AUTH_SOCK

# support , as a separator as well
hostlist=`echo $hostlist| sed -e 's/,/ /g'`
nodelist=`echo $nodelist| sed -e 's/,/ /g'`

SSH="/usr/bin/ssh -q -x -n"
STONITH_USER=${stonithuser:-"stonith"}

if [ "$hosttype" = "vz" ]; then
        CTCTL="/usr/sbin/vzctl"
elif [ "$hosttype" = "lxc" ]; then
        CTCTL="/usr/sbin/pct"
fi

case $1 in
    gethosts)
        hostlist=`echo $hostlist|sed -e 's/:[^ ]*//g'`
        for hostid in $hostlist
        do
                echo $hostid
        done
        exit 0
        ;;

    on)
        proxmox_check_config
        for hostid in $hostlist
        do
                proxmox_get_node
                proxmox_start
        done
        exit $?
        ;;

    off)
        proxmox_check_config
        for hostid in $hostlist
        do
                proxmox_get_node
                proxmox_stop
        done
        [ $? = 1 ] && exit 1
        exit 0
        ;;

    reset)
        proxmox_check_config
        for hostid in $hostlist
        do
                proxmox_get_node
                proxmox_restart
        done
        exit $?
        ;;

    status)
        proxmox_check_config
        for hostid in $hostlist
        do
                proxmox_get_node
                proxmox_status
        done
        exit $?
        ;;

    getconfignames)
        echo "hostname hostlist hosttype nodelist stonithuser"
        exit 0
        ;;

    getinfo-devid)
        echo "ProxMox STONITH device"
        exit 0
        ;;

    getinfo-devname)
        echo "ProxMox STONITH external device"
        exit 0
        ;;

    getinfo-devdescr)
        echo "ProxMox OpenVZ/LXC Container and QEMU/KVM VM control"
        exit 0
        ;;

    getinfo-devurl)
        echo "http://proxmox.org/ http://linux-ha.org/wiki"
        exit 0
        ;;

    getinfo-xml)
        proxmox_info
        echo 0;
        ;;

    *)
        exit 1
        ;;
esac