"dog vdi check" fails when inode object is broken

Bug #1317721 reported by Yuichi Bando
6
This bug affects 1 person
Affects Status Importance Assigned to Milestone
sheepdog
New
Undecided
Unassigned

Bug Description

When we run "dog vdi check" on the node that has broken inode,
"dog vdi check" output "No VDI found" message and fails recovery.
The broken inode lost area that contains VDI name.
(In addtion, the case that lost area that contains nr_copies may also negatively affect)

To recover successfully, we need to run "dog vdi check" on the node that has healthy replicas.

[How to reproduce]
I give 3 scripts for reproduction.
Please execute "105_broken-inode-vdi-check.sh"

################ 105_broken-inode-vdi-check.sh ##################
#!/bin/bash

SHDIR=`cd $(dirname $0); pwd`
. ${SHDIR}/common_functions.sh
. ${SHDIR}/assert_functions.sh

echo "### corosync & sheep restart."
_env_format

echo "### sheep cluster startup"
_start_sheep 3

echo "### wait startup and format sheep"
_wait_sheep_running_with_format || exit 1

echo "### test start"
echo " --- (0) prepare VDI --------------------------"
${DOG} vdi create v001 10M -P
${DOG} vdi list
echo " -------------------------------------------"
echo ""

echo " --- (1) overwrite inode object head ---------"
dd if=/dev/zero of=/tmp/sheepdog0/obj/8024022000000000 bs=1 count=4 seek=0 conv=notrunc
od -xa /tmp/sheepdog0/obj/8024022000000000
echo " -------------------------------------------"
echo ""

echo " --- (2) assert dog vdi check result ---------"
${DOG} vdi list -p 7000
${DOG} vdi check v001 -p 7000
result=$?
_assert_zero $result "dog vdi check error"
echo " -------------------------------------------"
echo ""

exit ${result}

################ common_functions.sh ##################
#!/bin/bash

## COMMANDS
DOG='/usr/sbin/dog'
SHEEP='/usr/sbin/sheep'
TGTD='/sbin/tgtd'

## COMMON LIBRARY PARAM
TIMEOUT=60 # sec

## COMMON LIBRARY
function _env_format() {
  umount /mnt/* >/dev/null 2>&1
  iscsiadm -m node --logout >/dev/null 2>&1
  pkill -9 sheep
  pkill -9 tgtd
  pkill -9 iscsid
  rm -rf /tmp/sheepdog*
  rm -rf /var/lib/iscsi
  service corosync restart
}

function _start_sheep() {
  ## param[1]: exec sheep proc num
  test $# != 1 && echo "specific param[1]:exec sheep proc num" && return 1
  for i in `seq 0 $(expr $1 - 1)`; do
    mkdir -p /tmp/sheepdog${i}
    if [ `${SHEEP} -v | cut -d' ' -f4 | grep 0.7 | wc -l` == 1 ]; then
      ${SHEEP} -z ${i} -p 700${i} -l 7 /tmp/sheepdog${i}
    else
      ${SHEEP} -z ${i} -p 700${i} -l level=debug /tmp/sheepdog${i}
    fi
  done
  return 0
}

function _wait_sheep_running() {
  for i in `seq 1 ${TIMEOUT}`; do
    test `${DOG} cluster info | grep "Cluster status" | cut -d, -f1 | cut -d: -f2 | sed "s/ //g"`x == "running"x && return 0
    sleep 1
  done
  echo "[ERROR] sheep start timeout (${TIMEOUT}s)."
  return 1
}

function _wait_sheep_running_with_format() {
  for i in `seq 1 ${TIMEOUT}`; do
    yes yes | ${DOG} cluster format 2>/dev/null
    test `${DOG} cluster info | grep "Cluster status" | cut -d, -f1 | cut -d: -f2 | sed "s/ //g"`x == "running"x && return 0
    sleep 1
  done
  echo "[ERROR] sheep start timeout (${TIMEOUT}s)."
  return 1
}

function _wait_tgtd_running() {
  for i in `seq 1 ${TIMEOUT}`; do
    test `tgtadm -m system -o show | grep State | cut -d: -f2 | sed "s/ //g"`x == "ready"x && return 0
    sleep 1
  done
  echo "[ERROR] tgtd start timeout (${TIMEOUT}s)."
  return 1
}

function _wait_exist_symboliclink() {
  ## param[1]: check syncboliclink path
  test $# != 1 && echo "specific param[1]:check syncboliclink path" && return 1
  for i in `seq 1 ${TIMEOUT}`; do
    test -h $1 && return 0
    sleep 1
  done
  echo "[ERROR] symboliclink exist check timeout (${TIMEOUT}s)."
  return 1

}

################ assert_functions.sh ##################
#!/bin/bash

##
## ASSERT FUNCTIONS
##

#-----------------------------------
# func : _assert_equal
# desc : Asserts that two objects are equal.
# param: $1: inspection
# $2: expect
# $3: output error message
#-----------------------------------
function _assert_equal() {
  if [ "$1"x == "$2"x ]; then
    return 0
  else
    echo "[ERROR] $3, \"$1\" expect equals \"$2\"."
    return 1
  fi
}

#-----------------------------------
# func : _assert_not_equal
# desc : Asserts that two objects does not equal.
# param: $1: inspection
# $2: expect
# $3: output error message
#-----------------------------------
function _assert_not_equal() {
  if [ "$1"x != "$2"x ]; then
    return 0
  else
    echo "[ERROR] $3, \"$1\" expect not equals \"$2\"."
    return 1
  fi
}

#-----------------------------------
# func : _assert_null
# desc : Asserts that an object is null.
# param: $1: inspection
# $2: output error message
#-----------------------------------
function _assert_null() {
  ret=`printf "$1" | wc -c`
  if [ $ret == 0 ]; then
    return 0
  else
    echo "[ERROR] $2, \"$1\" expect equals null."
    return 1
  fi
}

#-----------------------------------
# func : _assert_not_null
# desc : Asserts that an object isn't null.
# param: $1: inspection
# $2: output error message
#-----------------------------------
function _assert_not_null() {
  ret=`printf "$1" | wc -c`
  if [ $ret != 0 ]; then
    return 0
  else
    echo "[ERROR] $2, expect equals not null."
    return 1
  fi
}

#-----------------------------------
# func : _assert_zero
# desc : Asserts that an object is zero(0).
# param: $1: inspection
# $2: output error message
#-----------------------------------
function _assert_zero() {
  if [ "$1"x == "0"x ]; then
    return 0
  else
    echo "[ERROR] $2, \"$1\" expect equals zero."
    return 1
  fi
}

#-----------------------------------
# func : _assert_not_zero
# desc : Asserts that an object isn't zero(0).
# param: $1: inspection
# $2: output error message
#-----------------------------------
function _assert_not_zero() {
  if [ "$1"x != "0"x ]; then
    return 0
  else
    echo "[ERROR] $2, \"$1\" expect not equals zero."
    return 1
  fi
}

#-----------------------------------
# func : _assert_contain
# desc : Asserts that a keyword contain at object.
# param: $1: inspection
# $2: keyword to expect
# $3: output error message
# ($4): if specify "1", don't print a inspection to message.
#-----------------------------------
function _assert_contain() {
  ret=`echo "$1" | grep "$2" | wc -l`
  if [ $ret -ge 1 ]; then
    return 0
  else
    test "$4"x == "1"x && echo "[ERROR] $3, expect contain \"$2\"."
    test "$4"x != "1"x && echo "[ERROR] $3, \"$1\" expect contain \"$2\"."
    return 1
  fi
}

#-----------------------------------
# func : _assert_not_contain
# desc : Asserts that a keyword does not contain at object.
# param: $1: inspection
# $2: keyword not to expect
# $3: output error message
# ($4): if specify "1", don't print a inspection to message.
#-----------------------------------
function _assert_not_contain() {
  ret=`echo "$1" | grep "$2" | wc -l`
  if [ $ret -eq 0 ]; then
    return 0
  else
    test "$4"x == "1"x && echo "[ERROR] $3, expect not contain \"$2\"."
    test "$4"x != "1"x && echo "[ERROR] $3, \"$1\" expect not contain \"$2\"."
    return 1
  fi
}

#-----------------------------------
# func : _assert_file_exist
# desc : Asserts that a file exist.
# param: $1: inspection filepath
# $2: output error message
#-----------------------------------
function _assert_file_exist() {
  if [ -f "$1" ]; then
    return 0
  else
    echo "[ERROR] $2, \"$1\" expect file exist."
    return 1
  fi
}

#-----------------------------------
# func : _assert_file_not_exist
# desc : Asserts that a file does not exist.
# param: $1: inspection filepath
# $2: output error message
#-----------------------------------
function _assert_file_not_exist() {
  if [ ! -f "$1" ]; then
    return 0
  else
    echo "[ERROR] $2, \"$1\" expact file not exist."
    return 1
  fi
}

Thanks
Yuichi Bando

description: updated
description: updated
To post a comment you must log in.
This report contains Public information  
Everyone can see this information.

Other bug subscribers

Remote bug watches

Bug watches keep track of this bug in other bug trackers.