From 3a350eed06fea2b010922ce2800abce8a7b0b622 Mon Sep 17 00:00:00 2001 From: Igor Fedotov Date: Wed, 18 Aug 2021 13:39:02 +0300 Subject: [PATCH 2/2] os/bluestore: accept undecodable multi-block bluefs transactions on log replay. We should proceed with OSD startup when detecting undecodable bluefs transaction spanning multiple disk blocks during log replay. The rationale is that such a transaction might appear during unexpected power down - just not every disk block is written to disk. Hence we can consider this a normal log replay stop condition. https://tracker.ceph.com/issues/52079 Signed-off-by: Igor Fedotov --- src/os/bluestore/BlueFS.cc | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/os/bluestore/BlueFS.cc b/src/os/bluestore/BlueFS.cc index 7b866b5eccc..a9696b32680 100644 --- a/src/os/bluestore/BlueFS.cc +++ b/src/os/bluestore/BlueFS.cc @@ -738,18 +738,27 @@ int BlueFS::_replay(bool noop) bl.claim_append(t); read_pos += r; } - seen_recs = true; bluefs_transaction_t t; try { bufferlist::iterator p = bl.begin(); ::decode(t, p); + seen_recs = true; } catch (buffer::error& e) { - derr << __func__ << " 0x" << std::hex << pos << std::dec - << ": stop: failed to decode: " << e.what() - << dendl; - delete log_reader; - return -EIO; + // Multi-block transactions might be incomplete due to unexpected + // power off. Hence let's treat that as a regular stop condition. + if (seen_recs && more) { + dout(10) << __func__ << " 0x" << std::hex << pos << std::dec + << ": stop: failed to decode: " << e.what() + << dendl; + } else { + derr << __func__ << " 0x" << std::hex << pos << std::dec + << ": stop: failed to decode: " << e.what() + << dendl; + delete log_reader; + return -EIO; + } + break; } assert(seq == t.seq); dout(10) << __func__ << " 0x" << std::hex << pos << std::dec -- 2.17.1