diff -Nru ceph-15.2.17/debian/changelog ceph-15.2.17/debian/changelog --- ceph-15.2.17/debian/changelog 2022-10-12 13:30:58.000000000 +0000 +++ ceph-15.2.17/debian/changelog 2022-10-31 05:45:04.000000000 +0000 @@ -1,3 +1,10 @@ +ceph (15.2.17-0ubuntu0.20.04.2) focal; urgency=medium + + * d/p/bug1978913.patch: + Cherry-pick upstream fix for on-line trim of dups + + -- Nikhil Kshirsagar Mon, 31 Oct 2022 05:45:04 +0000 + ceph (15.2.17-0ubuntu0.20.04.1) focal; urgency=medium * New upstream release (LP: #1990862). diff -Nru ceph-15.2.17/debian/patches/bug1978913.patch ceph-15.2.17/debian/patches/bug1978913.patch --- ceph-15.2.17/debian/patches/bug1978913.patch 1970-01-01 00:00:00.000000000 +0000 +++ ceph-15.2.17/debian/patches/bug1978913.patch 2022-10-31 05:43:10.000000000 +0000 @@ -0,0 +1,840 @@ +Index: ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +=================================================================== +--- ceph-15.2.17.orig/qa/standalone/osd/repro_long_log.sh ++++ ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +@@ -148,6 +148,44 @@ function TEST_trim_max_entries() + test_log_size $PGID 3 || return 1 + } + ++function TEST_trim_max_entries_with_dups() ++{ ++ local dir=$1 ++ ++ setup_log_test $dir || return 1 ++ ++ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 ++ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 ++ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 ++ ++ # adding log entries, should only trim 4 and add one each time ++ # dups should be trimmed to 1 ++ rados -p test rm foo ++ test_log_size $PGID 18 2 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 15 6 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 12 10 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 9 14 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 6 18 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ ++ # below trim_min ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++} ++ + main repro-long-log "$@" + + # Local Variables: +Index: ceph-15.2.17/src/kv/RocksDBStore.cc +=================================================================== +--- ceph-15.2.17.orig/src/kv/RocksDBStore.cc ++++ ceph-15.2.17/src/kv/RocksDBStore.cc +@@ -1008,6 +1008,8 @@ void RocksDBStore::RocksDBTransactionImp + const string &start, + const string &end) + { ++ ldout(db->cct, 10) << __func__ << " enter start=" << start ++ << " end=" << end << dendl; + auto cf = db->get_cf_handle(prefix); + + uint64_t cnt = db->delete_range_threshold; +@@ -1021,8 +1023,12 @@ void RocksDBStore::RocksDBTransactionImp + if (!cnt) { + bat.RollbackToSavePoint(); + if (cf) { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(cf, rocksdb::Slice(start), rocksdb::Slice(end)); + } else { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(db->default_cf, + rocksdb::Slice(combine_strings(prefix, start)), + rocksdb::Slice(combine_strings(prefix, end))); +@@ -1038,6 +1044,7 @@ void RocksDBStore::RocksDBTransactionImp + --cnt; + } + bat.PopSavePoint(); ++ ldout(db->cct, 10) << __func__ << " end" << dendl; + } + + void RocksDBStore::RocksDBTransactionImpl::merge( +Index: ceph-15.2.17/src/osd/PGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.cc ++++ ceph-15.2.17/src/osd/PGLog.cc +@@ -50,6 +50,7 @@ void PGLog::IndexedLog::trim( + set* trimmed_dups, + eversion_t *write_from_dups) + { ++ lgeneric_subdout(cct, osd, 10) << "IndexedLog::trim s=" << s << dendl; + ceph_assert(s <= can_rollback_to); + if (complete_to != log.end()) + lgeneric_subdout(cct, osd, 20) << " complete_to " << complete_to->version << dendl; +@@ -121,10 +122,18 @@ void PGLog::IndexedLog::trim( + } + } + +- while (!dups.empty()) { ++ // we can hit an inflated `dups` b/c of https://tracker.ceph.com/issues/53729 ++ // the idea is to slowly trim them over a prolonged period of time and mix ++ // omap deletes with writes (if we're here, a new log entry got added) to ++ // neither: 1) blow size of single Transaction nor 2) generate-n-accumulate ++ // large amount of tombstones in BlueStore's RocksDB. ++ // if trimming immediately is a must, then the ceph-objectstore-tool is ++ // the way to go. ++ const size_t max_dups = cct->_conf->osd_pg_log_dups_tracked; ++ for (size_t max_dups_to_trim = cct->_conf->osd_pg_log_trim_max; ++ max_dups_to_trim > 0 && dups.size() > max_dups; ++ max_dups_to_trim--) { + const auto& e = *dups.begin(); +- if (e.version.version >= earliest_dup_version) +- break; + lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; + if (trimmed_dups) + trimmed_dups->insert(e.get_key_name()); +@@ -135,6 +144,10 @@ void PGLog::IndexedLog::trim( + // raise tail? + if (tail < s) + tail = s; ++ lgeneric_subdout(cct, osd, 20) << "IndexedLog::trim after trim" ++ << " dups.size()=" << dups.size() ++ << " tail=" << tail ++ << " s=" << s << dendl; + } + + ostream& PGLog::IndexedLog::print(ostream& out) const +@@ -506,6 +519,9 @@ void PGLog::merge_log(pg_info_t &oinfo, + + // returns true if any changes were made to log.dups + bool PGLog::merge_log_dups(const pg_log_t& olog) { ++ dout(5) << __func__ ++ << " log.dups.size()=" << log.dups.size() ++ << "olog.dups.size()=" << olog.dups.size() << dendl; + bool changed = false; + + if (!olog.dups.empty()) { +@@ -584,6 +600,9 @@ bool PGLog::merge_log_dups(const pg_log_ + } + } + ++ dout(5) << "end of " << __func__ << " changed=" << changed ++ << " log.dups.size()=" << log.dups.size() ++ << " olog.dups.size()=" << olog.dups.size() << dendl; + return changed; + } + +@@ -645,7 +664,8 @@ void PGLog::write_log_and_missing( + dirty_from_dups, + write_from_dups, + &may_include_deletes_in_missing_dirty, +- (pg_log_debug ? &log_keys_debug : nullptr)); ++ (pg_log_debug ? &log_keys_debug : nullptr), ++ this); + undirty(); + } else { + dout(10) << "log is not dirty" << dendl; +@@ -659,14 +679,15 @@ void PGLog::write_log_and_missing_wo_mis + pg_log_t &log, + const coll_t& coll, const ghobject_t &log_oid, + map &divergent_priors, +- bool require_rollback ++ bool require_rollback, ++ const DoutPrefixProvider *dpp + ) + { + _write_log_and_missing_wo_missing( + t, km, log, coll, log_oid, + divergent_priors, eversion_t::max(), eversion_t(), eversion_t(), + true, true, require_rollback, +- eversion_t::max(), eversion_t(), eversion_t(), nullptr); ++ eversion_t::max(), eversion_t(), eversion_t(), nullptr, dpp); + } + + // static +@@ -678,7 +699,8 @@ void PGLog::write_log_and_missing( + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *may_include_deletes_in_missing_dirty) ++ bool *may_include_deletes_in_missing_dirty, ++ const DoutPrefixProvider *dpp) + { + _write_log_and_missing( + t, km, log, coll, log_oid, +@@ -692,7 +714,7 @@ void PGLog::write_log_and_missing( + eversion_t::max(), + eversion_t(), + eversion_t(), +- may_include_deletes_in_missing_dirty, nullptr); ++ may_include_deletes_in_missing_dirty, nullptr, dpp); + } + + // static +@@ -711,10 +733,14 @@ void PGLog::_write_log_and_missing_wo_mi + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) + { +- // dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl; ++ ldpp_dout(dpp, 10) << "_write_log_and_missing_wo_missing, clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups << dendl; + if (touch_log) + t.touch(coll, log_oid); + if (dirty_to != eversion_t()) { +@@ -765,6 +791,8 @@ void PGLog::_write_log_and_missing_wo_mi + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -773,11 +801,16 @@ void PGLog::_write_log_and_missing_wo_mi + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -786,6 +819,8 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[entry.get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && + (p->version >= dirty_from_dups || p->version >= write_from_dups) && +@@ -796,8 +831,11 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (dirty_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + encode(divergent_priors, (*km)["divergent_priors"]); + } + if (require_rollback) { +@@ -808,6 +846,7 @@ void PGLog::_write_log_and_missing_wo_mi + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); + } ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + // static +@@ -829,8 +868,14 @@ void PGLog::_write_log_and_missing( + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, // in/out param +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) { ++ ldpp_dout(dpp, 10) << __func__ << " clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups ++ << " trimmed_dups.size()=" << trimmed_dups.size() << dendl; + set to_remove; + to_remove.swap(trimmed_dups); + for (auto& t : trimmed) { +@@ -853,7 +898,8 @@ void PGLog::_write_log_and_missing( + clear_up_to(log_keys_debug, dirty_to.get_key_name()); + } + if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) { +- // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing, clearing from " ++ << dirty_from << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from.get_key_name(), eversion_t::max().get_key_name()); +@@ -894,6 +940,8 @@ void PGLog::_write_log_and_missing( + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -902,11 +950,16 @@ void PGLog::_write_log_and_missing( + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -914,6 +967,8 @@ void PGLog::_write_log_and_missing( + encode(entry, bl); + (*km)[entry.get_key_name()].claim(bl); + } ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && +@@ -925,8 +980,11 @@ void PGLog::_write_log_and_missing( + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (clear_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + to_remove.insert("divergent_priors"); + } + // since we encode individual missing items instead of a whole +@@ -956,6 +1014,7 @@ void PGLog::_write_log_and_missing( + + if (!to_remove.empty()) + t.omap_rmkeys(coll, log_oid, to_remove); ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + void PGLog::rebuild_missing_set_with_deletes( +Index: ceph-15.2.17/src/osd/PGLog.h +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.h ++++ ceph-15.2.17/src/osd/PGLog.h +@@ -1274,8 +1274,9 @@ public: + map* km, + pg_log_t &log, + const coll_t& coll, +- const ghobject_t &log_oid, map &divergent_priors, +- bool require_rollback); ++ const ghobject_t &log_oid, std::map &divergent_priors, ++ bool require_rollback, ++ const DoutPrefixProvider *dpp = nullptr); + + static void write_log_and_missing( + ObjectStore::Transaction& t, +@@ -1285,7 +1286,8 @@ public: + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *rebuilt_missing_set_with_deletes); ++ bool *rebuilt_missing_set_with_deletes, ++ const DoutPrefixProvider *dpp = nullptr); + + static void _write_log_and_missing_wo_missing( + ObjectStore::Transaction& t, +@@ -1302,7 +1304,8 @@ public: + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + static void _write_log_and_missing( +@@ -1323,7 +1326,8 @@ public: + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + void read_log_and_missing( +@@ -1336,7 +1340,7 @@ public: + bool debug_verify_stored_missing = false + ) { + return read_log_and_missing( +- store, ch, pgmeta_oid, info, ++ cct, store, ch, pgmeta_oid, info, + log, missing, oss, + tolerate_divergent_missing_log, + &clear_divergent_priors, +@@ -1347,6 +1351,7 @@ public: + + template + static void read_log_and_missing( ++ CephContext *cct, + ObjectStore *store, + ObjectStore::CollectionHandle &ch, + ghobject_t pgmeta_oid, +@@ -1360,9 +1365,9 @@ public: + set *log_keys_debug = nullptr, + bool debug_verify_stored_missing = false + ) { +- ldpp_dout(dpp, 20) << "read_log_and_missing coll " << ch->cid ++ ldpp_dout(dpp, 10) << "read_log_and_missing coll " << ch->cid + << " " << pgmeta_oid << dendl; +- ++ size_t total_dups = 0; + // legacy? + struct stat st; + int r = store->stat(ch, pgmeta_oid, &st); +@@ -1377,8 +1382,9 @@ public: + map divergent_priors; + bool must_rebuild = false; + missing.may_include_deletes = false; +- list entries; +- list dups; ++ std::list entries; ++ std::list dups; ++ const auto NUM_DUPS_WARN_THRESHOLD = 2*cct->_conf->osd_pg_log_dups_tracked; + if (p) { + for (p->seek_to_first(); p->valid() ; p->next()) { + // non-log pgmeta_oid keys are prefixed with _; skip those +@@ -1409,11 +1415,20 @@ public: + } + missing.add(oid, std::move(item)); + } else if (p->key().substr(0, 4) == string("dup_")) { ++ ++total_dups; + pg_log_dup_t dup; + decode(dup, bp); + if (!dups.empty()) { + ceph_assert(dups.back().version < dup.version); + } ++ if (dups.size() == NUM_DUPS_WARN_THRESHOLD) { ++ ldpp_dout(dpp, 0) << "read_log_and_missing WARN num of dups exceeded " ++ << NUM_DUPS_WARN_THRESHOLD << "." ++ << " You can be hit by THE DUPS BUG" ++ << " https://tracker.ceph.com/issues/53729." ++ << " Consider ceph-objectstore-tool --op trim-pg-log-dups" ++ << dendl; ++ } + dups.push_back(dup); + } else { + pg_log_entry_t e; +@@ -1591,7 +1606,9 @@ public: + (*clear_divergent_priors) = false; + missing.flush(); + } +- ldpp_dout(dpp, 10) << "read_log_and_missing done" << dendl; ++ ldpp_dout(dpp, 10) << "read_log_and_missing done coll " << ch->cid ++ << " total_dups=" << total_dups ++ << " log.dups.size()=" << log.dups.size() << dendl; + } // static read_log_and_missing + + #ifdef WITH_SEASTAR +Index: ceph-15.2.17/src/osd/osd_types.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/osd_types.cc ++++ ceph-15.2.17/src/osd/osd_types.cc +@@ -5042,8 +5042,8 @@ static void _handle_dups(CephContext* cc + { + auto earliest_dup_version = + target.head.version < maxdups ? 0u : target.head.version - maxdups + 1; +- lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl; +- ++ lgeneric_subdout(cct, osd, 20) << __func__ << " earliest_dup_version " ++ << earliest_dup_version << dendl; + for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) { + if (d->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) +@@ -5072,7 +5072,9 @@ void pg_log_t::copy_after(CephContext* c + can_rollback_to = other.can_rollback_to; + head = other.head; + tail = other.tail; +- lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (i->version <= v) { +@@ -5084,6 +5086,9 @@ void pg_log_t::copy_after(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max) +@@ -5093,6 +5098,9 @@ void pg_log_t::copy_up_to(CephContext* c + head = other.head; + tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (n++ >= max) { +@@ -5103,6 +5111,9 @@ void pg_log_t::copy_up_to(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + ostream& pg_log_t::print(ostream& out) const +Index: ceph-15.2.17/src/test/osd/TestPGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/test/osd/TestPGLog.cc ++++ ceph-15.2.17/src/test/osd/TestPGLog.cc +@@ -2739,8 +2739,8 @@ TEST_F(PGLogTrimTest, TestPartialTrim) + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); +- EXPECT_EQ(2u, log.dups.size()); +- EXPECT_EQ(1u, trimmed_dups2.size()); ++ EXPECT_EQ(3u, log.dups.size()); ++ EXPECT_EQ(0u, trimmed_dups2.size()); + } + + +@@ -3023,7 +3023,7 @@ TEST_F(PGLogTrimTest, TestTrimDups) { + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; +- EXPECT_EQ(3u, log.dups.size()) << log; ++ EXPECT_EQ(4u, log.dups.size()) << log; + } + + // This tests trim() to make copies of +@@ -3067,7 +3067,7 @@ TEST_F(PGLogTrimTest, TestTrimDups2) { + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; +- EXPECT_EQ(5u, log.dups.size()) << log; ++ EXPECT_EQ(6u, log.dups.size()) << log; + } + + // This tests copy_up_to() to make copies of +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.cc ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +@@ -14,8 +14,10 @@ + + #include + #include ++#include + #include + #include ++#include + + #include + +@@ -430,7 +432,7 @@ static int get_fd_data(int fd, bufferlis + return 0; + } + +-int get_log(ObjectStore *fs, __u8 struct_ver, ++int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver, + spg_t pgid, const pg_info_t &info, + PGLog::IndexedLog &log, pg_missing_t &missing) + { +@@ -442,7 +444,7 @@ int get_log(ObjectStore *fs, __u8 struct + ostringstream oss; + ceph_assert(struct_ver > 0); + PGLog::read_log_and_missing( +- fs, ch, ++ cct, fs, ch, + pgid.make_pgmeta_oid(), + info, log, missing, + oss, +@@ -1068,7 +1070,8 @@ int add_osdmap(ObjectStore *store, metad + return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); + } + +-int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++int ObjectStoreTool::do_export( ++ CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals) +@@ -1078,7 +1081,7 @@ int ObjectStoreTool::do_export(ObjectSto + + cerr << "Exporting " << pgid << " info " << info << std::endl; + +- int ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing); + if (ret > 0) + return ret; + +@@ -3147,6 +3150,136 @@ int dup(string srcpath, ObjectStore *src + return r; + } + ++ ++const int ceph_entity_name_type(const string name) ++{ ++ if (name == "mds") return CEPH_ENTITY_TYPE_MDS; ++ if (name == "osd") return CEPH_ENTITY_TYPE_OSD; ++ if (name == "mon") return CEPH_ENTITY_TYPE_MON; ++ if (name == "client") return CEPH_ENTITY_TYPE_CLIENT; ++ if (name == "mgr") return CEPH_ENTITY_TYPE_MGR; ++ if (name == "auth") return CEPH_ENTITY_TYPE_AUTH; ++ return -1; ++} ++ ++eversion_t get_eversion_from_str(const string& s) { ++ eversion_t e; ++ vector result; ++ boost::split(result, s, boost::is_any_of("'")); ++ if (result.size() != 2) { ++ cerr << "eversion_t: invalid format: '" << s << "'" << std::endl; ++ return e; ++ } ++ e.epoch = atoi(result[0].c_str()); ++ e.version = atoi(result[1].c_str()); ++ return e; ++} ++ ++osd_reqid_t get_reqid_from_str(const string& s) { ++ osd_reqid_t reqid; ++ ++ vector result; ++ boost::split(result, s, boost::is_any_of(".:")); ++ if (result.size() != 4) { ++ cerr << "reqid: invalid format " << s << std::endl; ++ return osd_reqid_t(); ++ } ++ reqid.name._type = ceph_entity_name_type(result[0]); ++ reqid.name._num = atoi(result[1].c_str()); ++ ++ reqid.inc = atoi(result[2].c_str()); ++ reqid.tid = atoi(result[3].c_str()); ++ return reqid; ++} ++ ++void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map *new_dups) ++{ ++ ObjectStore::Transaction t; ++ coll_t coll(r_pgid); ++ cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl; ++ t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups)); ++ auto ch = store->open_collection(coll); ++ store->queue_transaction(ch, std::move(t)); ++ new_dups->clear(); ++} ++ ++int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj, ++ map *new_dups, bool debug) { ++ std::map::const_iterator it = in_json_obj.find("generate"); ++ int32_t generate = 0; ++ if (it != in_json_obj.end()) { ++ generate = atoi(it->second.get_str().c_str()); ++ } ++ ++ it = in_json_obj.find("reqid"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ osd_reqid_t reqid(get_reqid_from_str(it->second.get_str())); ++ it = in_json_obj.find("version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ eversion_t version(get_eversion_from_str(it->second.get_str())); ++ it = in_json_obj.find("user_version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ version_t user_version = atoi(it->second.get_str().c_str()); ++ it = in_json_obj.find("return_code"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ int32_t return_code = atoi(it->second.get_str().c_str()); ++ if (generate) { ++ for(auto i = 0; i < generate; ++i) { ++ version.version++; ++ if (debug) { ++ cout << "generate dups reqid " << reqid << " v=" << version << std::endl; ++ } ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ if ( new_dups->size() > 50000 ) { ++ do_dups_inject_transction(store, r_pgid, new_dups); ++ cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl; ++ } ++ } ++ return 0; ++ } else { ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ if (debug) { ++ cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl; ++ } ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ } ++ return 0; ++} ++ ++void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug) ++{ ++ map new_dups; ++ const vector& o = inJson.get_array(); ++ for (const auto& obj : o) { ++ if (obj.type() == json_spirit::obj_type) { ++ json_spirit::mObject Mobj = obj.get_obj(); ++ do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug); ++ } else { ++ throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type())); ++ return; ++ } ++ } ++ if (new_dups.size() > 0) { ++ do_dups_inject_transction(store, r_pgid, &new_dups); ++ } ++ ++ ++ return ; ++} ++ + void usage(po::options_description &desc) + { + cerr << std::endl; +@@ -3480,7 +3613,7 @@ int main(int argc, char **argv) + } else { + file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + } +- } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") { ++ } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") { + if (!vm.count("file") || file == "-") { + if (isatty(STDIN_FILENO)) { + cerr << "stdin is a tty and no --file filename specified" << std::endl; +@@ -3859,7 +3992,7 @@ int main(int argc, char **argv) + || op == "export-remove" || op == "mark-complete" + || op == "reset-last-complete" + || op == "trim-pg-log" +- || op == "trim-pg-log-dups") && ++ || op == "pg-log-inject-dups") && + pgidstr.length() == 0) { + cerr << "Must provide pgid" << std::endl; + usage(desc); +@@ -4086,7 +4219,7 @@ int main(int argc, char **argv) + + // If not an object command nor any of the ops handled below, then output this usage + // before complaining about a bad pgid +- if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") { ++ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") { + cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)" + << std::endl; +@@ -4378,7 +4511,7 @@ int main(int argc, char **argv) + + if (op == "export" || op == "export-remove") { + ceph_assert(superblock != nullptr); +- ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); ++ ret = tool.do_export(cct.get(), fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); + if (ret == 0) { + cerr << "Export successful" << std::endl; + if (op == "export-remove") { +@@ -4397,7 +4530,7 @@ int main(int argc, char **argv) + } else if (op == "log") { + PGLog::IndexedLog log; + pg_missing_t missing; +- ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ ret = get_log(cct.get(), fs, struct_ver, pgid, info, log, missing); + if (ret < 0) + goto out; + +@@ -4482,6 +4615,34 @@ int main(int argc, char **argv) + } + cout << "Reseting last_complete succeeded" << std::endl; + ++ } else if (op == "pg-log-inject-dups") { ++ if (!vm.count("file") || file == "-") { ++ cerr << "Must provide file containing JSON dups entries" << std::endl; ++ ret = 1; ++ goto out; ++ } ++ if (debug) ++ cerr << "opening file " << file << std::endl; ++ ++ ifstream json_file_stream(file , std::ifstream::in); ++ if (!json_file_stream.is_open()) { ++ cerr << "unable to open file " << file << std::endl; ++ ret = -1; ++ goto out; ++ } ++ json_spirit::mValue result; ++ try { ++ if (!json_spirit::read(json_file_stream, result)) ++ throw std::runtime_error("unparseable JSON " + file); ++ if (result.type() != json_spirit::array_type) { ++ cerr << "result is not an array_type - type=" << result.type() << std::endl; ++ throw std::runtime_error("not JSON array_type " + file); ++ } ++ do_dups_inject_from_json(fs, pgid, result, debug); ++ } catch (const std::runtime_error &e) { ++ cerr << e.what() << std::endl;; ++ return -1; ++ } + } else { + ceph_assert(!"Should have already checked for valid --op"); + } +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.h +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.h ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.h +@@ -27,7 +27,7 @@ class ObjectStoreTool : public RadosDump + int dump_export(Formatter *formatter); + int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, + std::string pgidstr); +- int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++ int do_export(CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals); diff -Nru ceph-15.2.17/debian/patches/series ceph-15.2.17/debian/patches/series --- ceph-15.2.17/debian/patches/series 2022-10-12 13:26:35.000000000 +0000 +++ ceph-15.2.17/debian/patches/series 2022-10-31 05:22:09.000000000 +0000 @@ -11,3 +11,4 @@ riscv64-link-pthread.patch # AARCH64 EC regression bug1917414.patch +bug1978913.patch diff -Nru ceph-15.2.17/src/test/debian-jessie/debian/changelog ceph-15.2.17/src/test/debian-jessie/debian/changelog --- ceph-15.2.17/src/test/debian-jessie/debian/changelog 2022-10-12 13:30:58.000000000 +0000 +++ ceph-15.2.17/src/test/debian-jessie/debian/changelog 2022-10-31 05:45:04.000000000 +0000 @@ -1,3 +1,10 @@ +ceph (15.2.17-0ubuntu0.20.04.2) focal; urgency=medium + + * d/p/bug1978913.patch: + Cherry-pick upstream fix for on-line trim of dups + + -- Nikhil Kshirsagar Mon, 31 Oct 2022 05:45:04 +0000 + ceph (15.2.17-0ubuntu0.20.04.1) focal; urgency=medium * New upstream release (LP: #1990862). diff -Nru ceph-15.2.17/src/test/debian-jessie/debian/patches/bug1978913.patch ceph-15.2.17/src/test/debian-jessie/debian/patches/bug1978913.patch --- ceph-15.2.17/src/test/debian-jessie/debian/patches/bug1978913.patch 1970-01-01 00:00:00.000000000 +0000 +++ ceph-15.2.17/src/test/debian-jessie/debian/patches/bug1978913.patch 2022-10-31 05:43:10.000000000 +0000 @@ -0,0 +1,840 @@ +Index: ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +=================================================================== +--- ceph-15.2.17.orig/qa/standalone/osd/repro_long_log.sh ++++ ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +@@ -148,6 +148,44 @@ function TEST_trim_max_entries() + test_log_size $PGID 3 || return 1 + } + ++function TEST_trim_max_entries_with_dups() ++{ ++ local dir=$1 ++ ++ setup_log_test $dir || return 1 ++ ++ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 ++ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 ++ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 ++ ++ # adding log entries, should only trim 4 and add one each time ++ # dups should be trimmed to 1 ++ rados -p test rm foo ++ test_log_size $PGID 18 2 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 15 6 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 12 10 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 9 14 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 6 18 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ ++ # below trim_min ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++} ++ + main repro-long-log "$@" + + # Local Variables: +Index: ceph-15.2.17/src/kv/RocksDBStore.cc +=================================================================== +--- ceph-15.2.17.orig/src/kv/RocksDBStore.cc ++++ ceph-15.2.17/src/kv/RocksDBStore.cc +@@ -1008,6 +1008,8 @@ void RocksDBStore::RocksDBTransactionImp + const string &start, + const string &end) + { ++ ldout(db->cct, 10) << __func__ << " enter start=" << start ++ << " end=" << end << dendl; + auto cf = db->get_cf_handle(prefix); + + uint64_t cnt = db->delete_range_threshold; +@@ -1021,8 +1023,12 @@ void RocksDBStore::RocksDBTransactionImp + if (!cnt) { + bat.RollbackToSavePoint(); + if (cf) { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(cf, rocksdb::Slice(start), rocksdb::Slice(end)); + } else { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(db->default_cf, + rocksdb::Slice(combine_strings(prefix, start)), + rocksdb::Slice(combine_strings(prefix, end))); +@@ -1038,6 +1044,7 @@ void RocksDBStore::RocksDBTransactionImp + --cnt; + } + bat.PopSavePoint(); ++ ldout(db->cct, 10) << __func__ << " end" << dendl; + } + + void RocksDBStore::RocksDBTransactionImpl::merge( +Index: ceph-15.2.17/src/osd/PGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.cc ++++ ceph-15.2.17/src/osd/PGLog.cc +@@ -50,6 +50,7 @@ void PGLog::IndexedLog::trim( + set* trimmed_dups, + eversion_t *write_from_dups) + { ++ lgeneric_subdout(cct, osd, 10) << "IndexedLog::trim s=" << s << dendl; + ceph_assert(s <= can_rollback_to); + if (complete_to != log.end()) + lgeneric_subdout(cct, osd, 20) << " complete_to " << complete_to->version << dendl; +@@ -121,10 +122,18 @@ void PGLog::IndexedLog::trim( + } + } + +- while (!dups.empty()) { ++ // we can hit an inflated `dups` b/c of https://tracker.ceph.com/issues/53729 ++ // the idea is to slowly trim them over a prolonged period of time and mix ++ // omap deletes with writes (if we're here, a new log entry got added) to ++ // neither: 1) blow size of single Transaction nor 2) generate-n-accumulate ++ // large amount of tombstones in BlueStore's RocksDB. ++ // if trimming immediately is a must, then the ceph-objectstore-tool is ++ // the way to go. ++ const size_t max_dups = cct->_conf->osd_pg_log_dups_tracked; ++ for (size_t max_dups_to_trim = cct->_conf->osd_pg_log_trim_max; ++ max_dups_to_trim > 0 && dups.size() > max_dups; ++ max_dups_to_trim--) { + const auto& e = *dups.begin(); +- if (e.version.version >= earliest_dup_version) +- break; + lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; + if (trimmed_dups) + trimmed_dups->insert(e.get_key_name()); +@@ -135,6 +144,10 @@ void PGLog::IndexedLog::trim( + // raise tail? + if (tail < s) + tail = s; ++ lgeneric_subdout(cct, osd, 20) << "IndexedLog::trim after trim" ++ << " dups.size()=" << dups.size() ++ << " tail=" << tail ++ << " s=" << s << dendl; + } + + ostream& PGLog::IndexedLog::print(ostream& out) const +@@ -506,6 +519,9 @@ void PGLog::merge_log(pg_info_t &oinfo, + + // returns true if any changes were made to log.dups + bool PGLog::merge_log_dups(const pg_log_t& olog) { ++ dout(5) << __func__ ++ << " log.dups.size()=" << log.dups.size() ++ << "olog.dups.size()=" << olog.dups.size() << dendl; + bool changed = false; + + if (!olog.dups.empty()) { +@@ -584,6 +600,9 @@ bool PGLog::merge_log_dups(const pg_log_ + } + } + ++ dout(5) << "end of " << __func__ << " changed=" << changed ++ << " log.dups.size()=" << log.dups.size() ++ << " olog.dups.size()=" << olog.dups.size() << dendl; + return changed; + } + +@@ -645,7 +664,8 @@ void PGLog::write_log_and_missing( + dirty_from_dups, + write_from_dups, + &may_include_deletes_in_missing_dirty, +- (pg_log_debug ? &log_keys_debug : nullptr)); ++ (pg_log_debug ? &log_keys_debug : nullptr), ++ this); + undirty(); + } else { + dout(10) << "log is not dirty" << dendl; +@@ -659,14 +679,15 @@ void PGLog::write_log_and_missing_wo_mis + pg_log_t &log, + const coll_t& coll, const ghobject_t &log_oid, + map &divergent_priors, +- bool require_rollback ++ bool require_rollback, ++ const DoutPrefixProvider *dpp + ) + { + _write_log_and_missing_wo_missing( + t, km, log, coll, log_oid, + divergent_priors, eversion_t::max(), eversion_t(), eversion_t(), + true, true, require_rollback, +- eversion_t::max(), eversion_t(), eversion_t(), nullptr); ++ eversion_t::max(), eversion_t(), eversion_t(), nullptr, dpp); + } + + // static +@@ -678,7 +699,8 @@ void PGLog::write_log_and_missing( + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *may_include_deletes_in_missing_dirty) ++ bool *may_include_deletes_in_missing_dirty, ++ const DoutPrefixProvider *dpp) + { + _write_log_and_missing( + t, km, log, coll, log_oid, +@@ -692,7 +714,7 @@ void PGLog::write_log_and_missing( + eversion_t::max(), + eversion_t(), + eversion_t(), +- may_include_deletes_in_missing_dirty, nullptr); ++ may_include_deletes_in_missing_dirty, nullptr, dpp); + } + + // static +@@ -711,10 +733,14 @@ void PGLog::_write_log_and_missing_wo_mi + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) + { +- // dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl; ++ ldpp_dout(dpp, 10) << "_write_log_and_missing_wo_missing, clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups << dendl; + if (touch_log) + t.touch(coll, log_oid); + if (dirty_to != eversion_t()) { +@@ -765,6 +791,8 @@ void PGLog::_write_log_and_missing_wo_mi + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -773,11 +801,16 @@ void PGLog::_write_log_and_missing_wo_mi + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -786,6 +819,8 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[entry.get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && + (p->version >= dirty_from_dups || p->version >= write_from_dups) && +@@ -796,8 +831,11 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (dirty_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + encode(divergent_priors, (*km)["divergent_priors"]); + } + if (require_rollback) { +@@ -808,6 +846,7 @@ void PGLog::_write_log_and_missing_wo_mi + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); + } ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + // static +@@ -829,8 +868,14 @@ void PGLog::_write_log_and_missing( + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, // in/out param +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) { ++ ldpp_dout(dpp, 10) << __func__ << " clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups ++ << " trimmed_dups.size()=" << trimmed_dups.size() << dendl; + set to_remove; + to_remove.swap(trimmed_dups); + for (auto& t : trimmed) { +@@ -853,7 +898,8 @@ void PGLog::_write_log_and_missing( + clear_up_to(log_keys_debug, dirty_to.get_key_name()); + } + if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) { +- // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing, clearing from " ++ << dirty_from << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from.get_key_name(), eversion_t::max().get_key_name()); +@@ -894,6 +940,8 @@ void PGLog::_write_log_and_missing( + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -902,11 +950,16 @@ void PGLog::_write_log_and_missing( + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -914,6 +967,8 @@ void PGLog::_write_log_and_missing( + encode(entry, bl); + (*km)[entry.get_key_name()].claim(bl); + } ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && +@@ -925,8 +980,11 @@ void PGLog::_write_log_and_missing( + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (clear_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + to_remove.insert("divergent_priors"); + } + // since we encode individual missing items instead of a whole +@@ -956,6 +1014,7 @@ void PGLog::_write_log_and_missing( + + if (!to_remove.empty()) + t.omap_rmkeys(coll, log_oid, to_remove); ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + void PGLog::rebuild_missing_set_with_deletes( +Index: ceph-15.2.17/src/osd/PGLog.h +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.h ++++ ceph-15.2.17/src/osd/PGLog.h +@@ -1274,8 +1274,9 @@ public: + map* km, + pg_log_t &log, + const coll_t& coll, +- const ghobject_t &log_oid, map &divergent_priors, +- bool require_rollback); ++ const ghobject_t &log_oid, std::map &divergent_priors, ++ bool require_rollback, ++ const DoutPrefixProvider *dpp = nullptr); + + static void write_log_and_missing( + ObjectStore::Transaction& t, +@@ -1285,7 +1286,8 @@ public: + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *rebuilt_missing_set_with_deletes); ++ bool *rebuilt_missing_set_with_deletes, ++ const DoutPrefixProvider *dpp = nullptr); + + static void _write_log_and_missing_wo_missing( + ObjectStore::Transaction& t, +@@ -1302,7 +1304,8 @@ public: + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + static void _write_log_and_missing( +@@ -1323,7 +1326,8 @@ public: + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + void read_log_and_missing( +@@ -1336,7 +1340,7 @@ public: + bool debug_verify_stored_missing = false + ) { + return read_log_and_missing( +- store, ch, pgmeta_oid, info, ++ cct, store, ch, pgmeta_oid, info, + log, missing, oss, + tolerate_divergent_missing_log, + &clear_divergent_priors, +@@ -1347,6 +1351,7 @@ public: + + template + static void read_log_and_missing( ++ CephContext *cct, + ObjectStore *store, + ObjectStore::CollectionHandle &ch, + ghobject_t pgmeta_oid, +@@ -1360,9 +1365,9 @@ public: + set *log_keys_debug = nullptr, + bool debug_verify_stored_missing = false + ) { +- ldpp_dout(dpp, 20) << "read_log_and_missing coll " << ch->cid ++ ldpp_dout(dpp, 10) << "read_log_and_missing coll " << ch->cid + << " " << pgmeta_oid << dendl; +- ++ size_t total_dups = 0; + // legacy? + struct stat st; + int r = store->stat(ch, pgmeta_oid, &st); +@@ -1377,8 +1382,9 @@ public: + map divergent_priors; + bool must_rebuild = false; + missing.may_include_deletes = false; +- list entries; +- list dups; ++ std::list entries; ++ std::list dups; ++ const auto NUM_DUPS_WARN_THRESHOLD = 2*cct->_conf->osd_pg_log_dups_tracked; + if (p) { + for (p->seek_to_first(); p->valid() ; p->next()) { + // non-log pgmeta_oid keys are prefixed with _; skip those +@@ -1409,11 +1415,20 @@ public: + } + missing.add(oid, std::move(item)); + } else if (p->key().substr(0, 4) == string("dup_")) { ++ ++total_dups; + pg_log_dup_t dup; + decode(dup, bp); + if (!dups.empty()) { + ceph_assert(dups.back().version < dup.version); + } ++ if (dups.size() == NUM_DUPS_WARN_THRESHOLD) { ++ ldpp_dout(dpp, 0) << "read_log_and_missing WARN num of dups exceeded " ++ << NUM_DUPS_WARN_THRESHOLD << "." ++ << " You can be hit by THE DUPS BUG" ++ << " https://tracker.ceph.com/issues/53729." ++ << " Consider ceph-objectstore-tool --op trim-pg-log-dups" ++ << dendl; ++ } + dups.push_back(dup); + } else { + pg_log_entry_t e; +@@ -1591,7 +1606,9 @@ public: + (*clear_divergent_priors) = false; + missing.flush(); + } +- ldpp_dout(dpp, 10) << "read_log_and_missing done" << dendl; ++ ldpp_dout(dpp, 10) << "read_log_and_missing done coll " << ch->cid ++ << " total_dups=" << total_dups ++ << " log.dups.size()=" << log.dups.size() << dendl; + } // static read_log_and_missing + + #ifdef WITH_SEASTAR +Index: ceph-15.2.17/src/osd/osd_types.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/osd_types.cc ++++ ceph-15.2.17/src/osd/osd_types.cc +@@ -5042,8 +5042,8 @@ static void _handle_dups(CephContext* cc + { + auto earliest_dup_version = + target.head.version < maxdups ? 0u : target.head.version - maxdups + 1; +- lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl; +- ++ lgeneric_subdout(cct, osd, 20) << __func__ << " earliest_dup_version " ++ << earliest_dup_version << dendl; + for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) { + if (d->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) +@@ -5072,7 +5072,9 @@ void pg_log_t::copy_after(CephContext* c + can_rollback_to = other.can_rollback_to; + head = other.head; + tail = other.tail; +- lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (i->version <= v) { +@@ -5084,6 +5086,9 @@ void pg_log_t::copy_after(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max) +@@ -5093,6 +5098,9 @@ void pg_log_t::copy_up_to(CephContext* c + head = other.head; + tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (n++ >= max) { +@@ -5103,6 +5111,9 @@ void pg_log_t::copy_up_to(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + ostream& pg_log_t::print(ostream& out) const +Index: ceph-15.2.17/src/test/osd/TestPGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/test/osd/TestPGLog.cc ++++ ceph-15.2.17/src/test/osd/TestPGLog.cc +@@ -2739,8 +2739,8 @@ TEST_F(PGLogTrimTest, TestPartialTrim) + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); +- EXPECT_EQ(2u, log.dups.size()); +- EXPECT_EQ(1u, trimmed_dups2.size()); ++ EXPECT_EQ(3u, log.dups.size()); ++ EXPECT_EQ(0u, trimmed_dups2.size()); + } + + +@@ -3023,7 +3023,7 @@ TEST_F(PGLogTrimTest, TestTrimDups) { + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; +- EXPECT_EQ(3u, log.dups.size()) << log; ++ EXPECT_EQ(4u, log.dups.size()) << log; + } + + // This tests trim() to make copies of +@@ -3067,7 +3067,7 @@ TEST_F(PGLogTrimTest, TestTrimDups2) { + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; +- EXPECT_EQ(5u, log.dups.size()) << log; ++ EXPECT_EQ(6u, log.dups.size()) << log; + } + + // This tests copy_up_to() to make copies of +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.cc ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +@@ -14,8 +14,10 @@ + + #include + #include ++#include + #include + #include ++#include + + #include + +@@ -430,7 +432,7 @@ static int get_fd_data(int fd, bufferlis + return 0; + } + +-int get_log(ObjectStore *fs, __u8 struct_ver, ++int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver, + spg_t pgid, const pg_info_t &info, + PGLog::IndexedLog &log, pg_missing_t &missing) + { +@@ -442,7 +444,7 @@ int get_log(ObjectStore *fs, __u8 struct + ostringstream oss; + ceph_assert(struct_ver > 0); + PGLog::read_log_and_missing( +- fs, ch, ++ cct, fs, ch, + pgid.make_pgmeta_oid(), + info, log, missing, + oss, +@@ -1068,7 +1070,8 @@ int add_osdmap(ObjectStore *store, metad + return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); + } + +-int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++int ObjectStoreTool::do_export( ++ CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals) +@@ -1078,7 +1081,7 @@ int ObjectStoreTool::do_export(ObjectSto + + cerr << "Exporting " << pgid << " info " << info << std::endl; + +- int ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing); + if (ret > 0) + return ret; + +@@ -3147,6 +3150,136 @@ int dup(string srcpath, ObjectStore *src + return r; + } + ++ ++const int ceph_entity_name_type(const string name) ++{ ++ if (name == "mds") return CEPH_ENTITY_TYPE_MDS; ++ if (name == "osd") return CEPH_ENTITY_TYPE_OSD; ++ if (name == "mon") return CEPH_ENTITY_TYPE_MON; ++ if (name == "client") return CEPH_ENTITY_TYPE_CLIENT; ++ if (name == "mgr") return CEPH_ENTITY_TYPE_MGR; ++ if (name == "auth") return CEPH_ENTITY_TYPE_AUTH; ++ return -1; ++} ++ ++eversion_t get_eversion_from_str(const string& s) { ++ eversion_t e; ++ vector result; ++ boost::split(result, s, boost::is_any_of("'")); ++ if (result.size() != 2) { ++ cerr << "eversion_t: invalid format: '" << s << "'" << std::endl; ++ return e; ++ } ++ e.epoch = atoi(result[0].c_str()); ++ e.version = atoi(result[1].c_str()); ++ return e; ++} ++ ++osd_reqid_t get_reqid_from_str(const string& s) { ++ osd_reqid_t reqid; ++ ++ vector result; ++ boost::split(result, s, boost::is_any_of(".:")); ++ if (result.size() != 4) { ++ cerr << "reqid: invalid format " << s << std::endl; ++ return osd_reqid_t(); ++ } ++ reqid.name._type = ceph_entity_name_type(result[0]); ++ reqid.name._num = atoi(result[1].c_str()); ++ ++ reqid.inc = atoi(result[2].c_str()); ++ reqid.tid = atoi(result[3].c_str()); ++ return reqid; ++} ++ ++void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map *new_dups) ++{ ++ ObjectStore::Transaction t; ++ coll_t coll(r_pgid); ++ cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl; ++ t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups)); ++ auto ch = store->open_collection(coll); ++ store->queue_transaction(ch, std::move(t)); ++ new_dups->clear(); ++} ++ ++int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj, ++ map *new_dups, bool debug) { ++ std::map::const_iterator it = in_json_obj.find("generate"); ++ int32_t generate = 0; ++ if (it != in_json_obj.end()) { ++ generate = atoi(it->second.get_str().c_str()); ++ } ++ ++ it = in_json_obj.find("reqid"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ osd_reqid_t reqid(get_reqid_from_str(it->second.get_str())); ++ it = in_json_obj.find("version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ eversion_t version(get_eversion_from_str(it->second.get_str())); ++ it = in_json_obj.find("user_version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ version_t user_version = atoi(it->second.get_str().c_str()); ++ it = in_json_obj.find("return_code"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ int32_t return_code = atoi(it->second.get_str().c_str()); ++ if (generate) { ++ for(auto i = 0; i < generate; ++i) { ++ version.version++; ++ if (debug) { ++ cout << "generate dups reqid " << reqid << " v=" << version << std::endl; ++ } ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ if ( new_dups->size() > 50000 ) { ++ do_dups_inject_transction(store, r_pgid, new_dups); ++ cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl; ++ } ++ } ++ return 0; ++ } else { ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ if (debug) { ++ cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl; ++ } ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ } ++ return 0; ++} ++ ++void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug) ++{ ++ map new_dups; ++ const vector& o = inJson.get_array(); ++ for (const auto& obj : o) { ++ if (obj.type() == json_spirit::obj_type) { ++ json_spirit::mObject Mobj = obj.get_obj(); ++ do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug); ++ } else { ++ throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type())); ++ return; ++ } ++ } ++ if (new_dups.size() > 0) { ++ do_dups_inject_transction(store, r_pgid, &new_dups); ++ } ++ ++ ++ return ; ++} ++ + void usage(po::options_description &desc) + { + cerr << std::endl; +@@ -3480,7 +3613,7 @@ int main(int argc, char **argv) + } else { + file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + } +- } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") { ++ } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") { + if (!vm.count("file") || file == "-") { + if (isatty(STDIN_FILENO)) { + cerr << "stdin is a tty and no --file filename specified" << std::endl; +@@ -3859,7 +3992,7 @@ int main(int argc, char **argv) + || op == "export-remove" || op == "mark-complete" + || op == "reset-last-complete" + || op == "trim-pg-log" +- || op == "trim-pg-log-dups") && ++ || op == "pg-log-inject-dups") && + pgidstr.length() == 0) { + cerr << "Must provide pgid" << std::endl; + usage(desc); +@@ -4086,7 +4219,7 @@ int main(int argc, char **argv) + + // If not an object command nor any of the ops handled below, then output this usage + // before complaining about a bad pgid +- if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") { ++ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") { + cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)" + << std::endl; +@@ -4378,7 +4511,7 @@ int main(int argc, char **argv) + + if (op == "export" || op == "export-remove") { + ceph_assert(superblock != nullptr); +- ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); ++ ret = tool.do_export(cct.get(), fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); + if (ret == 0) { + cerr << "Export successful" << std::endl; + if (op == "export-remove") { +@@ -4397,7 +4530,7 @@ int main(int argc, char **argv) + } else if (op == "log") { + PGLog::IndexedLog log; + pg_missing_t missing; +- ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ ret = get_log(cct.get(), fs, struct_ver, pgid, info, log, missing); + if (ret < 0) + goto out; + +@@ -4482,6 +4615,34 @@ int main(int argc, char **argv) + } + cout << "Reseting last_complete succeeded" << std::endl; + ++ } else if (op == "pg-log-inject-dups") { ++ if (!vm.count("file") || file == "-") { ++ cerr << "Must provide file containing JSON dups entries" << std::endl; ++ ret = 1; ++ goto out; ++ } ++ if (debug) ++ cerr << "opening file " << file << std::endl; ++ ++ ifstream json_file_stream(file , std::ifstream::in); ++ if (!json_file_stream.is_open()) { ++ cerr << "unable to open file " << file << std::endl; ++ ret = -1; ++ goto out; ++ } ++ json_spirit::mValue result; ++ try { ++ if (!json_spirit::read(json_file_stream, result)) ++ throw std::runtime_error("unparseable JSON " + file); ++ if (result.type() != json_spirit::array_type) { ++ cerr << "result is not an array_type - type=" << result.type() << std::endl; ++ throw std::runtime_error("not JSON array_type " + file); ++ } ++ do_dups_inject_from_json(fs, pgid, result, debug); ++ } catch (const std::runtime_error &e) { ++ cerr << e.what() << std::endl;; ++ return -1; ++ } + } else { + ceph_assert(!"Should have already checked for valid --op"); + } +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.h +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.h ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.h +@@ -27,7 +27,7 @@ class ObjectStoreTool : public RadosDump + int dump_export(Formatter *formatter); + int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, + std::string pgidstr); +- int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++ int do_export(CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals); diff -Nru ceph-15.2.17/src/test/debian-jessie/debian/patches/series ceph-15.2.17/src/test/debian-jessie/debian/patches/series --- ceph-15.2.17/src/test/debian-jessie/debian/patches/series 2022-10-12 13:26:35.000000000 +0000 +++ ceph-15.2.17/src/test/debian-jessie/debian/patches/series 2022-10-31 05:22:09.000000000 +0000 @@ -11,3 +11,4 @@ riscv64-link-pthread.patch # AARCH64 EC regression bug1917414.patch +bug1978913.patch diff -Nru ceph-15.2.17/src/test/ubuntu-16.04/debian/changelog ceph-15.2.17/src/test/ubuntu-16.04/debian/changelog --- ceph-15.2.17/src/test/ubuntu-16.04/debian/changelog 2022-10-12 13:30:58.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-16.04/debian/changelog 2022-10-31 05:45:04.000000000 +0000 @@ -1,3 +1,10 @@ +ceph (15.2.17-0ubuntu0.20.04.2) focal; urgency=medium + + * d/p/bug1978913.patch: + Cherry-pick upstream fix for on-line trim of dups + + -- Nikhil Kshirsagar Mon, 31 Oct 2022 05:45:04 +0000 + ceph (15.2.17-0ubuntu0.20.04.1) focal; urgency=medium * New upstream release (LP: #1990862). diff -Nru ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/bug1978913.patch ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/bug1978913.patch --- ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/bug1978913.patch 1970-01-01 00:00:00.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/bug1978913.patch 2022-10-31 05:43:10.000000000 +0000 @@ -0,0 +1,840 @@ +Index: ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +=================================================================== +--- ceph-15.2.17.orig/qa/standalone/osd/repro_long_log.sh ++++ ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +@@ -148,6 +148,44 @@ function TEST_trim_max_entries() + test_log_size $PGID 3 || return 1 + } + ++function TEST_trim_max_entries_with_dups() ++{ ++ local dir=$1 ++ ++ setup_log_test $dir || return 1 ++ ++ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 ++ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 ++ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 ++ ++ # adding log entries, should only trim 4 and add one each time ++ # dups should be trimmed to 1 ++ rados -p test rm foo ++ test_log_size $PGID 18 2 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 15 6 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 12 10 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 9 14 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 6 18 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ ++ # below trim_min ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++} ++ + main repro-long-log "$@" + + # Local Variables: +Index: ceph-15.2.17/src/kv/RocksDBStore.cc +=================================================================== +--- ceph-15.2.17.orig/src/kv/RocksDBStore.cc ++++ ceph-15.2.17/src/kv/RocksDBStore.cc +@@ -1008,6 +1008,8 @@ void RocksDBStore::RocksDBTransactionImp + const string &start, + const string &end) + { ++ ldout(db->cct, 10) << __func__ << " enter start=" << start ++ << " end=" << end << dendl; + auto cf = db->get_cf_handle(prefix); + + uint64_t cnt = db->delete_range_threshold; +@@ -1021,8 +1023,12 @@ void RocksDBStore::RocksDBTransactionImp + if (!cnt) { + bat.RollbackToSavePoint(); + if (cf) { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(cf, rocksdb::Slice(start), rocksdb::Slice(end)); + } else { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(db->default_cf, + rocksdb::Slice(combine_strings(prefix, start)), + rocksdb::Slice(combine_strings(prefix, end))); +@@ -1038,6 +1044,7 @@ void RocksDBStore::RocksDBTransactionImp + --cnt; + } + bat.PopSavePoint(); ++ ldout(db->cct, 10) << __func__ << " end" << dendl; + } + + void RocksDBStore::RocksDBTransactionImpl::merge( +Index: ceph-15.2.17/src/osd/PGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.cc ++++ ceph-15.2.17/src/osd/PGLog.cc +@@ -50,6 +50,7 @@ void PGLog::IndexedLog::trim( + set* trimmed_dups, + eversion_t *write_from_dups) + { ++ lgeneric_subdout(cct, osd, 10) << "IndexedLog::trim s=" << s << dendl; + ceph_assert(s <= can_rollback_to); + if (complete_to != log.end()) + lgeneric_subdout(cct, osd, 20) << " complete_to " << complete_to->version << dendl; +@@ -121,10 +122,18 @@ void PGLog::IndexedLog::trim( + } + } + +- while (!dups.empty()) { ++ // we can hit an inflated `dups` b/c of https://tracker.ceph.com/issues/53729 ++ // the idea is to slowly trim them over a prolonged period of time and mix ++ // omap deletes with writes (if we're here, a new log entry got added) to ++ // neither: 1) blow size of single Transaction nor 2) generate-n-accumulate ++ // large amount of tombstones in BlueStore's RocksDB. ++ // if trimming immediately is a must, then the ceph-objectstore-tool is ++ // the way to go. ++ const size_t max_dups = cct->_conf->osd_pg_log_dups_tracked; ++ for (size_t max_dups_to_trim = cct->_conf->osd_pg_log_trim_max; ++ max_dups_to_trim > 0 && dups.size() > max_dups; ++ max_dups_to_trim--) { + const auto& e = *dups.begin(); +- if (e.version.version >= earliest_dup_version) +- break; + lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; + if (trimmed_dups) + trimmed_dups->insert(e.get_key_name()); +@@ -135,6 +144,10 @@ void PGLog::IndexedLog::trim( + // raise tail? + if (tail < s) + tail = s; ++ lgeneric_subdout(cct, osd, 20) << "IndexedLog::trim after trim" ++ << " dups.size()=" << dups.size() ++ << " tail=" << tail ++ << " s=" << s << dendl; + } + + ostream& PGLog::IndexedLog::print(ostream& out) const +@@ -506,6 +519,9 @@ void PGLog::merge_log(pg_info_t &oinfo, + + // returns true if any changes were made to log.dups + bool PGLog::merge_log_dups(const pg_log_t& olog) { ++ dout(5) << __func__ ++ << " log.dups.size()=" << log.dups.size() ++ << "olog.dups.size()=" << olog.dups.size() << dendl; + bool changed = false; + + if (!olog.dups.empty()) { +@@ -584,6 +600,9 @@ bool PGLog::merge_log_dups(const pg_log_ + } + } + ++ dout(5) << "end of " << __func__ << " changed=" << changed ++ << " log.dups.size()=" << log.dups.size() ++ << " olog.dups.size()=" << olog.dups.size() << dendl; + return changed; + } + +@@ -645,7 +664,8 @@ void PGLog::write_log_and_missing( + dirty_from_dups, + write_from_dups, + &may_include_deletes_in_missing_dirty, +- (pg_log_debug ? &log_keys_debug : nullptr)); ++ (pg_log_debug ? &log_keys_debug : nullptr), ++ this); + undirty(); + } else { + dout(10) << "log is not dirty" << dendl; +@@ -659,14 +679,15 @@ void PGLog::write_log_and_missing_wo_mis + pg_log_t &log, + const coll_t& coll, const ghobject_t &log_oid, + map &divergent_priors, +- bool require_rollback ++ bool require_rollback, ++ const DoutPrefixProvider *dpp + ) + { + _write_log_and_missing_wo_missing( + t, km, log, coll, log_oid, + divergent_priors, eversion_t::max(), eversion_t(), eversion_t(), + true, true, require_rollback, +- eversion_t::max(), eversion_t(), eversion_t(), nullptr); ++ eversion_t::max(), eversion_t(), eversion_t(), nullptr, dpp); + } + + // static +@@ -678,7 +699,8 @@ void PGLog::write_log_and_missing( + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *may_include_deletes_in_missing_dirty) ++ bool *may_include_deletes_in_missing_dirty, ++ const DoutPrefixProvider *dpp) + { + _write_log_and_missing( + t, km, log, coll, log_oid, +@@ -692,7 +714,7 @@ void PGLog::write_log_and_missing( + eversion_t::max(), + eversion_t(), + eversion_t(), +- may_include_deletes_in_missing_dirty, nullptr); ++ may_include_deletes_in_missing_dirty, nullptr, dpp); + } + + // static +@@ -711,10 +733,14 @@ void PGLog::_write_log_and_missing_wo_mi + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) + { +- // dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl; ++ ldpp_dout(dpp, 10) << "_write_log_and_missing_wo_missing, clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups << dendl; + if (touch_log) + t.touch(coll, log_oid); + if (dirty_to != eversion_t()) { +@@ -765,6 +791,8 @@ void PGLog::_write_log_and_missing_wo_mi + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -773,11 +801,16 @@ void PGLog::_write_log_and_missing_wo_mi + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -786,6 +819,8 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[entry.get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && + (p->version >= dirty_from_dups || p->version >= write_from_dups) && +@@ -796,8 +831,11 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (dirty_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + encode(divergent_priors, (*km)["divergent_priors"]); + } + if (require_rollback) { +@@ -808,6 +846,7 @@ void PGLog::_write_log_and_missing_wo_mi + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); + } ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + // static +@@ -829,8 +868,14 @@ void PGLog::_write_log_and_missing( + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, // in/out param +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) { ++ ldpp_dout(dpp, 10) << __func__ << " clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups ++ << " trimmed_dups.size()=" << trimmed_dups.size() << dendl; + set to_remove; + to_remove.swap(trimmed_dups); + for (auto& t : trimmed) { +@@ -853,7 +898,8 @@ void PGLog::_write_log_and_missing( + clear_up_to(log_keys_debug, dirty_to.get_key_name()); + } + if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) { +- // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing, clearing from " ++ << dirty_from << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from.get_key_name(), eversion_t::max().get_key_name()); +@@ -894,6 +940,8 @@ void PGLog::_write_log_and_missing( + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -902,11 +950,16 @@ void PGLog::_write_log_and_missing( + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -914,6 +967,8 @@ void PGLog::_write_log_and_missing( + encode(entry, bl); + (*km)[entry.get_key_name()].claim(bl); + } ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && +@@ -925,8 +980,11 @@ void PGLog::_write_log_and_missing( + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (clear_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + to_remove.insert("divergent_priors"); + } + // since we encode individual missing items instead of a whole +@@ -956,6 +1014,7 @@ void PGLog::_write_log_and_missing( + + if (!to_remove.empty()) + t.omap_rmkeys(coll, log_oid, to_remove); ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + void PGLog::rebuild_missing_set_with_deletes( +Index: ceph-15.2.17/src/osd/PGLog.h +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.h ++++ ceph-15.2.17/src/osd/PGLog.h +@@ -1274,8 +1274,9 @@ public: + map* km, + pg_log_t &log, + const coll_t& coll, +- const ghobject_t &log_oid, map &divergent_priors, +- bool require_rollback); ++ const ghobject_t &log_oid, std::map &divergent_priors, ++ bool require_rollback, ++ const DoutPrefixProvider *dpp = nullptr); + + static void write_log_and_missing( + ObjectStore::Transaction& t, +@@ -1285,7 +1286,8 @@ public: + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *rebuilt_missing_set_with_deletes); ++ bool *rebuilt_missing_set_with_deletes, ++ const DoutPrefixProvider *dpp = nullptr); + + static void _write_log_and_missing_wo_missing( + ObjectStore::Transaction& t, +@@ -1302,7 +1304,8 @@ public: + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + static void _write_log_and_missing( +@@ -1323,7 +1326,8 @@ public: + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + void read_log_and_missing( +@@ -1336,7 +1340,7 @@ public: + bool debug_verify_stored_missing = false + ) { + return read_log_and_missing( +- store, ch, pgmeta_oid, info, ++ cct, store, ch, pgmeta_oid, info, + log, missing, oss, + tolerate_divergent_missing_log, + &clear_divergent_priors, +@@ -1347,6 +1351,7 @@ public: + + template + static void read_log_and_missing( ++ CephContext *cct, + ObjectStore *store, + ObjectStore::CollectionHandle &ch, + ghobject_t pgmeta_oid, +@@ -1360,9 +1365,9 @@ public: + set *log_keys_debug = nullptr, + bool debug_verify_stored_missing = false + ) { +- ldpp_dout(dpp, 20) << "read_log_and_missing coll " << ch->cid ++ ldpp_dout(dpp, 10) << "read_log_and_missing coll " << ch->cid + << " " << pgmeta_oid << dendl; +- ++ size_t total_dups = 0; + // legacy? + struct stat st; + int r = store->stat(ch, pgmeta_oid, &st); +@@ -1377,8 +1382,9 @@ public: + map divergent_priors; + bool must_rebuild = false; + missing.may_include_deletes = false; +- list entries; +- list dups; ++ std::list entries; ++ std::list dups; ++ const auto NUM_DUPS_WARN_THRESHOLD = 2*cct->_conf->osd_pg_log_dups_tracked; + if (p) { + for (p->seek_to_first(); p->valid() ; p->next()) { + // non-log pgmeta_oid keys are prefixed with _; skip those +@@ -1409,11 +1415,20 @@ public: + } + missing.add(oid, std::move(item)); + } else if (p->key().substr(0, 4) == string("dup_")) { ++ ++total_dups; + pg_log_dup_t dup; + decode(dup, bp); + if (!dups.empty()) { + ceph_assert(dups.back().version < dup.version); + } ++ if (dups.size() == NUM_DUPS_WARN_THRESHOLD) { ++ ldpp_dout(dpp, 0) << "read_log_and_missing WARN num of dups exceeded " ++ << NUM_DUPS_WARN_THRESHOLD << "." ++ << " You can be hit by THE DUPS BUG" ++ << " https://tracker.ceph.com/issues/53729." ++ << " Consider ceph-objectstore-tool --op trim-pg-log-dups" ++ << dendl; ++ } + dups.push_back(dup); + } else { + pg_log_entry_t e; +@@ -1591,7 +1606,9 @@ public: + (*clear_divergent_priors) = false; + missing.flush(); + } +- ldpp_dout(dpp, 10) << "read_log_and_missing done" << dendl; ++ ldpp_dout(dpp, 10) << "read_log_and_missing done coll " << ch->cid ++ << " total_dups=" << total_dups ++ << " log.dups.size()=" << log.dups.size() << dendl; + } // static read_log_and_missing + + #ifdef WITH_SEASTAR +Index: ceph-15.2.17/src/osd/osd_types.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/osd_types.cc ++++ ceph-15.2.17/src/osd/osd_types.cc +@@ -5042,8 +5042,8 @@ static void _handle_dups(CephContext* cc + { + auto earliest_dup_version = + target.head.version < maxdups ? 0u : target.head.version - maxdups + 1; +- lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl; +- ++ lgeneric_subdout(cct, osd, 20) << __func__ << " earliest_dup_version " ++ << earliest_dup_version << dendl; + for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) { + if (d->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) +@@ -5072,7 +5072,9 @@ void pg_log_t::copy_after(CephContext* c + can_rollback_to = other.can_rollback_to; + head = other.head; + tail = other.tail; +- lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (i->version <= v) { +@@ -5084,6 +5086,9 @@ void pg_log_t::copy_after(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max) +@@ -5093,6 +5098,9 @@ void pg_log_t::copy_up_to(CephContext* c + head = other.head; + tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (n++ >= max) { +@@ -5103,6 +5111,9 @@ void pg_log_t::copy_up_to(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + ostream& pg_log_t::print(ostream& out) const +Index: ceph-15.2.17/src/test/osd/TestPGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/test/osd/TestPGLog.cc ++++ ceph-15.2.17/src/test/osd/TestPGLog.cc +@@ -2739,8 +2739,8 @@ TEST_F(PGLogTrimTest, TestPartialTrim) + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); +- EXPECT_EQ(2u, log.dups.size()); +- EXPECT_EQ(1u, trimmed_dups2.size()); ++ EXPECT_EQ(3u, log.dups.size()); ++ EXPECT_EQ(0u, trimmed_dups2.size()); + } + + +@@ -3023,7 +3023,7 @@ TEST_F(PGLogTrimTest, TestTrimDups) { + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; +- EXPECT_EQ(3u, log.dups.size()) << log; ++ EXPECT_EQ(4u, log.dups.size()) << log; + } + + // This tests trim() to make copies of +@@ -3067,7 +3067,7 @@ TEST_F(PGLogTrimTest, TestTrimDups2) { + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; +- EXPECT_EQ(5u, log.dups.size()) << log; ++ EXPECT_EQ(6u, log.dups.size()) << log; + } + + // This tests copy_up_to() to make copies of +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.cc ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +@@ -14,8 +14,10 @@ + + #include + #include ++#include + #include + #include ++#include + + #include + +@@ -430,7 +432,7 @@ static int get_fd_data(int fd, bufferlis + return 0; + } + +-int get_log(ObjectStore *fs, __u8 struct_ver, ++int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver, + spg_t pgid, const pg_info_t &info, + PGLog::IndexedLog &log, pg_missing_t &missing) + { +@@ -442,7 +444,7 @@ int get_log(ObjectStore *fs, __u8 struct + ostringstream oss; + ceph_assert(struct_ver > 0); + PGLog::read_log_and_missing( +- fs, ch, ++ cct, fs, ch, + pgid.make_pgmeta_oid(), + info, log, missing, + oss, +@@ -1068,7 +1070,8 @@ int add_osdmap(ObjectStore *store, metad + return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); + } + +-int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++int ObjectStoreTool::do_export( ++ CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals) +@@ -1078,7 +1081,7 @@ int ObjectStoreTool::do_export(ObjectSto + + cerr << "Exporting " << pgid << " info " << info << std::endl; + +- int ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing); + if (ret > 0) + return ret; + +@@ -3147,6 +3150,136 @@ int dup(string srcpath, ObjectStore *src + return r; + } + ++ ++const int ceph_entity_name_type(const string name) ++{ ++ if (name == "mds") return CEPH_ENTITY_TYPE_MDS; ++ if (name == "osd") return CEPH_ENTITY_TYPE_OSD; ++ if (name == "mon") return CEPH_ENTITY_TYPE_MON; ++ if (name == "client") return CEPH_ENTITY_TYPE_CLIENT; ++ if (name == "mgr") return CEPH_ENTITY_TYPE_MGR; ++ if (name == "auth") return CEPH_ENTITY_TYPE_AUTH; ++ return -1; ++} ++ ++eversion_t get_eversion_from_str(const string& s) { ++ eversion_t e; ++ vector result; ++ boost::split(result, s, boost::is_any_of("'")); ++ if (result.size() != 2) { ++ cerr << "eversion_t: invalid format: '" << s << "'" << std::endl; ++ return e; ++ } ++ e.epoch = atoi(result[0].c_str()); ++ e.version = atoi(result[1].c_str()); ++ return e; ++} ++ ++osd_reqid_t get_reqid_from_str(const string& s) { ++ osd_reqid_t reqid; ++ ++ vector result; ++ boost::split(result, s, boost::is_any_of(".:")); ++ if (result.size() != 4) { ++ cerr << "reqid: invalid format " << s << std::endl; ++ return osd_reqid_t(); ++ } ++ reqid.name._type = ceph_entity_name_type(result[0]); ++ reqid.name._num = atoi(result[1].c_str()); ++ ++ reqid.inc = atoi(result[2].c_str()); ++ reqid.tid = atoi(result[3].c_str()); ++ return reqid; ++} ++ ++void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map *new_dups) ++{ ++ ObjectStore::Transaction t; ++ coll_t coll(r_pgid); ++ cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl; ++ t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups)); ++ auto ch = store->open_collection(coll); ++ store->queue_transaction(ch, std::move(t)); ++ new_dups->clear(); ++} ++ ++int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj, ++ map *new_dups, bool debug) { ++ std::map::const_iterator it = in_json_obj.find("generate"); ++ int32_t generate = 0; ++ if (it != in_json_obj.end()) { ++ generate = atoi(it->second.get_str().c_str()); ++ } ++ ++ it = in_json_obj.find("reqid"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ osd_reqid_t reqid(get_reqid_from_str(it->second.get_str())); ++ it = in_json_obj.find("version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ eversion_t version(get_eversion_from_str(it->second.get_str())); ++ it = in_json_obj.find("user_version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ version_t user_version = atoi(it->second.get_str().c_str()); ++ it = in_json_obj.find("return_code"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ int32_t return_code = atoi(it->second.get_str().c_str()); ++ if (generate) { ++ for(auto i = 0; i < generate; ++i) { ++ version.version++; ++ if (debug) { ++ cout << "generate dups reqid " << reqid << " v=" << version << std::endl; ++ } ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ if ( new_dups->size() > 50000 ) { ++ do_dups_inject_transction(store, r_pgid, new_dups); ++ cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl; ++ } ++ } ++ return 0; ++ } else { ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ if (debug) { ++ cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl; ++ } ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ } ++ return 0; ++} ++ ++void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug) ++{ ++ map new_dups; ++ const vector& o = inJson.get_array(); ++ for (const auto& obj : o) { ++ if (obj.type() == json_spirit::obj_type) { ++ json_spirit::mObject Mobj = obj.get_obj(); ++ do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug); ++ } else { ++ throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type())); ++ return; ++ } ++ } ++ if (new_dups.size() > 0) { ++ do_dups_inject_transction(store, r_pgid, &new_dups); ++ } ++ ++ ++ return ; ++} ++ + void usage(po::options_description &desc) + { + cerr << std::endl; +@@ -3480,7 +3613,7 @@ int main(int argc, char **argv) + } else { + file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + } +- } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") { ++ } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") { + if (!vm.count("file") || file == "-") { + if (isatty(STDIN_FILENO)) { + cerr << "stdin is a tty and no --file filename specified" << std::endl; +@@ -3859,7 +3992,7 @@ int main(int argc, char **argv) + || op == "export-remove" || op == "mark-complete" + || op == "reset-last-complete" + || op == "trim-pg-log" +- || op == "trim-pg-log-dups") && ++ || op == "pg-log-inject-dups") && + pgidstr.length() == 0) { + cerr << "Must provide pgid" << std::endl; + usage(desc); +@@ -4086,7 +4219,7 @@ int main(int argc, char **argv) + + // If not an object command nor any of the ops handled below, then output this usage + // before complaining about a bad pgid +- if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") { ++ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") { + cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)" + << std::endl; +@@ -4378,7 +4511,7 @@ int main(int argc, char **argv) + + if (op == "export" || op == "export-remove") { + ceph_assert(superblock != nullptr); +- ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); ++ ret = tool.do_export(cct.get(), fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); + if (ret == 0) { + cerr << "Export successful" << std::endl; + if (op == "export-remove") { +@@ -4397,7 +4530,7 @@ int main(int argc, char **argv) + } else if (op == "log") { + PGLog::IndexedLog log; + pg_missing_t missing; +- ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ ret = get_log(cct.get(), fs, struct_ver, pgid, info, log, missing); + if (ret < 0) + goto out; + +@@ -4482,6 +4615,34 @@ int main(int argc, char **argv) + } + cout << "Reseting last_complete succeeded" << std::endl; + ++ } else if (op == "pg-log-inject-dups") { ++ if (!vm.count("file") || file == "-") { ++ cerr << "Must provide file containing JSON dups entries" << std::endl; ++ ret = 1; ++ goto out; ++ } ++ if (debug) ++ cerr << "opening file " << file << std::endl; ++ ++ ifstream json_file_stream(file , std::ifstream::in); ++ if (!json_file_stream.is_open()) { ++ cerr << "unable to open file " << file << std::endl; ++ ret = -1; ++ goto out; ++ } ++ json_spirit::mValue result; ++ try { ++ if (!json_spirit::read(json_file_stream, result)) ++ throw std::runtime_error("unparseable JSON " + file); ++ if (result.type() != json_spirit::array_type) { ++ cerr << "result is not an array_type - type=" << result.type() << std::endl; ++ throw std::runtime_error("not JSON array_type " + file); ++ } ++ do_dups_inject_from_json(fs, pgid, result, debug); ++ } catch (const std::runtime_error &e) { ++ cerr << e.what() << std::endl;; ++ return -1; ++ } + } else { + ceph_assert(!"Should have already checked for valid --op"); + } +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.h +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.h ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.h +@@ -27,7 +27,7 @@ class ObjectStoreTool : public RadosDump + int dump_export(Formatter *formatter); + int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, + std::string pgidstr); +- int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++ int do_export(CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals); diff -Nru ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/series ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/series --- ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/series 2022-10-12 13:26:35.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-16.04/debian/patches/series 2022-10-31 05:22:09.000000000 +0000 @@ -11,3 +11,4 @@ riscv64-link-pthread.patch # AARCH64 EC regression bug1917414.patch +bug1978913.patch diff -Nru ceph-15.2.17/src/test/ubuntu-18.04/debian/changelog ceph-15.2.17/src/test/ubuntu-18.04/debian/changelog --- ceph-15.2.17/src/test/ubuntu-18.04/debian/changelog 2022-10-12 13:30:58.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-18.04/debian/changelog 2022-10-31 05:45:04.000000000 +0000 @@ -1,3 +1,10 @@ +ceph (15.2.17-0ubuntu0.20.04.2) focal; urgency=medium + + * d/p/bug1978913.patch: + Cherry-pick upstream fix for on-line trim of dups + + -- Nikhil Kshirsagar Mon, 31 Oct 2022 05:45:04 +0000 + ceph (15.2.17-0ubuntu0.20.04.1) focal; urgency=medium * New upstream release (LP: #1990862). diff -Nru ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/bug1978913.patch ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/bug1978913.patch --- ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/bug1978913.patch 1970-01-01 00:00:00.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/bug1978913.patch 2022-10-31 05:43:10.000000000 +0000 @@ -0,0 +1,840 @@ +Index: ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +=================================================================== +--- ceph-15.2.17.orig/qa/standalone/osd/repro_long_log.sh ++++ ceph-15.2.17/qa/standalone/osd/repro_long_log.sh +@@ -148,6 +148,44 @@ function TEST_trim_max_entries() + test_log_size $PGID 3 || return 1 + } + ++function TEST_trim_max_entries_with_dups() ++{ ++ local dir=$1 ++ ++ setup_log_test $dir || return 1 ++ ++ ceph tell osd.\* injectargs -- --osd_target_pg_log_entries_per_osd 2 || return 1 ++ ceph tell osd.\* injectargs -- --osd-min-pg-log-entries 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-min 2 ++ ceph tell osd.\* injectargs -- --osd-pg-log-trim-max 4 ++ ceph tell osd.\* injectargs -- --osd_pg_log_dups_tracked 20 || return 1 ++ ++ # adding log entries, should only trim 4 and add one each time ++ # dups should be trimmed to 1 ++ rados -p test rm foo ++ test_log_size $PGID 18 2 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 15 6 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 12 10 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 9 14 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 6 18 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ ++ # below trim_min ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 4 20 || return 1 ++ rados -p test rm foo ++ test_log_size $PGID 3 20 || return 1 ++} ++ + main repro-long-log "$@" + + # Local Variables: +Index: ceph-15.2.17/src/kv/RocksDBStore.cc +=================================================================== +--- ceph-15.2.17.orig/src/kv/RocksDBStore.cc ++++ ceph-15.2.17/src/kv/RocksDBStore.cc +@@ -1008,6 +1008,8 @@ void RocksDBStore::RocksDBTransactionImp + const string &start, + const string &end) + { ++ ldout(db->cct, 10) << __func__ << " enter start=" << start ++ << " end=" << end << dendl; + auto cf = db->get_cf_handle(prefix); + + uint64_t cnt = db->delete_range_threshold; +@@ -1021,8 +1023,12 @@ void RocksDBStore::RocksDBTransactionImp + if (!cnt) { + bat.RollbackToSavePoint(); + if (cf) { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(cf, rocksdb::Slice(start), rocksdb::Slice(end)); + } else { ++ ldout(db->cct, 10) << __func__ << " p_iter == end(), resorting to DeleteRange" ++ << dendl; + bat.DeleteRange(db->default_cf, + rocksdb::Slice(combine_strings(prefix, start)), + rocksdb::Slice(combine_strings(prefix, end))); +@@ -1038,6 +1044,7 @@ void RocksDBStore::RocksDBTransactionImp + --cnt; + } + bat.PopSavePoint(); ++ ldout(db->cct, 10) << __func__ << " end" << dendl; + } + + void RocksDBStore::RocksDBTransactionImpl::merge( +Index: ceph-15.2.17/src/osd/PGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.cc ++++ ceph-15.2.17/src/osd/PGLog.cc +@@ -50,6 +50,7 @@ void PGLog::IndexedLog::trim( + set* trimmed_dups, + eversion_t *write_from_dups) + { ++ lgeneric_subdout(cct, osd, 10) << "IndexedLog::trim s=" << s << dendl; + ceph_assert(s <= can_rollback_to); + if (complete_to != log.end()) + lgeneric_subdout(cct, osd, 20) << " complete_to " << complete_to->version << dendl; +@@ -121,10 +122,18 @@ void PGLog::IndexedLog::trim( + } + } + +- while (!dups.empty()) { ++ // we can hit an inflated `dups` b/c of https://tracker.ceph.com/issues/53729 ++ // the idea is to slowly trim them over a prolonged period of time and mix ++ // omap deletes with writes (if we're here, a new log entry got added) to ++ // neither: 1) blow size of single Transaction nor 2) generate-n-accumulate ++ // large amount of tombstones in BlueStore's RocksDB. ++ // if trimming immediately is a must, then the ceph-objectstore-tool is ++ // the way to go. ++ const size_t max_dups = cct->_conf->osd_pg_log_dups_tracked; ++ for (size_t max_dups_to_trim = cct->_conf->osd_pg_log_trim_max; ++ max_dups_to_trim > 0 && dups.size() > max_dups; ++ max_dups_to_trim--) { + const auto& e = *dups.begin(); +- if (e.version.version >= earliest_dup_version) +- break; + lgeneric_subdout(cct, osd, 20) << "trim dup " << e << dendl; + if (trimmed_dups) + trimmed_dups->insert(e.get_key_name()); +@@ -135,6 +144,10 @@ void PGLog::IndexedLog::trim( + // raise tail? + if (tail < s) + tail = s; ++ lgeneric_subdout(cct, osd, 20) << "IndexedLog::trim after trim" ++ << " dups.size()=" << dups.size() ++ << " tail=" << tail ++ << " s=" << s << dendl; + } + + ostream& PGLog::IndexedLog::print(ostream& out) const +@@ -506,6 +519,9 @@ void PGLog::merge_log(pg_info_t &oinfo, + + // returns true if any changes were made to log.dups + bool PGLog::merge_log_dups(const pg_log_t& olog) { ++ dout(5) << __func__ ++ << " log.dups.size()=" << log.dups.size() ++ << "olog.dups.size()=" << olog.dups.size() << dendl; + bool changed = false; + + if (!olog.dups.empty()) { +@@ -584,6 +600,9 @@ bool PGLog::merge_log_dups(const pg_log_ + } + } + ++ dout(5) << "end of " << __func__ << " changed=" << changed ++ << " log.dups.size()=" << log.dups.size() ++ << " olog.dups.size()=" << olog.dups.size() << dendl; + return changed; + } + +@@ -645,7 +664,8 @@ void PGLog::write_log_and_missing( + dirty_from_dups, + write_from_dups, + &may_include_deletes_in_missing_dirty, +- (pg_log_debug ? &log_keys_debug : nullptr)); ++ (pg_log_debug ? &log_keys_debug : nullptr), ++ this); + undirty(); + } else { + dout(10) << "log is not dirty" << dendl; +@@ -659,14 +679,15 @@ void PGLog::write_log_and_missing_wo_mis + pg_log_t &log, + const coll_t& coll, const ghobject_t &log_oid, + map &divergent_priors, +- bool require_rollback ++ bool require_rollback, ++ const DoutPrefixProvider *dpp + ) + { + _write_log_and_missing_wo_missing( + t, km, log, coll, log_oid, + divergent_priors, eversion_t::max(), eversion_t(), eversion_t(), + true, true, require_rollback, +- eversion_t::max(), eversion_t(), eversion_t(), nullptr); ++ eversion_t::max(), eversion_t(), eversion_t(), nullptr, dpp); + } + + // static +@@ -678,7 +699,8 @@ void PGLog::write_log_and_missing( + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *may_include_deletes_in_missing_dirty) ++ bool *may_include_deletes_in_missing_dirty, ++ const DoutPrefixProvider *dpp) + { + _write_log_and_missing( + t, km, log, coll, log_oid, +@@ -692,7 +714,7 @@ void PGLog::write_log_and_missing( + eversion_t::max(), + eversion_t(), + eversion_t(), +- may_include_deletes_in_missing_dirty, nullptr); ++ may_include_deletes_in_missing_dirty, nullptr, dpp); + } + + // static +@@ -711,10 +733,14 @@ void PGLog::_write_log_and_missing_wo_mi + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) + { +- // dout(10) << "write_log_and_missing, clearing up to " << dirty_to << dendl; ++ ldpp_dout(dpp, 10) << "_write_log_and_missing_wo_missing, clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups << dendl; + if (touch_log) + t.touch(coll, log_oid); + if (dirty_to != eversion_t()) { +@@ -765,6 +791,8 @@ void PGLog::_write_log_and_missing_wo_mi + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -773,11 +801,16 @@ void PGLog::_write_log_and_missing_wo_mi + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -786,6 +819,8 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[entry.get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && + (p->version >= dirty_from_dups || p->version >= write_from_dups) && +@@ -796,8 +831,11 @@ void PGLog::_write_log_and_missing_wo_mi + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (dirty_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + encode(divergent_priors, (*km)["divergent_priors"]); + } + if (require_rollback) { +@@ -808,6 +846,7 @@ void PGLog::_write_log_and_missing_wo_mi + log.get_rollback_info_trimmed_to(), + (*km)["rollback_info_trimmed_to"]); + } ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + // static +@@ -829,8 +868,14 @@ void PGLog::_write_log_and_missing( + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, // in/out param +- set *log_keys_debug ++ set *log_keys_debug, ++ const DoutPrefixProvider *dpp + ) { ++ ldpp_dout(dpp, 10) << __func__ << " clearing up to " << dirty_to ++ << " dirty_to_dups=" << dirty_to_dups ++ << " dirty_from_dups=" << dirty_from_dups ++ << " write_from_dups=" << write_from_dups ++ << " trimmed_dups.size()=" << trimmed_dups.size() << dendl; + set to_remove; + to_remove.swap(trimmed_dups); + for (auto& t : trimmed) { +@@ -853,7 +898,8 @@ void PGLog::_write_log_and_missing( + clear_up_to(log_keys_debug, dirty_to.get_key_name()); + } + if (dirty_to != eversion_t::max() && dirty_from != eversion_t::max()) { +- // dout(10) << "write_log_and_missing, clearing from " << dirty_from << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing, clearing from " ++ << dirty_from << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from.get_key_name(), eversion_t::max().get_key_name()); +@@ -894,6 +940,8 @@ void PGLog::_write_log_and_missing( + if (dirty_to_dups != eversion_t()) { + pg_log_dup_t min, dirty_to_dup; + dirty_to_dup.version = dirty_to_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups min=" << min.get_key_name() ++ << " to dirty_to_dup=" << dirty_to_dup.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + min.get_key_name(), dirty_to_dup.get_key_name()); +@@ -902,11 +950,16 @@ void PGLog::_write_log_and_missing( + pg_log_dup_t max, dirty_from_dup; + max.version = eversion_t::max(); + dirty_from_dup.version = dirty_from_dups; ++ ldpp_dout(dpp, 10) << __func__ << " remove dups dirty_from_dup=" ++ << dirty_from_dup.get_key_name() ++ << " to max=" << max.get_key_name() << dendl; + t.omap_rmkeyrange( + coll, log_oid, + dirty_from_dup.get_key_name(), max.get_key_name()); + } + ++ ldpp_dout(dpp, 10) << __func__ << " going to encode log.dups.size()=" ++ << log.dups.size() << dendl; + for (const auto& entry : log.dups) { + if (entry.version > dirty_to_dups) + break; +@@ -914,6 +967,8 @@ void PGLog::_write_log_and_missing( + encode(entry, bl); + (*km)[entry.get_key_name()].claim(bl); + } ++ ldpp_dout(dpp, 10) << __func__ << " 1st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + + for (list::reverse_iterator p = log.dups.rbegin(); + p != log.dups.rend() && +@@ -925,8 +980,11 @@ void PGLog::_write_log_and_missing( + (*km)[p->get_key_name()].claim(bl); + } + ++ ldpp_dout(dpp, 10) << __func__ << " 2st round encoded log.dups.size()=" ++ << log.dups.size() << dendl; + if (clear_divergent_priors) { +- //dout(10) << "write_log_and_missing: writing divergent_priors" << dendl; ++ ldpp_dout(dpp, 10) << "write_log_and_missing: writing divergent_priors" ++ << dendl; + to_remove.insert("divergent_priors"); + } + // since we encode individual missing items instead of a whole +@@ -956,6 +1014,7 @@ void PGLog::_write_log_and_missing( + + if (!to_remove.empty()) + t.omap_rmkeys(coll, log_oid, to_remove); ++ ldpp_dout(dpp, 10) << "end of " << __func__ << dendl; + } + + void PGLog::rebuild_missing_set_with_deletes( +Index: ceph-15.2.17/src/osd/PGLog.h +=================================================================== +--- ceph-15.2.17.orig/src/osd/PGLog.h ++++ ceph-15.2.17/src/osd/PGLog.h +@@ -1274,8 +1274,9 @@ public: + map* km, + pg_log_t &log, + const coll_t& coll, +- const ghobject_t &log_oid, map &divergent_priors, +- bool require_rollback); ++ const ghobject_t &log_oid, std::map &divergent_priors, ++ bool require_rollback, ++ const DoutPrefixProvider *dpp = nullptr); + + static void write_log_and_missing( + ObjectStore::Transaction& t, +@@ -1285,7 +1286,8 @@ public: + const ghobject_t &log_oid, + const pg_missing_tracker_t &missing, + bool require_rollback, +- bool *rebuilt_missing_set_with_deletes); ++ bool *rebuilt_missing_set_with_deletes, ++ const DoutPrefixProvider *dpp = nullptr); + + static void _write_log_and_missing_wo_missing( + ObjectStore::Transaction& t, +@@ -1302,7 +1304,8 @@ public: + eversion_t dirty_to_dups, + eversion_t dirty_from_dups, + eversion_t write_from_dups, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + static void _write_log_and_missing( +@@ -1323,7 +1326,8 @@ public: + eversion_t dirty_from_dups, + eversion_t write_from_dups, + bool *may_include_deletes_in_missing_dirty, +- set *log_keys_debug ++ std::set *log_keys_debug, ++ const DoutPrefixProvider *dpp = nullptr + ); + + void read_log_and_missing( +@@ -1336,7 +1340,7 @@ public: + bool debug_verify_stored_missing = false + ) { + return read_log_and_missing( +- store, ch, pgmeta_oid, info, ++ cct, store, ch, pgmeta_oid, info, + log, missing, oss, + tolerate_divergent_missing_log, + &clear_divergent_priors, +@@ -1347,6 +1351,7 @@ public: + + template + static void read_log_and_missing( ++ CephContext *cct, + ObjectStore *store, + ObjectStore::CollectionHandle &ch, + ghobject_t pgmeta_oid, +@@ -1360,9 +1365,9 @@ public: + set *log_keys_debug = nullptr, + bool debug_verify_stored_missing = false + ) { +- ldpp_dout(dpp, 20) << "read_log_and_missing coll " << ch->cid ++ ldpp_dout(dpp, 10) << "read_log_and_missing coll " << ch->cid + << " " << pgmeta_oid << dendl; +- ++ size_t total_dups = 0; + // legacy? + struct stat st; + int r = store->stat(ch, pgmeta_oid, &st); +@@ -1377,8 +1382,9 @@ public: + map divergent_priors; + bool must_rebuild = false; + missing.may_include_deletes = false; +- list entries; +- list dups; ++ std::list entries; ++ std::list dups; ++ const auto NUM_DUPS_WARN_THRESHOLD = 2*cct->_conf->osd_pg_log_dups_tracked; + if (p) { + for (p->seek_to_first(); p->valid() ; p->next()) { + // non-log pgmeta_oid keys are prefixed with _; skip those +@@ -1409,11 +1415,20 @@ public: + } + missing.add(oid, std::move(item)); + } else if (p->key().substr(0, 4) == string("dup_")) { ++ ++total_dups; + pg_log_dup_t dup; + decode(dup, bp); + if (!dups.empty()) { + ceph_assert(dups.back().version < dup.version); + } ++ if (dups.size() == NUM_DUPS_WARN_THRESHOLD) { ++ ldpp_dout(dpp, 0) << "read_log_and_missing WARN num of dups exceeded " ++ << NUM_DUPS_WARN_THRESHOLD << "." ++ << " You can be hit by THE DUPS BUG" ++ << " https://tracker.ceph.com/issues/53729." ++ << " Consider ceph-objectstore-tool --op trim-pg-log-dups" ++ << dendl; ++ } + dups.push_back(dup); + } else { + pg_log_entry_t e; +@@ -1591,7 +1606,9 @@ public: + (*clear_divergent_priors) = false; + missing.flush(); + } +- ldpp_dout(dpp, 10) << "read_log_and_missing done" << dendl; ++ ldpp_dout(dpp, 10) << "read_log_and_missing done coll " << ch->cid ++ << " total_dups=" << total_dups ++ << " log.dups.size()=" << log.dups.size() << dendl; + } // static read_log_and_missing + + #ifdef WITH_SEASTAR +Index: ceph-15.2.17/src/osd/osd_types.cc +=================================================================== +--- ceph-15.2.17.orig/src/osd/osd_types.cc ++++ ceph-15.2.17/src/osd/osd_types.cc +@@ -5042,8 +5042,8 @@ static void _handle_dups(CephContext* cc + { + auto earliest_dup_version = + target.head.version < maxdups ? 0u : target.head.version - maxdups + 1; +- lgeneric_subdout(cct, osd, 20) << "copy_up_to/copy_after earliest_dup_version " << earliest_dup_version << dendl; +- ++ lgeneric_subdout(cct, osd, 20) << __func__ << " earliest_dup_version " ++ << earliest_dup_version << dendl; + for (auto d = other.dups.cbegin(); d != other.dups.cend(); ++d) { + if (d->version.version >= earliest_dup_version) { + lgeneric_subdout(cct, osd, 20) +@@ -5072,7 +5072,9 @@ void pg_log_t::copy_after(CephContext* c + can_rollback_to = other.can_rollback_to; + head = other.head; + tail = other.tail; +- lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (i->version <= v) { +@@ -5084,6 +5086,9 @@ void pg_log_t::copy_after(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END v " << v ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + void pg_log_t::copy_up_to(CephContext* cct, const pg_log_t &other, int max) +@@ -5093,6 +5098,9 @@ void pg_log_t::copy_up_to(CephContext* c + head = other.head; + tail = other.tail; + lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max << dendl; ++ lgeneric_subdout(cct, osd, 20) << __func__ << " max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + for (auto i = other.log.crbegin(); i != other.log.crend(); ++i) { + ceph_assert(i->version > other.tail); + if (n++ >= max) { +@@ -5103,6 +5111,9 @@ void pg_log_t::copy_up_to(CephContext* c + log.push_front(*i); + } + _handle_dups(cct, *this, other, cct->_conf->osd_pg_log_dups_tracked); ++ lgeneric_subdout(cct, osd, 20) << __func__ << " END max " << max ++ << " dups.size()=" << dups.size() ++ << " other.dups.size()=" << other.dups.size() << dendl; + } + + ostream& pg_log_t::print(ostream& out) const +Index: ceph-15.2.17/src/test/osd/TestPGLog.cc +=================================================================== +--- ceph-15.2.17.orig/src/test/osd/TestPGLog.cc ++++ ceph-15.2.17/src/test/osd/TestPGLog.cc +@@ -2739,8 +2739,8 @@ TEST_F(PGLogTrimTest, TestPartialTrim) + EXPECT_EQ(eversion_t(19, 160), write_from_dups2); + EXPECT_EQ(2u, log.log.size()); + EXPECT_EQ(1u, trimmed2.size()); +- EXPECT_EQ(2u, log.dups.size()); +- EXPECT_EQ(1u, trimmed_dups2.size()); ++ EXPECT_EQ(3u, log.dups.size()); ++ EXPECT_EQ(0u, trimmed_dups2.size()); + } + + +@@ -3023,7 +3023,7 @@ TEST_F(PGLogTrimTest, TestTrimDups) { + + EXPECT_EQ(eversion_t(20, 103), write_from_dups) << log; + EXPECT_EQ(2u, log.log.size()) << log; +- EXPECT_EQ(3u, log.dups.size()) << log; ++ EXPECT_EQ(4u, log.dups.size()) << log; + } + + // This tests trim() to make copies of +@@ -3067,7 +3067,7 @@ TEST_F(PGLogTrimTest, TestTrimDups2) { + + EXPECT_EQ(eversion_t(10, 100), write_from_dups) << log; + EXPECT_EQ(4u, log.log.size()) << log; +- EXPECT_EQ(5u, log.dups.size()) << log; ++ EXPECT_EQ(6u, log.dups.size()) << log; + } + + // This tests copy_up_to() to make copies of +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.cc ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.cc +@@ -14,8 +14,10 @@ + + #include + #include ++#include + #include + #include ++#include + + #include + +@@ -430,7 +432,7 @@ static int get_fd_data(int fd, bufferlis + return 0; + } + +-int get_log(ObjectStore *fs, __u8 struct_ver, ++int get_log(CephContext *cct, ObjectStore *fs, __u8 struct_ver, + spg_t pgid, const pg_info_t &info, + PGLog::IndexedLog &log, pg_missing_t &missing) + { +@@ -442,7 +444,7 @@ int get_log(ObjectStore *fs, __u8 struct + ostringstream oss; + ceph_assert(struct_ver > 0); + PGLog::read_log_and_missing( +- fs, ch, ++ cct, fs, ch, + pgid.make_pgmeta_oid(), + info, log, missing, + oss, +@@ -1068,7 +1070,8 @@ int add_osdmap(ObjectStore *store, metad + return get_osdmap(store, ms.map_epoch, ms.osdmap, ms.osdmap_bl); + } + +-int ObjectStoreTool::do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++int ObjectStoreTool::do_export( ++ CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals) +@@ -1078,7 +1081,7 @@ int ObjectStoreTool::do_export(ObjectSto + + cerr << "Exporting " << pgid << " info " << info << std::endl; + +- int ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ int ret = get_log(cct, fs, struct_ver, pgid, info, log, missing); + if (ret > 0) + return ret; + +@@ -3147,6 +3150,136 @@ int dup(string srcpath, ObjectStore *src + return r; + } + ++ ++const int ceph_entity_name_type(const string name) ++{ ++ if (name == "mds") return CEPH_ENTITY_TYPE_MDS; ++ if (name == "osd") return CEPH_ENTITY_TYPE_OSD; ++ if (name == "mon") return CEPH_ENTITY_TYPE_MON; ++ if (name == "client") return CEPH_ENTITY_TYPE_CLIENT; ++ if (name == "mgr") return CEPH_ENTITY_TYPE_MGR; ++ if (name == "auth") return CEPH_ENTITY_TYPE_AUTH; ++ return -1; ++} ++ ++eversion_t get_eversion_from_str(const string& s) { ++ eversion_t e; ++ vector result; ++ boost::split(result, s, boost::is_any_of("'")); ++ if (result.size() != 2) { ++ cerr << "eversion_t: invalid format: '" << s << "'" << std::endl; ++ return e; ++ } ++ e.epoch = atoi(result[0].c_str()); ++ e.version = atoi(result[1].c_str()); ++ return e; ++} ++ ++osd_reqid_t get_reqid_from_str(const string& s) { ++ osd_reqid_t reqid; ++ ++ vector result; ++ boost::split(result, s, boost::is_any_of(".:")); ++ if (result.size() != 4) { ++ cerr << "reqid: invalid format " << s << std::endl; ++ return osd_reqid_t(); ++ } ++ reqid.name._type = ceph_entity_name_type(result[0]); ++ reqid.name._num = atoi(result[1].c_str()); ++ ++ reqid.inc = atoi(result[2].c_str()); ++ reqid.tid = atoi(result[3].c_str()); ++ return reqid; ++} ++ ++void do_dups_inject_transction(ObjectStore *store, spg_t r_pgid, map *new_dups) ++{ ++ ObjectStore::Transaction t; ++ coll_t coll(r_pgid); ++ cerr << "injecting dups into pgid:" << r_pgid << " num of dups:" << new_dups->size() << std::endl; ++ t.omap_setkeys(coll, r_pgid.make_pgmeta_oid(), (*new_dups)); ++ auto ch = store->open_collection(coll); ++ store->queue_transaction(ch, std::move(t)); ++ new_dups->clear(); ++} ++ ++int do_dups_inject_object(ObjectStore *store, spg_t r_pgid, json_spirit::mObject &in_json_obj, ++ map *new_dups, bool debug) { ++ std::map::const_iterator it = in_json_obj.find("generate"); ++ int32_t generate = 0; ++ if (it != in_json_obj.end()) { ++ generate = atoi(it->second.get_str().c_str()); ++ } ++ ++ it = in_json_obj.find("reqid"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ osd_reqid_t reqid(get_reqid_from_str(it->second.get_str())); ++ it = in_json_obj.find("version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ eversion_t version(get_eversion_from_str(it->second.get_str())); ++ it = in_json_obj.find("user_version"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ version_t user_version = atoi(it->second.get_str().c_str()); ++ it = in_json_obj.find("return_code"); ++ if (it == in_json_obj.end()) { ++ return 1; ++ } ++ int32_t return_code = atoi(it->second.get_str().c_str()); ++ if (generate) { ++ for(auto i = 0; i < generate; ++i) { ++ version.version++; ++ if (debug) { ++ cout << "generate dups reqid " << reqid << " v=" << version << std::endl; ++ } ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ if ( new_dups->size() > 50000 ) { ++ do_dups_inject_transction(store, r_pgid, new_dups); ++ cout << "inject of " << i << " dups into pgid:" << r_pgid << " done..." << std::endl; ++ } ++ } ++ return 0; ++ } else { ++ pg_log_dup_t tmp(version, user_version, reqid, return_code); ++ if (debug) { ++ cout << "adding dup: " << tmp << "into key:" << tmp.get_key_name() << std::endl; ++ } ++ bufferlist bl; ++ encode(tmp, bl); ++ (*new_dups)[tmp.get_key_name()] = std::move(bl); ++ } ++ return 0; ++} ++ ++void do_dups_inject_from_json(ObjectStore *store, spg_t r_pgid, json_spirit::mValue &inJson, bool debug) ++{ ++ map new_dups; ++ const vector& o = inJson.get_array(); ++ for (const auto& obj : o) { ++ if (obj.type() == json_spirit::obj_type) { ++ json_spirit::mObject Mobj = obj.get_obj(); ++ do_dups_inject_object(store, r_pgid, Mobj, &new_dups, debug); ++ } else { ++ throw std::runtime_error("JSON array/object not allowed type:" + std::to_string(obj.type())); ++ return; ++ } ++ } ++ if (new_dups.size() > 0) { ++ do_dups_inject_transction(store, r_pgid, &new_dups); ++ } ++ ++ ++ return ; ++} ++ + void usage(po::options_description &desc) + { + cerr << std::endl; +@@ -3480,7 +3613,7 @@ int main(int argc, char **argv) + } else { + file_fd = open(file.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); + } +- } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap") { ++ } else if (op == "import" || op == "dump-export" || op == "set-osdmap" || op == "set-inc-osdmap" || op == "pg-log-inject-dups") { + if (!vm.count("file") || file == "-") { + if (isatty(STDIN_FILENO)) { + cerr << "stdin is a tty and no --file filename specified" << std::endl; +@@ -3859,7 +3992,7 @@ int main(int argc, char **argv) + || op == "export-remove" || op == "mark-complete" + || op == "reset-last-complete" + || op == "trim-pg-log" +- || op == "trim-pg-log-dups") && ++ || op == "pg-log-inject-dups") && + pgidstr.length() == 0) { + cerr << "Must provide pgid" << std::endl; + usage(desc); +@@ -4086,7 +4219,7 @@ int main(int argc, char **argv) + + // If not an object command nor any of the ops handled below, then output this usage + // before complaining about a bad pgid +- if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups") { ++ if (!vm.count("objcmd") && op != "export" && op != "export-remove" && op != "info" && op != "log" && op != "mark-complete" && op != "trim-pg-log" && op != "trim-pg-log-dups" && op != "pg-log-inject-dups") { + cerr << "Must provide --op (info, log, remove, mkfs, fsck, repair, export, export-remove, import, list, fix-lost, list-pgs, dump-journal, dump-super, meta-list, " + "get-osdmap, set-osdmap, get-inc-osdmap, set-inc-osdmap, mark-complete, reset-last-complete, dump-export, trim-pg-log, trim-pg-log-dups statfs)" + << std::endl; +@@ -4378,7 +4511,7 @@ int main(int argc, char **argv) + + if (op == "export" || op == "export-remove") { + ceph_assert(superblock != nullptr); +- ret = tool.do_export(fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); ++ ret = tool.do_export(cct.get(), fs, coll, pgid, info, map_epoch, struct_ver, *superblock, past_intervals); + if (ret == 0) { + cerr << "Export successful" << std::endl; + if (op == "export-remove") { +@@ -4397,7 +4530,7 @@ int main(int argc, char **argv) + } else if (op == "log") { + PGLog::IndexedLog log; + pg_missing_t missing; +- ret = get_log(fs, struct_ver, pgid, info, log, missing); ++ ret = get_log(cct.get(), fs, struct_ver, pgid, info, log, missing); + if (ret < 0) + goto out; + +@@ -4482,6 +4615,34 @@ int main(int argc, char **argv) + } + cout << "Reseting last_complete succeeded" << std::endl; + ++ } else if (op == "pg-log-inject-dups") { ++ if (!vm.count("file") || file == "-") { ++ cerr << "Must provide file containing JSON dups entries" << std::endl; ++ ret = 1; ++ goto out; ++ } ++ if (debug) ++ cerr << "opening file " << file << std::endl; ++ ++ ifstream json_file_stream(file , std::ifstream::in); ++ if (!json_file_stream.is_open()) { ++ cerr << "unable to open file " << file << std::endl; ++ ret = -1; ++ goto out; ++ } ++ json_spirit::mValue result; ++ try { ++ if (!json_spirit::read(json_file_stream, result)) ++ throw std::runtime_error("unparseable JSON " + file); ++ if (result.type() != json_spirit::array_type) { ++ cerr << "result is not an array_type - type=" << result.type() << std::endl; ++ throw std::runtime_error("not JSON array_type " + file); ++ } ++ do_dups_inject_from_json(fs, pgid, result, debug); ++ } catch (const std::runtime_error &e) { ++ cerr << e.what() << std::endl;; ++ return -1; ++ } + } else { + ceph_assert(!"Should have already checked for valid --op"); + } +Index: ceph-15.2.17/src/tools/ceph_objectstore_tool.h +=================================================================== +--- ceph-15.2.17.orig/src/tools/ceph_objectstore_tool.h ++++ ceph-15.2.17/src/tools/ceph_objectstore_tool.h +@@ -27,7 +27,7 @@ class ObjectStoreTool : public RadosDump + int dump_export(Formatter *formatter); + int do_import(ObjectStore *store, OSDSuperblock& sb, bool force, + std::string pgidstr); +- int do_export(ObjectStore *fs, coll_t coll, spg_t pgid, ++ int do_export(CephContext *cct, ObjectStore *fs, coll_t coll, spg_t pgid, + pg_info_t &info, epoch_t map_epoch, __u8 struct_ver, + const OSDSuperblock& superblock, + PastIntervals &past_intervals); diff -Nru ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/series ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/series --- ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/series 2022-10-12 13:26:35.000000000 +0000 +++ ceph-15.2.17/src/test/ubuntu-18.04/debian/patches/series 2022-10-31 05:22:09.000000000 +0000 @@ -11,3 +11,4 @@ riscv64-link-pthread.patch # AARCH64 EC regression bug1917414.patch +bug1978913.patch