From 3fd079262cdc79f302d3b16df30969516174f85b Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Mon, 26 Oct 2020 12:59:12 +0300 Subject: [PATCH] mdbx: fix extra-rare `MDBX_KEY_EXIST` during `mdbx_commit()`. The MDX_KEYEXISTS error could occur inside mdbx_update_gc() in the extremely rare case: - no GC records was reclaimed before mdbx_txn_commit() called; - there were few loose pages during the transaction; - some reader prohibit reclaiming, therefore mdbx_page_alloc(MDBX_ALLOC_GC), which called for obtain present GC-record's Id, returns MDBX_NOTFOUND; - immediately then the reader completes its transaction and unlocks reclaiming; - mdbx_update_gc() decide that no reclaimable GC entries, i.e. no GC-entries with ID < find_oldest(), and it is safe to use find_oldest() - 1 to store loose page list; - but find_oldest() actually returns new/larger ID than expected, So KEYEXISTS will returned if using this ID. Change-Id: I9726217d6b5983f1e31a211c0eeb3edc8ff94282 --- src/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 8478f0b7..b6dee5c0 100644 --- a/src/core.c +++ b/src/core.c @@ -7503,9 +7503,12 @@ retry_noaccount: env->me_maxgc_ov1page) { /* LY: need just a txn-id for save page list. */ - couple.outer.mc_flags &= ~C_RECLAIMING; bool need_cleanup = false; + txnid_t snap_oldest; + retry_rid: + couple.outer.mc_flags &= ~C_RECLAIMING; do { + snap_oldest = mdbx_find_oldest(txn); rc = mdbx_page_alloc(&couple.outer, 0, NULL, MDBX_ALLOC_GC); if (likely(rc == MDBX_SUCCESS)) { mdbx_trace("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode, @@ -7533,9 +7536,13 @@ retry_noaccount: gc_rid = MDBX_PNL_LAST(txn->tw.lifo_reclaimed); } else { mdbx_tassert(txn, txn->tw.last_reclaimed == 0); + if (unlikely(mdbx_find_oldest(txn) != snap_oldest)) + /* should retry mdbx_page_alloc(MDBX_ALLOC_GC) + * if the oldest reader changes since the last attempt */ + goto retry_rid; /* no reclaimable GC entries, * therefore no entries with ID < mdbx_find_oldest(txn) */ - txn->tw.last_reclaimed = gc_rid = mdbx_find_oldest(txn) - 1; + txn->tw.last_reclaimed = gc_rid = snap_oldest - 1; mdbx_trace("%s: none recycled yet, set rid to @%" PRIaTXN, dbg_prefix_mode, gc_rid); }