From ae5df65af0efaa051d754af4eaf82254965db3b7 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Thu, 30 Dec 2021 17:36:33 +0300 Subject: [PATCH] mdbx: refine `page_alloc()` and `update_gc()` to reduce looping. Resolves https://github.com/erthink/libmdbx/issues/254. --- src/core.c | 75 ++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 50 insertions(+), 25 deletions(-) diff --git a/src/core.c b/src/core.c index 42b5cb87..7edb8755 100644 --- a/src/core.c +++ b/src/core.c @@ -6335,7 +6335,6 @@ no_loose: pgno_t pgno, *re_list = txn->tw.reclaimed_pglist; unsigned range_begin = 0, re_len = MDBX_PNL_SIZE(re_list); txnid_t oldest = 0, last = 0; - const unsigned wanna_range = num - 1; while (true) { /* hsr-kick retry loop */ MDBX_cursor_couple recur; @@ -6348,13 +6347,15 @@ no_loose: mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno)); if ((flags & (MDBX_COALESCE | MDBX_ALLOC_CACHE)) == MDBX_ALLOC_CACHE && - re_len > wanna_range) { + re_len >= num) { mdbx_tassert(txn, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno && MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno); range_begin = MDBX_PNL_ASCENDING ? 1 : re_len; pgno = MDBX_PNL_LEAST(re_list); - if (likely(wanna_range == 0)) + if (likely(num == 1)) goto done; + + const unsigned wanna_range = num - 1; #if MDBX_PNL_ASCENDING mdbx_tassert(txn, pgno == re_list[1] && range_begin == 1); while (true) { @@ -6558,7 +6559,8 @@ no_loose: } /* Done for a kick-reclaim mode, actually no page needed */ - if (unlikely(flags & MDBX_ALLOC_SLOT)) { + if (unlikely(num == 0)) { + mdbx_assert(env, flags & MDBX_ALLOC_SLOT); ret.err = MDBX_SUCCESS; ret.page = NULL; return ret; @@ -8722,10 +8724,12 @@ static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor, err); } } - err = mdbx_page_alloc(gc_cursor, linear4list, - MDBX_ALLOC_GC | MDBX_ALLOC_CACHE | MDBX_ALLOC_SLOT) - .err; + err = + mdbx_page_alloc(gc_cursor, linear4list, MDBX_ALLOC_GC | MDBX_ALLOC_SLOT) + .err; mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err); + mdbx_cassert(gc_cursor, + backlog_size(txn) >= linear4list || err != MDBX_SUCCESS); } while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS) @@ -8768,10 +8772,10 @@ static int mdbx_update_gc(MDBX_txn *txn) { couple.outer.mc_flags |= C_RECLAIMING; couple.outer.mc_next = txn->tw.cursors[FREE_DBI]; txn->tw.cursors[FREE_DBI] = &couple.outer; + bool dense_gc = false; retry: ++loop; -retry_noaccount: mdbx_trace("%s", " >> restart"); mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, @@ -8783,7 +8787,7 @@ retry_noaccount: goto bailout; } - if (retired_stored != MDBX_PNL_SIZE(txn->tw.retired_pages)) { + if (unlikely(dense_gc) && retired_stored) { rc = mdbx_prep_backlog(txn, &couple.outer, MDBX_PNL_SIZEOF(txn->tw.retired_pages), &retired_stored); @@ -8799,6 +8803,15 @@ retry_noaccount: MDBX_val key, data; mdbx_trace("%s", " >> continue"); + if (retired_stored != MDBX_PNL_SIZE(txn->tw.retired_pages) && + MDBX_PNL_SIZE(txn->tw.retired_pages) > env->me_maxgc_ov1page) { + rc = mdbx_prep_backlog(txn, &couple.outer, + MDBX_PNL_SIZEOF(txn->tw.retired_pages), + &retired_stored); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } + mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); @@ -8823,9 +8836,11 @@ retry_noaccount: continue; if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + if (likely(!dense_gc)) { + rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + } mdbx_tassert(txn, cleaned_gc_id < env->me_lck->mti_oldest_reader.weak); mdbx_trace("%s: cleanup-reclaimed-id [%u]%" PRIaTXN, dbg_prefix_mode, @@ -8863,7 +8878,7 @@ retry_noaccount: } if (cleaned_gc_id > txn->tw.last_reclaimed) break; - if (cleaned_gc_id < txn->tw.last_reclaimed) { + if (likely(!dense_gc) && cleaned_gc_id < txn->tw.last_reclaimed) { rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -9014,11 +9029,12 @@ retry_noaccount: mdbx_debug_extra_print(" %" PRIaPGNO, txn->tw.retired_pages[i]); mdbx_debug_extra_print("%s\n", "."); } - if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) { + if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) && + settled)) { mdbx_trace("%s: reclaimed-list changed %u -> %u, retry", dbg_prefix_mode, amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)); - goto retry_noaccount /* rare case, but avoids GC fragmentation + goto retry /* rare case, but avoids GC fragmentation and one cycle. */ ; } @@ -9062,7 +9078,8 @@ retry_noaccount: prefer_max_scatter && left > ((unsigned)MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) - reused_gc_slot) * - env->me_maxgc_ov1page) { + env->me_maxgc_ov1page && + !dense_gc) { /* LY: need just a txn-id for save page list. */ bool need_cleanup = false; @@ -9118,7 +9135,7 @@ retry_noaccount: left > ((unsigned)MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) - reused_gc_slot) * env->me_maxgc_ov1page) { - if (unlikely(gc_rid < 2)) { + if (unlikely(gc_rid <= MIN_TXNID)) { if (unlikely(MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) <= reused_gc_slot)) { mdbx_notice("** restart: reserve depleted (reused_gc_slot %u >= " @@ -9143,6 +9160,7 @@ retry_noaccount: rc = mdbx_cursor_get(&couple.outer, &key, &data, MDBX_FIRST); if (rc == MDBX_NOTFOUND) { mdbx_debug("%s: GC is empty", dbg_prefix_mode); + dense_gc = true; break; } if (unlikely(rc != MDBX_SUCCESS || @@ -9156,14 +9174,16 @@ retry_noaccount: rc = MDBX_CORRUPTED; goto bailout; } - if (gc_first < 2) { + if (gc_first <= MIN_TXNID) { mdbx_debug("%s: no free GC's id(s) less than %" PRIaTXN, dbg_prefix_mode, gc_rid); + dense_gc = true; break; } gc_rid = gc_first - 1; } + mdbx_assert(env, !dense_gc); rc = mdbx_txl_append(&txn->tw.lifo_reclaimed, gc_rid); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; @@ -9181,10 +9201,12 @@ retry_noaccount: dbg_prefix_mode, gc_rid, cleaned_gc_slot); } - if (need_cleanup) { + if (need_cleanup || dense_gc) { + if (cleaned_gc_slot) + mdbx_trace( + "%s: restart inner-loop to clear and re-create GC entries", + dbg_prefix_mode); cleaned_gc_slot = 0; - mdbx_trace("%s: restart inner-loop to clear and re-create GC entries", - dbg_prefix_mode); continue; } } @@ -9216,6 +9238,7 @@ retry_noaccount: gc_rid = gc_first - 1; if (unlikely(gc_rid == 0)) { mdbx_error("%s", "** no GC tail-space to store"); + dense_gc = true; goto retry; } } else if (rc != MDBX_NOTFOUND) @@ -9251,7 +9274,7 @@ retry_noaccount: const unsigned tail = left - threshold + env->me_maxgc_ov1page + 1; unsigned span = 1; unsigned avail = (unsigned)((pgno2bytes(env, span) - PAGEHDRSZ) / - sizeof(pgno_t)) /*- 1 + span */; + sizeof(pgno_t)) /* - 1 + span */; if (tail > avail) { for (unsigned i = amount - span; i > 0; --i) { if (MDBX_PNL_ASCENDING @@ -9288,7 +9311,7 @@ retry_noaccount: mdbx_tassert(txn, reservation_gc_id < env->me_lck->mti_oldest_reader.weak); if (unlikely( - reservation_gc_id < 1 || + reservation_gc_id < MIN_TXNID || reservation_gc_id >= atomic_load64(&env->me_lck->mti_oldest_reader, mo_Relaxed))) { mdbx_error("** internal error (reservation_gc_id %" PRIaTXN ")", @@ -9317,10 +9340,12 @@ retry_noaccount: chunk); if (txn->tw.lifo_reclaimed && - unlikely(amount < MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) { + unlikely(amount < MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)) && + (loop < 5 || MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) - amount > + env->me_maxgc_ov1page)) { mdbx_notice("** restart: reclaimed-list growth %u -> %u", amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)); - goto retry_noaccount; + goto retry; } continue;