mdbx: refine page_alloc() and update_gc() to reduce looping.

Resolves https://github.com/erthink/libmdbx/issues/254.
This commit is contained in:
Leonid Yuriev 2021-12-30 17:36:33 +03:00
parent 2ba90e63b1
commit ae5df65af0

View File

@ -6335,7 +6335,6 @@ no_loose:
pgno_t pgno, *re_list = txn->tw.reclaimed_pglist;
unsigned range_begin = 0, re_len = MDBX_PNL_SIZE(re_list);
txnid_t oldest = 0, last = 0;
const unsigned wanna_range = num - 1;
while (true) { /* hsr-kick retry loop */
MDBX_cursor_couple recur;
@ -6348,13 +6347,15 @@ no_loose:
mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno));
if ((flags & (MDBX_COALESCE | MDBX_ALLOC_CACHE)) == MDBX_ALLOC_CACHE &&
re_len > wanna_range) {
re_len >= num) {
mdbx_tassert(txn, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno &&
MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno);
range_begin = MDBX_PNL_ASCENDING ? 1 : re_len;
pgno = MDBX_PNL_LEAST(re_list);
if (likely(wanna_range == 0))
if (likely(num == 1))
goto done;
const unsigned wanna_range = num - 1;
#if MDBX_PNL_ASCENDING
mdbx_tassert(txn, pgno == re_list[1] && range_begin == 1);
while (true) {
@ -6558,7 +6559,8 @@ no_loose:
}
/* Done for a kick-reclaim mode, actually no page needed */
if (unlikely(flags & MDBX_ALLOC_SLOT)) {
if (unlikely(num == 0)) {
mdbx_assert(env, flags & MDBX_ALLOC_SLOT);
ret.err = MDBX_SUCCESS;
ret.page = NULL;
return ret;
@ -8722,10 +8724,12 @@ static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
err);
}
}
err = mdbx_page_alloc(gc_cursor, linear4list,
MDBX_ALLOC_GC | MDBX_ALLOC_CACHE | MDBX_ALLOC_SLOT)
.err;
err =
mdbx_page_alloc(gc_cursor, linear4list, MDBX_ALLOC_GC | MDBX_ALLOC_SLOT)
.err;
mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err);
mdbx_cassert(gc_cursor,
backlog_size(txn) >= linear4list || err != MDBX_SUCCESS);
}
while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS)
@ -8768,10 +8772,10 @@ static int mdbx_update_gc(MDBX_txn *txn) {
couple.outer.mc_flags |= C_RECLAIMING;
couple.outer.mc_next = txn->tw.cursors[FREE_DBI];
txn->tw.cursors[FREE_DBI] = &couple.outer;
bool dense_gc = false;
retry:
++loop;
retry_noaccount:
mdbx_trace("%s", " >> restart");
mdbx_tassert(txn,
mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
@ -8783,7 +8787,7 @@ retry_noaccount:
goto bailout;
}
if (retired_stored != MDBX_PNL_SIZE(txn->tw.retired_pages)) {
if (unlikely(dense_gc) && retired_stored) {
rc = mdbx_prep_backlog(txn, &couple.outer,
MDBX_PNL_SIZEOF(txn->tw.retired_pages),
&retired_stored);
@ -8799,6 +8803,15 @@ retry_noaccount:
MDBX_val key, data;
mdbx_trace("%s", " >> continue");
if (retired_stored != MDBX_PNL_SIZE(txn->tw.retired_pages) &&
MDBX_PNL_SIZE(txn->tw.retired_pages) > env->me_maxgc_ov1page) {
rc = mdbx_prep_backlog(txn, &couple.outer,
MDBX_PNL_SIZEOF(txn->tw.retired_pages),
&retired_stored);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
}
mdbx_tassert(txn,
mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
@ -8823,9 +8836,11 @@ retry_noaccount:
continue;
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
if (likely(!dense_gc)) {
rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
}
mdbx_tassert(txn,
cleaned_gc_id < env->me_lck->mti_oldest_reader.weak);
mdbx_trace("%s: cleanup-reclaimed-id [%u]%" PRIaTXN, dbg_prefix_mode,
@ -8863,7 +8878,7 @@ retry_noaccount:
}
if (cleaned_gc_id > txn->tw.last_reclaimed)
break;
if (cleaned_gc_id < txn->tw.last_reclaimed) {
if (likely(!dense_gc) && cleaned_gc_id < txn->tw.last_reclaimed) {
rc = mdbx_prep_backlog(txn, &couple.outer, 0, nullptr);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
@ -9014,11 +9029,12 @@ retry_noaccount:
mdbx_debug_extra_print(" %" PRIaPGNO, txn->tw.retired_pages[i]);
mdbx_debug_extra_print("%s\n", ".");
}
if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) {
if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) &&
settled)) {
mdbx_trace("%s: reclaimed-list changed %u -> %u, retry",
dbg_prefix_mode, amount,
(unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist));
goto retry_noaccount /* rare case, but avoids GC fragmentation
goto retry /* rare case, but avoids GC fragmentation
and one cycle. */
;
}
@ -9062,7 +9078,8 @@ retry_noaccount:
prefer_max_scatter &&
left > ((unsigned)MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) -
reused_gc_slot) *
env->me_maxgc_ov1page) {
env->me_maxgc_ov1page &&
!dense_gc) {
/* LY: need just a txn-id for save page list. */
bool need_cleanup = false;
@ -9118,7 +9135,7 @@ retry_noaccount:
left > ((unsigned)MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) -
reused_gc_slot) *
env->me_maxgc_ov1page) {
if (unlikely(gc_rid < 2)) {
if (unlikely(gc_rid <= MIN_TXNID)) {
if (unlikely(MDBX_PNL_SIZE(txn->tw.lifo_reclaimed) <=
reused_gc_slot)) {
mdbx_notice("** restart: reserve depleted (reused_gc_slot %u >= "
@ -9143,6 +9160,7 @@ retry_noaccount:
rc = mdbx_cursor_get(&couple.outer, &key, &data, MDBX_FIRST);
if (rc == MDBX_NOTFOUND) {
mdbx_debug("%s: GC is empty", dbg_prefix_mode);
dense_gc = true;
break;
}
if (unlikely(rc != MDBX_SUCCESS ||
@ -9156,14 +9174,16 @@ retry_noaccount:
rc = MDBX_CORRUPTED;
goto bailout;
}
if (gc_first < 2) {
if (gc_first <= MIN_TXNID) {
mdbx_debug("%s: no free GC's id(s) less than %" PRIaTXN,
dbg_prefix_mode, gc_rid);
dense_gc = true;
break;
}
gc_rid = gc_first - 1;
}
mdbx_assert(env, !dense_gc);
rc = mdbx_txl_append(&txn->tw.lifo_reclaimed, gc_rid);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
@ -9181,10 +9201,12 @@ retry_noaccount:
dbg_prefix_mode, gc_rid, cleaned_gc_slot);
}
if (need_cleanup) {
if (need_cleanup || dense_gc) {
if (cleaned_gc_slot)
mdbx_trace(
"%s: restart inner-loop to clear and re-create GC entries",
dbg_prefix_mode);
cleaned_gc_slot = 0;
mdbx_trace("%s: restart inner-loop to clear and re-create GC entries",
dbg_prefix_mode);
continue;
}
}
@ -9216,6 +9238,7 @@ retry_noaccount:
gc_rid = gc_first - 1;
if (unlikely(gc_rid == 0)) {
mdbx_error("%s", "** no GC tail-space to store");
dense_gc = true;
goto retry;
}
} else if (rc != MDBX_NOTFOUND)
@ -9251,7 +9274,7 @@ retry_noaccount:
const unsigned tail = left - threshold + env->me_maxgc_ov1page + 1;
unsigned span = 1;
unsigned avail = (unsigned)((pgno2bytes(env, span) - PAGEHDRSZ) /
sizeof(pgno_t)) /*- 1 + span */;
sizeof(pgno_t)) /* - 1 + span */;
if (tail > avail) {
for (unsigned i = amount - span; i > 0; --i) {
if (MDBX_PNL_ASCENDING
@ -9288,7 +9311,7 @@ retry_noaccount:
mdbx_tassert(txn, reservation_gc_id < env->me_lck->mti_oldest_reader.weak);
if (unlikely(
reservation_gc_id < 1 ||
reservation_gc_id < MIN_TXNID ||
reservation_gc_id >=
atomic_load64(&env->me_lck->mti_oldest_reader, mo_Relaxed))) {
mdbx_error("** internal error (reservation_gc_id %" PRIaTXN ")",
@ -9317,10 +9340,12 @@ retry_noaccount:
chunk);
if (txn->tw.lifo_reclaimed &&
unlikely(amount < MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) {
unlikely(amount < MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)) &&
(loop < 5 || MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) - amount >
env->me_maxgc_ov1page)) {
mdbx_notice("** restart: reclaimed-list growth %u -> %u", amount,
(unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist));
goto retry_noaccount;
goto retry;
}
continue;