mdbx: упрощение page_alloc_slowpath().

Упрощение за счет удаления проверки флага `MDBX_ALLOC_GC`,
который всегда взведен при вызове page_alloc_slowpath().
This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2022-11-16 18:45:37 +03:00
parent 3e05d1a427
commit f73cd7a491

View File

@ -5394,6 +5394,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) {
MDBX_lockinfo *const lck = env->me_lck_mmap.lck; MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
if (unlikely(lck == NULL /* exclusive without-lck mode */)) { if (unlikely(lck == NULL /* exclusive without-lck mode */)) {
eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub); eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub);
env->me_lck->mti_readers_refresh_flag.weak = nothing_changed;
return env->me_lck->mti_oldest_reader.weak = steady; return env->me_lck->mti_oldest_reader.weak = steady;
} }
@ -6623,6 +6624,14 @@ static int gc_cursor_init(MDBX_cursor *mc, MDBX_txn *txn) {
return cursor_init(mc, txn, FREE_DBI); return cursor_init(mc, txn, FREE_DBI);
} }
__hot static bool is_already_reclaimed(const MDBX_txn *txn, txnid_t id) {
const size_t len = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed);
for (size_t i = 1; i <= len; ++i)
if (txn->tw.lifo_reclaimed[i] == id)
return true;
return false;
}
static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num, static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
char flags) { char flags) {
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
@ -6642,11 +6651,9 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
prof->spe_counter += 1; prof->spe_counter += 1;
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
eASSERT(env, flags & MDBX_ALLOC_GC);
eASSERT(env, num == 0 || !(flags & MDBX_ALLOC_SLOT)); eASSERT(env, num == 0 || !(flags & MDBX_ALLOC_SLOT));
eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW)); eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
MDBX_ALLOC_BACKLOG)) == 0 ||
(flags & MDBX_ALLOC_GC));
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE | eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
MDBX_ALLOC_BACKLOG)) == 0 || MDBX_ALLOC_BACKLOG)) == 0 ||
(flags & MDBX_ALLOC_NEW) == 0); (flags & MDBX_ALLOC_NEW) == 0);
@ -6654,9 +6661,12 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
txn->mt_next_pgno - MDBX_ENABLE_REFUND)); txn->mt_next_pgno - MDBX_ENABLE_REFUND));
pgno_t pgno = 0, *range = nullptr; pgno_t pgno = 0, *range = nullptr;
size_t re_len = MDBX_PNL_GETSIZE(txn->tw.relist); size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
if (num > 1) { if (num > 1) {
eASSERT(env, !(flags & MDBX_ALLOC_SLOT)); eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
if (unlikely(txn->mt_flags & MDBX_TXN_FROZEN_RE))
goto no_gc;
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
prof->xpages += 1; prof->xpages += 1;
#endif /* MDBX_ENABLE_PROFGC */ #endif /* MDBX_ENABLE_PROFGC */
@ -6672,13 +6682,12 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
} }
} }
} else { } else {
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) || eASSERT(env,
MDBX_PNL_GETSIZE(txn->tw.relist) == 0); (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) || re_len == 0);
} }
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
if (likely(flags & MDBX_ALLOC_GC)) {
if (unlikely(!is_gc_usable(txn))) if (unlikely(!is_gc_usable(txn)))
goto no_gc; goto no_gc;
@ -6697,9 +6706,6 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
retry_gc_refresh_oldest:; retry_gc_refresh_oldest:;
txnid_t oldest = txn_oldest_reader(txn); txnid_t oldest = txn_oldest_reader(txn);
if (unlikely(!oldest))
goto no_gc;
retry_gc_have_oldest: retry_gc_have_oldest:
if (unlikely(oldest >= txn->mt_txnid)) { if (unlikely(oldest >= txn->mt_txnid)) {
ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN ERROR("unexpected/invalid oldest-readed txnid %" PRIaTXN
@ -6710,7 +6716,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
} }
const txnid_t detent = oldest + 1; const txnid_t detent = oldest + 1;
txnid_t last = 0; txnid_t id = 0;
bool should_scan = false; bool should_scan = false;
MDBX_cursor_op op = MDBX_FIRST; MDBX_cursor_op op = MDBX_FIRST;
if (flags & MDBX_ALLOC_LIFO) { if (flags & MDBX_ALLOC_LIFO) {
@ -6722,20 +6728,20 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
} }
} }
/* Begin lookup backward from oldest reader */ /* Begin lookup backward from oldest reader */
last = detent - 1; id = detent - 1;
op = MDBX_SET_RANGE; op = MDBX_SET_RANGE;
} else if (txn->tw.last_reclaimed) { } else if (txn->tw.last_reclaimed) {
/* Continue lookup forward from last-reclaimed */ /* Continue lookup forward from last-reclaimed */
last = txn->tw.last_reclaimed + 1; id = txn->tw.last_reclaimed + 1;
if (last >= detent) if (id >= detent)
goto no_gc; goto depleted_gc;
op = MDBX_SET_RANGE; op = MDBX_SET_RANGE;
} }
next_gc:; next_gc:;
MDBX_val key; MDBX_val key;
key.iov_base = &last; key.iov_base = &id;
key.iov_len = sizeof(last); key.iov_len = sizeof(id);
#if MDBX_ENABLE_PROFGC #if MDBX_ENABLE_PROFGC
prof->rsteps += 1; prof->rsteps += 1;
@ -6756,26 +6762,22 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
ret.err = MDBX_CORRUPTED; ret.err = MDBX_CORRUPTED;
goto fail; goto fail;
} }
last = unaligned_peek_u64(4, key.iov_base); id = unaligned_peek_u64(4, key.iov_base);
if (flags & MDBX_ALLOC_LIFO) { if (flags & MDBX_ALLOC_LIFO) {
op = MDBX_PREV; op = MDBX_PREV;
if (last >= detent) if (id >= detent || is_already_reclaimed(txn, id))
goto next_gc;
/* skip IDs of records that already reclaimed */
for (size_t i = MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed); i > 0; --i)
if (txn->tw.lifo_reclaimed[i] == last)
goto next_gc; goto next_gc;
} else { } else {
op = MDBX_NEXT; op = MDBX_NEXT;
if (unlikely(last >= detent)) if (unlikely(id >= detent))
goto depleted_gc; goto depleted_gc;
} }
/* Reading next GC record */ /* Reading next GC record */
MDBX_val data; MDBX_val data;
MDBX_page *const mp = recur.mc_pg[recur.mc_top]; MDBX_page *const mp = recur.mc_pg[recur.mc_top];
if (unlikely((ret.err = node_read(&recur, if (unlikely(
page_node(mp, recur.mc_ki[recur.mc_top]), (ret.err = node_read(&recur, page_node(mp, recur.mc_ki[recur.mc_top]),
&data, mp)) != MDBX_SUCCESS)) &data, mp)) != MDBX_SUCCESS))
goto fail; goto fail;
@ -6798,7 +6800,8 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
/* Stop reclaiming to avoid large/overflow the page list. /* Stop reclaiming to avoid large/overflow the page list.
* This is a rare case while search for a continuously multi-page region * This is a rare case while search for a continuously multi-page region
* in a large database. * in a large database.
* https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123 */ * https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123
*/
NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu " NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
"(chunk) -> %zu", "(chunk) -> %zu",
MDBX_PNL_GETSIZE(txn->tw.relist), gc_len, MDBX_PNL_GETSIZE(txn->tw.relist), gc_len,
@ -6807,9 +6810,9 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
} }
/* Remember ID of readed GC record */ /* Remember ID of readed GC record */
txn->tw.last_reclaimed = last; txn->tw.last_reclaimed = id;
if (flags & MDBX_ALLOC_LIFO) { if (flags & MDBX_ALLOC_LIFO) {
ret.err = txl_append(&txn->tw.lifo_reclaimed, last); ret.err = txl_append(&txn->tw.lifo_reclaimed, id);
if (unlikely(ret.err != MDBX_SUCCESS)) if (unlikely(ret.err != MDBX_SUCCESS))
goto fail; goto fail;
} }
@ -6818,12 +6821,11 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
ret.err = pnl_need(&txn->tw.relist, gc_len); ret.err = pnl_need(&txn->tw.relist, gc_len);
if (unlikely(ret.err != MDBX_SUCCESS)) if (unlikely(ret.err != MDBX_SUCCESS))
goto fail; goto fail;
txn->tw.relist = txn->tw.relist;
if (LOG_ENABLED(MDBX_LOG_EXTRA)) { if (LOG_ENABLED(MDBX_LOG_EXTRA)) {
DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO DEBUG_EXTRA("readed GC-pnl txn %" PRIaTXN " root %" PRIaPGNO
" len %zu, PNL", " len %zu, PNL",
last, txn->mt_dbs[FREE_DBI].md_root, gc_len); id, txn->mt_dbs[FREE_DBI].md_root, gc_len);
for (size_t i = gc_len; i; i--) for (size_t i = gc_len; i; i--)
DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]); DEBUG_EXTRA_PRINT(" %" PRIaPGNO, gc_pnl[i]);
DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno); DEBUG_EXTRA_PRINT(", next_pgno %u\n", txn->mt_next_pgno);
@ -6847,7 +6849,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
if (MDBX_ENABLE_REFUND && re_len && if (MDBX_ENABLE_REFUND && re_len &&
unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) { unlikely(MDBX_PNL_MOST(txn->tw.relist) == txn->mt_next_pgno - 1)) {
/* Refund suitable pages into "unallocated" space */ /* Refund suitable pages into "unallocated" space */
if (txn_refund(txn)) txn_refund(txn);
re_len = MDBX_PNL_GETSIZE(txn->tw.relist); re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
} }
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist)); eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
@ -6910,12 +6912,11 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
* - extend the database file. */ * - extend the database file. */
/* Will use new pages from the map if nothing is suitable in the GC. */ /* Will use new pages from the map if nothing is suitable in the GC. */
pgno = txn->mt_next_pgno; newnext = (pgno = txn->mt_next_pgno) + num;
const size_t newnext = num + pgno;
/* Does reclaiming stopped at the last steady point? */
const meta_ptr_t recent = meta_recent(env, &txn->tw.troika); const meta_ptr_t recent = meta_recent(env, &txn->tw.troika);
const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, &txn->tw.troika);
/* does reclaiming stopped at the last steady point? */
if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady && if (recent.ptr_c != prefer_steady.ptr_c && prefer_steady.is_steady &&
detent == prefer_steady.txnid + 1) { detent == prefer_steady.txnid + 1) {
DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN DEBUG("gc-kick-steady: recent %" PRIaTXN "-%s, steady %" PRIaTXN
@ -6979,16 +6980,14 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
} }
} }
if (env->me_lck_mmap.lck && if (unlikely(true == atomic_load32(&env->me_lck->mti_readers_refresh_flag,
unlikely(true ==
atomic_load32(&env->me_lck_mmap.lck->mti_readers_refresh_flag,
mo_AcquireRelease))) { mo_AcquireRelease))) {
oldest = txn_oldest_reader(txn); oldest = txn_oldest_reader(txn);
if (oldest >= detent) if (oldest >= detent)
goto retry_gc_have_oldest; goto retry_gc_have_oldest;
} }
/* avoid kick lagging reader(s) if is enough unallocated space /* Avoid kick lagging reader(s) if is enough unallocated space
* at the end of database file. */ * at the end of database file. */
if ((flags & MDBX_ALLOC_NEW) && newnext <= txn->mt_end_pgno) { if ((flags & MDBX_ALLOC_NEW) && newnext <= txn->mt_end_pgno) {
eASSERT(env, range == nullptr); eASSERT(env, range == nullptr);
@ -7000,7 +6999,6 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
if (oldest >= detent) if (oldest >= detent)
goto retry_gc_have_oldest; goto retry_gc_have_oldest;
} }
}
//--------------------------------------------------------------------------- //---------------------------------------------------------------------------
@ -7011,8 +7009,7 @@ no_gc:
} }
/* Will use new pages from the map if nothing is suitable in the GC. */ /* Will use new pages from the map if nothing is suitable in the GC. */
pgno = txn->mt_next_pgno; newnext = (pgno = txn->mt_next_pgno) + num;
const size_t newnext = num + pgno;
if (newnext <= txn->mt_end_pgno) if (newnext <= txn->mt_end_pgno)
goto done; goto done;
@ -7055,6 +7052,7 @@ done:
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0); eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
eASSERT(env, pgno == *range); eASSERT(env, pgno == *range);
eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS); eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
/* Cutoff allocated pages from tw.relist */ /* Cutoff allocated pages from tw.relist */
#if MDBX_PNL_ASCENDING #if MDBX_PNL_ASCENDING
for (const pgno_t *const end = re_list + re_len - num; range <= end; for (const pgno_t *const end = re_list + re_len - num; range <= end;