mdbx: avoid extra looping inside mdbx_page_alloc() for mdbx_prep_backlog().

This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2022-02-14 20:16:48 +03:00
parent 925a673d57
commit c5f1f73fca
2 changed files with 25 additions and 18 deletions

View File

@ -4,7 +4,7 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF | | 1 |0000 0002|ALLOC_GC |TXN_ERROR |REVERSEKEY|F_SUBDATA |DBI_STALE |F_SUBDATA|P_LEAF | |
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| | 2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| |
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | |
4 |0000 0010| |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | 4 |0000 0010|ALLOC_FAKE |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | |
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | | 5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | |
6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | | 6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | |
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | | 7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | |

View File

@ -6343,6 +6343,7 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
#define MDBX_ALLOC_GC 2 #define MDBX_ALLOC_GC 2
#define MDBX_ALLOC_NEW 4 #define MDBX_ALLOC_NEW 4
#define MDBX_ALLOC_SLOT 8 #define MDBX_ALLOC_SLOT 8
#define MDBX_ALLOC_FAKE 16
#define MDBX_ALLOC_ALL (MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW) #define MDBX_ALLOC_ALL (MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
__hot static struct page_result mdbx_page_alloc(MDBX_cursor *mc, __hot static struct page_result mdbx_page_alloc(MDBX_cursor *mc,
@ -6350,6 +6351,8 @@ __hot static struct page_result mdbx_page_alloc(MDBX_cursor *mc,
struct page_result ret; struct page_result ret;
MDBX_txn *const txn = mc->mc_txn; MDBX_txn *const txn = mc->mc_txn;
MDBX_env *const env = txn->mt_env; MDBX_env *const env = txn->mt_env;
mdbx_assert(env, num == 0 || !(flags & MDBX_ALLOC_SLOT));
mdbx_assert(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
const unsigned coalesce_threshold = const unsigned coalesce_threshold =
env->me_maxgc_ov1page - env->me_maxgc_ov1page / 4; env->me_maxgc_ov1page - env->me_maxgc_ov1page / 4;
@ -6418,8 +6421,7 @@ no_loose:
* Prefer pages with lower pgno. */ * Prefer pages with lower pgno. */
mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno)); txn->mt_next_pgno));
if ((flags & (MDBX_COALESCE | MDBX_ALLOC_CACHE)) == MDBX_ALLOC_CACHE && if (!(flags & (MDBX_COALESCE | MDBX_ALLOC_SLOT)) && re_len >= num) {
re_len >= num) {
mdbx_tassert(txn, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno && mdbx_tassert(txn, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno &&
MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno); MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno);
range_begin = MDBX_PNL_ASCENDING ? 1 : re_len; range_begin = MDBX_PNL_ASCENDING ? 1 : re_len;
@ -6628,9 +6630,8 @@ no_loose:
} }
/* Done for a kick-reclaim mode, actually no page needed */ /* Done for a kick-reclaim mode, actually no page needed */
if (unlikely(num == 0)) { if (unlikely(flags & MDBX_ALLOC_SLOT)) {
mdbx_debug("early-return NULL-page for %s mode", "MDBX_ALLOC_SLOT"); mdbx_debug("early-return NULL-page for %s mode", "MDBX_ALLOC_SLOT");
mdbx_assert(env, flags & MDBX_ALLOC_SLOT);
ret.err = MDBX_SUCCESS; ret.err = MDBX_SUCCESS;
ret.page = NULL; ret.page = NULL;
return ret; return ret;
@ -6648,7 +6649,7 @@ no_loose:
} }
} }
if (F_ISSET(flags, MDBX_COALESCE | MDBX_ALLOC_CACHE)) { if (F_ISSET(flags, MDBX_COALESCE | MDBX_ALLOC_GC)) {
mdbx_debug_extra("clear %s and continue", "MDBX_COALESCE"); mdbx_debug_extra("clear %s and continue", "MDBX_COALESCE");
flags &= ~MDBX_COALESCE; flags &= ~MDBX_COALESCE;
continue; continue;
@ -6768,10 +6769,10 @@ no_loose:
} }
fail: fail:
mdbx_tassert(txn, mdbx_assert(env,
mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno - MDBX_ENABLE_REFUND)); txn->mt_next_pgno - MDBX_ENABLE_REFUND));
if (likely(!(flags & MDBX_ALLOC_SLOT))) if (likely(!(flags & MDBX_ALLOC_FAKE)))
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
if (num != 1 || ret.err != MDBX_NOTFOUND) if (num != 1 || ret.err != MDBX_NOTFOUND)
mdbx_notice("alloc %u pages failed, flags 0x%x, errcode %d", num, flags, mdbx_notice("alloc %u pages failed, flags 0x%x, errcode %d", num, flags,
@ -6785,21 +6786,24 @@ no_loose:
} }
done: done:
ret.page = NULL; mdbx_assert(env, !(flags & MDBX_ALLOC_SLOT));
if (unlikely(flags & MDBX_ALLOC_SLOT)) { mdbx_ensure(env, pgno >= NUM_METAS);
mdbx_debug("return NULL-page for %s mode", "MDBX_ALLOC_SLOT"); if (unlikely(flags & MDBX_ALLOC_FAKE)) {
mdbx_debug("return NULL-page for %u pages of %s mode", num,
"MDBX_ALLOC_FAKE");
ret.page = NULL;
ret.err = MDBX_SUCCESS; ret.err = MDBX_SUCCESS;
return ret; return ret;
} }
mdbx_ensure(env, pgno >= NUM_METAS);
if (env->me_flags & MDBX_WRITEMAP) { if (env->me_flags & MDBX_WRITEMAP) {
ret.page = pgno2page(env, pgno); ret.page = pgno2page(env, pgno);
/* LY: reset no-access flag from mdbx_page_loose() */ /* LY: reset no-access flag from mdbx_page_loose() */
VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num)); VALGRIND_MAKE_MEM_UNDEFINED(ret.page, pgno2bytes(env, num));
MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, pgno2bytes(env, num)); MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, pgno2bytes(env, num));
} else { } else {
if (unlikely(!(ret.page = mdbx_page_malloc(txn, num)))) { ret.page = mdbx_page_malloc(txn, num);
if (unlikely(!ret.page)) {
ret.err = MDBX_ENOMEM; ret.err = MDBX_ENOMEM;
goto fail; goto fail;
} }
@ -8810,7 +8814,7 @@ static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
} }
} }
err = err =
mdbx_page_alloc(gc_cursor, linear4list, MDBX_ALLOC_GC | MDBX_ALLOC_SLOT) mdbx_page_alloc(gc_cursor, linear4list, MDBX_ALLOC_GC | MDBX_ALLOC_FAKE)
.err; .err;
mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err); mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err);
mdbx_cassert(gc_cursor, mdbx_cassert(gc_cursor,
@ -8818,7 +8822,9 @@ static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
} }
while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS) while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS)
err = mdbx_page_alloc(gc_cursor, 1, MDBX_ALLOC_GC | MDBX_ALLOC_SLOT).err; err = mdbx_page_alloc(gc_cursor, 0,
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT | MDBX_ALLOC_FAKE)
.err;
gc_cursor->mc_flags |= C_RECLAIMING; gc_cursor->mc_flags |= C_RECLAIMING;
mdbx_trace("<< backlog %u, err %d", backlog_size(txn), err); mdbx_trace("<< backlog %u, err %d", backlog_size(txn), err);
@ -9179,7 +9185,8 @@ retry:
do { do {
snap_oldest = mdbx_find_oldest(txn); snap_oldest = mdbx_find_oldest(txn);
rc = rc =
mdbx_page_alloc(&couple.outer, 0, MDBX_ALLOC_GC | MDBX_ALLOC_SLOT) mdbx_page_alloc(&couple.outer, 0,
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT | MDBX_ALLOC_FAKE)
.err; .err;
if (likely(rc == MDBX_SUCCESS)) { if (likely(rc == MDBX_SUCCESS)) {
mdbx_trace("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode, mdbx_trace("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,