mdbx: speedup GC-related pnl-merge and sequence-search.

This commit is contained in:
Леонид Юрьев (Leonid Yuriev) 2022-06-24 22:05:41 +03:00
parent acce7d4b16
commit 065e5849da

View File

@ -3203,18 +3203,22 @@ static __always_inline bool mdbx_pnl_check4assert(const MDBX_PNL pl,
static void __hot mdbx_pnl_xmerge(MDBX_PNL dst, const MDBX_PNL src) { static void __hot mdbx_pnl_xmerge(MDBX_PNL dst, const MDBX_PNL src) {
assert(mdbx_pnl_check4assert(dst, MAX_PAGENO + 1)); assert(mdbx_pnl_check4assert(dst, MAX_PAGENO + 1));
assert(mdbx_pnl_check(src, MAX_PAGENO + 1)); assert(mdbx_pnl_check(src, MAX_PAGENO + 1));
const size_t total = MDBX_PNL_SIZE(dst) + MDBX_PNL_SIZE(src); if (likely(MDBX_PNL_SIZE(src) > 0)) {
assert(MDBX_PNL_ALLOCLEN(dst) >= total); const size_t total = MDBX_PNL_SIZE(dst) + MDBX_PNL_SIZE(src);
pgno_t *w = dst + total; assert(MDBX_PNL_ALLOCLEN(dst) >= total);
pgno_t *d = dst + MDBX_PNL_SIZE(dst); pgno_t *w = dst + total;
const pgno_t *s = src + MDBX_PNL_SIZE(src); pgno_t *d = dst + MDBX_PNL_SIZE(dst);
dst[0] = /* detent for scan below */ (MDBX_PNL_ASCENDING ? 0 : ~(pgno_t)0); const pgno_t *s = src + MDBX_PNL_SIZE(src);
while (s > src) { dst[0] = /* detent for scan below */ (MDBX_PNL_ASCENDING ? 0 : ~(pgno_t)0);
while (MDBX_PNL_ORDERED(*s, *d)) do {
*w-- = *d--; const bool cmp = MDBX_PNL_ORDERED(*s, *d);
*w-- = *s--; *w = cmp ? *d : *s;
d -= cmp ? 1 : 0;
s -= cmp ? 0 : 1;
--w;
} while (s > src);
MDBX_PNL_SIZE(dst) = (pgno_t)total;
} }
MDBX_PNL_SIZE(dst) = (pgno_t)total;
assert(mdbx_pnl_check4assert(dst, MAX_PAGENO + 1)); assert(mdbx_pnl_check4assert(dst, MAX_PAGENO + 1));
} }
@ -6483,6 +6487,63 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
__hot static pgno_t *scan4range(const MDBX_PNL pnl, const unsigned len,
const int num) {
assert(num > 0 && len >= (unsigned)num && len == MDBX_PNL_SIZE(pnl));
#if MDBX_PNL_ASCENDING
const pgno_t *const detent = pnl + len - num;
pgno_t *scan = pnl + 1;
while (likely(scan + 7 <= detent)) {
if (unlikely(scan[num] == *scan + num))
return scan;
if (unlikely(scan[num + 1] == scan[1] + num))
return scan + 1;
if (unlikely(scan[num + 2] == scan[2] + num))
return scan + 2;
if (unlikely(scan[num + 3] == scan[3] + num))
return scan + 3;
if (unlikely(scan[num + 4] == scan[4] + num))
return scan + 4;
if (unlikely(scan[num + 5] == scan[5] + num))
return scan + 5;
if (unlikely(scan[num + 6] == scan[6] + num))
return scan + 6;
if (unlikely(scan[num + 7] == scan[7] + num))
return scan + 7;
scan += 8;
}
for (; scan <= detent; ++scan)
if (scan[num] == *scan + num)
return scan;
#else
const pgno_t *const detent = pnl + num;
pgno_t *scan = pnl + len;
while (likely(scan - 7 >= detent)) {
if (unlikely(scan[-num] == *scan + num))
return scan;
if (unlikely(scan[-num - 1] == scan[-1] + num))
return scan - 1;
if (unlikely(scan[-num - 2] == scan[-2] + num))
return scan - 2;
if (unlikely(scan[-num - 3] == scan[-3] + num))
return scan - 3;
if (unlikely(scan[-num - 4] == scan[-4] + num))
return scan - 4;
if (unlikely(scan[-num - 5] == scan[-5] + num))
return scan - 5;
if (unlikely(scan[-num - 6] == scan[-6] + num))
return scan - 6;
if (unlikely(scan[-num - 7] == scan[-7] + num))
return scan - 7;
scan -= 8;
}
for (; scan >= detent; --scan)
if (scan[-num] == *scan + num)
return scan;
#endif /* MDBX_PNL sort-order */
return nullptr;
}
/* Allocate page numbers and memory for writing. Maintain mt_last_reclaimed, /* Allocate page numbers and memory for writing. Maintain mt_last_reclaimed,
* mt_reclaimed_pglist and mt_next_pgno. Set MDBX_TXN_ERROR on failure. * mt_reclaimed_pglist and mt_next_pgno. Set MDBX_TXN_ERROR on failure.
* *
@ -6534,11 +6595,12 @@ page_alloc_slowpath(MDBX_cursor *mc, const pgno_t num, int flags) {
flags &= ~(MDBX_ALLOC_GC | MDBX_ALLOC_COALESCE); flags &= ~(MDBX_ALLOC_GC | MDBX_ALLOC_COALESCE);
} }
mdbx_tassert(txn, mdbx_assert(env,
mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno - MDBX_ENABLE_REFUND)); txn->mt_next_pgno - MDBX_ENABLE_REFUND));
pgno_t pgno, *re_list = txn->tw.reclaimed_pglist; pgno_t pgno, *re_list = txn->tw.reclaimed_pglist;
unsigned range_begin = 0, re_len = MDBX_PNL_SIZE(re_list); unsigned re_len = MDBX_PNL_SIZE(re_list);
pgno_t *range = nullptr;
txnid_t oldest = 0, last = 0; txnid_t oldest = 0, last = 0;
while (true) { /* hsr-kick retry loop */ while (true) { /* hsr-kick retry loop */
@ -6549,37 +6611,16 @@ page_alloc_slowpath(MDBX_cursor *mc, const pgno_t num, int flags) {
/* Seek a big enough contiguous page range. /* Seek a big enough contiguous page range.
* Prefer pages with lower pgno. */ * Prefer pages with lower pgno. */
mdbx_tassert(txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, mdbx_assert(env, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno)); txn->mt_next_pgno));
if (!(flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_SLOT)) && re_len >= num) { if (!(flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_SLOT)) && re_len >= num) {
mdbx_tassert(txn, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno && mdbx_assert(env, MDBX_PNL_LAST(re_list) < txn->mt_next_pgno &&
MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno); MDBX_PNL_FIRST(re_list) < txn->mt_next_pgno);
range_begin = MDBX_PNL_ASCENDING ? 1 : re_len; range = scan4range(re_list, re_len, num);
pgno = MDBX_PNL_LEAST(re_list); if (likely(range)) {
if (likely(num == 1)) pgno = *range;
goto done; goto done;
const unsigned wanna_range = num - 1;
#if MDBX_PNL_ASCENDING
mdbx_tassert(txn, pgno == re_list[1] && range_begin == 1);
while (true) {
unsigned range_end = range_begin + wanna_range;
if (re_list[range_end] - pgno == wanna_range)
goto done;
if (range_end == re_len)
break;
pgno = re_list[++range_begin];
} }
#else
mdbx_tassert(txn, pgno == re_list[re_len] && range_begin == re_len);
while (true) {
if (re_list[range_begin - wanna_range] - pgno == wanna_range)
goto done;
if (range_begin == wanna_range)
break;
pgno = re_list[--range_begin];
}
#endif /* MDBX_PNL sort-order */
} }
if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */ if (op == MDBX_FIRST) { /* 1st iteration, setup cursor, etc */
@ -6795,7 +6836,7 @@ page_alloc_slowpath(MDBX_cursor *mc, const pgno_t num, int flags) {
* - extend the database file. */ * - extend the database file. */
/* Will use new pages from the map if nothing is suitable in the GC. */ /* Will use new pages from the map if nothing is suitable in the GC. */
range_begin = 0; range = nullptr;
pgno = txn->mt_next_pgno; pgno = txn->mt_next_pgno;
const size_t next = (size_t)pgno + num; const size_t next = (size_t)pgno + num;
@ -6947,20 +6988,20 @@ done:
} }
} }
if (range_begin) { if (range) {
mdbx_cassert(mc, (mc->mc_flags & C_GCFREEZE) == 0); mdbx_cassert(mc, (mc->mc_flags & C_GCFREEZE) == 0);
mdbx_tassert(txn, pgno < txn->mt_next_pgno); mdbx_tassert(txn, pgno < txn->mt_next_pgno);
mdbx_tassert(txn, pgno == re_list[range_begin]); mdbx_tassert(txn, pgno == *range);
/* Cutoff allocated pages from tw.reclaimed_pglist */ /* Cutoff allocated pages from tw.reclaimed_pglist */
#if MDBX_PNL_ASCENDING #if MDBX_PNL_ASCENDING
for (unsigned i = range_begin + num; i <= re_len;) for (const pgno_t *const end = re_list + re_len - num; range <= end;
re_list[range_begin++] = re_list[i++]; ++range)
MDBX_PNL_SIZE(re_list) = re_len = range_begin - 1; *range = range[num];
#else #else
MDBX_PNL_SIZE(re_list) = re_len -= num; for (const pgno_t *const end = re_list + re_len; ++range <= end;)
for (unsigned i = range_begin - num; i < re_len;) range[-(ptrdiff_t)num] = *range;
re_list[++i] = re_list[++range_begin];
#endif #endif
MDBX_PNL_SIZE(re_list) = re_len -= num;
mdbx_tassert(txn, mdbx_tassert(txn,
mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist,
txn->mt_next_pgno - MDBX_ENABLE_REFUND)); txn->mt_next_pgno - MDBX_ENABLE_REFUND));