mdbx: adds pre- and post- gaps to dirty page list to avoid some comparisons and conditional branches.

More for https://github.com/erthink/libmdbx/issues/132

Change-Id: I6562c5ff6c559341bb7bb64222b126f06cc13427
This commit is contained in:
Leonid Yuriev 2021-01-30 21:02:04 +03:00
parent b57a338546
commit 077989bfed
3 changed files with 80 additions and 47 deletions

View File

@ -1424,6 +1424,7 @@ sendfile
sepkey sepkey
SETALL SETALL
SETFD SETFD
setlen
setlevel setlevel
setlk setlk
setlkw setlkw

View File

@ -3175,8 +3175,24 @@ static __always_inline unsigned bytes2dpl(const ptrdiff_t bytes) {
return (unsigned)size - MDBX_DPL_RESERVE_GAP; return (unsigned)size - MDBX_DPL_RESERVE_GAP;
} }
static __always_inline unsigned mdbx_dpl_setlen(MDBX_dpl *dl, unsigned len) {
static const MDBX_page dpl_stub_pageE = {
{0}, 0, P_BAD, {0}, /* pgno */ ~(pgno_t)0};
assert(dpl_stub_pageE.mp_flags == P_BAD &&
dpl_stub_pageE.mp_pgno == P_INVALID);
dl->length = len;
dl->items[len + 1].pgno = P_INVALID;
dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE;
return len;
}
static __always_inline void mdbx_dpl_clear(MDBX_dpl *dl) { static __always_inline void mdbx_dpl_clear(MDBX_dpl *dl) {
dl->sorted = dl->length = 0; static const MDBX_page dpl_stub_pageB = {{0}, 0, P_BAD, {0}, /* pgno */ 0};
assert(dpl_stub_pageB.mp_flags == P_BAD && dpl_stub_pageB.mp_pgno == 0);
dl->sorted = mdbx_dpl_setlen(dl, 0);
dl->items[0].pgno = 0;
dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
} }
static void mdbx_dpl_free(MDBX_txn *txn) { static void mdbx_dpl_free(MDBX_txn *txn) {
@ -3196,8 +3212,6 @@ static MDBX_dpl *mdbx_dpl_reserve(MDBX_txn *txn, size_t size) {
bytes = malloc_usable_size(dl); bytes = malloc_usable_size(dl);
#endif /* malloc_usable_size */ #endif /* malloc_usable_size */
dl->detent = bytes2dpl(bytes); dl->detent = bytes2dpl(bytes);
dl->items[0].pgno = 0;
dl->items[0].ptr = nullptr;
mdbx_tassert(txn, txn->tw.dirtylist == NULL || dl->length <= dl->detent); mdbx_tassert(txn, txn->tw.dirtylist == NULL || dl->length <= dl->detent);
txn->tw.dirtylist = dl; txn->tw.dirtylist = dl;
} }
@ -3220,6 +3234,7 @@ SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP)
static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) { static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) {
assert(dl->length <= MDBX_PGL_LIMIT); assert(dl->length <= MDBX_PGL_LIMIT);
assert(dl->sorted <= dl->length); assert(dl->sorted <= dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
if (dl->sorted != dl->length) { if (dl->sorted != dl->length) {
dl->sorted = dl->length; dl->sorted = dl->length;
dp_sort(dl->items + 1, dl->items + dl->length + 1); dp_sort(dl->items + 1, dl->items + dl->length + 1);
@ -3233,6 +3248,7 @@ static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) {
SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP) SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP)
static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) { static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) {
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) { for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) {
assert(ptr[0].pgno < ptr[1].pgno); assert(ptr[0].pgno < ptr[1].pgno);
@ -3284,15 +3300,15 @@ static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) {
static __inline bool mdbx_dpl_intersect(MDBX_dpl *dl, pgno_t pgno, static __inline bool mdbx_dpl_intersect(MDBX_dpl *dl, pgno_t pgno,
unsigned npages) { unsigned npages) {
assert(dl->sorted == dl->length); assert(dl->sorted == dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
unsigned const n = mdbx_dpl_search(dl, pgno); unsigned const n = mdbx_dpl_search(dl, pgno);
assert(n >= 1 && n <= dl->length + 1); assert(n >= 1 && n <= dl->length + 1);
assert(n > dl->length || pgno <= dl->items[n].pgno); assert(pgno <= dl->items[n].pgno);
assert(n == 1 || pgno > dl->items[n - 1].pgno); assert(pgno > dl->items[n - 1].pgno);
const MDBX_page *const prev = dl->items[n - 1].ptr; const MDBX_page *const prev = dl->items[n - 1].ptr;
const bool rc = const bool rc =
(/* intersection with founded */ n <= dl->length && /* intersection with founded */ pgno + npages > dl->items[n].pgno ||
pgno + npages > dl->items[n].pgno) || (/* intersection with prev */ unlikely(IS_OVERFLOW(prev)) &&
(/* intersection with prev */ n > 1 && unlikely(IS_OVERFLOW(prev)) &&
prev->mp_pgno + prev->mp_pages > pgno); prev->mp_pgno + prev->mp_pages > pgno);
if (mdbx_assert_enabled()) { if (mdbx_assert_enabled()) {
bool check = false; bool check = false;
@ -3311,18 +3327,18 @@ static __inline bool mdbx_dpl_intersect(MDBX_dpl *dl, pgno_t pgno,
static __always_inline unsigned mdbx_dpl_exist(MDBX_dpl *dl, pgno_t pgno) { static __always_inline unsigned mdbx_dpl_exist(MDBX_dpl *dl, pgno_t pgno) {
unsigned i = mdbx_dpl_search(dl, pgno); unsigned i = mdbx_dpl_search(dl, pgno);
assert((int)i > 0); assert((int)i > 0);
return (i <= dl->length && dl->items[i].pgno == pgno) ? i : 0; return (dl->items[i].pgno == pgno) ? i : 0;
} }
static __always_inline MDBX_page *mdbx_dpl_find(MDBX_dpl *dl, pgno_t pgno) { static __always_inline MDBX_page *mdbx_dpl_find(MDBX_dpl *dl, pgno_t pgno) {
const unsigned i = mdbx_dpl_search(dl, pgno); const unsigned i = mdbx_dpl_search(dl, pgno);
assert((int)i > 0); assert((int)i > 0);
return (i <= dl->length && dl->items[i].pgno == pgno) ? dl->items[i].ptr return (dl->items[i].pgno == pgno) ? dl->items[i].ptr : nullptr;
: nullptr;
} }
static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl, static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl,
const pgno_t pgno) { const pgno_t pgno) {
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
for (unsigned i = dl->length; i > dl->sorted; --i) for (unsigned i = dl->length; i > dl->sorted; --i)
if (dl->items[i].pgno == pgno) if (dl->items[i].pgno == pgno)
return dl->items[i].ptr; return dl->items[i].ptr;
@ -3330,7 +3346,7 @@ static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl,
if (dl->sorted) { if (dl->sorted) {
const unsigned i = const unsigned i =
(unsigned)(dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items); (unsigned)(dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items);
if (i <= dl->sorted && dl->items[i].pgno == pgno) if (dl->items[i].pgno == pgno)
return dl->items[i].ptr; return dl->items[i].ptr;
} }
return nullptr; return nullptr;
@ -3338,17 +3354,19 @@ static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl,
static void mdbx_dpl_remove(MDBX_dpl *dl, unsigned i) { static void mdbx_dpl_remove(MDBX_dpl *dl, unsigned i) {
assert((int)i > 0 && i <= dl->length); assert((int)i > 0 && i <= dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
dl->sorted -= dl->sorted >= i; dl->sorted -= dl->sorted >= i;
dl->length -= 1; dl->length -= 1;
if (dl->length >= i)
memmove(dl->items + i, dl->items + i + 1, memmove(dl->items + i, dl->items + i + 1,
(dl->length - i + 1) * sizeof(dl->items[0])); (dl->length - i + 2) * sizeof(dl->items[0]));
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
} }
static __always_inline int __must_check_result static __always_inline int __must_check_result
mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) { mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) {
MDBX_dpl *dl = txn->tw.dirtylist; MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE); assert(dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (unsigned i = dl->length; i > 0; --i) { for (unsigned i = dl->length; i > 0; --i) {
assert(dl->items[i].pgno != pgno); assert(dl->items[i].pgno != pgno);
@ -3357,6 +3375,12 @@ mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) {
} }
} }
const unsigned length = dl->length + 1;
const unsigned sorted =
(dl->sorted == dl->length && dl->items[dl->length].pgno < pgno)
? length
: dl->sorted;
if (unlikely(dl->length == dl->detent)) { if (unlikely(dl->length == dl->detent)) {
if (unlikely(dl->detent >= MDBX_PGL_LIMIT)) { if (unlikely(dl->detent >= MDBX_PGL_LIMIT)) {
mdbx_error("DPL is full (MDBX_PGL_LIMIT %u)", MDBX_PGL_LIMIT); mdbx_error("DPL is full (MDBX_PGL_LIMIT %u)", MDBX_PGL_LIMIT);
@ -3371,13 +3395,14 @@ mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) {
mdbx_tassert(txn, dl->length < dl->detent); mdbx_tassert(txn, dl->length < dl->detent);
} }
/* copy the stub beyond the end */
dl->items[length + 1] = dl->items[length];
/* append page */ /* append page */
const unsigned n = dl->length + 1; dl->items[length].pgno = pgno;
if (n == 1 || (dl->sorted >= dl->length && dl->items[n - 1].pgno < pgno)) dl->items[length].ptr = page;
dl->sorted = n; dl->length = length;
dl->length = n; dl->sorted = sorted;
dl->items[n].pgno = pgno; assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
dl->items[n].ptr = page;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -4008,10 +4033,11 @@ static __always_inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) {
} }
static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) { static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
const MDBX_dpl *const dl = txn->tw.dirtylist;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
if (!mdbx_audit_enabled()) if (!mdbx_audit_enabled())
return true; return true;
const MDBX_dpl *const dl = txn->tw.dirtylist;
unsigned loose = 0; unsigned loose = 0;
for (unsigned i = dl->length; i > 0; --i) { for (unsigned i = dl->length; i > 0; --i) {
const MDBX_page *const dp = dl->items[i].ptr; const MDBX_page *const dp = dl->items[i].ptr;
@ -4178,7 +4204,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
dl->items[w] = dl->items[r]; dl->items[w] = dl->items[r];
} }
} }
dl->length = w; mdbx_dpl_setlen(dl, w);
mdbx_tassert(txn, txn->mt_parent || txn->tw.dirtyroom + dl->length == mdbx_tassert(txn, txn->mt_parent || txn->tw.dirtyroom + dl->length ==
txn->mt_env->me_options.dp_limit); txn->mt_env->me_options.dp_limit);
goto unlink_loose; goto unlink_loose;
@ -4191,14 +4217,17 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
mdbx_tassert(txn, dl->sorted == dl->length); mdbx_tassert(txn, dl->sorted == dl->length);
/* Scan dirtylist tail-forward and cutoff suitable pages. */ /* Scan dirtylist tail-forward and cutoff suitable pages. */
while (dl->length && dl->items[dl->length].pgno == txn->mt_next_pgno - 1 && unsigned n;
dl->items[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) { for (n = dl->length; dl->items[n].pgno == txn->mt_next_pgno - 1 &&
MDBX_page *dp = dl->items[dl->length].ptr; dl->items[n].ptr->mp_flags == (P_LOOSE | P_DIRTY);
--n) {
mdbx_tassert(txn, n > 0);
MDBX_page *dp = dl->items[n].ptr;
mdbx_debug("refund-sorted page %" PRIaPGNO, dp->mp_pgno); mdbx_debug("refund-sorted page %" PRIaPGNO, dp->mp_pgno);
mdbx_tassert(txn, dp->mp_pgno == dl->items[dl->length].pgno); mdbx_tassert(txn, dp->mp_pgno == dl->items[n].pgno);
txn->mt_next_pgno -= 1; txn->mt_next_pgno -= 1;
dl->length -= 1;
} }
mdbx_dpl_setlen(dl, n);
if (dl->sorted != dl->length) { if (dl->sorted != dl->length) {
const unsigned refunded = dl->sorted - dl->length; const unsigned refunded = dl->sorted - dl->length;
@ -7539,7 +7568,7 @@ static void mdbx_dpl_sift(MDBX_txn *const txn, MDBX_PNL pl,
goto remove_dl; goto remove_dl;
} }
} }
dl->sorted = dl->length = w - 1; dl->sorted = mdbx_dpl_setlen(dl, w - 1);
txn->tw.dirtyroom += r - w; txn->tw.dirtyroom += r - w;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
return; return;
@ -8133,7 +8162,7 @@ retry_noaccount:
mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages", mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages",
dbg_prefix_mode, dl->length, w); dbg_prefix_mode, dl->length, w);
mdbx_tassert(txn, txn->tw.loose_count == dl->length - w); mdbx_tassert(txn, txn->tw.loose_count == dl->length - w);
dl->length = w; mdbx_dpl_setlen(dl, w);
dl->sorted = 0; dl->sorted = 0;
txn->tw.dirtyroom += txn->tw.loose_count; txn->tw.dirtyroom += txn->tw.loose_count;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
@ -8761,7 +8790,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
mdbx_tassert(txn, dl->sorted == dl->length && r == dl->length + 1); mdbx_tassert(txn, dl->sorted == dl->length && r == dl->length + 1);
txn->tw.dirtyroom += dl->length - w; txn->tw.dirtyroom += dl->length - w;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
dl->sorted = dl->length = w; dl->sorted = mdbx_dpl_setlen(dl, w);
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + txn->tw.dirtylist->length ==
txn->mt_env->me_options.dp_limit); txn->mt_env->me_options.dp_limit);
@ -8809,21 +8838,23 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
/* Remove refunded pages from parent's dirty list */ /* Remove refunded pages from parent's dirty list */
MDBX_dpl *const dst = mdbx_dpl_sort(parent->tw.dirtylist); MDBX_dpl *const dst = mdbx_dpl_sort(parent->tw.dirtylist);
while (MDBX_ENABLE_REFUND && dst->length && if (MDBX_ENABLE_REFUND) {
dst->items[dst->length].pgno >= parent->mt_next_pgno) { unsigned n = dst->length;
while (n && dst->items[n].pgno >= parent->mt_next_pgno) {
if (!(txn->mt_env->me_flags & MDBX_WRITEMAP)) { if (!(txn->mt_env->me_flags & MDBX_WRITEMAP)) {
MDBX_page *dp = dst->items[dst->length].ptr; MDBX_page *dp = dst->items[n].ptr;
mdbx_dpage_free(txn->mt_env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(txn->mt_env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1);
} }
dst->length -= 1; --n;
} }
parent->tw.dirtyroom += dst->sorted - dst->length; parent->tw.dirtyroom += dst->sorted - n;
assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit); assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit);
dst->sorted = dst->length; dst->sorted = mdbx_dpl_setlen(dst, n);
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==
parent->mt_env->me_options.dp_limit); parent->mt_env->me_options.dp_limit);
}
/* Remove reclaimed pages from parent's dirty list */ /* Remove reclaimed pages from parent's dirty list */
const MDBX_PNL reclaimed_list = parent->tw.reclaimed_pglist; const MDBX_PNL reclaimed_list = parent->tw.reclaimed_pglist;
@ -9111,7 +9142,7 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
} }
parent->tw.dirtyroom -= dst->sorted - dst->length; parent->tw.dirtyroom -= dst->sorted - dst->length;
assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit); assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit);
dst->length = dst->sorted; mdbx_dpl_setlen(dst, dst->sorted);
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==

View File

@ -380,8 +380,8 @@ typedef struct MDBX_meta {
* in the snapshot: Either used by a database or listed in a GC record. */ * in the snapshot: Either used by a database or listed in a GC record. */
typedef struct MDBX_page { typedef struct MDBX_page {
union { union {
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
uint64_t mp_txnid; /* txnid that committed this page */ uint64_t mp_txnid; /* txnid that committed this page */
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
}; };
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */ uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
#define P_BRANCH 0x01 /* branch page */ #define P_BRANCH 0x01 /* branch page */
@ -391,15 +391,16 @@ typedef struct MDBX_page {
#define P_DIRTY 0x10 /* dirty page, also set for P_SUBP pages */ #define P_DIRTY 0x10 /* dirty page, also set for P_SUBP pages */
#define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */ #define P_LEAF2 0x20 /* for MDBX_DUPFIXED records */
#define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */ #define P_SUBP 0x40 /* for MDBX_DUPSORT sub-pages */
#define P_BAD 0x80 /* explicit flag for invalid/bad page */
#define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */ #define P_LOOSE 0x4000 /* page was dirtied then freed, can be reused */
#define P_KEEP 0x8000 /* leave this page alone during spill */ #define P_KEEP 0x8000 /* leave this page alone during spill */
uint16_t mp_flags; uint16_t mp_flags;
union { union {
uint32_t mp_pages; /* number of overflow pages */
__anonymous_struct_extension__ struct { __anonymous_struct_extension__ struct {
indx_t mp_lower; /* lower bound of free space */ indx_t mp_lower; /* lower bound of free space */
indx_t mp_upper; /* upper bound of free space */ indx_t mp_upper; /* upper bound of free space */
}; };
uint32_t mp_pages; /* number of overflow pages */
}; };
pgno_t mp_pgno; /* page number */ pgno_t mp_pgno; /* page number */