mdbx: refactor dirty-page-list (preparation to rework).

Change-Id: Ib52bb52f73ef1d31f55838d879de081fc0a140c2
This commit is contained in:
Leonid Yuriev 2020-12-02 14:17:57 +03:00
parent 7210f994fb
commit d8d89cca7d
2 changed files with 143 additions and 128 deletions

View File

@ -2978,11 +2978,21 @@ static int __must_check_result mdbx_txl_append(MDBX_TXL *ptl, txnid_t id) {
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
static __always_inline size_t dpl2bytes(const ptrdiff_t size) { static __always_inline size_t dpl2bytes(const ptrdiff_t size) {
assert(size > 0 && size <= MDBX_PGL_LIMIT); assert(size > 2 && size <= MDBX_PGL_LIMIT);
return (size + 1) * sizeof(MDBX_DP); size_t bytes = ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(MDBX_dpl) +
(size + 2) * sizeof(MDBX_dp),
MDBX_PNL_GRANULATE * sizeof(void *) * 2) -
MDBX_ASSUME_MALLOC_OVERHEAD;
return bytes;
} }
static __always_inline void mdbx_dpl_clear(MDBX_DPL dl) { static __always_inline unsigned bytes2dpl(const ptrdiff_t bytes) {
size_t size = (bytes - sizeof(MDBX_dpl)) / sizeof(MDBX_dp);
assert(size > 4 && size <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE);
return (unsigned)size - 2;
}
static __always_inline void mdbx_dpl_clear(MDBX_dpl *dl) {
dl->sorted = dl->length = 0; dl->sorted = dl->length = 0;
} }
@ -2998,22 +3008,26 @@ static int mdbx_dpl_alloc(MDBX_txn *txn) {
(txn->mt_flags & MDBX_TXN_RDONLY) == 0 && !txn->tw.dirtylist); (txn->mt_flags & MDBX_TXN_RDONLY) == 0 && !txn->tw.dirtylist);
unsigned limit = /* TODO */ MDBX_DPL_TXNFULL; unsigned limit = /* TODO */ MDBX_DPL_TXNFULL;
size_t bytes = dpl2bytes(limit); size_t bytes = dpl2bytes(limit);
MDBX_DPL dl = mdbx_malloc(bytes); MDBX_dpl *dl = mdbx_malloc(bytes);
if (unlikely(!dl)) if (unlikely(!dl))
return MDBX_ENOMEM; return MDBX_ENOMEM;
#if __GLIBC_PREREQ(2, 12) || defined(__FreeBSD__) || defined(malloc_usable_size)
bytes = malloc_usable_size(dl);
#endif /* malloc_usable_size */
dl->allocated = dl->limit = bytes2dpl(bytes);
mdbx_dpl_clear(dl); mdbx_dpl_clear(dl);
txn->tw.dirtylist = dl; txn->tw.dirtylist = dl;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) #define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno)
SORT_IMPL(dp_sort, false, MDBX_DP, DP_SORT_CMP) SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP)
static __always_inline MDBX_DPL mdbx_dpl_sort(MDBX_DPL dl) { static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) {
assert(dl->length <= MDBX_PGL_LIMIT); assert(dl->length <= MDBX_PGL_LIMIT);
assert(dl->sorted <= dl->length); assert(dl->sorted <= dl->length);
if (dl->sorted != dl->length) { if (dl->sorted != dl->length) {
dl->sorted = dl->length; dl->sorted = dl->length;
dp_sort(dl + 1, dl + dl->length + 1); dp_sort(dl->items + 1, dl->items + dl->length + 1);
} }
return dl; return dl;
} }
@ -3021,11 +3035,11 @@ static __always_inline MDBX_DPL mdbx_dpl_sort(MDBX_DPL dl) {
/* Returns the index of the first dirty-page whose pgno /* Returns the index of the first dirty-page whose pgno
* member is greater than or equal to id. */ * member is greater than or equal to id. */
#define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) #define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id))
SEARCH_IMPL(dp_bsearch, MDBX_DP, pgno_t, DP_SEARCH_CMP) SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP)
static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t pgno) { static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) {
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (const MDBX_DP *ptr = dl + dl->sorted; --ptr > dl;) { for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) {
assert(ptr[0].pgno < ptr[1].pgno); assert(ptr[0].pgno < ptr[1].pgno);
assert(ptr[0].pgno >= NUM_METAS); assert(ptr[0].pgno >= NUM_METAS);
} }
@ -3035,21 +3049,21 @@ static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t pgno) {
default: default:
/* sort a whole */ /* sort a whole */
dl->sorted = dl->length; dl->sorted = dl->length;
dp_sort(dl + 1, dl + dl->length + 1); dp_sort(dl->items + 1, dl->items + dl->length + 1);
__fallthrough; /* fall through */ __fallthrough; /* fall through */
case 0: case 0:
/* whole sorted cases */ /* whole sorted cases */
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (const MDBX_DP *ptr = dl + dl->length; --ptr > dl;) { for (const MDBX_dp *ptr = dl->items + dl->length; --ptr > dl->items;) {
assert(ptr[0].pgno < ptr[1].pgno); assert(ptr[0].pgno < ptr[1].pgno);
assert(ptr[0].pgno >= NUM_METAS); assert(ptr[0].pgno >= NUM_METAS);
} }
} }
return (unsigned)(dp_bsearch(dl + 1, dl->length, pgno) - dl); return (unsigned)(dp_bsearch(dl->items + 1, dl->length, pgno) - dl->items);
#define LINEAR_SEARCH_CASE(N) \ #define LINEAR_SEARCH_CASE(N) \
case N: \ case N: \
if (dl[dl->length - N + 1].pgno == pgno) \ if (dl->items[dl->length - N + 1].pgno == pgno) \
return dl->length - N + 1; \ return dl->length - N + 1; \
__fallthrough __fallthrough
@ -3070,28 +3084,29 @@ static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t pgno) {
LINEAR_SEARCH_CASE(3); /* fall through */ LINEAR_SEARCH_CASE(3); /* fall through */
LINEAR_SEARCH_CASE(2); /* fall through */ LINEAR_SEARCH_CASE(2); /* fall through */
case 1: case 1:
if (dl[dl->length].pgno == pgno) if (dl->items[dl->length].pgno == pgno)
return dl->length; return dl->length;
/* continue bsearch on the sorted part */ /* continue bsearch on the sorted part */
return (unsigned)(dp_bsearch(dl + 1, dl->sorted, pgno) - dl); return (unsigned)(dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items);
} }
} }
static __always_inline MDBX_page *mdbx_dpl_find(MDBX_DPL dl, pgno_t pgno) { static __always_inline MDBX_page *mdbx_dpl_find(MDBX_dpl *dl, pgno_t pgno) {
const unsigned i = mdbx_dpl_search(dl, pgno); const unsigned i = mdbx_dpl_search(dl, pgno);
assert((int)i > 0); assert((int)i > 0);
return (i <= dl->length && dl[i].pgno == pgno) ? dl[i].ptr : nullptr; return (i <= dl->length && dl->items[i].pgno == pgno) ? dl->items[i].ptr
: nullptr;
} }
static __hot MDBX_page *mdbx_dpl_remove(MDBX_DPL dl, pgno_t prno) { static __hot MDBX_page *mdbx_dpl_remove(MDBX_dpl *dl, pgno_t prno) {
unsigned i = mdbx_dpl_search(dl, prno); unsigned i = mdbx_dpl_search(dl, prno);
assert((int)i > 0); assert((int)i > 0);
MDBX_page *mp = nullptr; MDBX_page *mp = nullptr;
if (i <= dl->length && dl[i].pgno == prno) { if (i <= dl->length && dl->items[i].pgno == prno) {
dl->sorted -= dl->sorted >= i; dl->sorted -= dl->sorted >= i;
mp = dl[i].ptr; mp = dl->items[i].ptr;
while (i < dl->length) { while (i < dl->length) {
dl[i] = dl[i + 1]; dl->items[i] = dl->items[i + 1];
++i; ++i;
} }
dl->length -= 1; dl->length -= 1;
@ -3101,26 +3116,26 @@ static __hot MDBX_page *mdbx_dpl_remove(MDBX_DPL dl, pgno_t prno) {
static __always_inline int __must_check_result static __always_inline int __must_check_result
mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) { mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) {
MDBX_DPL dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
assert(dl->length <= MDBX_PGL_LIMIT); assert(dl->length <= MDBX_PGL_LIMIT);
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (unsigned i = dl->length; i > 0; --i) { for (unsigned i = dl->length; i > 0; --i) {
assert(dl[i].pgno != pgno); assert(dl->items[i].pgno != pgno);
if (unlikely(dl[i].pgno == pgno)) if (unlikely(dl->items[i].pgno == pgno))
return MDBX_PROBLEM; return MDBX_PROBLEM;
} }
} }
if (unlikely(dl->length == MDBX_DPL_TXNFULL)) if (unlikely(dl->length == dl->limit))
return MDBX_TXN_FULL; return MDBX_TXN_FULL;
/* append page */ /* append page */
const unsigned n = dl->length + 1; const unsigned n = dl->length + 1;
if (n == 1 || (dl->sorted >= dl->length && dl[n - 1].pgno < pgno)) if (n == 1 || (dl->sorted >= dl->length && dl->items[n - 1].pgno < pgno))
dl->sorted = n; dl->sorted = n;
dl->length = n; dl->length = n;
dl[n].pgno = pgno; dl->items[n].pgno = pgno;
dl[n].ptr = page; dl->items[n].ptr = page;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -3731,11 +3746,11 @@ static void mdbx_dpage_free(MDBX_env *env, MDBX_page *dp, unsigned pages) {
/* Return all dirty pages to dpage list */ /* Return all dirty pages to dpage list */
static void mdbx_dlist_free(MDBX_txn *txn) { static void mdbx_dlist_free(MDBX_txn *txn) {
MDBX_env *env = txn->mt_env; MDBX_env *env = txn->mt_env;
const MDBX_DPL dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
const size_t n = dl->length; const size_t n = dl->length;
for (size_t i = 1; i <= n; i++) { for (size_t i = 1; i <= n; i++) {
MDBX_page *dp = dl[i].ptr; MDBX_page *dp = dl->items[i].ptr;
mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1);
} }
@ -3755,13 +3770,15 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
if (!mdbx_audit_enabled()) if (!mdbx_audit_enabled())
return true; return true;
MDBX_dpl *const dl = txn->tw.dirtylist;
unsigned loose = 0; unsigned loose = 0;
for (unsigned i = txn->tw.dirtylist->length; i > 0; --i) { for (unsigned i = dl->length; i > 0; --i) {
const MDBX_page *const dp = txn->tw.dirtylist[i].ptr; const MDBX_page *const dp = dl->items[i].ptr;
if (!dp) if (!dp)
continue; continue;
mdbx_tassert(txn, dp->mp_pgno == txn->tw.dirtylist[i].pgno);
if (unlikely(dp->mp_pgno != txn->tw.dirtylist[i].pgno)) mdbx_tassert(txn, dp->mp_pgno == dl->items[i].pgno);
if (unlikely(dp->mp_pgno != dl->items[i].pgno))
return false; return false;
mdbx_tassert(txn, dp->mp_flags & P_DIRTY); mdbx_tassert(txn, dp->mp_flags & P_DIRTY);
@ -3779,9 +3796,9 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
if (unlikely(txn->mt_next_pgno < dp->mp_pgno + num)) if (unlikely(txn->mt_next_pgno < dp->mp_pgno + num))
return false; return false;
if (i < txn->tw.dirtylist->sorted) { if (i < dl->sorted) {
mdbx_tassert(txn, txn->tw.dirtylist[i + 1].pgno >= dp->mp_pgno + num); mdbx_tassert(txn, dl->items[i + 1].pgno >= dp->mp_pgno + num);
if (unlikely(txn->tw.dirtylist[i + 1].pgno < dp->mp_pgno + num)) if (unlikely(dl->items[i + 1].pgno < dp->mp_pgno + num))
return false; return false;
} }
@ -3804,10 +3821,9 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
if (unlikely(loose != txn->tw.loose_count)) if (unlikely(loose != txn->tw.loose_count))
return false; return false;
if (txn->tw.dirtylist->length - txn->tw.dirtylist->sorted < 16) { if (dl->length - dl->sorted < 16) {
for (unsigned i = 1; i <= MDBX_PNL_SIZE(txn->tw.retired_pages); ++i) { for (unsigned i = 1; i <= MDBX_PNL_SIZE(txn->tw.retired_pages); ++i) {
const MDBX_page *const dp = const MDBX_page *const dp = mdbx_dpl_find(dl, txn->tw.retired_pages[i]);
mdbx_dpl_find(txn->tw.dirtylist, txn->tw.retired_pages[i]);
mdbx_tassert(txn, !dp); mdbx_tassert(txn, !dp);
if (unlikely(dp)) if (unlikely(dp))
return false; return false;
@ -3850,7 +3866,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
mdbx_tassert(txn, txn->tw.loose_pages != nullptr); mdbx_tassert(txn, txn->tw.loose_pages != nullptr);
mdbx_tassert(txn, txn->tw.loose_count > 0); mdbx_tassert(txn, txn->tw.loose_count > 0);
const MDBX_DPL dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
mdbx_tassert(txn, dl->length >= txn->tw.loose_count); mdbx_tassert(txn, dl->length >= txn->tw.loose_count);
mdbx_tassert(txn, txn->tw.spill_pages == nullptr || mdbx_tassert(txn, txn->tw.spill_pages == nullptr ||
dl->length >= MDBX_PNL_SIZE(txn->tw.spill_pages)); dl->length >= MDBX_PNL_SIZE(txn->tw.spill_pages));
@ -3911,37 +3927,37 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
w = 0; w = 0;
if (dl->sorted) { if (dl->sorted) {
do { do {
if (dl[++r].pgno < most) { if (dl->items[++r].pgno < most) {
if (++w != r) if (++w != r)
dl[w] = dl[r]; dl->items[w] = dl->items[r];
} }
} while (r < dl->sorted); } while (r < dl->sorted);
dl->sorted = w; dl->sorted = w;
} }
while (r < dl->length) { while (r < dl->length) {
if (dl[++r].pgno < most) { if (dl->items[++r].pgno < most) {
if (++w != r) if (++w != r)
dl[w] = dl[r]; dl->items[w] = dl->items[r];
} }
} }
dl->length = w; dl->length = w;
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + dl->length == dl->limit);
MDBX_DPL_TXNFULL);
goto unlink_loose; goto unlink_loose;
} }
} else { } else {
/* Dirtylist is mostly sorted, just refund loose pages at the end. */ /* Dirtylist is mostly sorted, just refund loose pages at the end. */
mdbx_dpl_sort(dl); mdbx_dpl_sort(dl);
mdbx_tassert(txn, dl->length < 2 || dl[1].pgno < dl[dl->length].pgno); mdbx_tassert(txn, dl->length < 2 ||
dl->items[1].pgno < dl->items[dl->length].pgno);
mdbx_tassert(txn, dl->sorted == dl->length); mdbx_tassert(txn, dl->sorted == dl->length);
/* Scan dirtylist tail-forward and cutoff suitable pages. */ /* Scan dirtylist tail-forward and cutoff suitable pages. */
while (dl->length && dl[dl->length].pgno == txn->mt_next_pgno - 1 && while (dl->length && dl->items[dl->length].pgno == txn->mt_next_pgno - 1 &&
dl[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) { dl->items[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) {
MDBX_page *dp = dl[dl->length].ptr; MDBX_page *dp = dl->items[dl->length].ptr;
mdbx_debug("refund-sorted page %" PRIaPGNO, dp->mp_pgno); mdbx_debug("refund-sorted page %" PRIaPGNO, dp->mp_pgno);
mdbx_tassert(txn, dp->mp_pgno == dl[dl->length].pgno); mdbx_tassert(txn, dp->mp_pgno == dl->items[dl->length].pgno);
dl->length -= 1; dl->length -= 1;
} }
@ -3952,8 +3968,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
txn->tw.dirtyroom += refunded; txn->tw.dirtyroom += refunded;
txn->mt_next_pgno -= refunded; txn->mt_next_pgno -= refunded;
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + dl->length == dl->limit);
MDBX_DPL_TXNFULL);
/* Filter-out loose chain & dispose refunded pages. */ /* Filter-out loose chain & dispose refunded pages. */
unlink_loose: unlink_loose:
@ -3972,9 +3987,8 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
} }
mdbx_tassert(txn, mdbx_dirtylist_check(txn)); mdbx_tassert(txn, mdbx_dirtylist_check(txn));
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn,
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->mt_parent || txn->tw.dirtyroom + dl->length == dl->limit);
MDBX_DPL_TXNFULL);
if (suitable != onstack) if (suitable != onstack)
mdbx_pnl_free(suitable); mdbx_pnl_free(suitable);
txn->tw.loose_refund_wl = txn->mt_next_pgno; txn->tw.loose_refund_wl = txn->mt_next_pgno;
@ -4096,7 +4110,7 @@ static int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
txn->tw.dirtyroom++; txn->tw.dirtyroom++;
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
if ((txn->mt_flags & MDBX_WRITEMAP) == 0) if ((txn->mt_flags & MDBX_WRITEMAP) == 0)
mdbx_dpage_free(txn->mt_env, mp, npages); mdbx_dpage_free(txn->mt_env, mp, npages);
} }
@ -4296,7 +4310,7 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
return MDBX_SUCCESS; return MDBX_SUCCESS;
MDBX_txn *txn = mc->mc_txn; MDBX_txn *txn = mc->mc_txn;
MDBX_DPL dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
/* Estimate how much space this op will take */ /* Estimate how much space this op will take */
pgno_t i = mc->mc_db->md_depth; pgno_t i = mc->mc_db->md_depth;
@ -4318,8 +4332,8 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
* of those pages will need to be used again. So now we spill only 1/8th * of those pages will need to be used again. So now we spill only 1/8th
* of the dirty pages. Testing revealed this to be a good tradeoff, * of the dirty pages. Testing revealed this to be a good tradeoff,
* better than 1/2, 1/4, or 1/10. */ * better than 1/2, 1/4, or 1/10. */
if (need < MDBX_DPL_TXNFULL / 8) if (need < dl->limit / 8)
need = MDBX_DPL_TXNFULL / 8; need = dl->limit / 8;
if (!txn->tw.spill_pages) { if (!txn->tw.spill_pages) {
txn->tw.spill_pages = mdbx_pnl_alloc(need); txn->tw.spill_pages = mdbx_pnl_alloc(need);
@ -4344,8 +4358,8 @@ static int mdbx_page_spill(MDBX_cursor *mc, const MDBX_val *key,
/* Save the page IDs of all the pages we're flushing */ /* Save the page IDs of all the pages we're flushing */
/* flush from the tail forward, this saves a lot of shifting later on. */ /* flush from the tail forward, this saves a lot of shifting later on. */
for (i = dl->length; i && need; i--) { for (i = dl->length; i && need; i--) {
pgno_t pn = dl[i].pgno << 1; pgno_t pn = dl->items[i].pgno << 1;
MDBX_page *dp = dl[i].ptr; MDBX_page *dp = dl->items[i].ptr;
if (dp->mp_flags & (P_LOOSE | P_KEEP)) if (dp->mp_flags & (P_LOOSE | P_KEEP))
continue; continue;
/* Can't spill twice, /* Can't spill twice,
@ -4672,7 +4686,7 @@ static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
txn->tw.dirtyroom--; txn->tw.dirtyroom--;
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -5367,8 +5381,6 @@ skip_cache:
} }
/* Don't try to coalesce too much. */ /* Don't try to coalesce too much. */
if (unlikely(re_len > MDBX_DPL_TXNFULL / 42))
break;
if (re_len /* current size */ >= env->me_maxgc_ov1page || if (re_len /* current size */ >= env->me_maxgc_ov1page ||
(re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >= (re_len > prev_re_len && re_len - prev_re_len /* delta from prev */ >=
env->me_maxgc_ov1page / 2)) env->me_maxgc_ov1page / 2))
@ -5710,7 +5722,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
} }
mdbx_debug("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno); mdbx_debug("clone db %d page %" PRIaPGNO, DDBI(mc), mp->mp_pgno);
mdbx_cassert(mc, txn->tw.dirtylist->length <= MDBX_DPL_TXNFULL); mdbx_cassert(mc, txn->tw.dirtylist->length <= txn->tw.dirtylist->limit);
/* No - copy it */ /* No - copy it */
np = mdbx_page_malloc(txn, 1); np = mdbx_page_malloc(txn, 1);
if (unlikely(!np)) { if (unlikely(!np)) {
@ -6399,7 +6411,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
txn->mt_child = NULL; txn->mt_child = NULL;
txn->tw.loose_pages = NULL; txn->tw.loose_pages = NULL;
txn->tw.loose_count = 0; txn->tw.loose_count = 0;
txn->tw.dirtyroom = MDBX_DPL_TXNFULL; txn->tw.dirtyroom = txn->tw.dirtylist->limit;
mdbx_dpl_clear(txn->tw.dirtylist); mdbx_dpl_clear(txn->tw.dirtylist);
MDBX_PNL_SIZE(txn->tw.retired_pages) = 0; MDBX_PNL_SIZE(txn->tw.retired_pages) = 0;
txn->tw.spill_pages = NULL; txn->tw.spill_pages = NULL;
@ -6683,7 +6695,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); parent->tw.dirtylist->limit);
env->me_txn = txn; env->me_txn = txn;
rc = mdbx_cursor_shadow(parent, txn); rc = mdbx_cursor_shadow(parent, txn);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -6807,7 +6819,7 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
: MDBX_PNL_SIZE(txn->tw.retired_pages)); : MDBX_PNL_SIZE(txn->tw.retired_pages));
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom); info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
info->txn_space_dirty = info->txn_space_dirty =
pgno2bytes(env, MDBX_DPL_TXNFULL - txn->tw.dirtyroom); pgno2bytes(env, txn->tw.dirtylist->limit - txn->tw.dirtyroom);
info->txn_reader_lag = INT64_MAX; info->txn_reader_lag = INT64_MAX;
MDBX_lockinfo *const lck = env->me_lck; MDBX_lockinfo *const lck = env->me_lck;
if (scan_rlt && lck) { if (scan_rlt && lck) {
@ -7298,7 +7310,7 @@ retry_noaccount:
txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno)); txn, mdbx_pnl_check4assert(txn->tw.reclaimed_pglist, txn->mt_next_pgno));
mdbx_tassert(txn, mdbx_dirtylist_check(txn)); mdbx_tassert(txn, mdbx_dirtylist_check(txn));
mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
if (unlikely(/* paranoia */ loop > ((MDBX_DEBUG > 0) ? 9 : 99))) { if (unlikely(/* paranoia */ loop > ((MDBX_DEBUG > 0) ? 9 : 99))) {
mdbx_error("too more loops %u, bailout", loop); mdbx_error("too more loops %u, bailout", loop);
rc = MDBX_PROBLEM; rc = MDBX_PROBLEM;
@ -7394,7 +7406,7 @@ retry_noaccount:
txn->mt_next_pgno)); txn->mt_next_pgno));
mdbx_tassert(txn, mdbx_dirtylist_check(txn)); mdbx_tassert(txn, mdbx_dirtylist_check(txn));
mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
rc = mdbx_audit_ex(txn, retired_stored, false); rc = mdbx_audit_ex(txn, retired_stored, false);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
@ -7452,16 +7464,17 @@ retry_noaccount:
} }
/* filter-out list of dirty-pages from loose-pages */ /* filter-out list of dirty-pages from loose-pages */
const MDBX_DPL dl = txn->tw.dirtylist; MDBX_dpl *const dl = txn->tw.dirtylist;
unsigned w = 0; unsigned w = 0;
for (unsigned r = w; ++r <= dl->length;) { for (unsigned r = w; ++r <= dl->length;) {
MDBX_page *dp = dl[r].ptr; MDBX_page *dp = dl->items[r].ptr;
mdbx_tassert(txn, (dp->mp_flags & P_DIRTY)); mdbx_tassert(txn, (dp->mp_flags & P_DIRTY));
mdbx_tassert(txn, dl[r].pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1) <= mdbx_tassert(txn,
dl->items[r].pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1) <=
txn->mt_next_pgno); txn->mt_next_pgno);
if ((dp->mp_flags & P_LOOSE) == 0) { if ((dp->mp_flags & P_LOOSE) == 0) {
if (++w != r) if (++w != r)
dl[w] = dl[r]; dl->items[w] = dl->items[r];
} else { } else {
mdbx_tassert(txn, dp->mp_flags == (P_LOOSE | P_DIRTY)); mdbx_tassert(txn, dp->mp_flags == (P_LOOSE | P_DIRTY));
if ((env->me_flags & MDBX_WRITEMAP) == 0) if ((env->me_flags & MDBX_WRITEMAP) == 0)
@ -7978,7 +7991,7 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
* Returns 0 on success, non-zero on failure. */ * Returns 0 on success, non-zero on failure. */
__hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) { __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
struct iovec iov[MDBX_COMMIT_PAGES]; struct iovec iov[MDBX_COMMIT_PAGES];
const MDBX_DPL dl = (keep || txn->tw.loose_count > 1) MDBX_dpl *const dl = (keep || txn->tw.loose_count > 1)
? mdbx_dpl_sort(txn->tw.dirtylist) ? mdbx_dpl_sort(txn->tw.dirtylist)
: txn->tw.dirtylist; : txn->tw.dirtylist;
MDBX_env *const env = txn->mt_env; MDBX_env *const env = txn->mt_env;
@ -7989,7 +8002,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
size_t iov_off = 0; size_t iov_off = 0;
unsigned r, w; unsigned r, w;
for (r = w = keep; ++r <= dl->length;) { for (r = w = keep; ++r <= dl->length;) {
MDBX_page *dp = dl[r].ptr; MDBX_page *dp = dl->items[r].ptr;
mdbx_tassert(txn, mdbx_tassert(txn,
dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno); dp->mp_pgno >= MIN_PAGENO && dp->mp_pgno < txn->mt_next_pgno);
mdbx_tassert(txn, dp->mp_flags & P_DIRTY); mdbx_tassert(txn, dp->mp_flags & P_DIRTY);
@ -7997,11 +8010,11 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
/* Don't flush this page yet */ /* Don't flush this page yet */
if (dp->mp_flags & P_KEEP) { if (dp->mp_flags & P_KEEP) {
dp->mp_flags &= ~P_KEEP; dp->mp_flags &= ~P_KEEP;
dl[++w] = dl[r]; dl->items[++w] = dl->items[r];
continue; continue;
} }
if (dp->mp_flags & P_LOOSE) { if (dp->mp_flags & P_LOOSE) {
dl[++w] = dl[r]; dl->items[++w] = dl->items[r];
continue; continue;
} }
@ -8066,7 +8079,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
dl->length = w; dl->length = w;
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -8225,9 +8238,10 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
ts_1 = latency ? mdbx_osal_monotime() : 0; ts_1 = latency ? mdbx_osal_monotime() : 0;
/* Remove refunded pages from parent's dirty & spill lists */ /* Remove refunded pages from parent's dirty & spill lists */
MDBX_DPL dst = mdbx_dpl_sort(parent->tw.dirtylist); MDBX_dpl *const dst = mdbx_dpl_sort(parent->tw.dirtylist);
while (dst->length && dst[dst->length].pgno >= parent->mt_next_pgno) { while (dst->length &&
MDBX_page *mp = dst[dst->length].ptr; dst->items[dst->length].pgno >= parent->mt_next_pgno) {
MDBX_page *mp = dst->items[dst->length].ptr;
if (mp && (txn->mt_env->me_flags & MDBX_WRITEMAP) == 0) if (mp && (txn->mt_env->me_flags & MDBX_WRITEMAP) == 0)
mdbx_dpage_free(txn->mt_env, mp, IS_OVERFLOW(mp) ? mp->mp_pages : 1); mdbx_dpage_free(txn->mt_env, mp, IS_OVERFLOW(mp) ? mp->mp_pages : 1);
dst->length -= 1; dst->length -= 1;
@ -8237,7 +8251,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); parent->tw.dirtylist->limit);
if (parent->tw.spill_pages && MDBX_PNL_SIZE(parent->tw.spill_pages) > 0 && if (parent->tw.spill_pages && MDBX_PNL_SIZE(parent->tw.spill_pages) > 0 &&
MDBX_PNL_MOST(parent->tw.spill_pages) >= parent->mt_next_pgno << 1) { MDBX_PNL_MOST(parent->tw.spill_pages) >= parent->mt_next_pgno << 1) {
@ -8261,7 +8275,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
} }
/* Remove anything in our dirty list from parent's spill list */ /* Remove anything in our dirty list from parent's spill list */
MDBX_DPL src = mdbx_dpl_sort(txn->tw.dirtylist); MDBX_dpl *const src = mdbx_dpl_sort(txn->tw.dirtylist);
if (likely(src->length > 0) && parent->tw.spill_pages && if (likely(src->length > 0) && parent->tw.spill_pages &&
MDBX_PNL_SIZE(parent->tw.spill_pages) > 0) { MDBX_PNL_SIZE(parent->tw.spill_pages) > 0) {
MDBX_PNL sp = parent->tw.spill_pages; MDBX_PNL sp = parent->tw.spill_pages;
@ -8274,7 +8288,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
unsigned r, w, i = 1; unsigned r, w, i = 1;
w = r = len; w = r = len;
do { do {
pgno_t pn = src[i].pgno << 1; pgno_t pn = src->items[i].pgno << 1;
while (pn > sp[r]) while (pn > sp[r])
r--; r--;
if (pn == sp[r]) { if (pn == sp[r]) {
@ -8306,15 +8320,15 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
for (int i = begin; r <= dst->length;) { for (int i = begin; r <= dst->length;) {
mdbx_tassert(txn, (sp[i] & 1) == 0); mdbx_tassert(txn, (sp[i] & 1) == 0);
const pgno_t pgno = sp[i] >> 1; const pgno_t pgno = sp[i] >> 1;
if (dst[r].pgno < pgno) { if (dst->items[r].pgno < pgno) {
dst[w++] = dst[r++]; dst->items[w++] = dst->items[r++];
} else if (dst[r].pgno > pgno) { } else if (dst->items[r].pgno > pgno) {
i += step; i += step;
if (i == end) if (i == end)
while (r <= dst->length) while (r <= dst->length)
dst[w++] = dst[r++]; dst->items[w++] = dst->items[r++];
} else { } else {
MDBX_page *dp = dst[r++].ptr; MDBX_page *dp = dst->items[r++].ptr;
if ((env->me_flags & MDBX_WRITEMAP) == 0) if ((env->me_flags & MDBX_WRITEMAP) == 0)
mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1);
} }
@ -8327,13 +8341,13 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); parent->tw.dirtylist->limit);
unsigned d, s, l; unsigned d, s, l;
/* Find length of merging our dirty list with parent's */ /* Find length of merging our dirty list with parent's */
for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0; ++l) { for (l = 0, d = dst->length, s = src->length; d > 0 && s > 0; ++l) {
const pgno_t s_pgno = src[s].pgno; const pgno_t s_pgno = src->items[s].pgno;
const pgno_t d_pgno = dst[d].pgno; const pgno_t d_pgno = dst->items[d].pgno;
d -= d_pgno >= s_pgno; d -= d_pgno >= s_pgno;
s -= d_pgno <= s_pgno; s -= d_pgno <= s_pgno;
} }
@ -8344,24 +8358,24 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
/* Merge our dirty list into parent's */ /* Merge our dirty list into parent's */
for (d = dst->length, s = src->length; d > 0 && s > 0; --l) { for (d = dst->length, s = src->length; d > 0 && s > 0; --l) {
if (dst[d].pgno > src[s].pgno) if (dst->items[d].pgno > src->items[s].pgno)
dst[l] = dst[d--]; dst->items[l] = dst->items[d--];
else if (dst[d].pgno < src[s].pgno) else if (dst->items[d].pgno < src->items[s].pgno)
dst[l] = src[s--]; dst->items[l] = src->items[s--];
else { else {
MDBX_page *dp = dst[d--].ptr; MDBX_page *dp = dst->items[d--].ptr;
if (dp && (env->me_flags & MDBX_WRITEMAP) == 0) if (dp && (env->me_flags & MDBX_WRITEMAP) == 0)
mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pgno : 1); mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pgno : 1);
dst[l] = src[s--]; dst->items[l] = src->items[s--];
} }
} }
if (s) { if (s) {
do do
dst[l--] = src[s--]; dst->items[l--] = src->items[s--];
while (s > 0); while (s > 0);
} else if (d) { } else if (d) {
do do
dst[l--] = dst[d--]; dst->items[l--] = dst->items[d--];
while (d > 0); while (d > 0);
} }
assert(l == 0); assert(l == 0);
@ -8370,7 +8384,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent ||
parent->tw.dirtyroom + parent->tw.dirtylist->length == parent->tw.dirtyroom + parent->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); parent->tw.dirtylist->limit);
if (txn->tw.spill_pages) { if (txn->tw.spill_pages) {
if (parent->tw.spill_pages) { if (parent->tw.spill_pages) {
@ -8434,7 +8448,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
} }
mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); txn->tw.dirtylist->limit);
mdbx_cursors_eot(txn, false); mdbx_cursors_eot(txn, false);
end_mode |= MDBX_END_EOTDONE; end_mode |= MDBX_END_EOTDONE;
@ -9227,10 +9241,10 @@ static void __cold mdbx_setup_pagesize(MDBX_env *env, const size_t pagesize) {
env->me_psize = (unsigned)pagesize; env->me_psize = (unsigned)pagesize;
STATIC_ASSERT(MAX_GC1OVPAGE(MIN_PAGESIZE) > 4); STATIC_ASSERT(MAX_GC1OVPAGE(MIN_PAGESIZE) > 4);
STATIC_ASSERT(MAX_GC1OVPAGE(MAX_PAGESIZE) < MDBX_DPL_TXNFULL); STATIC_ASSERT(MAX_GC1OVPAGE(MAX_PAGESIZE) < MDBX_DPL_TXNFULL / 4);
const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1; const intptr_t maxgc_ov1page = (pagesize - PAGEHDRSZ) / sizeof(pgno_t) - 1;
mdbx_ensure(env, mdbx_ensure(env, maxgc_ov1page > 42 &&
maxgc_ov1page > 42 && maxgc_ov1page < (intptr_t)MDBX_DPL_TXNFULL); maxgc_ov1page < (intptr_t)MDBX_DPL_TXNFULL / 4);
env->me_maxgc_ov1page = (unsigned)maxgc_ov1page; env->me_maxgc_ov1page = (unsigned)maxgc_ov1page;
STATIC_ASSERT(LEAF_NODEMAX(MIN_PAGESIZE) > sizeof(MDBX_db) + NODESIZE + 42); STATIC_ASSERT(LEAF_NODEMAX(MIN_PAGESIZE) > sizeof(MDBX_db) + NODESIZE + 42);
@ -15566,7 +15580,7 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, unsigned options) {
mdbx_tassert(mc->mc_txn, mc->mc_txn->mt_parent || mdbx_tassert(mc->mc_txn, mc->mc_txn->mt_parent ||
mc->mc_txn->tw.dirtyroom + mc->mc_txn->tw.dirtyroom +
mc->mc_txn->tw.dirtylist->length == mc->mc_txn->tw.dirtylist->length ==
MDBX_DPL_TXNFULL); mc->mc_txn->tw.dirtylist->limit);
mdbx_cassert(mc, mc->mc_top == mc->mc_snum - 1); mdbx_cassert(mc, mc->mc_top == mc->mc_snum - 1);
if (unlikely(mc->mc_top != mc->mc_snum - 1)) if (unlikely(mc->mc_top != mc->mc_snum - 1))
return MDBX_CURSOR_FULL; return MDBX_CURSOR_FULL;

View File

@ -657,21 +657,22 @@ typedef pgno_t *MDBX_PNL;
typedef txnid_t *MDBX_TXL; typedef txnid_t *MDBX_TXL;
/* An Dirty-Page list item is an pgno/pointer pair. */ /* An Dirty-Page list item is an pgno/pointer pair. */
typedef union MDBX_DP { typedef struct MDBX_dp {
__anonymous_struct_extension__ struct {
pgno_t pgno; pgno_t pgno;
MDBX_page *ptr; MDBX_page *ptr;
}; } MDBX_dp;
__anonymous_struct_extension__ struct {
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
typedef struct MDBX_dpl {
unsigned sorted; unsigned sorted;
unsigned length; unsigned length;
}; unsigned allocated;
} MDBX_DP; unsigned limit;
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. (!defined(__cplusplus) && defined(_MSC_VER))
* The first element's length member is a count of how many actual MDBX_dp items[] /* dynamic size with holes at zero and after the last */;
* elements are in the array. */ #endif
typedef MDBX_DP *MDBX_DPL; } MDBX_dpl;
/* PNL sizes */ /* PNL sizes */
#define MDBX_PNL_GRANULATE 1024 #define MDBX_PNL_GRANULATE 1024
@ -814,7 +815,7 @@ struct MDBX_txn {
* dirtylist into mt_parent after freeing hidden mt_parent pages. */ * dirtylist into mt_parent after freeing hidden mt_parent pages. */
unsigned dirtyroom; unsigned dirtyroom;
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
MDBX_DPL dirtylist; MDBX_dpl *dirtylist;
/* The list of reclaimed txns from GC */ /* The list of reclaimed txns from GC */
MDBX_TXL lifo_reclaimed; MDBX_TXL lifo_reclaimed;
/* The list of pages that became unused during this transaction. */ /* The list of pages that became unused during this transaction. */