mdbx: preparation for LRU-spilling.

Initial for https://github.com/erthink/libmdbx/issues/186

Change-Id: I9a6d60cc142ff7490e2a23defe9654d7e3337edc
This commit is contained in:
Leonid Yuriev 2021-04-23 19:03:07 +03:00
parent b21ad733ea
commit d1fea74d0e
3 changed files with 132 additions and 104 deletions

View File

@ -306,6 +306,7 @@ dirname
DIRS DIRS
dirs dirs
dirtylist dirtylist
dirtylru
dirtyroom dirtyroom
diskutil diskutil
distclean distclean
@ -411,6 +412,7 @@ endif
endl endl
endmacro endmacro
endmntent endmntent
endpgno
endutxent endutxent
enewcommand enewcommand
ENODATA ENODATA
@ -846,6 +848,7 @@ LPVOID
LPWSTR LPWSTR
lrint lrint
lrt lrt
lru
lsb lsb
lseek lseek
lsize lsize

View File

@ -3210,23 +3210,25 @@ static __always_inline unsigned bytes2dpl(const ptrdiff_t bytes) {
return (unsigned)size; return (unsigned)size;
} }
static __always_inline unsigned mdbx_dpl_setlen(MDBX_dpl *dl, unsigned len) { static __always_inline unsigned dpl_setlen(MDBX_dpl *dl, unsigned len) {
static const MDBX_page dpl_stub_pageE = { static const MDBX_page dpl_stub_pageE = {
{0}, 0, P_BAD, {0}, /* pgno */ ~(pgno_t)0}; {0}, 0, P_BAD, {0}, /* pgno */ ~(pgno_t)0};
assert(dpl_stub_pageE.mp_flags == P_BAD && assert(dpl_stub_pageE.mp_flags == P_BAD &&
dpl_stub_pageE.mp_pgno == P_INVALID); dpl_stub_pageE.mp_pgno == P_INVALID);
dl->length = len; dl->length = len;
dl->items[len + 1].pgno = P_INVALID;
dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE; dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE;
dl->items[len + 1].pgno = P_INVALID;
dl->items[len + 1].extra = 0;
return len; return len;
} }
static __always_inline void mdbx_dpl_clear(MDBX_dpl *dl) { static __always_inline void dpl_clear(MDBX_dpl *dl) {
static const MDBX_page dpl_stub_pageB = {{0}, 0, P_BAD, {0}, /* pgno */ 0}; static const MDBX_page dpl_stub_pageB = {{0}, 0, P_BAD, {0}, /* pgno */ 0};
assert(dpl_stub_pageB.mp_flags == P_BAD && dpl_stub_pageB.mp_pgno == 0); assert(dpl_stub_pageB.mp_flags == P_BAD && dpl_stub_pageB.mp_pgno == 0);
dl->sorted = mdbx_dpl_setlen(dl, 0); dl->sorted = dpl_setlen(dl, 0);
dl->items[0].pgno = 0;
dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB; dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB;
dl->items[0].pgno = 0;
dl->items[0].extra = 0;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
} }
@ -3257,7 +3259,7 @@ static int mdbx_dpl_alloc(MDBX_txn *txn) {
? txn->mt_env->me_options.dp_initial ? txn->mt_env->me_options.dp_initial
: txn->mt_geo.upper; : txn->mt_geo.upper;
if (txn->tw.dirtylist) { if (txn->tw.dirtylist) {
mdbx_dpl_clear(txn->tw.dirtylist); dpl_clear(txn->tw.dirtylist);
const int realloc_threshold = 64; const int realloc_threshold = 64;
if (likely( if (likely(
!((int)(txn->tw.dirtylist->detent - wanna) > realloc_threshold || !((int)(txn->tw.dirtylist->detent - wanna) > realloc_threshold ||
@ -3266,7 +3268,7 @@ static int mdbx_dpl_alloc(MDBX_txn *txn) {
} }
if (unlikely(!mdbx_dpl_reserve(txn, wanna))) if (unlikely(!mdbx_dpl_reserve(txn, wanna)))
return MDBX_ENOMEM; return MDBX_ENOMEM;
mdbx_dpl_clear(txn->tw.dirtylist); dpl_clear(txn->tw.dirtylist);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -3277,7 +3279,8 @@ RADIXSORT_IMPL(dpl, MDBX_dp, MDBX_DPL_EXTRACT_KEY,
#define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno) #define DP_SORT_CMP(first, last) ((first).pgno < (last).pgno)
SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP) SORT_IMPL(dp_sort, false, MDBX_dp, DP_SORT_CMP)
__hot static MDBX_dpl *mdbx_dpl_sort_slowpath(MDBX_dpl *dl) { __hot __noinline static MDBX_dpl *mdbx_dpl_sort_slowpath(MDBX_txn *txn) {
MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
const unsigned unsorted = dl->length - dl->sorted; const unsigned unsorted = dl->length - dl->sorted;
if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) || if (likely(unsorted < MDBX_RADIXSORT_THRESHOLD) ||
@ -3325,11 +3328,12 @@ __hot static MDBX_dpl *mdbx_dpl_sort_slowpath(MDBX_dpl *dl) {
return dl; return dl;
} }
static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) { static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_txn *txn) {
MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->length <= MDBX_PGL_LIMIT); assert(dl->length <= MDBX_PGL_LIMIT);
assert(dl->sorted <= dl->length); assert(dl->sorted <= dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
return likely(dl->sorted == dl->length) ? dl : mdbx_dpl_sort_slowpath(dl); return likely(dl->sorted == dl->length) ? dl : mdbx_dpl_sort_slowpath(txn);
} }
/* Returns the index of the first dirty-page whose pgno /* Returns the index of the first dirty-page whose pgno
@ -3337,7 +3341,8 @@ static __always_inline MDBX_dpl *mdbx_dpl_sort(MDBX_dpl *dl) {
#define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id)) #define DP_SEARCH_CMP(dp, id) ((dp).pgno < (id))
SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP) SEARCH_IMPL(dp_bsearch, MDBX_dp, pgno_t, DP_SEARCH_CMP)
static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) { static unsigned __hot mdbx_dpl_search(MDBX_txn *txn, pgno_t pgno) {
MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) { for (const MDBX_dp *ptr = dl->items + dl->sorted; --ptr > dl->items;) {
@ -3349,7 +3354,7 @@ static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) {
switch (dl->length - dl->sorted) { switch (dl->length - dl->sorted) {
default: default:
/* sort a whole */ /* sort a whole */
mdbx_dpl_sort_slowpath(dl); mdbx_dpl_sort_slowpath(txn);
break; break;
case 0: case 0:
/* whole sorted cases */ /* whole sorted cases */
@ -3386,26 +3391,37 @@ static unsigned __hot mdbx_dpl_search(MDBX_dpl *dl, pgno_t pgno) {
return (unsigned)(dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items); return (unsigned)(dp_bsearch(dl->items + 1, dl->sorted, pgno) - dl->items);
} }
static __inline bool mdbx_dpl_intersect(MDBX_dpl *dl, pgno_t pgno, MDBX_NOTHROW_PURE_FUNCTION static __inline unsigned
dpl_npages(const MDBX_dpl *dl, unsigned i) {
assert(0 <= (int)i && i <= dl->length);
unsigned n = likely(!dl->items[i].multi) ? 1 : dl->items[i].ptr->mp_pages;
assert(n == (IS_OVERFLOW(dl->items[i].ptr) ? dl->items[i].ptr->mp_pages : 1));
return n;
}
MDBX_NOTHROW_PURE_FUNCTION static __inline unsigned
dpl_endpgno(const MDBX_dpl *dl, unsigned i) {
return dpl_npages(dl, i) + dl->items[i].pgno;
}
static __inline bool mdbx_dpl_intersect(MDBX_txn *txn, pgno_t pgno,
unsigned npages) { unsigned npages) {
MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->sorted == dl->length); assert(dl->sorted == dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
unsigned const n = mdbx_dpl_search(dl, pgno); unsigned const n = mdbx_dpl_search(txn, pgno);
assert(n >= 1 && n <= dl->length + 1); assert(n >= 1 && n <= dl->length + 1);
assert(pgno <= dl->items[n].pgno); assert(pgno <= dl->items[n].pgno);
assert(pgno > dl->items[n - 1].pgno); assert(pgno > dl->items[n - 1].pgno);
const MDBX_page *const prev = dl->items[n - 1].ptr;
const bool rc = const bool rc =
/* intersection with founded */ pgno + npages > dl->items[n].pgno || /* intersection with founded */ pgno + npages > dl->items[n].pgno ||
(/* intersection with prev */ unlikely(IS_OVERFLOW(prev)) && /* intersection with prev */ dpl_endpgno(dl, n - 1) > pgno;
prev->mp_pgno + prev->mp_pages > pgno);
if (mdbx_assert_enabled()) { if (mdbx_assert_enabled()) {
bool check = false; bool check = false;
for (unsigned i = 1; i <= dl->length; ++i) { for (unsigned i = 1; i <= dl->length; ++i) {
const MDBX_page *const dp = dl->items[i].ptr; const MDBX_page *const dp = dl->items[i].ptr;
if (!(dp->mp_pgno /* begin */ >= /* end */ pgno + npages || if (!(dp->mp_pgno /* begin */ >= /* end */ pgno + npages ||
dp->mp_pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1) /* end */ <= dpl_endpgno(dl, i) /* end */ <= /* begin */ pgno))
/* begin */ pgno))
check |= true; check |= true;
} }
assert(check == rc); assert(check == rc);
@ -3413,20 +3429,24 @@ static __inline bool mdbx_dpl_intersect(MDBX_dpl *dl, pgno_t pgno,
return rc; return rc;
} }
static __always_inline unsigned mdbx_dpl_exist(MDBX_dpl *dl, pgno_t pgno) { static __always_inline unsigned mdbx_dpl_exist(MDBX_txn *txn, pgno_t pgno) {
unsigned i = mdbx_dpl_search(dl, pgno); MDBX_dpl *dl = txn->tw.dirtylist;
unsigned i = mdbx_dpl_search(txn, pgno);
assert((int)i > 0); assert((int)i > 0);
return (dl->items[i].pgno == pgno) ? i : 0; return (dl->items[i].pgno == pgno) ? i : 0;
} }
static __always_inline MDBX_page *mdbx_dpl_find(MDBX_dpl *dl, pgno_t pgno) { static __always_inline MDBX_page *mdbx_dpl_find(MDBX_txn *txn, pgno_t pgno) {
const unsigned i = mdbx_dpl_search(dl, pgno); MDBX_dpl *dl = txn->tw.dirtylist;
const unsigned i = mdbx_dpl_search(txn, pgno);
assert((int)i > 0); assert((int)i > 0);
return (dl->items[i].pgno == pgno) ? dl->items[i].ptr : nullptr; return (dl->items[i].pgno == pgno) ? dl->items[i].lru = txn->tw.dirtylru++,
dl->items[i].ptr : nullptr;
} }
static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl, static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_txn *txn,
const pgno_t pgno) { const pgno_t pgno) {
const MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
for (unsigned i = dl->length; i > dl->sorted; --i) for (unsigned i = dl->length; i > dl->sorted; --i)
if (dl->items[i].pgno == pgno) if (dl->items[i].pgno == pgno)
@ -3441,7 +3461,8 @@ static __maybe_unused const MDBX_page *debug_dpl_find(const MDBX_dpl *dl,
return nullptr; return nullptr;
} }
static void mdbx_dpl_remove(MDBX_dpl *dl, unsigned i) { static void mdbx_dpl_remove(MDBX_txn *txn, unsigned i) {
MDBX_dpl *dl = txn->tw.dirtylist;
assert((int)i > 0 && i <= dl->length); assert((int)i > 0 && i <= dl->length);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
dl->sorted -= dl->sorted >= i; dl->sorted -= dl->sorted >= i;
@ -3452,7 +3473,7 @@ static void mdbx_dpl_remove(MDBX_dpl *dl, unsigned i) {
} }
static __always_inline int __must_check_result static __always_inline int __must_check_result
mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) { mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page, unsigned npages) {
MDBX_dpl *dl = txn->tw.dirtylist; MDBX_dpl *dl = txn->tw.dirtylist;
assert(dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE); assert(dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE);
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
@ -3487,8 +3508,10 @@ mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page) {
/* copy the stub beyond the end */ /* copy the stub beyond the end */
dl->items[length + 1] = dl->items[length]; dl->items[length + 1] = dl->items[length];
/* append page */ /* append page */
dl->items[length].pgno = pgno;
dl->items[length].ptr = page; dl->items[length].ptr = page;
dl->items[length].pgno = pgno;
dl->items[length].multi = npages > 1;
dl->items[length].lru = txn->tw.dirtylru++;
dl->length = length; dl->length = length;
dl->sorted = sorted; dl->sorted = sorted;
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID); assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
@ -4133,10 +4156,10 @@ static void mdbx_dlist_free(MDBX_txn *txn) {
for (size_t i = 1; i <= len; i++) { for (size_t i = 1; i <= len; i++) {
MDBX_page *dp = dl->items[i].ptr; MDBX_page *dp = dl->items[i].ptr;
mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(env, dp, dpl_npages(dl, i));
} }
mdbx_dpl_clear(dl); dpl_clear(dl);
} }
static __always_inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) { static __always_inline MDBX_db *mdbx_outer_db(MDBX_cursor *mc) {
@ -4171,7 +4194,7 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
} else if (unlikely(!IS_MODIFIABLE(txn, dp))) } else if (unlikely(!IS_MODIFIABLE(txn, dp)))
return false; return false;
const unsigned num = IS_OVERFLOW(dp) ? dp->mp_pages : 1; const unsigned num = dpl_npages(dl, i);
mdbx_tassert(txn, txn->mt_next_pgno >= dp->mp_pgno + num); mdbx_tassert(txn, txn->mt_next_pgno >= dp->mp_pgno + num);
if (unlikely(txn->mt_next_pgno < dp->mp_pgno + num)) if (unlikely(txn->mt_next_pgno < dp->mp_pgno + num))
return false; return false;
@ -4202,7 +4225,7 @@ static __cold __maybe_unused bool mdbx_dirtylist_check(MDBX_txn *txn) {
return false; return false;
for (unsigned i = 1; i <= MDBX_PNL_SIZE(txn->tw.retired_pages); ++i) { for (unsigned i = 1; i <= MDBX_PNL_SIZE(txn->tw.retired_pages); ++i) {
const MDBX_page *const dp = debug_dpl_find(dl, txn->tw.retired_pages[i]); const MDBX_page *const dp = debug_dpl_find(txn, txn->tw.retired_pages[i]);
mdbx_tassert(txn, !dp); mdbx_tassert(txn, !dp);
if (unlikely(dp)) if (unlikely(dp))
return false; return false;
@ -4317,14 +4340,14 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
dl->items[w] = dl->items[r]; dl->items[w] = dl->items[r];
} }
} }
mdbx_dpl_setlen(dl, w); dpl_setlen(dl, w);
mdbx_tassert(txn, txn->mt_parent || txn->tw.dirtyroom + dl->length == mdbx_tassert(txn, txn->mt_parent || txn->tw.dirtyroom + dl->length ==
txn->mt_env->me_options.dp_limit); txn->mt_env->me_options.dp_limit);
goto unlink_loose; goto unlink_loose;
} }
} else { } else {
/* Dirtylist is mostly sorted, just refund loose pages at the end. */ /* Dirtylist is mostly sorted, just refund loose pages at the end. */
mdbx_dpl_sort(dl); mdbx_dpl_sort(txn);
mdbx_tassert(txn, dl->length < 2 || mdbx_tassert(txn, dl->length < 2 ||
dl->items[1].pgno < dl->items[dl->length].pgno); dl->items[1].pgno < dl->items[dl->length].pgno);
mdbx_tassert(txn, dl->sorted == dl->length); mdbx_tassert(txn, dl->sorted == dl->length);
@ -4340,7 +4363,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
mdbx_tassert(txn, dp->mp_pgno == dl->items[n].pgno); mdbx_tassert(txn, dp->mp_pgno == dl->items[n].pgno);
txn->mt_next_pgno -= 1; txn->mt_next_pgno -= 1;
} }
mdbx_dpl_setlen(dl, n); dpl_setlen(dl, n);
if (dl->sorted != dl->length) { if (dl->sorted != dl->length) {
const unsigned refunded = dl->sorted - dl->length; const unsigned refunded = dl->sorted - dl->length;
@ -4449,7 +4472,7 @@ static __inline void mdbx_page_wash(MDBX_txn *txn, const unsigned di,
const unsigned npages) { const unsigned npages) {
mdbx_tassert(txn, di && di <= txn->tw.dirtylist->length && mdbx_tassert(txn, di && di <= txn->tw.dirtylist->length &&
txn->tw.dirtylist->items[di].ptr == mp); txn->tw.dirtylist->items[di].ptr == mp);
mdbx_dpl_remove(txn->tw.dirtylist, di); mdbx_dpl_remove(txn, di);
txn->tw.dirtyroom++; txn->tw.dirtyroom++;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
@ -4521,13 +4544,12 @@ static int mdbx_page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
mdbx_tassert(txn, mdbx_tassert(txn,
!scan->tw.spill_pages || !scan->tw.spill_pages ||
!mdbx_pnl_exist(scan->tw.spill_pages, pgno << 1)); !mdbx_pnl_exist(scan->tw.spill_pages, pgno << 1));
mdbx_tassert(txn, !scan->tw.dirtylist || mdbx_tassert(txn, !scan->tw.dirtylist || !debug_dpl_find(scan, pgno));
!debug_dpl_find(scan->tw.dirtylist, pgno));
} }
} }
goto status_done; goto status_done;
} else if (pagetype && txn->tw.dirtylist) { } else if (pagetype && txn->tw.dirtylist) {
if ((di = mdbx_dpl_exist(txn->tw.dirtylist, pgno)) != 0) { if ((di = mdbx_dpl_exist(txn, pgno)) != 0) {
mp = txn->tw.dirtylist->items[di].ptr; mp = txn->tw.dirtylist->items[di].ptr;
mdbx_tassert(txn, IS_MODIFIABLE(txn, mp)); mdbx_tassert(txn, IS_MODIFIABLE(txn, mp));
goto status_done; goto status_done;
@ -4539,7 +4561,7 @@ static int mdbx_page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
} }
for (MDBX_txn *parent = txn->mt_parent; parent; for (MDBX_txn *parent = txn->mt_parent; parent;
parent = parent->mt_parent) { parent = parent->mt_parent) {
if (mdbx_dpl_exist(parent->tw.dirtylist, pgno)) { if (mdbx_dpl_exist(parent, pgno)) {
is_shadowed = true; is_shadowed = true;
goto status_done; goto status_done;
} }
@ -4569,13 +4591,12 @@ static int mdbx_page_retire_ex(MDBX_cursor *mc, const pgno_t pgno,
mdbx_tassert(txn, !is_spilled); mdbx_tassert(txn, !is_spilled);
mdbx_tassert(txn, !txn->tw.spill_pages || mdbx_tassert(txn, !txn->tw.spill_pages ||
!mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1)); !mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1));
mdbx_tassert(txn, debug_dpl_find(txn->tw.dirtylist, pgno) == mp || mdbx_tassert(txn, debug_dpl_find(txn, pgno) == mp || txn->mt_parent);
txn->mt_parent);
} else { } else {
mdbx_tassert(txn, !debug_dpl_find(txn->tw.dirtylist, pgno)); mdbx_tassert(txn, !debug_dpl_find(txn, pgno));
} }
di = is_dirty ? mdbx_dpl_exist(txn->tw.dirtylist, pgno) : 0; di = is_dirty ? mdbx_dpl_exist(txn, pgno) : 0;
si = (is_spilled && txn->tw.spill_pages) si = (is_spilled && txn->tw.spill_pages)
? mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1) ? mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1)
: 0; : 0;
@ -4648,7 +4669,7 @@ status_done:
mdbx_tassert(txn, is_spilled); mdbx_tassert(txn, is_spilled);
break; break;
} }
if (mp == debug_dpl_find(parent->tw.dirtylist, pgno)) { if (mp == debug_dpl_find(parent, pgno)) {
kind = "parent-dirty"; kind = "parent-dirty";
mdbx_tassert(txn, !is_spilled); mdbx_tassert(txn, !is_spilled);
break; break;
@ -4708,7 +4729,7 @@ status_done:
if (parent->tw.spill_pages && if (parent->tw.spill_pages &&
mdbx_pnl_intersect(parent->tw.spill_pages, pgno << 1, npages << 1)) mdbx_pnl_intersect(parent->tw.spill_pages, pgno << 1, npages << 1))
goto skip_invalidate; goto skip_invalidate;
if (mdbx_dpl_intersect(parent->tw.dirtylist, pgno, npages)) if (mdbx_dpl_intersect(parent, pgno, npages))
goto skip_invalidate; goto skip_invalidate;
} }
@ -4747,7 +4768,7 @@ status_done:
* retired-список для последующей фильтрации при коммите. */ * retired-список для последующей фильтрации при коммите. */
for (MDBX_txn *parent = txn->mt_parent; parent; for (MDBX_txn *parent = txn->mt_parent; parent;
parent = parent->mt_parent) { parent = parent->mt_parent) {
if (mdbx_dpl_exist(parent->tw.dirtylist, pgno)) if (mdbx_dpl_exist(parent, pgno))
goto retire; goto retire;
} }
/* Страница точно была выделена в этой транзакции /* Страница точно была выделена в этой транзакции
@ -4765,7 +4786,7 @@ status_done:
mdbx_tassert(txn, mdbx_tassert(txn,
!parent->tw.spill_pages || !parent->tw.spill_pages ||
!mdbx_pnl_exist(parent->tw.spill_pages, pgno << 1)); !mdbx_pnl_exist(parent->tw.spill_pages, pgno << 1));
parent_dp = debug_dpl_find(parent->tw.dirtylist, pgno); parent_dp = debug_dpl_find(parent, pgno);
} }
mdbx_tassert(txn, parent_dp && (!mp || parent_dp == mp)); mdbx_tassert(txn, parent_dp && (!mp || parent_dp == mp));
} }
@ -4845,12 +4866,15 @@ static void mdbx_txn_xkeep(MDBX_txn *txn, MDBX_cursor *m0,
pgno_t pgno = txn->mt_dbs[i].md_root; pgno_t pgno = txn->mt_dbs[i].md_root;
if (pgno == P_INVALID) if (pgno == P_INVALID)
continue; continue;
MDBX_page *dp = mdbx_dpl_find(txn->tw.dirtylist, pgno); unsigned di = mdbx_dpl_exist(txn, pgno);
if (dp && (dp->mp_flags & mask) == pflags) if (di) {
MDBX_page *dp = txn->tw.dirtylist->items[di].ptr;
if ((dp->mp_flags & mask) == pflags)
dp->mp_flags ^= P_KEEP; dp->mp_flags ^= P_KEEP;
} }
} }
} }
}
} }
/* Spill pages from the dirty list back to disk. /* Spill pages from the dirty list back to disk.
@ -4929,7 +4953,7 @@ static int mdbx_txn_spill(MDBX_txn *txn, MDBX_cursor *m0, unsigned need) {
/* Preserve pages which may soon be dirtied again */ /* Preserve pages which may soon be dirtied again */
mdbx_txn_xkeep(txn, m0, 0, true); mdbx_txn_xkeep(txn, m0, 0, true);
MDBX_dpl *const dl = mdbx_dpl_sort(txn->tw.dirtylist); MDBX_dpl *const dl = mdbx_dpl_sort(txn);
/* Save the page IDs of all the pages we're flushing */ /* Save the page IDs of all the pages we're flushing */
/* flush from the tail forward, this saves a lot of shifting later on. */ /* flush from the tail forward, this saves a lot of shifting later on. */
const unsigned dl_len_before = dl->length; const unsigned dl_len_before = dl->length;
@ -4938,7 +4962,7 @@ static int mdbx_txn_spill(MDBX_txn *txn, MDBX_cursor *m0, unsigned need) {
for (; keep && wanna_spill; keep--) { for (; keep && wanna_spill; keep--) {
const pgno_t pgno = dl->items[keep].pgno; const pgno_t pgno = dl->items[keep].pgno;
MDBX_page *dp = dl->items[keep].ptr; MDBX_page *dp = dl->items[keep].ptr;
const unsigned npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1; const unsigned npages = dpl_npages(dl, keep);
if (dp->mp_flags & (P_LOOSE | P_KEEP | P_SPILLED)) { if (dp->mp_flags & (P_LOOSE | P_KEEP | P_SPILLED)) {
mdbx_debug("skip %s %u page %" PRIaPGNO, mdbx_debug("skip %s %u page %" PRIaPGNO,
(dp->mp_flags & P_LOOSE) (dp->mp_flags & P_LOOSE)
@ -5299,14 +5323,17 @@ static __cold pgno_t mdbx_find_largest(MDBX_env *env, pgno_t largest) {
} }
/* Add a page to the txn's dirty list */ /* Add a page to the txn's dirty list */
static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) { static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp,
unsigned npages) {
mp->mp_txnid = txn->mt_front; mp->mp_txnid = txn->mt_front;
if (unlikely(txn->tw.dirtyroom == 0)) { if (unlikely(txn->tw.dirtyroom == 0)) {
mdbx_error("Dirtyroom is depleted, DPL length %u", mdbx_error("Dirtyroom is depleted, DPL length %u",
txn->tw.dirtylist->length); txn->tw.dirtylist->length);
if (!(txn->mt_flags & MDBX_WRITEMAP))
mdbx_dpage_free(txn->mt_env, mp, npages);
return MDBX_TXN_FULL; return MDBX_TXN_FULL;
} }
const int rc = mdbx_dpl_append(txn, mp->mp_pgno, mp); const int rc = mdbx_dpl_append(txn, mp->mp_pgno, mp, npages);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
return rc; return rc;
@ -6213,12 +6240,6 @@ done:
return ret; return ret;
} }
if (unlikely(txn->tw.dirtyroom < 1)) {
mdbx_error("Dirtyroom is depleted, DPL length %u",
txn->tw.dirtylist->length);
ret.err = MDBX_TXN_FULL;
return ret;
}
mdbx_ensure(env, pgno >= NUM_METAS); mdbx_ensure(env, pgno >= NUM_METAS);
if (env->me_flags & MDBX_WRITEMAP) { if (env->me_flags & MDBX_WRITEMAP) {
ret.page = pgno2page(env, pgno); ret.page = pgno2page(env, pgno);
@ -6265,7 +6286,7 @@ done:
ret.page->mp_pages = num; ret.page->mp_pages = num;
ret.page->mp_flags = P_OVERFLOW; ret.page->mp_flags = P_OVERFLOW;
} }
ret.err = mdbx_page_dirty(txn, ret.page); ret.err = mdbx_page_dirty(txn, ret.page, num);
if (unlikely(ret.err != MDBX_SUCCESS)) if (unlikely(ret.err != MDBX_SUCCESS))
goto fail; goto fail;
@ -6330,11 +6351,9 @@ mdbx_page_unspill(MDBX_txn *const txn, MDBX_page *mp) {
} /* otherwise, if belonging to a parent txn, the } /* otherwise, if belonging to a parent txn, the
* page remains spilled until child commits */ * page remains spilled until child commits */
ret.err = mdbx_page_dirty(txn, ret.page); ret.err = mdbx_page_dirty(txn, ret.page, npages);
if (unlikely(ret.err != MDBX_SUCCESS)) { if (unlikely(ret.err != MDBX_SUCCESS))
mdbx_dpage_free(txn->mt_env, ret.page, npages);
return ret; return ret;
}
ret.page->mp_flags |= (scan == txn) ? 0 : P_SPILLED; ret.page->mp_flags |= (scan == txn) ? 0 : P_SPILLED;
ret.err = MDBX_SUCCESS; ret.err = MDBX_SUCCESS;
return ret; return ret;
@ -6419,7 +6438,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
np->mp_pgno = pgno; np->mp_pgno = pgno;
np->mp_txnid = txn->mt_front; np->mp_txnid = txn->mt_front;
} }
rc = mdbx_dpl_append(txn, pgno, np); rc = mdbx_dpl_append(txn, pgno, np, 1);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
mdbx_dpage_free(txn->mt_env, np, 1); mdbx_dpage_free(txn->mt_env, np, 1);
goto fail; goto fail;
@ -7153,6 +7172,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto bailout; goto bailout;
txn->tw.dirtyroom = txn->mt_env->me_options.dp_limit; txn->tw.dirtyroom = txn->mt_env->me_options.dp_limit;
txn->tw.dirtylru = 0;
} }
/* Setup db info */ /* Setup db info */
@ -7397,7 +7417,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
if (parent->tw.loose_count) { if (parent->tw.loose_count) {
do { do {
MDBX_page *lp = parent->tw.loose_pages; MDBX_page *lp = parent->tw.loose_pages;
const unsigned di = mdbx_dpl_exist(parent->tw.dirtylist, lp->mp_pgno); const unsigned di = mdbx_dpl_exist(parent, lp->mp_pgno);
mdbx_tassert(parent, di && parent->tw.dirtylist->items[di].ptr == lp); mdbx_tassert(parent, di && parent->tw.dirtylist->items[di].ptr == lp);
mdbx_tassert(parent, lp->mp_flags == P_LOOSE); mdbx_tassert(parent, lp->mp_flags == P_LOOSE);
rc = rc =
@ -7415,8 +7435,9 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags,
mdbx_tassert(parent, mdbx_dirtylist_check(parent)); mdbx_tassert(parent, mdbx_dirtylist_check(parent));
} }
txn->tw.dirtyroom = parent->tw.dirtyroom; txn->tw.dirtyroom = parent->tw.dirtyroom;
txn->tw.dirtylru = parent->tw.dirtylru;
mdbx_dpl_sort(parent->tw.dirtylist); mdbx_dpl_sort(parent);
if (parent->tw.spill_pages) if (parent->tw.spill_pages)
mdbx_spill_purge(parent); mdbx_spill_purge(parent);
@ -7743,7 +7764,7 @@ static void mdbx_dpl_sift(MDBX_txn *const txn, MDBX_PNL pl,
const bool spilled) { const bool spilled) {
if (MDBX_PNL_SIZE(pl) && txn->tw.dirtylist->length) { if (MDBX_PNL_SIZE(pl) && txn->tw.dirtylist->length) {
mdbx_tassert(txn, mdbx_pnl_check4assert(pl, txn->mt_next_pgno << spilled)); mdbx_tassert(txn, mdbx_pnl_check4assert(pl, txn->mt_next_pgno << spilled));
MDBX_dpl *dl = mdbx_dpl_sort(txn->tw.dirtylist); MDBX_dpl *dl = mdbx_dpl_sort(txn);
/* Scanning in ascend order */ /* Scanning in ascend order */
const int step = MDBX_PNL_ASCENDING ? 1 : -1; const int step = MDBX_PNL_ASCENDING ? 1 : -1;
@ -7751,7 +7772,7 @@ static void mdbx_dpl_sift(MDBX_txn *const txn, MDBX_PNL pl,
const int end = MDBX_PNL_ASCENDING ? MDBX_PNL_SIZE(pl) + 1 : 0; const int end = MDBX_PNL_ASCENDING ? MDBX_PNL_SIZE(pl) + 1 : 0;
mdbx_tassert(txn, pl[begin] <= pl[end - step]); mdbx_tassert(txn, pl[begin] <= pl[end - step]);
unsigned r = mdbx_dpl_search(dl, pl[begin] >> spilled); unsigned r = mdbx_dpl_search(txn, pl[begin] >> spilled);
mdbx_tassert(txn, dl->sorted == dl->length); mdbx_tassert(txn, dl->sorted == dl->length);
for (int i = begin; r <= dl->length;) { /* scan loop */ for (int i = begin; r <= dl->length;) { /* scan loop */
assert(i != end); assert(i != end);
@ -7772,7 +7793,7 @@ static void mdbx_dpl_sift(MDBX_txn *const txn, MDBX_PNL pl,
remove_dl: remove_dl:
if ((txn->mt_env->me_flags & MDBX_WRITEMAP) == 0) { if ((txn->mt_env->me_flags & MDBX_WRITEMAP) == 0) {
MDBX_page *dp = dl->items[r].ptr; MDBX_page *dp = dl->items[r].ptr;
mdbx_dpage_free(txn->mt_env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(txn->mt_env, dp, dpl_npages(dl, r));
} }
++r; ++r;
next_i: next_i:
@ -7794,7 +7815,7 @@ static void mdbx_dpl_sift(MDBX_txn *const txn, MDBX_PNL pl,
goto remove_dl; goto remove_dl;
} }
} }
dl->sorted = mdbx_dpl_setlen(dl, w - 1); dl->sorted = dpl_setlen(dl, w - 1);
txn->tw.dirtyroom += r - w; txn->tw.dirtyroom += r - w;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
return; return;
@ -8378,9 +8399,7 @@ retry_noaccount:
for (unsigned r = w; ++r <= dl->length;) { for (unsigned r = w; ++r <= dl->length;) {
MDBX_page *dp = dl->items[r].ptr; MDBX_page *dp = dl->items[r].ptr;
mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp)); mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp));
mdbx_tassert(txn, mdbx_tassert(txn, dpl_endpgno(dl, r) <= txn->mt_next_pgno);
dl->items[r].pgno + (IS_OVERFLOW(dp) ? dp->mp_pages : 1) <=
txn->mt_next_pgno);
if ((dp->mp_flags & P_LOOSE) == 0) { if ((dp->mp_flags & P_LOOSE) == 0) {
if (++w != r) if (++w != r)
dl->items[w] = dl->items[r]; dl->items[w] = dl->items[r];
@ -8393,7 +8412,7 @@ retry_noaccount:
mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages", mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages",
dbg_prefix_mode, dl->length, w); dbg_prefix_mode, dl->length, w);
mdbx_tassert(txn, txn->tw.loose_count == dl->length - w); mdbx_tassert(txn, txn->tw.loose_count == dl->length - w);
mdbx_dpl_setlen(dl, w); dpl_setlen(dl, w);
dl->sorted = 0; dl->sorted = 0;
txn->tw.dirtyroom += txn->tw.loose_count; txn->tw.dirtyroom += txn->tw.loose_count;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
@ -8956,7 +8975,7 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
* Returns 0 on success, non-zero on failure. */ * Returns 0 on success, non-zero on failure. */
__hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) { __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
struct iovec iov[MDBX_COMMIT_PAGES]; struct iovec iov[MDBX_COMMIT_PAGES];
MDBX_dpl *const dl = mdbx_dpl_sort(txn->tw.dirtylist); MDBX_dpl *const dl = mdbx_dpl_sort(txn);
MDBX_env *const env = txn->mt_env; MDBX_env *const env = txn->mt_env;
pgno_t flush_begin = MAX_PAGENO; pgno_t flush_begin = MAX_PAGENO;
pgno_t flush_end = MIN_PAGENO; pgno_t flush_end = MIN_PAGENO;
@ -8983,7 +9002,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
continue; continue;
} }
const unsigned npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1; const unsigned npages = dpl_npages(dl, r);
flush_begin = (flush_begin < dp->mp_pgno) ? flush_begin : dp->mp_pgno; flush_begin = (flush_begin < dp->mp_pgno) ? flush_begin : dp->mp_pgno;
flush_end = flush_end =
(flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages; (flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages;
@ -9026,7 +9045,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
mdbx_tassert(txn, dl->sorted == dl->length && r == dl->length + 1); mdbx_tassert(txn, dl->sorted == dl->length && r == dl->length + 1);
txn->tw.dirtyroom += dl->length - w; txn->tw.dirtyroom += dl->length - w;
assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit); assert(txn->tw.dirtyroom <= txn->mt_env->me_options.dp_limit);
dl->sorted = mdbx_dpl_setlen(dl, w); dl->sorted = dpl_setlen(dl, w);
mdbx_tassert(txn, txn->mt_parent || mdbx_tassert(txn, txn->mt_parent ||
txn->tw.dirtyroom + txn->tw.dirtylist->length == txn->tw.dirtyroom + txn->tw.dirtylist->length ==
txn->mt_env->me_options.dp_limit); txn->mt_env->me_options.dp_limit);
@ -9074,25 +9093,24 @@ int mdbx_txn_commit(MDBX_txn *txn) { return __inline_mdbx_txn_commit(txn); }
/* Merge child txn into parent */ /* Merge child txn into parent */
static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
const unsigned parent_retired_len) { const unsigned parent_retired_len) {
MDBX_dpl *const src = mdbx_dpl_sort(txn->tw.dirtylist); MDBX_dpl *const src = mdbx_dpl_sort(txn);
/* Remove refunded pages from parent's dirty list */ /* Remove refunded pages from parent's dirty list */
MDBX_dpl *const dst = mdbx_dpl_sort(parent->tw.dirtylist); MDBX_dpl *const dst = mdbx_dpl_sort(parent);
if (MDBX_ENABLE_REFUND) { if (MDBX_ENABLE_REFUND) {
unsigned n = dst->length; unsigned n = dst->length;
while (n && dst->items[n].pgno >= parent->mt_next_pgno) { while (n && dst->items[n].pgno >= parent->mt_next_pgno) {
if (!(txn->mt_env->me_flags & MDBX_WRITEMAP)) { if (!(txn->mt_env->me_flags & MDBX_WRITEMAP)) {
MDBX_page *dp = dst->items[n].ptr; MDBX_page *dp = dst->items[n].ptr;
mdbx_dpage_free(txn->mt_env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(txn->mt_env, dp, dpl_npages(dst, n));
} }
--n; --n;
} }
parent->tw.dirtyroom += dst->sorted - n; parent->tw.dirtyroom += dst->sorted - n;
assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit); assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit);
dst->sorted = mdbx_dpl_setlen(dst, n); dst->sorted = dpl_setlen(dst, n);
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent || parent->tw.dirtyroom + dst->length ==
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
parent->mt_env->me_options.dp_limit); parent->mt_env->me_options.dp_limit);
} }
@ -9105,17 +9123,17 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
for (r = w = parent_retired_len; for (r = w = parent_retired_len;
++r <= MDBX_PNL_SIZE(parent->tw.retired_pages);) { ++r <= MDBX_PNL_SIZE(parent->tw.retired_pages);) {
const pgno_t pgno = parent->tw.retired_pages[r]; const pgno_t pgno = parent->tw.retired_pages[r];
const unsigned di = mdbx_dpl_exist(parent->tw.dirtylist, pgno); const unsigned di = mdbx_dpl_exist(parent, pgno);
const unsigned si = (!di && unlikely(parent->tw.spill_pages)) const unsigned si = (!di && unlikely(parent->tw.spill_pages))
? mdbx_pnl_exist(parent->tw.spill_pages, pgno << 1) ? mdbx_pnl_exist(parent->tw.spill_pages, pgno << 1)
: 0; : 0;
unsigned npages; unsigned npages;
const char *kind; const char *kind;
if (di) { if (di) {
MDBX_page *dp = parent->tw.dirtylist->items[di].ptr; MDBX_page *dp = dst->items[di].ptr;
mdbx_tassert(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | mdbx_tassert(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH |
P_OVERFLOW | P_SPILLED)) == 0); P_OVERFLOW | P_SPILLED)) == 0);
npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1; npages = dpl_npages(dst, di);
mdbx_page_wash(parent, di, dp, npages); mdbx_page_wash(parent, di, dp, npages);
kind = "dirty"; kind = "dirty";
l = 1; l = 1;
@ -9228,8 +9246,7 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
} }
const pgno_t spilled_pgno = sl[s] >> 1; const pgno_t spilled_pgno = sl[s] >> 1;
const pgno_t dirty_pgno_form = src->items[d].pgno; const pgno_t dirty_pgno_form = src->items[d].pgno;
MDBX_page *dp = src->items[d].ptr; const unsigned npages = dpl_npages(src, d);
const unsigned npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1;
const pgno_t dirty_pgno_to = dirty_pgno_form + npages; const pgno_t dirty_pgno_to = dirty_pgno_form + npages;
if (dirty_pgno_form > spilled_pgno) { if (dirty_pgno_form > spilled_pgno) {
--d; --d;
@ -9257,8 +9274,7 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
parent->mt_next_pgno << 1)); parent->mt_next_pgno << 1));
mdbx_dpl_sift(parent, txn->tw.spill_pages, true); mdbx_dpl_sift(parent, txn->tw.spill_pages, true);
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent || parent->tw.dirtyroom + dst->length ==
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
parent->mt_env->me_options.dp_limit); parent->mt_env->me_options.dp_limit);
} }
@ -9269,13 +9285,13 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
mdbx_tassert(parent, mdbx_tassert(parent,
(sp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | P_OVERFLOW | (sp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | P_OVERFLOW |
P_LOOSE | P_SPILLED)) == 0); P_LOOSE | P_SPILLED)) == 0);
const unsigned s_npages = IS_OVERFLOW(sp) ? sp->mp_pages : 1; const unsigned s_npages = dpl_npages(src, s);
const pgno_t s_pgno = src->items[s].pgno; const pgno_t s_pgno = src->items[s].pgno;
MDBX_page *dp = dst->items[d].ptr; MDBX_page *dp = dst->items[d].ptr;
mdbx_tassert(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH | mdbx_tassert(parent, (dp->mp_flags & ~(P_LEAF | P_LEAF2 | P_BRANCH |
P_OVERFLOW | P_SPILLED)) == 0); P_OVERFLOW | P_SPILLED)) == 0);
const unsigned d_npages = IS_OVERFLOW(dp) ? dp->mp_pages : 1; const unsigned d_npages = dpl_npages(dst, d);
const pgno_t d_pgno = dst->items[d].pgno; const pgno_t d_pgno = dst->items[d].pgno;
if (d_pgno >= s_pgno + s_npages) { if (d_pgno >= s_pgno + s_npages) {
@ -9398,11 +9414,11 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
} }
parent->tw.dirtyroom -= dst->sorted - dst->length; parent->tw.dirtyroom -= dst->sorted - dst->length;
assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit); assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit);
mdbx_dpl_setlen(dst, dst->sorted); dpl_setlen(dst, dst->sorted);
mdbx_tassert(parent, mdbx_tassert(parent,
parent->mt_parent || parent->mt_parent || parent->tw.dirtyroom + dst->length ==
parent->tw.dirtyroom + parent->tw.dirtylist->length ==
parent->mt_env->me_options.dp_limit); parent->mt_env->me_options.dp_limit);
parent->tw.dirtylru = txn->tw.dirtylru;
mdbx_tassert(parent, mdbx_dirtylist_check(parent)); mdbx_tassert(parent, mdbx_dirtylist_check(parent));
mdbx_dpl_free(txn); mdbx_dpl_free(txn);
@ -12616,7 +12632,7 @@ mdbx_page_get_ex(MDBX_cursor *const mc, const pgno_t pgno,
mdbx_pnl_exist(spiller->tw.spill_pages, pgno << 1)) { mdbx_pnl_exist(spiller->tw.spill_pages, pgno << 1)) {
goto spilled; goto spilled;
} }
ret.page = mdbx_dpl_find(spiller->tw.dirtylist, pgno); ret.page = mdbx_dpl_find(spiller, pgno);
if (ret.page) if (ret.page)
goto dirty; goto dirty;
spiller = spiller->mt_parent; spiller = spiller->mt_parent;
@ -14324,7 +14340,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
return MDBX_ENOMEM; return MDBX_ENOMEM;
/* Note - this page is already counted in parent's dirtyroom */ /* Note - this page is already counted in parent's dirtyroom */
rc2 = mdbx_dpl_append(mc->mc_txn, pgno, np); rc2 = mdbx_dpl_append(mc->mc_txn, pgno, np, ovpages);
if (unlikely(rc2 != MDBX_SUCCESS)) { if (unlikely(rc2 != MDBX_SUCCESS)) {
rc = rc2; rc = rc2;
mdbx_dpage_free(env, np, ovpages); mdbx_dpage_free(env, np, ovpages);

View File

@ -824,8 +824,15 @@ typedef txnid_t *MDBX_TXL;
/* An Dirty-Page list item is an pgno/pointer pair. */ /* An Dirty-Page list item is an pgno/pointer pair. */
typedef struct MDBX_dp { typedef struct MDBX_dp {
pgno_t pgno;
MDBX_page *ptr; MDBX_page *ptr;
pgno_t pgno;
union {
unsigned extra;
__anonymous_struct_extension__ struct {
unsigned multi : 1;
unsigned lru : 31;
};
};
} MDBX_dp; } MDBX_dp;
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */ /* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
@ -976,6 +983,8 @@ struct MDBX_txn {
* dirty/spilled pages. Thus commit(nested txn) has room to merge * dirty/spilled pages. Thus commit(nested txn) has room to merge
* dirtylist into mt_parent after freeing hidden mt_parent pages. */ * dirtylist into mt_parent after freeing hidden mt_parent pages. */
unsigned dirtyroom; unsigned dirtyroom;
/* a sequence to spilling dirty page with LRU policy */
unsigned dirtylru;
/* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */
MDBX_dpl *dirtylist; MDBX_dpl *dirtylist;
/* The list of reclaimed txns from GC */ /* The list of reclaimed txns from GC */