mdbx: auto-sorting for dirty-pages list.

Change-Id: I12f3de68c020aeacba2ac80cedf4cae6cb590c6b
This commit is contained in:
Leonid Yuriev 2019-09-21 19:57:05 +03:00
parent f5bd9b863c
commit 5a94d734cc
2 changed files with 137 additions and 120 deletions

View File

@ -1239,9 +1239,31 @@ static int __must_check_result mdbx_txl_append_list(MDBX_TXL *ptl,
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
static __hot int mdbx_dpl_cmp(const void *pa, const void *pb) {
const MDBX_DP a = *(MDBX_DPL)pa;
const MDBX_DP b = *(MDBX_DPL)pb;
return mdbx_cmp2int(a.pgno, b.pgno);
}
static __inline MDBX_DPL mdbx_dpl_sort(MDBX_DPL dl) {
assert(dl->length <= MDBX_DPL_TXNFULL);
if (dl->sorted != dl->length) {
/* LY: temporary */
dl->sorted = dl->length;
qsort(dl + 1, dl->length, sizeof(*dl), mdbx_dpl_cmp);
}
return dl;
}
/* Returns the index of the first dirty-page whose pgno /* Returns the index of the first dirty-page whose pgno
* member is greater than or equal to id. */ * member is greater than or equal to id. */
static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t id) { static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t id) {
#if MDBX_DEBUG
for (const MDBX_DP *ptr = dl + dl->sorted; --ptr > dl;)
assert(ptr[0].pgno < ptr[1].pgno);
#endif
dl = mdbx_dpl_sort(dl);
/* binary search of id in array /* binary search of id in array
* if found, returns position of id * if found, returns position of id
* if not found, returns first position greater than id */ * if not found, returns first position greater than id */
@ -1278,41 +1300,75 @@ static unsigned __hot mdbx_dpl_search(MDBX_DPL dl, pgno_t id) {
return cursor; return cursor;
} }
static int mdbx_dpl_cmp(const void *pa, const void *pb) { static __inline MDBX_page *mdbx_dpl_find(MDBX_DPL dl, pgno_t id) {
const MDBX_DP a = *(MDBX_DPL)pa; const unsigned i = mdbx_dpl_search(dl, id);
const MDBX_DP b = *(MDBX_DPL)pb; assert((int)i > 0);
return mdbx_cmp2int(a.pgno, b.pgno); return (i <= dl->length && dl[i].pgno == id) ? dl[i].ptr : nullptr;
} }
static void mdbx_dpl_sort(MDBX_DPL dl) { static __hot MDBX_page *mdbx_dpl_remove(MDBX_DPL dl, pgno_t id) {
assert(dl->length <= MDBX_DPL_TXNFULL); unsigned i = mdbx_dpl_search(dl, id);
/* LY: temporary */ assert((int)i > 0);
qsort(dl + 1, dl->length, sizeof(*dl), mdbx_dpl_cmp); MDBX_page *mp = nullptr;
if (i <= dl->length && dl[i].pgno == id) {
dl->sorted -= dl->sorted >= i;
mp = dl[i].ptr;
while (i < dl->length) {
dl[i] = dl[i + 1];
++i;
}
dl->length -= 1;
}
return mp;
} }
static int __must_check_result mdbx_dpl_insert(MDBX_DPL dl, pgno_t pgno, static __inline bool mdbx_dpl_mark4removal(MDBX_DPL dl, MDBX_page *mp) {
MDBX_page *page) { assert(dl != (void *)mp);
assert(dl->length <= MDBX_DPL_TXNFULL); const unsigned i = mdbx_dpl_search(dl, mp->mp_pgno);
unsigned x = mdbx_dpl_search(dl, pgno); if (i <= dl->length && dl[i].pgno == mp->mp_pgno) {
assert((int)x > 0); assert((int)i > 0 && dl[i].ptr == mp);
if (unlikely(dl[x].pgno == pgno && x <= dl->length)) dl[i].ptr = dl /* mark for deletion */;
return /* duplicate */ MDBX_PROBLEM; return true;
}
if (unlikely(dl->length == MDBX_DPL_TXNFULL)) return false;
return MDBX_TXN_FULL;
/* insert page */
for (unsigned i = dl->length += 1; i > x; --i)
dl[i] = dl[i - 1];
dl[x].pgno = pgno;
dl[x].ptr = page;
return MDBX_SUCCESS;
} }
static int __must_check_result mdbx_dpl_append(MDBX_DPL dl, pgno_t pgno, static __hot void mdbx_dpl_remove_marked(MDBX_DPL dl) {
MDBX_page *page) { assert(dl->sorted <= dl->length);
assert(dl->length <= MDBX_DPL_TXNFULL); assert(dl->length <= MDBX_DPL_TXNFULL);
MDBX_DPL r, w;
const MDBX_DPL end_sorted = dl + dl->sorted;
for (r = w = dl + 1; r <= end_sorted; r++) {
if (r->ptr != dl) {
if (r != w)
*w = *r;
++w;
}
}
dl->sorted = (unsigned)(w - dl - 1);
const MDBX_DPL end = dl + dl->length;
for (; r <= end; r++) {
if (r->ptr != dl) {
if (r != w)
*w = *r;
++w;
}
}
dl->length = (unsigned)(w - dl - 1);
#if MDBX_DEBUG
for (const MDBX_DP *ptr = dl + dl->sorted; --ptr > dl;)
assert(ptr[0].pgno < ptr[1].pgno);
#endif
}
static __inline int __must_check_result mdbx_dpl_append(MDBX_DPL dl,
pgno_t pgno,
MDBX_page *page) {
assert(dl->length <= MDBX_DPL_TXNFULL);
assert(page->mp_pgno == pgno);
#if MDBX_DEBUG #if MDBX_DEBUG
for (unsigned i = dl->length; i > 0; --i) { for (unsigned i = dl->length; i > 0; --i) {
assert(dl[i].pgno != pgno); assert(dl[i].pgno != pgno);
@ -1325,12 +1381,19 @@ static int __must_check_result mdbx_dpl_append(MDBX_DPL dl, pgno_t pgno,
return MDBX_TXN_FULL; return MDBX_TXN_FULL;
/* append page */ /* append page */
const unsigned i = dl->length += 1; const unsigned n = dl->length + 1;
dl[i].pgno = pgno; if (n == 1 || (dl->sorted >= dl->length && dl[n - 1].pgno < pgno))
dl[i].ptr = page; dl->sorted = n;
dl->length = n;
dl[n].pgno = pgno;
dl[n].ptr = page;
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
static __inline void mdbx_dpl_clear(MDBX_DPL dl) {
dl->sorted = dl->length = 0;
}
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
#ifndef MDBX_ALLOY #ifndef MDBX_ALLOY
@ -1867,7 +1930,7 @@ static void mdbx_dlist_free(MDBX_txn *txn) {
mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1); mdbx_dpage_free(env, dp, IS_OVERFLOW(dp) ? dp->mp_pages : 1);
} }
dl->length = 0; mdbx_dpl_clear(dl);
} }
static size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes) { static size_t bytes_align2os_bytes(const MDBX_env *env, size_t bytes) {
@ -1929,18 +1992,8 @@ static __must_check_result int mdbx_refund_loose(MDBX_txn *txn) {
txn->mt_next_pgno, mp->mp_pgno); txn->mt_next_pgno, mp->mp_pgno);
txn->mt_next_pgno = mp->mp_pgno; txn->mt_next_pgno = mp->mp_pgno;
if (txn->mt_rw_dirtylist) { if (txn->mt_rw_dirtylist) {
MDBX_DPL dl = txn->mt_rw_dirtylist; MDBX_page *dp = mdbx_dpl_remove(txn->mt_rw_dirtylist, mp->mp_pgno);
for (unsigned i = 1; i <= dl->length; ++i) if (unlikely(dp != mp)) {
if (dl[i].pgno == mp->mp_pgno) {
while (i < dl->length) {
dl[i] = dl[i + 1];
++i;
}
dl->length -= 1;
dl = nullptr;
break;
}
if (unlikely(dl)) {
mdbx_error("not found page 0x%p #%" PRIaPGNO " in the dirtylist", mp, mdbx_error("not found page 0x%p #%" PRIaPGNO " in the dirtylist", mp,
mp->mp_pgno); mp->mp_pgno);
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
@ -1997,12 +2050,12 @@ static int mdbx_page_loose(MDBX_cursor *mc, MDBX_page *mp) {
/* If txn has a parent, /* If txn has a parent,
* make sure the page is in our dirty list. */ * make sure the page is in our dirty list. */
if (dl->length) { if (dl->length) {
unsigned x = mdbx_dpl_search(dl, pgno); MDBX_page *dp = mdbx_dpl_find(dl, pgno);
if (x <= dl->length && dl[x].pgno == pgno) { if (dp) {
if (unlikely(mp != dl[x].ptr)) { /* bad cursor? */ if (unlikely(mp != dp)) { /* bad cursor? */
mdbx_error("wrong page 0x%p #%" PRIaPGNO mdbx_error("wrong page 0x%p #%" PRIaPGNO
" in the dirtylist[%d], expecting %p", " in the dirtylist, expecting %p",
dl[x].ptr, pgno, x, mp); dp, pgno, mp);
mc->mc_flags &= ~(C_INITIALIZED | C_EOF); mc->mc_flags &= ~(C_INITIALIZED | C_EOF);
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
return MDBX_PROBLEM; return MDBX_PROBLEM;
@ -2492,9 +2545,7 @@ static __cold pgno_t mdbx_find_largest(MDBX_env *env, pgno_t largest) {
/* Add a page to the txn's dirty list */ /* Add a page to the txn's dirty list */
static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) { static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
int (*const adder)(MDBX_DPL, pgno_t pgno, MDBX_page * page) = const int rc = mdbx_dpl_append(txn->mt_rw_dirtylist, mp->mp_pgno, mp);
(txn->mt_flags & MDBX_WRITEMAP) ? mdbx_dpl_append : mdbx_dpl_insert;
const int rc = adder(txn->mt_rw_dirtylist, mp->mp_pgno, mp);
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
return rc; return rc;
@ -3187,12 +3238,12 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
pgno = mp->mp_pgno; pgno = mp->mp_pgno;
/* If txn has a parent, make sure the page is in our dirty list. */ /* If txn has a parent, make sure the page is in our dirty list. */
if (dl->length) { if (dl->length) {
unsigned x = mdbx_dpl_search(dl, pgno); MDBX_page *dp = mdbx_dpl_find(dl, pgno);
if (x <= dl->length && dl[x].pgno == pgno) { if (dp) {
if (unlikely(mp != dl[x].ptr)) { /* bad cursor? */ if (unlikely(mp != dp)) { /* bad cursor? */
mdbx_error("wrong page 0x%p #%" PRIaPGNO mdbx_error("wrong page 0x%p #%" PRIaPGNO
" in the dirtylist[%d], expecting %p", " in the dirtylist, expecting %p",
dl[x].ptr, pgno, x, mp); dp, pgno, mp);
mc->mc_flags &= ~(C_INITIALIZED | C_EOF); mc->mc_flags &= ~(C_INITIALIZED | C_EOF);
rc = MDBX_PROBLEM; rc = MDBX_PROBLEM;
goto fail; goto fail;
@ -3209,7 +3260,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
rc = MDBX_ENOMEM; rc = MDBX_ENOMEM;
goto fail; goto fail;
} }
rc = mdbx_dpl_insert(dl, pgno, np); rc = mdbx_dpl_append(dl, pgno, np);
if (unlikely(rc)) { if (unlikely(rc)) {
mdbx_dpage_free(txn->mt_env, np, 1); mdbx_dpage_free(txn->mt_env, np, 1);
goto fail; goto fail;
@ -3628,7 +3679,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
txn->mt_loose_count = 0; txn->mt_loose_count = 0;
txn->mt_dirtyroom = MDBX_DPL_TXNFULL; txn->mt_dirtyroom = MDBX_DPL_TXNFULL;
txn->mt_rw_dirtylist = env->me_dirtylist; txn->mt_rw_dirtylist = env->me_dirtylist;
txn->mt_rw_dirtylist->length = 0; mdbx_dpl_clear(txn->mt_rw_dirtylist);
txn->mt_retired_pages = env->me_retired_pages; txn->mt_retired_pages = env->me_retired_pages;
MDBX_PNL_SIZE(txn->mt_retired_pages) = 0; MDBX_PNL_SIZE(txn->mt_retired_pages) = 0;
txn->mt_spill_pages = NULL; txn->mt_spill_pages = NULL;
@ -3849,7 +3900,7 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
} }
txn->mt_txnid = parent->mt_txnid; txn->mt_txnid = parent->mt_txnid;
txn->mt_dirtyroom = parent->mt_dirtyroom; txn->mt_dirtyroom = parent->mt_dirtyroom;
txn->mt_rw_dirtylist->length = 0; mdbx_dpl_clear(txn->mt_rw_dirtylist);
txn->mt_spill_pages = NULL; txn->mt_spill_pages = NULL;
txn->mt_geo = parent->mt_geo; txn->mt_geo = parent->mt_geo;
txn->mt_canary = parent->mt_canary; txn->mt_canary = parent->mt_canary;
@ -4123,9 +4174,8 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */
mdbx_cursors_eot(txn, 0); mdbx_cursors_eot(txn, 0);
if (!(env->me_flags & MDBX_WRITEMAP)) { if (!(env->me_flags & MDBX_WRITEMAP))
mdbx_dlist_free(txn); mdbx_dlist_free(txn);
}
if (txn->mt_lifo_reclaimed) { if (txn->mt_lifo_reclaimed) {
MDBX_PNL_SIZE(txn->mt_lifo_reclaimed) = 0; MDBX_PNL_SIZE(txn->mt_lifo_reclaimed) = 0;
@ -4516,20 +4566,13 @@ retry:
// filter-out list of dirty-pages from loose-pages // filter-out list of dirty-pages from loose-pages
MDBX_DPL dl = txn->mt_rw_dirtylist; MDBX_DPL dl = txn->mt_rw_dirtylist;
mdbx_dpl_sort(dl);
unsigned left = dl->length; unsigned left = dl->length;
for (MDBX_page *mp = txn->mt_loose_pages; mp;) { for (MDBX_page *mp = txn->mt_loose_pages; mp;) {
mdbx_tassert(txn, mp->mp_pgno < txn->mt_next_pgno); mdbx_tassert(txn, mp->mp_pgno < txn->mt_next_pgno);
mdbx_ensure(env, mp->mp_pgno >= NUM_METAS); mdbx_ensure(env, mp->mp_pgno >= NUM_METAS);
if (left > 0) { if (left > 0)
const unsigned i = mdbx_dpl_search(dl, mp->mp_pgno); left -= mdbx_dpl_mark4removal(dl, mp);
if (i <= dl->length && dl[i].pgno == mp->mp_pgno) {
mdbx_tassert(txn, i > 0 && dl[i].ptr != dl);
dl[i].ptr = dl /* mark for deletion */;
}
left -= 1;
}
MDBX_page *dp = mp; MDBX_page *dp = mp;
mp = NEXT_LOOSE_PAGE(mp); mp = NEXT_LOOSE_PAGE(mp);
@ -4537,22 +4580,15 @@ retry:
mdbx_dpage_free(env, dp, 1); mdbx_dpage_free(env, dp, 1);
} }
if (left > 0) { if (left != dl->length) {
MDBX_DPL r, w, end = dl + dl->length;
for (r = w = dl + 1; r <= end; r++) {
if (r->ptr != dl) {
if (r != w)
*w = *r;
++w;
}
}
mdbx_tassert(txn, w - dl == (int)left + 1);
}
if (left != dl->length)
mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages", mdbx_trace("%s: filtered-out loose-pages from %u -> %u dirty-pages",
dbg_prefix_mode, dl->length, left); dbg_prefix_mode, dl->length, left);
dl->length = left; if (!MDBX_DEBUG && unlikely(left == 0))
mdbx_dpl_clear(dl);
else
mdbx_dpl_remove_marked(dl);
mdbx_tassert(txn, dl->length == left);
}
txn->mt_loose_pages = NULL; txn->mt_loose_pages = NULL;
txn->mt_loose_count = 0; txn->mt_loose_count = 0;
@ -5247,8 +5283,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
txn->mt_dbflags[i] | (parent->mt_dbflags[i] & (DB_CREAT | DB_FRESH)); txn->mt_dbflags[i] | (parent->mt_dbflags[i] & (DB_CREAT | DB_FRESH));
} }
dst = parent->mt_rw_dirtylist; dst = mdbx_dpl_sort(parent->mt_rw_dirtylist);
src = txn->mt_rw_dirtylist; src = mdbx_dpl_sort(txn->mt_rw_dirtylist);
/* Remove anything in our dirty list from parent's spill list */ /* Remove anything in our dirty list from parent's spill list */
if ((pspill = parent->mt_spill_pages) && (ps_len = MDBX_PNL_SIZE(pspill))) { if ((pspill = parent->mt_spill_pages) && (ps_len = MDBX_PNL_SIZE(pspill))) {
x = y = ps_len; x = y = ps_len;
@ -5276,16 +5312,9 @@ int mdbx_txn_commit(MDBX_txn *txn) {
pgno_t pn = txn->mt_spill_pages[i]; pgno_t pn = txn->mt_spill_pages[i];
if (pn & 1) if (pn & 1)
continue; /* deleted spillpg */ continue; /* deleted spillpg */
pn >>= 1; MDBX_page *mp = mdbx_dpl_remove(dst, pn >> 1);
y = mdbx_dpl_search(dst, pn); if (mp)
if (y <= dst->length && dst[y].pgno == pn) { mdbx_free(mp);
mdbx_free(dst[y].ptr);
while (y < dst->length) {
dst[y] = dst[y + 1];
y++;
}
dst->length--;
}
} }
} }
@ -7782,11 +7811,9 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
goto mapped; goto mapped;
} }
if (dl->length) { if (dl->length) {
unsigned y = mdbx_dpl_search(dl, pgno); p = mdbx_dpl_find(dl, pgno);
if (y <= dl->length && dl[y].pgno == pgno) { if (p)
p = dl[y].ptr;
goto done; goto done;
}
} }
level++; level++;
} while ((tx2 = tx2->mt_parent) != NULL); } while ((tx2 = tx2->mt_parent) != NULL);
@ -8059,7 +8086,6 @@ static int mdbx_ovpage_free(MDBX_cursor *mc, MDBX_page *mp) {
sl[x] == pn))) { sl[x] == pn))) {
unsigned i, j; unsigned i, j;
pgno_t *mop; pgno_t *mop;
MDBX_DP *dl, ix, iy;
rc = mdbx_pnl_need(&env->me_reclaimed_pglist, ovpages); rc = mdbx_pnl_need(&env->me_reclaimed_pglist, ovpages);
if (unlikely(rc)) if (unlikely(rc))
return rc; return rc;
@ -8072,23 +8098,14 @@ static int mdbx_ovpage_free(MDBX_cursor *mc, MDBX_page *mp) {
sl[x] |= 1; sl[x] |= 1;
goto release; goto release;
} }
/* Remove from dirty list */ /* Remove from dirty list */
dl = txn->mt_rw_dirtylist; MDBX_page *dp = mdbx_dpl_remove(txn->mt_rw_dirtylist, mp->mp_pgno);
x = dl->length--; if (unlikely(dp != mp)) {
for (ix = dl[x]; ix.ptr != mp; ix = iy) { mdbx_error("not found page 0x%p #%" PRIaPGNO " in the dirtylist", mp,
if (likely(x > 1)) { mp->mp_pgno);
x--; txn->mt_flags |= MDBX_TXN_ERROR;
iy = dl[x]; return MDBX_PROBLEM;
dl[x] = ix;
} else {
mdbx_cassert(mc, x > 1);
mdbx_error("not found page 0x%p #%" PRIaPGNO " in the dirtylist", mp,
mp->mp_pgno);
j = dl->length += 1;
dl[j] = ix; /* Unsorted. OK when MDBX_TXN_ERROR. */
txn->mt_flags |= MDBX_TXN_ERROR;
return MDBX_PROBLEM;
}
} }
txn->mt_dirtyroom++; txn->mt_dirtyroom++;
if (!(env->me_flags & MDBX_WRITEMAP)) if (!(env->me_flags & MDBX_WRITEMAP))
@ -9399,7 +9416,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (unlikely(!np)) if (unlikely(!np))
return MDBX_ENOMEM; return MDBX_ENOMEM;
/* Note - this page is already counted in parent's dirtyroom */ /* Note - this page is already counted in parent's dirtyroom */
rc2 = mdbx_dpl_insert(mc->mc_txn->mt_rw_dirtylist, pg, np); rc2 = mdbx_dpl_append(mc->mc_txn->mt_rw_dirtylist, pg, np);
if (unlikely(rc2 != MDBX_SUCCESS)) { if (unlikely(rc2 != MDBX_SUCCESS)) {
rc = rc2; rc = rc2;
mdbx_dpage_free(env, np, ovpages); mdbx_dpage_free(env, np, ovpages);

View File

@ -628,7 +628,7 @@ typedef union MDBX_DP {
void *ptr; void *ptr;
}; };
struct { struct {
pgno_t unused; unsigned sorted;
unsigned length; unsigned length;
}; };
} MDBX_DP; } MDBX_DP;