mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-01 23:14:14 +08:00
mdbx: fix lru-counter overflow by module 2^31.
2 of 2 fixes for https://github.com/erthink/libmdbx/issues/195
This commit is contained in:
parent
bc6a690733
commit
6bedb02ac0
91
src/core.c
91
src/core.c
@ -3535,6 +3535,13 @@ mdbx_dpl_append(MDBX_txn *txn, pgno_t pgno, MDBX_page *page, unsigned npages) {
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
static __inline uint32_t mdbx_dpl_age(const MDBX_txn *txn, unsigned i) {
|
||||
const MDBX_dpl *dl = txn->tw.dirtylist;
|
||||
assert((int)i > 0 && i <= dl->length);
|
||||
/* overflow could be here */
|
||||
return (txn->tw.dirtylru - dl->items[i].lru) & UINT32_C(0x7fffFFFF);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
uint8_t mdbx_runtime_flags = MDBX_RUNTIME_FLAGS_INIT;
|
||||
@ -4207,8 +4214,9 @@ MDBX_MAYBE_UNUSED __cold static bool mdbx_dirtylist_check(MDBX_txn *txn) {
|
||||
if (unlikely(dp->mp_pgno != dl->items[i].pgno))
|
||||
return false;
|
||||
|
||||
mdbx_tassert(txn, txn->tw.dirtylru >= dl->items[i].lru);
|
||||
if (unlikely(txn->tw.dirtylru < dl->items[i].lru))
|
||||
const uint32_t age = mdbx_dpl_age(txn, i);
|
||||
mdbx_tassert(txn, age < UINT32_MAX / 3);
|
||||
if (unlikely(age > UINT32_MAX / 3))
|
||||
return false;
|
||||
|
||||
mdbx_tassert(txn, dp->mp_flags == P_LOOSE || IS_MODIFIABLE(txn, dp));
|
||||
@ -4977,14 +4985,14 @@ static int spill_page(MDBX_txn *txn, struct mdbx_iov_ctx *ctx, MDBX_page *dp,
|
||||
static unsigned mdbx_cursor_keep(MDBX_txn *txn, MDBX_cursor *mc) {
|
||||
unsigned keep = 0;
|
||||
while (mc->mc_flags & C_INITIALIZED) {
|
||||
for (unsigned i = 0; i < mc->mc_snum; i++) {
|
||||
for (unsigned i = 0; i < mc->mc_snum; ++i) {
|
||||
const MDBX_page *mp = mc->mc_pg[i];
|
||||
if (IS_MODIFIABLE(txn, mp) && !IS_SUBP(mp)) {
|
||||
unsigned const n = mdbx_dpl_search(txn, mp->mp_pgno);
|
||||
if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno &&
|
||||
txn->tw.dirtylist->items[n].lru != txn->tw.dirtylru) {
|
||||
mdbx_dpl_age(txn, n)) {
|
||||
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
|
||||
keep++;
|
||||
++keep;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -5011,12 +5019,12 @@ static unsigned mdbx_txn_keep(MDBX_txn *txn, MDBX_cursor *m0) {
|
||||
* ...
|
||||
* > 255 = must not be spilled. */
|
||||
static unsigned spill_prio(const MDBX_txn *txn, const unsigned i,
|
||||
const unsigned lru_min, const unsigned reciprocal) {
|
||||
const uint32_t reciprocal) {
|
||||
MDBX_dpl *const dl = txn->tw.dirtylist;
|
||||
const unsigned lru = dl->items[i].lru;
|
||||
const uint32_t age = mdbx_dpl_age(txn, i);
|
||||
const unsigned npages = dpl_npages(dl, i);
|
||||
const pgno_t pgno = dl->items[i].pgno;
|
||||
if (lru == txn->tw.dirtylru) {
|
||||
if (age == 0) {
|
||||
mdbx_debug("skip %s %u page %" PRIaPGNO, "keep", npages, pgno);
|
||||
return 256;
|
||||
}
|
||||
@ -5045,21 +5053,22 @@ static unsigned spill_prio(const MDBX_txn *txn, const unsigned i,
|
||||
while ((parent = parent->mt_parent) != nullptr);
|
||||
}
|
||||
|
||||
unsigned prio = 1 + ((lru - lru_min) * reciprocal >> 8);
|
||||
mdbx_tassert(txn, prio > 0 && prio < 256);
|
||||
mdbx_tassert(txn, age * (uint64_t)reciprocal < UINT32_MAX);
|
||||
unsigned prio = age * reciprocal >> 24;
|
||||
mdbx_tassert(txn, prio < 256);
|
||||
if (likely(npages == 1))
|
||||
return prio;
|
||||
return prio = 256 - prio;
|
||||
|
||||
/* make a large/overflow pages be likely to spill */
|
||||
uint32_t x = npages | npages >> 1;
|
||||
x |= x >> 2;
|
||||
x |= x >> 4;
|
||||
x |= x >> 8;
|
||||
x |= x >> 16;
|
||||
x = (255 - prio) * log2n_powerof2(x + 1) + /* golden ratio factor */ 157;
|
||||
x = (x < 256) ? 255 - x : 0;
|
||||
mdbx_tassert(txn, x < 256 && x < prio);
|
||||
return prio = x;
|
||||
uint32_t factor = npages | npages >> 1;
|
||||
factor |= factor >> 2;
|
||||
factor |= factor >> 4;
|
||||
factor |= factor >> 8;
|
||||
factor |= factor >> 16;
|
||||
factor = prio * log2n_powerof2(factor + 1) + /* golden ratio */ 157;
|
||||
factor = (factor < 256) ? 255 - factor : 0;
|
||||
mdbx_tassert(txn, factor < 256 && factor < (256 - prio));
|
||||
return prio = factor;
|
||||
}
|
||||
|
||||
/* Spill pages from the dirty list back to disk.
|
||||
@ -5216,21 +5225,20 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
* тем самым повышая их шансы на выталкивание. */
|
||||
|
||||
/* get min/max of LRU-labels */
|
||||
unsigned lru_min = dl->items[1].lru, lru_max = lru_min;
|
||||
for (unsigned i = 2; i <= dl->length; ++i) {
|
||||
lru_min = (lru_min < dl->items[i].lru) ? lru_min : dl->items[i].lru;
|
||||
lru_max = (lru_max > dl->items[i].lru) ? lru_max : dl->items[i].lru;
|
||||
uint32_t age_max = 0;
|
||||
for (unsigned i = 1; i <= dl->length; ++i) {
|
||||
const uint32_t age = mdbx_dpl_age(txn, i);
|
||||
age_max = (age_max >= age) ? age_max : age;
|
||||
}
|
||||
|
||||
mdbx_verbose("lru-head %u, lru-min %u, lru-max %u", txn->tw.dirtylru, lru_min,
|
||||
lru_max);
|
||||
mdbx_verbose("lru-head %u, age-max %u", txn->tw.dirtylru, age_max);
|
||||
|
||||
/* half of 8-bit radix-sort */
|
||||
unsigned radix_counters[256], spillable = 0, spilled = 0;
|
||||
memset(&radix_counters, 0, sizeof(radix_counters));
|
||||
unsigned const reciprocal = 255 * 256 / (lru_max - lru_min + 1);
|
||||
const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1);
|
||||
for (unsigned i = 1; i <= dl->length; ++i) {
|
||||
unsigned prio = spill_prio(txn, i, lru_min, reciprocal);
|
||||
unsigned prio = spill_prio(txn, i, reciprocal);
|
||||
if (prio < 256) {
|
||||
radix_counters[prio] += 1;
|
||||
spillable += 1;
|
||||
@ -5261,7 +5269,7 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
unsigned r, w, prio;
|
||||
for (w = 0, r = 1; r <= dl->length && spilled < wanna_spill;
|
||||
prev_prio = prio, ++r) {
|
||||
prio = spill_prio(txn, r, lru_min, reciprocal);
|
||||
prio = spill_prio(txn, r, reciprocal);
|
||||
MDBX_page *const dp = dl->items[r].ptr;
|
||||
if (prio < prio2adjacent) {
|
||||
const pgno_t pgno = dl->items[r].pgno;
|
||||
@ -5270,9 +5278,9 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
if (prev_prio < prio2adjacent && prev_prio > prio2spill &&
|
||||
dpl_endpgno(dl, r - 1) == pgno) {
|
||||
mdbx_debug("co-spill %u prev-adjacent page %" PRIaPGNO
|
||||
" (lru-dist %d, prio %u)",
|
||||
" (age %d, prio %u)",
|
||||
dpl_npages(dl, w), dl->items[r - 1].pgno,
|
||||
txn->tw.dirtylru - dl->items[r - 1].lru, prev_prio);
|
||||
mdbx_dpl_age(txn, r - 1), prev_prio);
|
||||
--w;
|
||||
rc = spill_page(txn, &ctx, dl->items[r - 1].ptr,
|
||||
dpl_npages(dl, r - 1));
|
||||
@ -5281,9 +5289,8 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
++spilled;
|
||||
}
|
||||
|
||||
mdbx_debug("spill %u page %" PRIaPGNO " (lru-dist %d, prio %u)",
|
||||
npages, dp->mp_pgno, txn->tw.dirtylru - dl->items[r].lru,
|
||||
prio);
|
||||
mdbx_debug("spill %u page %" PRIaPGNO " (age %d, prio %u)", npages,
|
||||
dp->mp_pgno, mdbx_dpl_age(txn, r), prio);
|
||||
rc = spill_page(txn, &ctx, dp, npages);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
break;
|
||||
@ -5293,9 +5300,8 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
|
||||
if (prev_prio <= prio2spill && dpl_endpgno(dl, r - 1) == pgno) {
|
||||
mdbx_debug("co-spill %u next-adjacent page %" PRIaPGNO
|
||||
" (lru-dist %d, prio %u)",
|
||||
npages, dp->mp_pgno, txn->tw.dirtylru - dl->items[r].lru,
|
||||
prio);
|
||||
" (age %d, prio %u)",
|
||||
npages, dp->mp_pgno, mdbx_dpl_age(txn, r), prio);
|
||||
rc = spill_page(txn, &ctx, dp, npages);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
break;
|
||||
@ -5333,10 +5339,9 @@ static int mdbx_txn_spill(MDBX_txn *const txn, MDBX_cursor *const m0,
|
||||
for (unsigned i = 1; i <= dl->length; ++i) {
|
||||
MDBX_page *dp = dl->items[i].ptr;
|
||||
mdbx_notice(
|
||||
"dirtylist[%u]: pgno %u, npages %u, flags 0x%04X, lru %u, prio %u", i,
|
||||
dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags,
|
||||
txn->tw.dirtylru - dl->items[i].lru,
|
||||
spill_prio(txn, i, lru_min, reciprocal));
|
||||
"dirtylist[%u]: pgno %u, npages %u, flags 0x%04X, age %u, prio %u", i,
|
||||
dp->mp_pgno, dpl_npages(dl, i), dp->mp_flags, mdbx_dpl_age(txn, i),
|
||||
spill_prio(txn, i, reciprocal));
|
||||
}
|
||||
}
|
||||
|
||||
@ -7586,7 +7591,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
txn->tw.dirtyroom = txn->mt_env->me_options.dp_limit;
|
||||
txn->tw.dirtylru = 0;
|
||||
txn->tw.dirtylru = MDBX_DEBUG ? ~42u : 0;
|
||||
}
|
||||
|
||||
/* Setup db info */
|
||||
@ -8352,7 +8357,6 @@ static int mdbx_txn_end(MDBX_txn *txn, const unsigned mode) {
|
||||
|
||||
parent->mt_child = nullptr;
|
||||
parent->mt_flags &= ~MDBX_TXN_HAS_CHILD;
|
||||
mdbx_tassert(parent, parent->tw.dirtylru <= txn->tw.dirtylru);
|
||||
parent->tw.dirtylru = txn->tw.dirtylru;
|
||||
mdbx_tassert(parent, mdbx_dirtylist_check(parent));
|
||||
mdbx_tassert(parent, mdbx_audit_ex(parent, 0, false) == 0);
|
||||
@ -9748,7 +9752,6 @@ static __inline void mdbx_txn_merge(MDBX_txn *const parent, MDBX_txn *const txn,
|
||||
parent->tw.dirtyroom -= dst->sorted - dst->length;
|
||||
assert(parent->tw.dirtyroom <= parent->mt_env->me_options.dp_limit);
|
||||
dpl_setlen(dst, dst->sorted);
|
||||
mdbx_tassert(parent, parent->tw.dirtylru <= txn->tw.dirtylru);
|
||||
parent->tw.dirtylru = txn->tw.dirtylru;
|
||||
mdbx_tassert(parent, mdbx_dirtylist_check(parent));
|
||||
mdbx_dpl_free(txn);
|
||||
|
Loading…
x
Reference in New Issue
Block a user