mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 18:14:12 +08:00
mdbx: доработка внутренних LRU-отметок для аккуратного спиллинга огромных транзакций.
This commit is contained in:
parent
48eeb93628
commit
a9163f6307
75
src/core.c
75
src/core.c
@ -2764,7 +2764,7 @@ static __always_inline size_t dpl_setlen(MDBX_dpl *dl, size_t len) {
|
|||||||
dl->length = len;
|
dl->length = len;
|
||||||
dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE;
|
dl->items[len + 1].ptr = (MDBX_page *)&dpl_stub_pageE;
|
||||||
dl->items[len + 1].pgno = P_INVALID;
|
dl->items[len + 1].pgno = P_INVALID;
|
||||||
dl->items[len + 1].extra = 0;
|
dl->items[len + 1].mlru = 0;
|
||||||
return len;
|
return len;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2779,7 +2779,7 @@ static __always_inline void dpl_clear(MDBX_dpl *dl) {
|
|||||||
dl->pages_including_loose = 0;
|
dl->pages_including_loose = 0;
|
||||||
dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB;
|
dl->items[0].ptr = (MDBX_page *)&dpl_stub_pageB;
|
||||||
dl->items[0].pgno = 0;
|
dl->items[0].pgno = 0;
|
||||||
dl->items[0].extra = 0;
|
dl->items[0].mlru = 0;
|
||||||
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
|
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2955,7 +2955,9 @@ __hot __noinline static size_t dpl_search(const MDBX_txn *txn, pgno_t pgno) {
|
|||||||
MDBX_NOTHROW_PURE_FUNCTION static __inline unsigned
|
MDBX_NOTHROW_PURE_FUNCTION static __inline unsigned
|
||||||
dpl_npages(const MDBX_dpl *dl, size_t i) {
|
dpl_npages(const MDBX_dpl *dl, size_t i) {
|
||||||
assert(0 <= (intptr_t)i && i <= dl->length);
|
assert(0 <= (intptr_t)i && i <= dl->length);
|
||||||
unsigned n = likely(!dl->items[i].multi) ? 1 : dl->items[i].ptr->mp_pages;
|
unsigned n = 1;
|
||||||
|
if (unlikely(dl->items[i].mlru & MDBX_dp_multi_mask))
|
||||||
|
n = dl->items[i].ptr->mp_pages;
|
||||||
assert(n == (IS_OVERFLOW(dl->items[i].ptr) ? dl->items[i].ptr->mp_pages : 1));
|
assert(n == (IS_OVERFLOW(dl->items[i].ptr) ? dl->items[i].ptr->mp_pages : 1));
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
@ -3043,20 +3045,50 @@ static void dpl_remove(const MDBX_txn *txn, size_t i) {
|
|||||||
dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i));
|
dpl_remove_ex(txn, i, dpl_npages(txn->tw.dirtylist, i));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static __noinline void txn_lru_reduce(MDBX_txn *txn) {
|
||||||
|
NOTICE("lru-reduce %u -> %u", txn->tw.dirtylru, txn->tw.dirtylru >> 1);
|
||||||
|
do {
|
||||||
|
txn->tw.dirtylru >>= 1;
|
||||||
|
MDBX_dpl *dl = txn->tw.dirtylist;
|
||||||
|
for (size_t i = 1; i <= dl->length; ++i) {
|
||||||
|
uint32_t mlru = dl->items[i].mlru;
|
||||||
|
mlru = (mlru & MDBX_dp_multi_mask) + ((mlru >> 1) & MDBX_dp_lru_mask);
|
||||||
|
dl->items[i].mlru = mlru;
|
||||||
|
}
|
||||||
|
txn = txn->mt_parent;
|
||||||
|
} while (txn);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline uint32_t dpl_age(const MDBX_txn *txn, size_t i) {
|
||||||
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
||||||
|
const MDBX_dpl *dl = txn->tw.dirtylist;
|
||||||
|
assert((intptr_t)i > 0 && i <= dl->length);
|
||||||
|
return (txn->tw.dirtylru + 1 - dl->items[i].mlru) >> 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline uint32_t txn_lru_inc(MDBX_txn *txn) {
|
||||||
|
if (unlikely(++txn->tw.dirtylru > UINT32_MAX / 3))
|
||||||
|
txn_lru_reduce(txn);
|
||||||
|
return txn->tw.dirtylru & MDBX_dp_lru_mask;
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
||||||
pgno_t pgno,
|
pgno_t pgno,
|
||||||
MDBX_page *page,
|
MDBX_page *page,
|
||||||
size_t npages) {
|
size_t npages) {
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
||||||
|
const MDBX_dp dp = {page, pgno, txn_lru_inc(txn) + (npages > 1)};
|
||||||
MDBX_dpl *dl = txn->tw.dirtylist;
|
MDBX_dpl *dl = txn->tw.dirtylist;
|
||||||
assert(dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE);
|
tASSERT(txn, dl->length <= MDBX_PGL_LIMIT + MDBX_PNL_GRANULATE);
|
||||||
assert(dl->items[0].pgno == 0 && dl->items[dl->length + 1].pgno == P_INVALID);
|
tASSERT(txn, dl->items[0].pgno == 0 &&
|
||||||
|
dl->items[dl->length + 1].pgno == P_INVALID);
|
||||||
if (AUDIT_ENABLED()) {
|
if (AUDIT_ENABLED()) {
|
||||||
for (size_t i = dl->length; i > 0; --i) {
|
for (size_t i = dl->length; i > 0; --i) {
|
||||||
assert(dl->items[i].pgno != pgno);
|
assert(dl->items[i].pgno != dp.pgno);
|
||||||
if (unlikely(dl->items[i].pgno == pgno)) {
|
if (unlikely(dl->items[i].pgno == dp.pgno)) {
|
||||||
ERROR("Page %u already exist in the DPL at %zu", pgno, i);
|
ERROR("Page %u already exist in the DPL at %zu", dp.pgno, i);
|
||||||
return MDBX_PROBLEM;
|
return MDBX_PROBLEM;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -3085,10 +3117,7 @@ static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
|||||||
/* copy the stub beyond the end */
|
/* copy the stub beyond the end */
|
||||||
dl->items[length + 1] = dl->items[length];
|
dl->items[length + 1] = dl->items[length];
|
||||||
/* append page */
|
/* append page */
|
||||||
dl->items[length].ptr = page;
|
dl->items[length] = dp;
|
||||||
dl->items[length].pgno = pgno;
|
|
||||||
dl->items[length].multi = npages > 1;
|
|
||||||
dl->items[length].lru = txn->tw.dirtylru++;
|
|
||||||
dl->length = length;
|
dl->length = length;
|
||||||
dl->sorted = sorted;
|
dl->sorted = sorted;
|
||||||
dl->pages_including_loose += npages;
|
dl->pages_including_loose += npages;
|
||||||
@ -3096,15 +3125,6 @@ static __always_inline int __must_check_result dpl_append(MDBX_txn *txn,
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline uint32_t dpl_age(const MDBX_txn *txn, size_t i) {
|
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0);
|
|
||||||
tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
|
||||||
const MDBX_dpl *dl = txn->tw.dirtylist;
|
|
||||||
assert((intptr_t)i > 0 && i <= dl->length);
|
|
||||||
/* overflow could be here */
|
|
||||||
return (txn->tw.dirtylru - dl->items[i].lru) & UINT32_C(0x7fffFFFF);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
|
|
||||||
uint8_t runtime_flags = MDBX_RUNTIME_FLAGS_INIT;
|
uint8_t runtime_flags = MDBX_RUNTIME_FLAGS_INIT;
|
||||||
@ -4627,7 +4647,9 @@ static size_t cursor_keep(MDBX_txn *txn, MDBX_cursor *mc) {
|
|||||||
size_t const n = dpl_search(txn, mp->mp_pgno);
|
size_t const n = dpl_search(txn, mp->mp_pgno);
|
||||||
if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno &&
|
if (txn->tw.dirtylist->items[n].pgno == mp->mp_pgno &&
|
||||||
dpl_age(txn, n)) {
|
dpl_age(txn, n)) {
|
||||||
txn->tw.dirtylist->items[n].lru = txn->tw.dirtylru;
|
txn->tw.dirtylist->items[n].mlru =
|
||||||
|
(txn->tw.dirtylist->items[n].mlru & MDBX_dp_multi_mask) +
|
||||||
|
(txn->tw.dirtylru & MDBX_dp_lru_mask);
|
||||||
++keep;
|
++keep;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4911,6 +4933,11 @@ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0,
|
|||||||
const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1);
|
const uint32_t reciprocal = (UINT32_C(255) << 24) / (age_max + 1);
|
||||||
for (size_t i = 1; i <= dl->length; ++i) {
|
for (size_t i = 1; i <= dl->length; ++i) {
|
||||||
const unsigned prio = spill_prio(txn, i, reciprocal);
|
const unsigned prio = spill_prio(txn, i, reciprocal);
|
||||||
|
TRACE("page %" PRIaPGNO
|
||||||
|
", lru %u, is_multi %c, npages %u, age %u of %u, prio %u",
|
||||||
|
dl->items[i].pgno, dl->items[i].mlru & MDBX_dp_lru_mask,
|
||||||
|
(dl->items[i].mlru & MDBX_dp_multi_mask) ? 'Y' : 'N',
|
||||||
|
dpl_npages(dl, i), dpl_age(txn, i), age_max, prio);
|
||||||
if (prio < 256) {
|
if (prio < 256) {
|
||||||
radix_entries[prio] += 1;
|
radix_entries[prio] += 1;
|
||||||
spillable_entries += 1;
|
spillable_entries += 1;
|
||||||
@ -15160,7 +15187,9 @@ __hot static __always_inline pgr_t page_get_inline(const uint16_t ILL,
|
|||||||
const size_t i = dpl_search(spiller, pgno);
|
const size_t i = dpl_search(spiller, pgno);
|
||||||
tASSERT(txn, (intptr_t)i > 0);
|
tASSERT(txn, (intptr_t)i > 0);
|
||||||
if (spiller->tw.dirtylist->items[i].pgno == pgno) {
|
if (spiller->tw.dirtylist->items[i].pgno == pgno) {
|
||||||
spiller->tw.dirtylist->items[i].lru = txn->tw.dirtylru++;
|
const uint32_t is_multi =
|
||||||
|
spiller->tw.dirtylist->items[i].mlru & MDBX_dp_multi_mask;
|
||||||
|
spiller->tw.dirtylist->items[i].mlru = is_multi + txn_lru_inc(txn);
|
||||||
r.page = spiller->tw.dirtylist->items[i].ptr;
|
r.page = spiller->tw.dirtylist->items[i].ptr;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -918,13 +918,9 @@ typedef txnid_t *MDBX_TXL;
|
|||||||
typedef struct MDBX_dp {
|
typedef struct MDBX_dp {
|
||||||
MDBX_page *ptr;
|
MDBX_page *ptr;
|
||||||
pgno_t pgno;
|
pgno_t pgno;
|
||||||
union {
|
uint32_t mlru;
|
||||||
uint32_t extra;
|
#define MDBX_dp_multi_mask 1
|
||||||
__anonymous_struct_extension__ struct {
|
#define MDBX_dp_lru_mask UINT32_C(0xffffFFFe)
|
||||||
unsigned multi : 1;
|
|
||||||
unsigned lru : 31;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
} MDBX_dp;
|
} MDBX_dp;
|
||||||
|
|
||||||
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
/* An DPL (dirty-page list) is a sorted array of MDBX_DPs. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user