mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-06 19:24:13 +08:00
mdbx: доработка поддержки авто-слияния записей GC внутри page_alloc_slowpath()
.
This commit is contained in:
parent
c46c03e7c8
commit
543e52730d
110
src/core.c
110
src/core.c
@ -6595,7 +6595,8 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
|
|||||||
#define MDBX_ALLOC_RESERVE 16
|
#define MDBX_ALLOC_RESERVE 16
|
||||||
#define MDBX_ALLOC_BACKLOG 32
|
#define MDBX_ALLOC_BACKLOG 32
|
||||||
#define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
#define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
||||||
#define MDBX_ALLOC_LIFO 128
|
#define MDBX_ALLOC_SHOULD_SCAN 64 /* internal state */
|
||||||
|
#define MDBX_ALLOC_LIFO 128 /* internal state */
|
||||||
|
|
||||||
static __inline bool is_gc_usable(const MDBX_txn *txn) {
|
static __inline bool is_gc_usable(const MDBX_txn *txn) {
|
||||||
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
||||||
@ -6692,13 +6693,18 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|||||||
if (unlikely(!is_gc_usable(txn)))
|
if (unlikely(!is_gc_usable(txn)))
|
||||||
goto no_gc;
|
goto no_gc;
|
||||||
|
|
||||||
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO)) == 0);
|
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
|
||||||
|
MDBX_ALLOC_SHOULD_SCAN)) == 0);
|
||||||
flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
|
flags += (env->me_flags & MDBX_LIFORECLAIM) ? MDBX_ALLOC_LIFO : 0;
|
||||||
|
|
||||||
const unsigned coalesce_threshold = env->me_maxgc_ov1page >> 2;
|
if (/* Не коагулируем записи при подготовке резерва для обновления GC.
|
||||||
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
* Иначе попытка увеличить резерв может приводить к необходимости ещё
|
||||||
MDBX_PNL_GETSIZE(txn->tw.relist) < coalesce_threshold && num)
|
* большего резерва из-за увеличения списка переработанных страниц. */
|
||||||
flags += MDBX_ALLOC_COALESCE;
|
flags < MDBX_ALLOC_COALESCE) {
|
||||||
|
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
||||||
|
re_len < env->me_maxgc_ov1page / 2)
|
||||||
|
flags += MDBX_ALLOC_COALESCE;
|
||||||
|
}
|
||||||
|
|
||||||
MDBX_cursor recur;
|
MDBX_cursor recur;
|
||||||
ret.err = gc_cursor_init(&recur, txn);
|
ret.err = gc_cursor_init(&recur, txn);
|
||||||
@ -6718,7 +6724,6 @@ retry_gc_have_oldest:
|
|||||||
const txnid_t detent = oldest + 1;
|
const txnid_t detent = oldest + 1;
|
||||||
|
|
||||||
txnid_t id = 0;
|
txnid_t id = 0;
|
||||||
bool should_scan = false;
|
|
||||||
MDBX_cursor_op op = MDBX_FIRST;
|
MDBX_cursor_op op = MDBX_FIRST;
|
||||||
if (flags & MDBX_ALLOC_LIFO) {
|
if (flags & MDBX_ALLOC_LIFO) {
|
||||||
if (!txn->tw.lifo_reclaimed) {
|
if (!txn->tw.lifo_reclaimed) {
|
||||||
@ -6790,24 +6795,54 @@ next_gc:;
|
|||||||
ret.err = MDBX_CORRUPTED;
|
ret.err = MDBX_CORRUPTED;
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
|
|
||||||
const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
|
const size_t gc_len = MDBX_PNL_GETSIZE(gc_pnl);
|
||||||
if (unlikely(/* list is too long already */ MDBX_PNL_GETSIZE(
|
TRACE("gc-read: id #%" PRIaTXN " len %zu, re-list will %zu ", id, gc_len,
|
||||||
txn->tw.relist) >= env->me_options.rp_augment_limit) &&
|
gc_len + re_len);
|
||||||
((/* not a slot-request from gc-update */
|
|
||||||
(flags & MDBX_ALLOC_SLOT) == 0 &&
|
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
||||||
/* have enough unallocated space */ txn->mt_geo.upper >=
|
if (unlikely(gc_len + re_len >= env->me_maxgc_ov1page)) {
|
||||||
txn->mt_next_pgno + num) ||
|
/* Don't try to coalesce too much. */
|
||||||
gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) {
|
if (flags & MDBX_ALLOC_SHOULD_SCAN) {
|
||||||
/* Stop reclaiming to avoid large/overflow the page list.
|
eASSERT(env, flags & MDBX_ALLOC_COALESCE);
|
||||||
* This is a rare case while search for a continuously multi-page region
|
eASSERT(env, num > 0);
|
||||||
* in a large database.
|
#if MDBX_ENABLE_PROFGC
|
||||||
* https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123
|
env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
|
||||||
*/
|
#endif /* MDBX_ENABLE_PROFGC */
|
||||||
NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
|
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
||||||
"(chunk) -> %zu",
|
if (re_len >= num) {
|
||||||
MDBX_PNL_GETSIZE(txn->tw.relist), gc_len,
|
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
||||||
gc_len + MDBX_PNL_GETSIZE(txn->tw.relist));
|
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
||||||
goto depleted_gc;
|
range = txn->tw.relist + (MDBX_PNL_ASCENDING ? 1 : re_len);
|
||||||
|
pgno = *range;
|
||||||
|
if (num == 1)
|
||||||
|
goto done;
|
||||||
|
range = scan4seq(range, re_len, num - 1);
|
||||||
|
eASSERT(env, range == scan4range_checker(txn->tw.relist, num - 1));
|
||||||
|
if (likely(range)) {
|
||||||
|
pgno = *range;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
flags -= MDBX_ALLOC_COALESCE | MDBX_ALLOC_SHOULD_SCAN;
|
||||||
|
}
|
||||||
|
if (unlikely(/* list is too long already */ re_len >=
|
||||||
|
env->me_options.rp_augment_limit) &&
|
||||||
|
((/* not a slot-request from gc-update */
|
||||||
|
(flags & MDBX_ALLOC_SLOT) == 0 &&
|
||||||
|
/* have enough unallocated space */ txn->mt_geo.upper >=
|
||||||
|
txn->mt_next_pgno + num) ||
|
||||||
|
gc_len + re_len >= MDBX_PGL_LIMIT)) {
|
||||||
|
/* Stop reclaiming to avoid large/overflow the page list.
|
||||||
|
* This is a rare case while search for a continuously multi-page region
|
||||||
|
* in a large database.
|
||||||
|
* https://web.archive.org/web/https://github.com/erthink/libmdbx/issues/123
|
||||||
|
*/
|
||||||
|
NOTICE("stop reclaiming to avoid PNL overflow: %zu (current) + %zu "
|
||||||
|
"(chunk) -> %zu",
|
||||||
|
re_len, gc_len, gc_len + re_len);
|
||||||
|
goto depleted_gc;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Remember ID of readed GC record */
|
/* Remember ID of readed GC record */
|
||||||
@ -6834,7 +6869,7 @@ next_gc:;
|
|||||||
|
|
||||||
/* Merge in descending sorted order */
|
/* Merge in descending sorted order */
|
||||||
re_len = pnl_merge(txn->tw.relist, gc_pnl);
|
re_len = pnl_merge(txn->tw.relist, gc_pnl);
|
||||||
should_scan = true;
|
flags |= MDBX_ALLOC_SHOULD_SCAN;
|
||||||
if (AUDIT_ENABLED()) {
|
if (AUDIT_ENABLED()) {
|
||||||
if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
|
if (unlikely(!pnl_check(txn->tw.relist, txn->mt_next_pgno))) {
|
||||||
ret.err = MDBX_CORRUPTED;
|
ret.err = MDBX_CORRUPTED;
|
||||||
@ -6860,26 +6895,22 @@ next_gc:;
|
|||||||
/* Done for a kick-reclaim mode, actually no page needed */
|
/* Done for a kick-reclaim mode, actually no page needed */
|
||||||
if (unlikely(flags & MDBX_ALLOC_SLOT)) {
|
if (unlikely(flags & MDBX_ALLOC_SLOT)) {
|
||||||
eASSERT(env, ret.err == MDBX_SUCCESS);
|
eASSERT(env, ret.err == MDBX_SUCCESS);
|
||||||
|
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
|
||||||
|
re_len);
|
||||||
goto early_exit;
|
goto early_exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* TODO: delete reclaimed records */
|
/* TODO: delete reclaimed records */
|
||||||
|
|
||||||
/* Don't try to coalesce too much. */
|
|
||||||
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
|
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
|
||||||
if (flags & MDBX_ALLOC_COALESCE) {
|
if (flags & MDBX_ALLOC_COALESCE) {
|
||||||
if (re_len /* current size */ < coalesce_threshold) {
|
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "coalesce-continue", id,
|
||||||
#if MDBX_ENABLE_PROFGC
|
re_len);
|
||||||
env->me_lck->mti_pgop_stat.gc_prof.coalescences += 1;
|
goto next_gc;
|
||||||
#endif /* MDBX_ENABLE_PROFGC */
|
|
||||||
goto next_gc;
|
|
||||||
}
|
|
||||||
TRACE("clear %s %s", "MDBX_ALLOC_COALESCE", "since got threshold");
|
|
||||||
flags &= ~MDBX_ALLOC_COALESCE;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scan:
|
scan:
|
||||||
eASSERT(env, should_scan);
|
eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
|
||||||
if (re_len >= num) {
|
if (re_len >= num) {
|
||||||
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
||||||
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
||||||
@ -6894,13 +6925,16 @@ scan:
|
|||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
should_scan = false;
|
flags -= MDBX_ALLOC_SHOULD_SCAN;
|
||||||
if (ret.err == MDBX_SUCCESS)
|
if (ret.err == MDBX_SUCCESS) {
|
||||||
|
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "continue-search", id,
|
||||||
|
re_len);
|
||||||
goto next_gc;
|
goto next_gc;
|
||||||
|
}
|
||||||
|
|
||||||
depleted_gc:
|
depleted_gc:
|
||||||
ret.err = MDBX_NOTFOUND;
|
ret.err = MDBX_NOTFOUND;
|
||||||
if (should_scan)
|
if (flags & MDBX_ALLOC_SHOULD_SCAN)
|
||||||
goto scan;
|
goto scan;
|
||||||
|
|
||||||
//-------------------------------------------------------------------------
|
//-------------------------------------------------------------------------
|
||||||
|
Loading…
x
Reference in New Issue
Block a user