mirror of
https://github.com/isar/libmdbx.git
synced 2025-06-23 01:42:36 +08:00
mdbx: ранняя/не-отложенная очистка GC (начало).
This commit is contained in:
parent
8c8aaf7dd1
commit
86890b4756
@ -520,7 +520,7 @@ int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
|
||||
txn_gc_detent(txn);
|
||||
oldest_reading = txn->env->gc.detent;
|
||||
if (oldest_reading == txn->wr.troika.txnid[txn->wr.troika.recent]) {
|
||||
/* Если самый старый используемый снимок является предыдущим, т. е. непосредственно предшествующим текущей
|
||||
/* Если самый старый используемый снимок является предыдущим, т.е. непосредственно предшествующим текущей
|
||||
* транзакции, то просматриваем таблицу читателей чтобы выяснить действительно ли снимок используется
|
||||
* читателями. */
|
||||
oldest_reading = txn->txnid;
|
||||
|
44
src/gc-get.c
44
src/gc-get.c
@ -890,12 +890,6 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
|
||||
if (num > 0 && txn->dbs[FREE_DBI].branch_pages && MDBX_PNL_GETSIZE(txn->wr.repnl) < env->maxgc_large1page / 2)
|
||||
flags += ALLOC_COALESCE;
|
||||
|
||||
MDBX_cursor *const gc = txn_gc_cursor(txn);
|
||||
eASSERT(env, mc != gc && gc->next == gc);
|
||||
gc->txn = txn;
|
||||
gc->dbi_state = txn->dbi_state;
|
||||
gc->top_and_flags = z_fresh_mark;
|
||||
|
||||
txn->wr.prefault_write_activated = !env->incore && env->options.prefault_write;
|
||||
if (txn->wr.prefault_write_activated) {
|
||||
/* Проверка посредством minicore() существенно снижает затраты, но в
|
||||
@ -913,6 +907,12 @@ pgr_t gc_alloc_ex(const MDBX_cursor *const mc, const size_t num, uint8_t flags)
|
||||
txn->wr.prefault_write_activated = false;
|
||||
}
|
||||
|
||||
MDBX_cursor *const gc = gc_cursor(env);
|
||||
gc->txn = txn;
|
||||
gc->tree = txn->dbs;
|
||||
gc->dbi_state = txn->dbi_state;
|
||||
gc->top_and_flags = z_fresh_mark;
|
||||
|
||||
retry_gc_refresh_detent:
|
||||
txn_gc_detent(txn);
|
||||
retry_gc_have_detent:
|
||||
@ -1046,20 +1046,6 @@ next_gc:
|
||||
}
|
||||
}
|
||||
|
||||
/* Remember ID of readed GC record */
|
||||
ret.err = rkl_push(
|
||||
&txn->wr.gc.reclaimed, id
|
||||
/* Вместо known_continuous=false, тут можно передавать/использовать (flags & ALLOC_LIFO) == 0, тогда дыры/пропуски
|
||||
* в идентификаторах GC будут образовывать непрерывные интервалы в wr.gc.reclaimed, что обеспечит больше свободных
|
||||
* идентификаторов/слотов для возврата страниц. Однако, это также приведёт к пустым попыткам удаления
|
||||
* отсутствующих записей в gc_clear_reclaimed(), а далее к перекладыванию этих сплошных интервалов поэлементно в
|
||||
* ready4reuse. Поэтому смысла в этом решительно нет. Следует либо формировать сплошные интервалы при работе
|
||||
* gc_clear_reclaimed(), особенно в FIFO-режиме, либо искать их только в gc_provide_ids() */
|
||||
);
|
||||
TRACE("%" PRIaTXN " len %zu pushed to txn-rkl, err %d", id, gc_len, ret.err);
|
||||
if (unlikely(ret.err != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
/* Append PNL from GC record to wr.repnl */
|
||||
ret.err = pnl_need(&txn->wr.repnl, gc_len);
|
||||
if (unlikely(ret.err != MDBX_SUCCESS))
|
||||
@ -1102,7 +1088,23 @@ next_gc:
|
||||
}
|
||||
eASSERT(env, pnl_check_allocated(txn->wr.repnl, txn->geo.first_unallocated - MDBX_ENABLE_REFUND));
|
||||
|
||||
/* TODO: удаление загруженных из GC записей */
|
||||
rkl_t *rkl = &txn->wr.gc.reclaimed;
|
||||
const char *rkl_name = "reclaimed";
|
||||
if (mc->dbi_state != txn->dbi_state &&
|
||||
(MDBX_DEBUG || MDBX_PNL_GETSIZE(txn->wr.repnl) > (size_t)gc->tree->height + gc->tree->height + 3)) {
|
||||
gc->next = txn->cursors[FREE_DBI];
|
||||
txn->cursors[FREE_DBI] = gc;
|
||||
ret.err = cursor_del(gc, 0);
|
||||
txn->cursors[FREE_DBI] = gc->next;
|
||||
if (unlikely(ret.err != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
rkl = &txn->wr.gc.ready4reuse;
|
||||
rkl_name = "ready4reuse";
|
||||
}
|
||||
ret.err = rkl_push(rkl, id);
|
||||
TRACE("%" PRIaTXN " len %zu pushed to rkl-%s, err %d", id, gc_len, rkl_name, ret.err);
|
||||
if (unlikely(ret.err != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
eASSERT(env, op == MDBX_PREV || op == MDBX_NEXT);
|
||||
if (flags & ALLOC_COALESCE) {
|
||||
|
52
src/gc-put.c
52
src/gc-put.c
@ -4,10 +4,9 @@
|
||||
#include "internals.h"
|
||||
|
||||
int gc_put_init(MDBX_txn *txn, gcu_t *ctx) {
|
||||
memset(ctx, 0, offsetof(gcu_t, ready4reuse));
|
||||
memset(ctx, 0, offsetof(gcu_t, sequel));
|
||||
/* Размер куска помещающийся на одну отдельную "overflow" страницу, но с небольшим запасом сводобного места. */
|
||||
ctx->goodchunk = txn->env->maxgc_large1page - (txn->env->maxgc_large1page >> 4);
|
||||
rkl_init(&ctx->ready4reuse);
|
||||
rkl_init(&ctx->sequel);
|
||||
#if MDBX_ENABLE_BIGFOOT
|
||||
ctx->bigfoot = txn->txnid;
|
||||
@ -15,10 +14,7 @@ int gc_put_init(MDBX_txn *txn, gcu_t *ctx) {
|
||||
return cursor_init(&ctx->cursor, txn, FREE_DBI);
|
||||
}
|
||||
|
||||
void gc_put_destroy(gcu_t *ctx) {
|
||||
rkl_destroy(&ctx->ready4reuse);
|
||||
rkl_destroy(&ctx->sequel);
|
||||
}
|
||||
void gc_put_destroy(gcu_t *ctx) { rkl_destroy(&ctx->sequel); }
|
||||
|
||||
static size_t gc_chunk_pages(const MDBX_txn *txn, const size_t chunk) {
|
||||
return largechunk_npages(txn->env, gc_chunk_bytes(chunk));
|
||||
@ -100,10 +96,10 @@ MDBX_MAYBE_UNUSED static void dbg_dump_ids(gcu_t *ctx) {
|
||||
DEBUG_EXTRA_PRINT("%s\n", " empty");
|
||||
|
||||
DEBUG_EXTRA("%s", "ready4reuse:");
|
||||
if (rkl_empty(&ctx->ready4reuse))
|
||||
if (rkl_empty(&txn->wr.gc.ready4reuse))
|
||||
DEBUG_EXTRA_PRINT("%s\n", " empty");
|
||||
else {
|
||||
rkl_iter_t i = rkl_iterator(&ctx->ready4reuse, false);
|
||||
rkl_iter_t i = rkl_iterator(&txn->wr.gc.ready4reuse, false);
|
||||
txnid_t id = rkl_turn(&i, false);
|
||||
while (id) {
|
||||
dbg_id(ctx, id);
|
||||
@ -485,7 +481,7 @@ static int gc_remove_rkl(MDBX_txn *txn, gcu_t *ctx, rkl_t *rkl) {
|
||||
int err = cursor_seek(&ctx->cursor, &key, nullptr, MDBX_SET).err;
|
||||
tASSERT(txn, id == rkl_edge(rkl, is_lifo(txn)));
|
||||
if (err == MDBX_NOTFOUND) {
|
||||
err = rkl_push(&ctx->ready4reuse, rkl_pop(rkl, is_lifo(txn)));
|
||||
err = rkl_push(&txn->wr.gc.ready4reuse, rkl_pop(rkl, is_lifo(txn)));
|
||||
WARNING("unexpected %s for gc-id %" PRIaTXN ", ignore and continue, push-err %d", "MDBX_NOTFOUND", id, err);
|
||||
if (unlikely(MDBX_IS_ERROR(err)))
|
||||
return err;
|
||||
@ -506,7 +502,7 @@ static int gc_remove_rkl(MDBX_txn *txn, gcu_t *ctx, rkl_t *rkl) {
|
||||
return err;
|
||||
ENSURE(txn->env, id == rkl_pop(rkl, is_lifo(txn)));
|
||||
tASSERT(txn, id <= txn->env->lck->cached_oldest.weak);
|
||||
err = rkl_push(&ctx->ready4reuse, id);
|
||||
err = rkl_push(&txn->wr.gc.ready4reuse, id);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
TRACE("id %" PRIaTXN " cleared and moved to ready4reuse", id);
|
||||
@ -526,7 +522,7 @@ static inline int gc_clear_returned(MDBX_txn *txn, gcu_t *ctx) {
|
||||
|
||||
static int gc_push_sequel(MDBX_txn *txn, gcu_t *ctx, txnid_t id) {
|
||||
tASSERT(txn, id > 0 && id < txn->env->gc.detent);
|
||||
tASSERT(txn, !rkl_contain(&txn->wr.gc.comeback, id) && !rkl_contain(&ctx->ready4reuse, id));
|
||||
tASSERT(txn, !rkl_contain(&txn->wr.gc.comeback, id) && !rkl_contain(&txn->wr.gc.ready4reuse, id));
|
||||
TRACE("id %" PRIaTXN ", return-left %zi", id, ctx->return_left);
|
||||
int err = rkl_push(&ctx->sequel, id);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
@ -705,7 +701,7 @@ static bool solve_recursive(const sr_context_t *const ct, sr_state_t *const st,
|
||||
|
||||
static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const solution) {
|
||||
sr_state_t st = {
|
||||
.left_slots = rkl_len(&ctx->ready4reuse), .left_volume = ctx->return_left, .hist = ctx->dense_histogram};
|
||||
.left_slots = rkl_len(&txn->wr.gc.ready4reuse), .left_volume = ctx->return_left, .hist = ctx->dense_histogram};
|
||||
assert(st.left_slots > 0 && st.left_volume > 0 && MDBX_PNL_GETSIZE(txn->wr.repnl) > 0);
|
||||
if (unlikely(!st.left_slots || !st.left_volume)) {
|
||||
ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving preconditions violated");
|
||||
@ -734,7 +730,7 @@ static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const
|
||||
chunk += ct.other_pages;
|
||||
}
|
||||
|
||||
if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&ctx->ready4reuse))) {
|
||||
if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&txn->wr.gc.ready4reuse))) {
|
||||
assert(!"recursive-solving failure");
|
||||
ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving failure");
|
||||
return MDBX_PROBLEM;
|
||||
@ -782,7 +778,7 @@ static int gc_dense_solve(MDBX_txn *txn, gcu_t *ctx, gc_dense_histogram_t *const
|
||||
// chunk += ct.other_pages;
|
||||
// }
|
||||
//
|
||||
// if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&ctx->ready4reuse))) {
|
||||
// if (unlikely(volume < (size_t)ctx->return_left || items > rkl_len(&txn->wr.gc.ready4reuse))) {
|
||||
// assert(!"recursive-solving failure");
|
||||
// ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "recursive-solving failure");
|
||||
// return MDBX_PROBLEM;
|
||||
@ -848,7 +844,7 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) {
|
||||
((ctx->gc_first > UINT16_MAX) ? UINT16_MAX : (unsigned)ctx->gc_first - 1) * ctx->goodchunk;
|
||||
const txnid_t reasonable_deep =
|
||||
txn->env->maxgc_per_branch +
|
||||
2 * (txn->env->gc.detent - txnid_min(rkl_lowest(&ctx->ready4reuse), rkl_lowest(&txn->wr.gc.comeback)));
|
||||
2 * (txn->env->gc.detent - txnid_min(rkl_lowest(&txn->wr.gc.ready4reuse), rkl_lowest(&txn->wr.gc.comeback)));
|
||||
const txnid_t scan_threshold = (txn->env->gc.detent > reasonable_deep) ? txn->env->gc.detent - reasonable_deep : 0;
|
||||
|
||||
txnid_t scan_hi = txn->env->gc.detent, scan_lo = INVALID_TXNID;
|
||||
@ -859,7 +855,7 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) {
|
||||
}
|
||||
|
||||
rkl_iter_t iter_ready4reuse, iter_comeback;
|
||||
rkl_find(&ctx->ready4reuse, scan_hi, &iter_ready4reuse);
|
||||
rkl_find(&txn->wr.gc.ready4reuse, scan_hi, &iter_ready4reuse);
|
||||
rkl_find(&txn->wr.gc.comeback, scan_hi, &iter_comeback);
|
||||
rkl_hole_t hole_ready4reuse = rkl_hole(&iter_ready4reuse, true);
|
||||
rkl_hole_t hole_comeback = rkl_hole(&iter_comeback, true);
|
||||
@ -948,8 +944,8 @@ static int gc_search_holes(MDBX_txn *txn, gcu_t *ctx) {
|
||||
}
|
||||
|
||||
static inline int gc_reserve4return(MDBX_txn *txn, gcu_t *ctx, const size_t chunk_lo, const size_t chunk_hi) {
|
||||
txnid_t reservation_id = rkl_pop(&ctx->ready4reuse, true);
|
||||
TRACE("%s: slots-ready4reuse-left %zu, reservation-id %" PRIaTXN, dbg_prefix(ctx), rkl_len(&ctx->ready4reuse),
|
||||
txnid_t reservation_id = rkl_pop(&txn->wr.gc.ready4reuse, true);
|
||||
TRACE("%s: slots-ready4reuse-left %zu, reservation-id %" PRIaTXN, dbg_prefix(ctx), rkl_len(&txn->wr.gc.ready4reuse),
|
||||
reservation_id);
|
||||
tASSERT(txn, reservation_id >= MIN_TXNID && reservation_id < txn->txnid);
|
||||
tASSERT(txn, reservation_id <= txn->env->lck->cached_oldest.weak);
|
||||
@ -1046,7 +1042,7 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le
|
||||
* размещения всех возвращаемых страниц. */
|
||||
|
||||
int err = MDBX_RESULT_FALSE;
|
||||
if (!rkl_empty(&ctx->ready4reuse)) {
|
||||
if (!rkl_empty(&txn->wr.gc.ready4reuse)) {
|
||||
gc_dense_hist(txn, ctx);
|
||||
gc_dense_histogram_t solution;
|
||||
if (ctx->loop == 1 || ctx->loop % 3 == 0)
|
||||
@ -1106,13 +1102,13 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le
|
||||
const size_t per_page = txn->env->ps / sizeof(pgno_t);
|
||||
size_t amount = MDBX_PNL_GETSIZE(txn->wr.repnl);
|
||||
do {
|
||||
if (rkl_empty(&ctx->ready4reuse)) {
|
||||
if (rkl_empty(&txn->wr.gc.ready4reuse)) {
|
||||
NOTICE("%s: restart since no slot(s) available (reserved %zu...%zu of %zu)", dbg_prefix(ctx),
|
||||
ctx->return_reserved_lo, ctx->return_reserved_hi, amount);
|
||||
return MDBX_RESULT_TRUE;
|
||||
}
|
||||
const size_t left = dense_adjust_amount(txn, amount) - ctx->return_reserved_hi;
|
||||
const size_t slots = rkl_len(&ctx->ready4reuse);
|
||||
const size_t slots = rkl_len(&txn->wr.gc.ready4reuse);
|
||||
const size_t base = (left + slots - 1) / slots;
|
||||
const size_t adjusted = dense_adjust_chunk(txn, base);
|
||||
TRACE("dense-reservation: reserved %zu...%zu of %zu, left %zu slot(s) and %zu pnl, step: %zu base,"
|
||||
@ -1139,7 +1135,7 @@ static int gc_handle_dense(MDBX_txn *txn, gcu_t *ctx, size_t left_min, size_t le
|
||||
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
ERROR("unable provide IDs and/or to fit returned PNL (%zd+%zd pages, %zd+%zd slots), err %d", ctx->retired_stored,
|
||||
MDBX_PNL_GETSIZE(txn->wr.repnl), rkl_len(&txn->wr.gc.comeback), rkl_len(&ctx->ready4reuse), err);
|
||||
MDBX_PNL_GETSIZE(txn->wr.repnl), rkl_len(&txn->wr.gc.comeback), rkl_len(&txn->wr.gc.ready4reuse), err);
|
||||
return err;
|
||||
}
|
||||
|
||||
@ -1178,11 +1174,11 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) {
|
||||
|
||||
const size_t left_min = amount - ctx->return_reserved_hi;
|
||||
const size_t left_max = amount - ctx->return_reserved_lo;
|
||||
if (likely(left_min < txn->env->maxgc_large1page && !rkl_empty(&ctx->ready4reuse))) {
|
||||
if (likely(left_min < txn->env->maxgc_large1page && !rkl_empty(&txn->wr.gc.ready4reuse))) {
|
||||
/* Есть хотя-бы один слот и весь остаток списка номеров страниц помещается в один кусок.
|
||||
* Это самая частая ситуация, просто продолжаем. */
|
||||
} else {
|
||||
if (likely(rkl_len(&ctx->ready4reuse) * ctx->goodchunk >= left_max)) {
|
||||
if (likely(rkl_len(&txn->wr.gc.ready4reuse) * ctx->goodchunk >= left_max)) {
|
||||
/* Слотов хватает, основная задача делить на куски так, чтобы изменение (уменьшение) кол-ва возвращаемых страниц в
|
||||
* процессе резервирования записей в GC не потребовало менять резервирование, т.е. удалять и повторять всё снова.
|
||||
*/
|
||||
@ -1195,7 +1191,7 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) {
|
||||
return err;
|
||||
|
||||
if (!rkl_empty(&ctx->sequel)) {
|
||||
err = rkl_merge(&ctx->sequel, &ctx->ready4reuse, false);
|
||||
err = rkl_merge(&ctx->sequel, &txn->wr.gc.ready4reuse, false);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
if (err == MDBX_RESULT_TRUE) {
|
||||
ERROR("%s/%d: %s", "MDBX_PROBLEM", MDBX_PROBLEM, "unexpected duplicate(s) during rkl-merge");
|
||||
@ -1208,14 +1204,14 @@ static int gc_rerere(MDBX_txn *txn, gcu_t *ctx) {
|
||||
|
||||
if (unlikely(ctx->return_left > 0)) {
|
||||
/* Делаем переоценку баланса для кусков предельного размера (по maxgc_large1page, вместо goodchunk). */
|
||||
const intptr_t dense_unfit = left_min - rkl_len(&ctx->ready4reuse) * txn->env->maxgc_large1page;
|
||||
const intptr_t dense_unfit = left_min - rkl_len(&txn->wr.gc.ready4reuse) * txn->env->maxgc_large1page;
|
||||
if (dense_unfit > 0) {
|
||||
/* Имеющихся идентификаторов НЕ хватит,
|
||||
* даже если если их использовать для кусков размером maxgc_large1page вместо goodchunk. */
|
||||
if (!ctx->dense) {
|
||||
NOTICE("%s: enter to dense-mode (amount %zu, reserved %zu..%zu, slots/ids %zu, left %zu..%zu, unfit %zu)",
|
||||
dbg_prefix(ctx), amount, ctx->return_reserved_lo, ctx->return_reserved_hi,
|
||||
rkl_len(&ctx->ready4reuse), left_min, left_max, dense_unfit);
|
||||
rkl_len(&txn->wr.gc.ready4reuse), left_min, left_max, dense_unfit);
|
||||
ctx->dense = true;
|
||||
}
|
||||
return gc_handle_dense(txn, ctx, left_min, left_max);
|
||||
@ -1373,7 +1369,7 @@ int gc_update(MDBX_txn *txn, gcu_t *ctx) {
|
||||
}
|
||||
|
||||
/* The txn->wr.repnl[] can grow and shrink during this call.
|
||||
* The txn->wr.gc.reclaimed[] can grow, then migrate into ctx->ready4reuse and later to txn->wr.gc.comeback[].
|
||||
* The txn->wr.gc.reclaimed[] can grow, then migrate into txn->wr.gc.ready4reuse and later to txn->wr.gc.comeback[].
|
||||
* But page numbers cannot disappear from txn->wr.retired_pages[]. */
|
||||
retry:
|
||||
ctx->loop += !(ctx->prev_first_unallocated > txn->geo.first_unallocated);
|
||||
|
4
src/gc.h
4
src/gc.h
@ -35,7 +35,7 @@ typedef struct gc_update_context {
|
||||
unsigned n;
|
||||
} dbg;
|
||||
#endif /* MDBX_DEBUG_GCU */
|
||||
rkl_t ready4reuse, sequel;
|
||||
rkl_t sequel;
|
||||
#if MDBX_ENABLE_BIGFOOT
|
||||
txnid_t bigfoot;
|
||||
#endif /* MDBX_ENABLE_BIGFOOT */
|
||||
@ -78,3 +78,5 @@ static inline bool gc_is_reclaimed(const MDBX_txn *txn, const txnid_t id) {
|
||||
static inline txnid_t txnid_min(txnid_t a, txnid_t b) { return (a < b) ? a : b; }
|
||||
|
||||
static inline txnid_t txnid_max(txnid_t a, txnid_t b) { return (a > b) ? a : b; }
|
||||
|
||||
static inline MDBX_cursor *gc_cursor(MDBX_env *env) { return ptr_disp(env->basal_txn, sizeof(MDBX_txn)); }
|
||||
|
@ -214,9 +214,10 @@ struct MDBX_txn {
|
||||
troika_t troika;
|
||||
pnl_t __restrict repnl; /* Reclaimed GC pages */
|
||||
struct {
|
||||
rkl_t reclaimed; /* The list of reclaimed txn-ids from GC */
|
||||
uint64_t spent; /* Time spent reading and searching GC */
|
||||
rkl_t comeback; /* The list of ids of records returned into GC during commit, etc */
|
||||
rkl_t reclaimed; /* The list of reclaimed txn-ids from GC, but not cleared/deleted */
|
||||
rkl_t ready4reuse; /* The list of reclaimed txn-ids from GC, and cleared/deleted */
|
||||
uint64_t spent; /* Time spent reading and searching GC */
|
||||
rkl_t comeback; /* The list of ids of records returned into GC during commit, etc */
|
||||
} gc;
|
||||
bool prefault_write_activated;
|
||||
#if MDBX_ENABLE_REFUND
|
||||
|
@ -17,7 +17,6 @@ MDBX_NOTHROW_PURE_FUNCTION static inline pgno_t node_pgno(const node_t *const __
|
||||
/* Set the page number in a branch node */
|
||||
static inline void node_set_pgno(node_t *const __restrict node, pgno_t pgno) {
|
||||
assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO);
|
||||
|
||||
UNALIGNED_POKE_32(node, node_t, child_pgno, (uint32_t)pgno);
|
||||
}
|
||||
|
||||
|
@ -179,19 +179,19 @@ __hot int page_touch_unmodifable(MDBX_txn *txn, MDBX_cursor *mc, const page_t *c
|
||||
page_t *np;
|
||||
if (is_frozen(txn, mp)) {
|
||||
/* CoW the page */
|
||||
rc = pnl_need(&txn->wr.retired_pages, 1);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
const pgr_t par = gc_alloc_single(mc);
|
||||
rc = par.err;
|
||||
np = par.page;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
rc = pnl_append(&txn->wr.retired_pages, mp->pgno);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
const pgno_t pgno = np->pgno;
|
||||
DEBUG("touched db %d page %" PRIaPGNO " -> %" PRIaPGNO, cursor_dbi_dbg(mc), mp->pgno, pgno);
|
||||
tASSERT(txn, mp->pgno != pgno);
|
||||
pnl_append_prereserved(txn->wr.retired_pages, mp->pgno);
|
||||
/* Update the parent page, if any, to point to the new page */
|
||||
if (likely(mc->top)) {
|
||||
page_t *parent = mc->pg[mc->top - 1];
|
||||
|
@ -52,7 +52,7 @@ void pnl_shrink(pnl_t __restrict *__restrict ppnl) {
|
||||
int pnl_reserve(pnl_t __restrict *__restrict ppnl, const size_t wanna) {
|
||||
const size_t allocated = MDBX_PNL_ALLOCLEN(*ppnl);
|
||||
assert(MDBX_PNL_GETSIZE(*ppnl) <= PAGELIST_LIMIT && MDBX_PNL_ALLOCLEN(*ppnl) >= MDBX_PNL_GETSIZE(*ppnl));
|
||||
if (likely(allocated >= wanna))
|
||||
if (unlikely(allocated >= wanna))
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
if (unlikely(wanna > /* paranoia */ PAGELIST_LIMIT)) {
|
||||
|
38
src/pnl.h
38
src/pnl.h
@ -56,21 +56,6 @@ typedef const pgno_t *const_pnl_t;
|
||||
#define MDBX_PNL_SIZEOF(pl) ((MDBX_PNL_GETSIZE(pl) + 1) * sizeof(pgno_t))
|
||||
#define MDBX_PNL_IS_EMPTY(pl) (MDBX_PNL_GETSIZE(pl) == 0)
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t size) {
|
||||
assert(size > 0 && size <= PAGELIST_LIMIT);
|
||||
#if MDBX_PNL_PREALLOC_FOR_RADIXSORT
|
||||
|
||||
size += size;
|
||||
#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
|
||||
STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD +
|
||||
(PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) <
|
||||
SIZE_MAX / 4 * 3);
|
||||
size_t bytes =
|
||||
ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) -
|
||||
MDBX_ASSUME_MALLOC_OVERHEAD;
|
||||
return bytes;
|
||||
}
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size(const size_t bytes) {
|
||||
size_t size = bytes / sizeof(pgno_t);
|
||||
assert(size > 3 && size <= PAGELIST_LIMIT + /* alignment gap */ 65536);
|
||||
@ -81,6 +66,22 @@ MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline pgno_t pnl_bytes2size
|
||||
return (pgno_t)size;
|
||||
}
|
||||
|
||||
MDBX_NOTHROW_PURE_FUNCTION MDBX_MAYBE_UNUSED static inline size_t pnl_size2bytes(size_t wanna_size) {
|
||||
size_t size = wanna_size;
|
||||
assert(size > 0 && size <= PAGELIST_LIMIT);
|
||||
#if MDBX_PNL_PREALLOC_FOR_RADIXSORT
|
||||
size += size;
|
||||
#endif /* MDBX_PNL_PREALLOC_FOR_RADIXSORT */
|
||||
STATIC_ASSERT(MDBX_ASSUME_MALLOC_OVERHEAD +
|
||||
(PAGELIST_LIMIT * (MDBX_PNL_PREALLOC_FOR_RADIXSORT + 1) + MDBX_PNL_GRANULATE + 3) * sizeof(pgno_t) <
|
||||
SIZE_MAX / 4 * 3);
|
||||
size_t bytes =
|
||||
ceil_powerof2(MDBX_ASSUME_MALLOC_OVERHEAD + sizeof(pgno_t) * (size + 3), MDBX_PNL_GRANULATE * sizeof(pgno_t)) -
|
||||
MDBX_ASSUME_MALLOC_OVERHEAD;
|
||||
assert(pnl_bytes2size(bytes) >= wanna_size);
|
||||
return bytes;
|
||||
}
|
||||
|
||||
MDBX_INTERNAL pnl_t pnl_alloc(size_t size);
|
||||
|
||||
MDBX_INTERNAL void pnl_free(pnl_t pnl);
|
||||
@ -106,6 +107,13 @@ MDBX_MAYBE_UNUSED static inline void pnl_append_prereserved(__restrict pnl_t pnl
|
||||
MDBX_PNL_LAST(pnl) = pgno;
|
||||
}
|
||||
|
||||
MDBX_MAYBE_UNUSED static inline int __must_check_result pnl_append(__restrict pnl_t *ppnl, pgno_t pgno) {
|
||||
int rc = pnl_need(ppnl, 1);
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
pnl_append_prereserved(*ppnl, pgno);
|
||||
return rc;
|
||||
}
|
||||
|
||||
MDBX_INTERNAL void pnl_shrink(pnl_t __restrict *__restrict ppnl);
|
||||
|
||||
MDBX_INTERNAL int __must_check_result spill_append_span(__restrict pnl_t *ppnl, pgno_t pgno, size_t n);
|
||||
|
@ -65,7 +65,6 @@ MDBX_INTERNAL bool txn_gc_detent(const MDBX_txn *const txn);
|
||||
MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits);
|
||||
MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn);
|
||||
MDBX_INTERNAL int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi);
|
||||
MDBX_INTERNAL MDBX_cursor *txn_gc_cursor(MDBX_txn *txn);
|
||||
|
||||
MDBX_INTERNAL MDBX_txn *txn_alloc(const MDBX_txn_flags_t flags, MDBX_env *env);
|
||||
MDBX_INTERNAL int txn_abort(MDBX_txn *txn);
|
||||
|
@ -63,6 +63,7 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) {
|
||||
return txn;
|
||||
|
||||
rkl_init(&txn->wr.gc.reclaimed);
|
||||
rkl_init(&txn->wr.gc.ready4reuse);
|
||||
rkl_init(&txn->wr.gc.comeback);
|
||||
txn->dbs = ptr_disp(txn, base);
|
||||
txn->cursors = ptr_disp(txn->dbs, max_dbi * sizeof(txn->dbs[0]));
|
||||
@ -85,6 +86,7 @@ __cold MDBX_txn *txn_basal_create(const size_t max_dbi) {
|
||||
__cold void txn_basal_destroy(MDBX_txn *txn) {
|
||||
dpl_free(txn);
|
||||
rkl_destroy(&txn->wr.gc.reclaimed);
|
||||
rkl_destroy(&txn->wr.gc.ready4reuse);
|
||||
rkl_destroy(&txn->wr.gc.comeback);
|
||||
pnl_free(txn->wr.retired_pages);
|
||||
pnl_free(txn->wr.spilled.list);
|
||||
@ -127,6 +129,8 @@ int txn_basal_start(MDBX_txn *txn, unsigned flags) {
|
||||
txn->wr.spilled.least_removed = 0;
|
||||
txn->wr.gc.spent = 0;
|
||||
tASSERT(txn, rkl_empty(&txn->wr.gc.reclaimed));
|
||||
tASSERT(txn, rkl_empty(&txn->wr.gc.ready4reuse));
|
||||
tASSERT(txn, rkl_empty(&txn->wr.gc.comeback));
|
||||
txn->env->gc.detent = 0;
|
||||
env->txn = txn;
|
||||
|
||||
@ -144,6 +148,7 @@ int txn_basal_end(MDBX_txn *txn, unsigned mode) {
|
||||
pnl_free(txn->wr.spilled.list);
|
||||
txn->wr.spilled.list = nullptr;
|
||||
rkl_clear_and_shrink(&txn->wr.gc.reclaimed);
|
||||
rkl_clear_and_shrink(&txn->wr.gc.ready4reuse);
|
||||
rkl_clear_and_shrink(&txn->wr.gc.comeback);
|
||||
|
||||
eASSERT(env, txn->parent == nullptr);
|
||||
|
@ -357,7 +357,6 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
|
||||
txn->env->txn = txn;
|
||||
txn->owner = parent->owner;
|
||||
txn->wr.troika = parent->wr.troika;
|
||||
rkl_init(&txn->wr.gc.reclaimed);
|
||||
|
||||
#if MDBX_ENABLE_DBI_SPARSE
|
||||
txn->dbi_sparse = parent->dbi_sparse;
|
||||
@ -415,6 +414,9 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
|
||||
txn->wr.gc.spent = parent->wr.gc.spent;
|
||||
rkl_init(&txn->wr.gc.comeback);
|
||||
err = rkl_copy(&parent->wr.gc.reclaimed, &txn->wr.gc.reclaimed);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
err = rkl_copy(&parent->wr.gc.ready4reuse, &txn->wr.gc.ready4reuse);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
@ -432,8 +434,6 @@ int txn_nested_create(MDBX_txn *parent, const MDBX_txn_flags_t flags) {
|
||||
(parent->parent ? parent->parent->wr.dirtyroom : parent->env->options.dp_limit));
|
||||
tASSERT(txn, txn->wr.dirtyroom + txn->wr.dirtylist->length ==
|
||||
(txn->parent ? txn->parent->wr.dirtyroom : txn->env->options.dp_limit));
|
||||
tASSERT(parent, parent->cursors[FREE_DBI] == nullptr);
|
||||
// TODO: shadow GC' cursor
|
||||
return txn_shadow_cursors(parent, MAIN_DBI);
|
||||
}
|
||||
|
||||
@ -443,7 +443,9 @@ void txn_nested_abort(MDBX_txn *nested) {
|
||||
nested->signature = 0;
|
||||
nested->owner = 0;
|
||||
|
||||
tASSERT(nested, rkl_empty(&nested->wr.gc.comeback));
|
||||
rkl_destroy(&nested->wr.gc.reclaimed);
|
||||
rkl_destroy(&nested->wr.gc.ready4reuse);
|
||||
|
||||
if (nested->wr.retired_pages) {
|
||||
tASSERT(parent, MDBX_PNL_GETSIZE(nested->wr.retired_pages) >= (uintptr_t)parent->wr.retired_pages);
|
||||
@ -527,6 +529,8 @@ int txn_nested_join(MDBX_txn *txn, struct commit_timestamp *ts) {
|
||||
txn->wr.repnl = nullptr;
|
||||
parent->wr.gc.spent = txn->wr.gc.spent;
|
||||
rkl_destructive_move(&txn->wr.gc.reclaimed, &parent->wr.gc.reclaimed);
|
||||
rkl_destructive_move(&txn->wr.gc.ready4reuse, &parent->wr.gc.ready4reuse);
|
||||
tASSERT(txn, rkl_empty(&txn->wr.gc.comeback));
|
||||
|
||||
parent->geo = txn->geo;
|
||||
parent->canary = txn->canary;
|
||||
|
@ -3,11 +3,6 @@
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
MDBX_cursor *txn_gc_cursor(MDBX_txn *txn) {
|
||||
tASSERT(txn, (txn->flags & (MDBX_TXN_BLOCKED | MDBX_TXN_RDONLY)) == 0);
|
||||
return ptr_disp(txn->env->basal_txn, sizeof(MDBX_txn));
|
||||
}
|
||||
|
||||
__hot bool txn_gc_detent(const MDBX_txn *const txn) {
|
||||
const txnid_t detent = mvcc_shapshot_oldest(txn->env, txn->wr.troika.txnid[txn->wr.troika.prefer_steady]);
|
||||
if (likely(detent == txn->env->gc.detent))
|
||||
@ -33,7 +28,7 @@ void txn_done_cursors(MDBX_txn *txn) {
|
||||
}
|
||||
|
||||
int txn_shadow_cursors(const MDBX_txn *parent, const size_t dbi) {
|
||||
tASSERT(parent, dbi > FREE_DBI && dbi < parent->n_dbi);
|
||||
tASSERT(parent, dbi < parent->n_dbi);
|
||||
MDBX_cursor *cursor = parent->cursors[dbi];
|
||||
if (!cursor)
|
||||
return MDBX_SUCCESS;
|
||||
@ -322,6 +317,7 @@ int txn_renew(MDBX_txn *txn, unsigned flags) {
|
||||
rc = cursor_init(gc, txn, FREE_DBI);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
goto bailout;
|
||||
tASSERT(txn, txn->cursors[FREE_DBI] == nullptr);
|
||||
}
|
||||
dxb_sanitize_tail(env, txn);
|
||||
return MDBX_SUCCESS;
|
||||
|
Loading…
x
Reference in New Issue
Block a user