mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 18:24:12 +08:00
mdbx: переработка внутренних флагов связанных с выделением страниц из GC.
This commit is contained in:
parent
141cce0c0f
commit
da023657f5
@ -5,8 +5,8 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD
|
|||||||
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| |
|
2 |0000 0004|ALLOC_NEW |TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| |
|
||||||
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | |
|
3 |0000 0008|ALLOC_SLOT |TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | |
|
||||||
4 |0000 0010|ALLOC_FAKE |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | |
|
4 |0000 0010|ALLOC_FAKE |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | |
|
||||||
5 |0000 0020| |TXN_UPDATE_GC |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | |
|
5 |0000 0020| | |INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | |
|
||||||
6 |0000 0040| |TXN_FROZEN_RE |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | |
|
6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | |
|
||||||
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | |
|
7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | |
|
||||||
8 |0000 0100| _MAY_MOVE | | | | | | | <= |
|
8 |0000 0100| _MAY_MOVE | | | | | | | <= |
|
||||||
9 |0000 0200| _MAY_UNMAP| | | | | | | <= |
|
9 |0000 0200| _MAY_UNMAP| | | | | | | <= |
|
||||||
|
247
src/core.c
247
src/core.c
@ -6588,20 +6588,19 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len,
|
|||||||
*
|
*
|
||||||
* Returns 0 on success, non-zero on failure.*/
|
* Returns 0 on success, non-zero on failure.*/
|
||||||
|
|
||||||
#define MDBX_ALLOC_GC 1
|
#define MDBX_ALLOC_DEFAULT 0
|
||||||
#define MDBX_ALLOC_NEW 2
|
#define MDBX_ALLOC_RESERVE 1
|
||||||
#define MDBX_ALLOC_COALESCE 4
|
#define MDBX_ALLOC_UNIMPORTANT 2
|
||||||
#define MDBX_ALLOC_SLOT 8
|
#define MDBX_ALLOC_COALESCE 4 /* внутреннее состояние */
|
||||||
#define MDBX_ALLOC_RESERVE 16
|
#define MDBX_ALLOC_SHOULD_SCAN 8 /* внутреннее состояние */
|
||||||
#define MDBX_ALLOC_BACKLOG 32
|
#define MDBX_ALLOC_LIFO 16 /* внутреннее состояние */
|
||||||
#define MDBX_ALLOC_ALL (MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
|
||||||
#define MDBX_ALLOC_SHOULD_SCAN 64 /* internal state */
|
|
||||||
#define MDBX_ALLOC_LIFO 128 /* internal state */
|
|
||||||
|
|
||||||
static __inline bool is_gc_usable(const MDBX_txn *txn) {
|
static __inline bool is_gc_usable(MDBX_txn *txn, const MDBX_cursor *mc,
|
||||||
|
const uint8_t flags) {
|
||||||
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
/* If txn is updating the GC, then the retired-list cannot play catch-up with
|
||||||
* itself by growing while trying to save it. */
|
* itself by growing while trying to save it. */
|
||||||
if (txn->mt_flags & (MDBX_TXN_UPDATE_GC | MDBX_TXN_FROZEN_RE))
|
if (mc->mc_dbi == FREE_DBI && !(flags & MDBX_ALLOC_RESERVE) &&
|
||||||
|
!(mc->mc_flags & C_GCU))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* avoid (recursive) search inside empty tree and while tree is
|
/* avoid (recursive) search inside empty tree and while tree is
|
||||||
@ -6609,11 +6608,6 @@ static __inline bool is_gc_usable(const MDBX_txn *txn) {
|
|||||||
if (txn->mt_dbs[FREE_DBI].md_entries == 0)
|
if (txn->mt_dbs[FREE_DBI].md_entries == 0)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* If our dirty list is already full, we can't touch GC */
|
|
||||||
if (unlikely(txn->tw.dirtyroom < txn->mt_dbs[FREE_DBI].md_depth) &&
|
|
||||||
!(txn->mt_dbistate[FREE_DBI] & DBI_DIRTY))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6644,22 +6638,13 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|||||||
prof->spe_counter += 1;
|
prof->spe_counter += 1;
|
||||||
#endif /* MDBX_ENABLE_PROFGC */
|
#endif /* MDBX_ENABLE_PROFGC */
|
||||||
|
|
||||||
eASSERT(env, flags & MDBX_ALLOC_GC);
|
eASSERT(env, num > 0 || (flags & MDBX_ALLOC_RESERVE));
|
||||||
eASSERT(env, num == 0 || !(flags & MDBX_ALLOC_SLOT));
|
|
||||||
eASSERT(env, num > 0 || !(flags & MDBX_ALLOC_NEW));
|
|
||||||
eASSERT(env, (flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE |
|
|
||||||
MDBX_ALLOC_BACKLOG)) == 0 ||
|
|
||||||
(flags & MDBX_ALLOC_NEW) == 0);
|
|
||||||
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
||||||
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
||||||
|
|
||||||
pgno_t pgno = 0, *range = nullptr;
|
pgno_t pgno = 0, *range = nullptr;
|
||||||
size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
size_t newnext, re_len = MDBX_PNL_GETSIZE(txn->tw.relist);
|
||||||
if (num > 1) {
|
if (num > 1) {
|
||||||
eASSERT(env, !(flags & MDBX_ALLOC_SLOT));
|
|
||||||
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
|
|
||||||
if (unlikely(txn->mt_flags & MDBX_TXN_FROZEN_RE))
|
|
||||||
goto no_gc;
|
|
||||||
#if MDBX_ENABLE_PROFGC
|
#if MDBX_ENABLE_PROFGC
|
||||||
prof->xpages += 1;
|
prof->xpages += 1;
|
||||||
#endif /* MDBX_ENABLE_PROFGC */
|
#endif /* MDBX_ENABLE_PROFGC */
|
||||||
@ -6675,13 +6660,12 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
eASSERT(env,
|
eASSERT(env, num == 0 || re_len == 0);
|
||||||
(flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) || re_len == 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
if (unlikely(!is_gc_usable(txn)))
|
if (unlikely(!is_gc_usable(txn, mc, flags)))
|
||||||
goto no_gc;
|
goto no_gc;
|
||||||
|
|
||||||
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
|
eASSERT(env, (flags & (MDBX_ALLOC_COALESCE | MDBX_ALLOC_LIFO |
|
||||||
@ -6691,7 +6675,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *mc, const size_t num,
|
|||||||
if (/* Не коагулируем записи при подготовке резерва для обновления GC.
|
if (/* Не коагулируем записи при подготовке резерва для обновления GC.
|
||||||
* Иначе попытка увеличить резерв может приводить к необходимости ещё
|
* Иначе попытка увеличить резерв может приводить к необходимости ещё
|
||||||
* большего резерва из-за увеличения списка переработанных страниц. */
|
* большего резерва из-за увеличения списка переработанных страниц. */
|
||||||
flags < MDBX_ALLOC_COALESCE) {
|
(flags & MDBX_ALLOC_RESERVE) == 0) {
|
||||||
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
if (txn->mt_dbs[FREE_DBI].md_branch_pages &&
|
||||||
re_len < env->me_maxgc_ov1page / 2)
|
re_len < env->me_maxgc_ov1page / 2)
|
||||||
flags += MDBX_ALLOC_COALESCE;
|
flags += MDBX_ALLOC_COALESCE;
|
||||||
@ -6777,7 +6761,6 @@ next_gc:;
|
|||||||
&data, mp)) != MDBX_SUCCESS))
|
&data, mp)) != MDBX_SUCCESS))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
|
||||||
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
|
|
||||||
pgno_t *gc_pnl = (pgno_t *)data.iov_base;
|
pgno_t *gc_pnl = (pgno_t *)data.iov_base;
|
||||||
if (unlikely(data.iov_len % sizeof(pgno_t) ||
|
if (unlikely(data.iov_len % sizeof(pgno_t) ||
|
||||||
data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
|
data.iov_len < MDBX_PNL_SIZEOF(gc_pnl) ||
|
||||||
@ -6818,8 +6801,7 @@ next_gc:;
|
|||||||
}
|
}
|
||||||
if (unlikely(/* list is too long already */ re_len >=
|
if (unlikely(/* list is too long already */ re_len >=
|
||||||
env->me_options.rp_augment_limit) &&
|
env->me_options.rp_augment_limit) &&
|
||||||
((/* not a slot-request from gc-update */
|
((/* not a slot-request from gc-update */ num &&
|
||||||
(flags & MDBX_ALLOC_SLOT) == 0 &&
|
|
||||||
/* have enough unallocated space */ txn->mt_geo.upper >=
|
/* have enough unallocated space */ txn->mt_geo.upper >=
|
||||||
txn->mt_next_pgno + num) ||
|
txn->mt_next_pgno + num) ||
|
||||||
gc_len + re_len >= MDBX_PGL_LIMIT)) {
|
gc_len + re_len >= MDBX_PGL_LIMIT)) {
|
||||||
@ -6883,7 +6865,7 @@ next_gc:;
|
|||||||
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
||||||
|
|
||||||
/* Done for a kick-reclaim mode, actually no page needed */
|
/* Done for a kick-reclaim mode, actually no page needed */
|
||||||
if (unlikely(flags & MDBX_ALLOC_SLOT)) {
|
if (unlikely(num == 0)) {
|
||||||
eASSERT(env, ret.err == MDBX_SUCCESS);
|
eASSERT(env, ret.err == MDBX_SUCCESS);
|
||||||
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
|
TRACE("%s: last id #%" PRIaTXN ", re-len %zu", "early-exit for slot", id,
|
||||||
re_len);
|
re_len);
|
||||||
@ -6901,6 +6883,7 @@ next_gc:;
|
|||||||
|
|
||||||
scan:
|
scan:
|
||||||
eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
|
eASSERT(env, flags & MDBX_ALLOC_SHOULD_SCAN);
|
||||||
|
eASSERT(env, num > 0);
|
||||||
if (re_len >= num) {
|
if (re_len >= num) {
|
||||||
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
eASSERT(env, MDBX_PNL_LAST(txn->tw.relist) < txn->mt_next_pgno &&
|
||||||
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
MDBX_PNL_FIRST(txn->tw.relist) < txn->mt_next_pgno);
|
||||||
@ -6977,8 +6960,7 @@ depleted_gc:
|
|||||||
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
meta_prefer_steady(env, &txn->tw.troika).ptr_c);
|
||||||
goto retry_gc_refresh_oldest;
|
goto retry_gc_refresh_oldest;
|
||||||
}
|
}
|
||||||
if ((flags & (MDBX_ALLOC_BACKLOG | MDBX_ALLOC_NEW)) == 0 ||
|
if ((autosync_threshold &&
|
||||||
(autosync_threshold &&
|
|
||||||
atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
|
atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) >=
|
||||||
autosync_threshold) ||
|
autosync_threshold) ||
|
||||||
(autosync_period &&
|
(autosync_period &&
|
||||||
@ -6986,7 +6968,7 @@ depleted_gc:
|
|||||||
atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
|
atomic_load64(&env->me_lck->mti_eoos_timestamp, mo_Relaxed)) &&
|
||||||
osal_monotime() - eoos_timestamp >= autosync_period) ||
|
osal_monotime() - eoos_timestamp >= autosync_period) ||
|
||||||
newnext >= txn->mt_geo.upper ||
|
newnext >= txn->mt_geo.upper ||
|
||||||
(newnext >= txn->mt_end_pgno &&
|
((num == 0 || newnext >= txn->mt_end_pgno) &&
|
||||||
(autosync_threshold | autosync_period) == 0)) {
|
(autosync_threshold | autosync_period) == 0)) {
|
||||||
/* make steady checkpoint. */
|
/* make steady checkpoint. */
|
||||||
#if MDBX_ENABLE_PROFGC
|
#if MDBX_ENABLE_PROFGC
|
||||||
@ -7014,7 +6996,7 @@ depleted_gc:
|
|||||||
|
|
||||||
/* Avoid kick lagging reader(s) if is enough unallocated space
|
/* Avoid kick lagging reader(s) if is enough unallocated space
|
||||||
* at the end of database file. */
|
* at the end of database file. */
|
||||||
if ((flags & MDBX_ALLOC_NEW) && newnext <= txn->mt_end_pgno) {
|
if (!(flags & MDBX_ALLOC_RESERVE) && newnext <= txn->mt_end_pgno) {
|
||||||
eASSERT(env, range == nullptr);
|
eASSERT(env, range == nullptr);
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -7028,7 +7010,7 @@ depleted_gc:
|
|||||||
//---------------------------------------------------------------------------
|
//---------------------------------------------------------------------------
|
||||||
|
|
||||||
no_gc:
|
no_gc:
|
||||||
if ((flags & MDBX_ALLOC_NEW) == 0) {
|
if (flags & MDBX_ALLOC_RESERVE) {
|
||||||
ret.err = MDBX_NOTFOUND;
|
ret.err = MDBX_NOTFOUND;
|
||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
@ -7071,10 +7053,9 @@ no_gc:
|
|||||||
|
|
||||||
done:
|
done:
|
||||||
ret.err = MDBX_SUCCESS;
|
ret.err = MDBX_SUCCESS;
|
||||||
if (likely((flags & (MDBX_ALLOC_SLOT | MDBX_ALLOC_RESERVE)) == 0)) {
|
if (likely((flags & MDBX_ALLOC_RESERVE) == 0)) {
|
||||||
ENSURE(env, pgno >= NUM_METAS);
|
ENSURE(env, pgno >= NUM_METAS);
|
||||||
if (range) {
|
if (range) {
|
||||||
eASSERT(env, (txn->mt_flags & MDBX_TXN_FROZEN_RE) == 0);
|
|
||||||
eASSERT(env, pgno == *range);
|
eASSERT(env, pgno == *range);
|
||||||
eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
|
eASSERT(env, pgno + num <= txn->mt_next_pgno && pgno >= NUM_METAS);
|
||||||
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
eASSERT(env, re_len == MDBX_PNL_GETSIZE(txn->tw.relist));
|
||||||
@ -7091,7 +7072,6 @@ done:
|
|||||||
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
eASSERT(env, pnl_check_allocated(txn->tw.relist,
|
||||||
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
||||||
} else {
|
} else {
|
||||||
eASSERT(env, flags & MDBX_ALLOC_NEW);
|
|
||||||
pgno = txn->mt_next_pgno;
|
pgno = txn->mt_next_pgno;
|
||||||
txn->mt_next_pgno += (pgno_t)num;
|
txn->mt_next_pgno += (pgno_t)num;
|
||||||
eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
|
eASSERT(env, txn->mt_next_pgno <= txn->mt_end_pgno);
|
||||||
@ -7135,8 +7115,9 @@ done:
|
|||||||
int level;
|
int level;
|
||||||
const char *what;
|
const char *what;
|
||||||
if (flags & MDBX_ALLOC_RESERVE) {
|
if (flags & MDBX_ALLOC_RESERVE) {
|
||||||
level = (flags & MDBX_ALLOC_BACKLOG) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
|
level =
|
||||||
what = (flags & MDBX_ALLOC_SLOT) ? "gc-slot/backlog" : "backlog-pages";
|
(flags & MDBX_ALLOC_UNIMPORTANT) ? MDBX_LOG_DEBUG : MDBX_LOG_NOTICE;
|
||||||
|
what = num ? "reserve-pages" : "fetch-slot";
|
||||||
} else {
|
} else {
|
||||||
txn->mt_flags |= MDBX_TXN_ERROR;
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
||||||
level = MDBX_LOG_ERROR;
|
level = MDBX_LOG_ERROR;
|
||||||
@ -7151,7 +7132,7 @@ done:
|
|||||||
} else {
|
} else {
|
||||||
early_exit:
|
early_exit:
|
||||||
DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
|
DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num,
|
||||||
(flags & MDBX_ALLOC_SLOT) ? "SLOT" : "RESERVE", ret.err);
|
num ? "RESERVE" : "SLOT", ret.err);
|
||||||
ret.page = NULL;
|
ret.page = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -7197,62 +7178,60 @@ __hot static pgr_t page_alloc(const MDBX_cursor *mc) {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (likely(!(txn->mt_flags & MDBX_TXN_FROZEN_RE))) {
|
MDBX_PNL pnl = txn->tw.relist;
|
||||||
MDBX_PNL pnl = txn->tw.relist;
|
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
||||||
const size_t len = MDBX_PNL_GETSIZE(pnl);
|
if (likely(len > 0)) {
|
||||||
if (likely(len > 0)) {
|
MDBX_env *const env = txn->mt_env;
|
||||||
MDBX_env *const env = txn->mt_env;
|
|
||||||
|
|
||||||
MDBX_PNL_SETSIZE(pnl, len - 1);
|
MDBX_PNL_SETSIZE(pnl, len - 1);
|
||||||
#if MDBX_PNL_ASCENDING
|
#if MDBX_PNL_ASCENDING
|
||||||
const pgno_t pgno = pnl[1];
|
const pgno_t pgno = pnl[1];
|
||||||
for (size_t i = 1; i < len; ++i)
|
for (size_t i = 1; i < len; ++i)
|
||||||
pnl[i] = pnl[i + 1];
|
pnl[i] = pnl[i + 1];
|
||||||
#else
|
#else
|
||||||
const pgno_t pgno = pnl[len];
|
const pgno_t pgno = pnl[len];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if MDBX_ENABLE_PROFGC
|
#if MDBX_ENABLE_PROFGC
|
||||||
const uint64_t monotime_before = osal_monotime();
|
const uint64_t monotime_before = osal_monotime();
|
||||||
size_t majflt_before;
|
size_t majflt_before;
|
||||||
const uint64_t cputime_before = osal_cputime(&majflt_before);
|
const uint64_t cputime_before = osal_cputime(&majflt_before);
|
||||||
profgc_stat_t *const prof =
|
profgc_stat_t *const prof = (mc->mc_dbi == FREE_DBI)
|
||||||
(mc->mc_dbi == FREE_DBI) ? &env->me_lck->mti_pgop_stat.gc_prof.self
|
? &env->me_lck->mti_pgop_stat.gc_prof.self
|
||||||
: &env->me_lck->mti_pgop_stat.gc_prof.work;
|
: &env->me_lck->mti_pgop_stat.gc_prof.work;
|
||||||
#endif /* MDBX_ENABLE_PROFGC */
|
#endif /* MDBX_ENABLE_PROFGC */
|
||||||
pgr_t ret;
|
pgr_t ret;
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
ret.page = pgno2page(env, pgno);
|
ret.page = pgno2page(env, pgno);
|
||||||
MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(ret.page, env->me_psize);
|
||||||
} else {
|
} else {
|
||||||
ret.page = page_malloc(txn, 1);
|
ret.page = page_malloc(txn, 1);
|
||||||
if (unlikely(!ret.page)) {
|
if (unlikely(!ret.page)) {
|
||||||
ret.err = MDBX_ENOMEM;
|
ret.err = MDBX_ENOMEM;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
|
|
||||||
ret.page->mp_pgno = pgno;
|
|
||||||
ret.page->mp_leaf2_ksize = 0;
|
|
||||||
ret.page->mp_flags = 0;
|
|
||||||
tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
|
|
||||||
|
|
||||||
ret.err = page_dirty(txn, ret.page, 1);
|
|
||||||
bailout:
|
|
||||||
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
|
||||||
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
|
||||||
#if MDBX_ENABLE_PROFGC
|
|
||||||
size_t majflt_after;
|
|
||||||
prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
|
|
||||||
prof->majflt += majflt_after - majflt_before;
|
|
||||||
prof->xtime_monotonic += osal_monotime() - monotime_before;
|
|
||||||
#endif /* MDBX_ENABLE_PROFGC */
|
|
||||||
return ret;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
VALGRIND_MAKE_MEM_UNDEFINED(ret.page, env->me_psize);
|
||||||
|
ret.page->mp_pgno = pgno;
|
||||||
|
ret.page->mp_leaf2_ksize = 0;
|
||||||
|
ret.page->mp_flags = 0;
|
||||||
|
tASSERT(txn, ret.page->mp_pgno >= NUM_METAS);
|
||||||
|
|
||||||
|
ret.err = page_dirty(txn, ret.page, 1);
|
||||||
|
bailout:
|
||||||
|
tASSERT(txn, pnl_check_allocated(txn->tw.relist,
|
||||||
|
txn->mt_next_pgno - MDBX_ENABLE_REFUND));
|
||||||
|
#if MDBX_ENABLE_PROFGC
|
||||||
|
size_t majflt_after;
|
||||||
|
prof->rtime_cpu += osal_cputime(&majflt_after) - cputime_before;
|
||||||
|
prof->majflt += majflt_after - majflt_before;
|
||||||
|
prof->xtime_monotonic += osal_monotime() - monotime_before;
|
||||||
|
#endif /* MDBX_ENABLE_PROFGC */
|
||||||
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
return page_alloc_slowpath(mc, 1, MDBX_ALLOC_ALL);
|
return page_alloc_slowpath(mc, 1, MDBX_ALLOC_DEFAULT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Copy the used portions of a page. */
|
/* Copy the used portions of a page. */
|
||||||
@ -9503,6 +9482,13 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) {
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int gcu_touch(gcu_context_t *ctx) {
|
||||||
|
ctx->cursor.mc_flags |= C_GCU;
|
||||||
|
int err = cursor_touch(&ctx->cursor);
|
||||||
|
ctx->cursor.mc_flags -= C_GCU;
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
/* Prepare a backlog of pages to modify GC itself, while reclaiming is
|
/* Prepare a backlog of pages to modify GC itself, while reclaiming is
|
||||||
* prohibited. It should be enough to prevent search in page_alloc_slowpath()
|
* prohibited. It should be enough to prevent search in page_alloc_slowpath()
|
||||||
* during a deleting, when GC tree is unbalanced. */
|
* during a deleting, when GC tree is unbalanced. */
|
||||||
@ -9537,9 +9523,7 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
tASSERT(txn, txn->mt_flags & MDBX_TXN_UPDATE_GC);
|
err = gcu_touch(ctx);
|
||||||
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
|
||||||
err = cursor_touch(&ctx->cursor);
|
|
||||||
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
TRACE("== after-touch, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
||||||
|
|
||||||
if (unlikely(pages4retiredlist > 1) &&
|
if (unlikely(pages4retiredlist > 1) &&
|
||||||
@ -9549,9 +9533,9 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|||||||
err = gcu_clean_stored_retired(txn, ctx);
|
err = gcu_clean_stored_retired(txn, ctx);
|
||||||
if (unlikely(err != MDBX_SUCCESS))
|
if (unlikely(err != MDBX_SUCCESS))
|
||||||
return err;
|
return err;
|
||||||
err = page_alloc_slowpath(&ctx->cursor, pages4retiredlist,
|
err =
|
||||||
MDBX_ALLOC_GC | MDBX_ALLOC_RESERVE)
|
page_alloc_slowpath(&ctx->cursor, pages4retiredlist, MDBX_ALLOC_RESERVE)
|
||||||
.err;
|
.err;
|
||||||
TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
TRACE("== after-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err);
|
||||||
cASSERT(&ctx->cursor,
|
cASSERT(&ctx->cursor,
|
||||||
gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
|
gcu_backlog_size(txn) >= pages4retiredlist || err != MDBX_SUCCESS);
|
||||||
@ -9560,11 +9544,9 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx,
|
|||||||
while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
|
while (gcu_backlog_size(txn) < backlog4cow + pages4retiredlist &&
|
||||||
err == MDBX_SUCCESS)
|
err == MDBX_SUCCESS)
|
||||||
err = page_alloc_slowpath(&ctx->cursor, 0,
|
err = page_alloc_slowpath(&ctx->cursor, 0,
|
||||||
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
MDBX_ALLOC_RESERVE | MDBX_ALLOC_UNIMPORTANT)
|
||||||
MDBX_ALLOC_RESERVE | MDBX_ALLOC_BACKLOG)
|
|
||||||
.err;
|
.err;
|
||||||
|
|
||||||
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
|
||||||
TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
|
TRACE("<< backlog %zu, err %d", gcu_backlog_size(txn), err);
|
||||||
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -9593,7 +9575,6 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) {
|
|||||||
MDBX_env *const env = txn->mt_env;
|
MDBX_env *const env = txn->mt_env;
|
||||||
const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
|
const char *const dbg_prefix_mode = ctx->lifo ? " lifo" : " fifo";
|
||||||
(void)dbg_prefix_mode;
|
(void)dbg_prefix_mode;
|
||||||
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
|
||||||
ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
|
ctx->cursor.mc_next = txn->mt_cursors[FREE_DBI];
|
||||||
txn->mt_cursors[FREE_DBI] = &ctx->cursor;
|
txn->mt_cursors[FREE_DBI] = &ctx->cursor;
|
||||||
|
|
||||||
@ -9741,10 +9722,7 @@ retry:
|
|||||||
if (txn->tw.loose_count > 0) {
|
if (txn->tw.loose_count > 0) {
|
||||||
TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
|
TRACE("%s: try allocate gc-slot for %zu loose-pages", dbg_prefix_mode,
|
||||||
txn->tw.loose_count);
|
txn->tw.loose_count);
|
||||||
rc = page_alloc_slowpath(&ctx->cursor, 0,
|
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
||||||
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
|
||||||
MDBX_ALLOC_RESERVE)
|
|
||||||
.err;
|
|
||||||
if (rc == MDBX_SUCCESS) {
|
if (rc == MDBX_SUCCESS) {
|
||||||
TRACE("%s: retry since gc-slot for %zu loose-pages available",
|
TRACE("%s: retry since gc-slot for %zu loose-pages available",
|
||||||
dbg_prefix_mode, txn->tw.loose_count);
|
dbg_prefix_mode, txn->tw.loose_count);
|
||||||
@ -9826,9 +9804,9 @@ retry:
|
|||||||
if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
if (ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)) {
|
||||||
if (unlikely(!ctx->retired_stored)) {
|
if (unlikely(!ctx->retired_stored)) {
|
||||||
/* Make sure last page of GC is touched and on retired-list */
|
/* Make sure last page of GC is touched and on retired-list */
|
||||||
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
rc = cursor_last(&ctx->cursor, nullptr, nullptr);
|
||||||
rc = page_search(&ctx->cursor, NULL, MDBX_PS_LAST | MDBX_PS_MODIFY);
|
if (likely(rc != MDBX_SUCCESS))
|
||||||
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
rc = gcu_touch(ctx);
|
||||||
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND)
|
||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
@ -9966,16 +9944,12 @@ retry:
|
|||||||
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
left > (MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
||||||
env->me_maxgc_ov1page &&
|
env->me_maxgc_ov1page &&
|
||||||
!ctx->dense) {
|
!ctx->dense) {
|
||||||
/* Hужен свобожный для для сохранения списка страниц. */
|
/* Hужен свободный для для сохранения списка страниц. */
|
||||||
bool need_cleanup = false;
|
bool need_cleanup = false;
|
||||||
txnid_t snap_oldest = 0;
|
txnid_t snap_oldest = 0;
|
||||||
retry_rid:
|
retry_rid:
|
||||||
txn->mt_flags -= MDBX_TXN_UPDATE_GC;
|
|
||||||
do {
|
do {
|
||||||
rc = page_alloc_slowpath(&ctx->cursor, 0,
|
rc = page_alloc_slowpath(&ctx->cursor, 0, MDBX_ALLOC_RESERVE).err;
|
||||||
MDBX_ALLOC_GC | MDBX_ALLOC_SLOT |
|
|
||||||
MDBX_ALLOC_RESERVE)
|
|
||||||
.err;
|
|
||||||
snap_oldest = env->me_lck->mti_oldest_reader.weak;
|
snap_oldest = env->me_lck->mti_oldest_reader.weak;
|
||||||
if (likely(rc == MDBX_SUCCESS)) {
|
if (likely(rc == MDBX_SUCCESS)) {
|
||||||
TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
|
TRACE("%s: took @%" PRIaTXN " from GC", dbg_prefix_mode,
|
||||||
@ -9988,7 +9962,6 @@ retry:
|
|||||||
left >
|
left >
|
||||||
(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
(MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot) *
|
||||||
env->me_maxgc_ov1page);
|
env->me_maxgc_ov1page);
|
||||||
txn->mt_flags += MDBX_TXN_UPDATE_GC;
|
|
||||||
|
|
||||||
if (likely(rc == MDBX_SUCCESS)) {
|
if (likely(rc == MDBX_SUCCESS)) {
|
||||||
TRACE("%s: got enough from GC.", dbg_prefix_mode);
|
TRACE("%s: got enough from GC.", dbg_prefix_mode);
|
||||||
@ -10006,7 +9979,7 @@ retry:
|
|||||||
} else {
|
} else {
|
||||||
tASSERT(txn, txn->tw.last_reclaimed == 0);
|
tASSERT(txn, txn->tw.last_reclaimed == 0);
|
||||||
if (unlikely(txn_oldest_reader(txn) != snap_oldest))
|
if (unlikely(txn_oldest_reader(txn) != snap_oldest))
|
||||||
/* should retry page_alloc_slowpath(MDBX_ALLOC_GC)
|
/* should retry page_alloc_slowpath()
|
||||||
* if the oldest reader changes since the last attempt */
|
* if the oldest reader changes since the last attempt */
|
||||||
goto retry_rid;
|
goto retry_rid;
|
||||||
/* no reclaimable GC entries,
|
/* no reclaimable GC entries,
|
||||||
@ -10289,7 +10262,6 @@ retry:
|
|||||||
key.iov_len = sizeof(fill_gc_id);
|
key.iov_len = sizeof(fill_gc_id);
|
||||||
|
|
||||||
tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2);
|
||||||
txn->mt_flags += MDBX_TXN_FROZEN_RE;
|
|
||||||
size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
|
size_t chunk = data.iov_len / sizeof(pgno_t) - 1;
|
||||||
if (unlikely(chunk > left)) {
|
if (unlikely(chunk > left)) {
|
||||||
TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
|
TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk,
|
||||||
@ -10297,14 +10269,11 @@ retry:
|
|||||||
if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
|
if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) ||
|
||||||
chunk - left > env->me_maxgc_ov1page) {
|
chunk - left > env->me_maxgc_ov1page) {
|
||||||
data.iov_len = (left + 1) * sizeof(pgno_t);
|
data.iov_len = (left + 1) * sizeof(pgno_t);
|
||||||
if (ctx->loop < 7)
|
|
||||||
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
|
||||||
}
|
}
|
||||||
chunk = left;
|
chunk = left;
|
||||||
}
|
}
|
||||||
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
rc = mdbx_cursor_put(&ctx->cursor, &key, &data,
|
||||||
MDBX_CURRENT | MDBX_RESERVE);
|
MDBX_CURRENT | MDBX_RESERVE);
|
||||||
txn->mt_flags &= ~MDBX_TXN_FROZEN_RE;
|
|
||||||
if (unlikely(rc != MDBX_SUCCESS))
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
goto bailout;
|
goto bailout;
|
||||||
gcu_clean_reserved(env, data);
|
gcu_clean_reserved(env, data);
|
||||||
@ -15079,7 +15048,8 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|||||||
if (!MDBX_DISABLE_VALIDATION) {
|
if (!MDBX_DISABLE_VALIDATION) {
|
||||||
const MDBX_env *env = mc->mc_txn->mt_env;
|
const MDBX_env *env = mc->mc_txn->mt_env;
|
||||||
const size_t dsize = data->iov_len;
|
const size_t dsize = data->iov_len;
|
||||||
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax))
|
if (unlikely(node_size_len(node_ks(node), dsize) <= env->me_leaf_nodemax) &&
|
||||||
|
mc->mc_dbi != FREE_DBI)
|
||||||
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
||||||
const unsigned npages = number_of_ovpages(env, dsize);
|
const unsigned npages = number_of_ovpages(env, dsize);
|
||||||
if (unlikely(lp.page->mp_pages != npages)) {
|
if (unlikely(lp.page->mp_pages != npages)) {
|
||||||
@ -15087,7 +15057,7 @@ static __noinline int node_read_bigdata(MDBX_cursor *mc, const MDBX_node *node,
|
|||||||
return bad_page(lp.page,
|
return bad_page(lp.page,
|
||||||
"too less n-pages %u for bigdata-node (%zu bytes)",
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
||||||
lp.page->mp_pages, dsize);
|
lp.page->mp_pages, dsize);
|
||||||
else
|
else if (mc->mc_dbi != FREE_DBI)
|
||||||
poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
|
poor_page(lp.page, "extra n-pages %u for bigdata-node (%zu bytes)",
|
||||||
lp.page->mp_pages, dsize);
|
lp.page->mp_pages, dsize);
|
||||||
}
|
}
|
||||||
@ -16183,7 +16153,6 @@ static int touch_dbi(MDBX_cursor *mc) {
|
|||||||
*mc->mc_dbistate |= DBI_DIRTY;
|
*mc->mc_dbistate |= DBI_DIRTY;
|
||||||
mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
|
mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY;
|
||||||
if (mc->mc_dbi >= CORE_DBS) {
|
if (mc->mc_dbi >= CORE_DBS) {
|
||||||
cASSERT(mc, (mc->mc_txn->mt_flags & MDBX_TXN_UPDATE_GC) == 0);
|
|
||||||
/* Touch DB record of named DB */
|
/* Touch DB record of named DB */
|
||||||
MDBX_cursor_couple cx;
|
MDBX_cursor_couple cx;
|
||||||
int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
|
int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI);
|
||||||
@ -16596,9 +16565,9 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|||||||
|
|
||||||
/* Large/Overflow page overwrites need special handling */
|
/* Large/Overflow page overwrites need special handling */
|
||||||
if (unlikely(node_flags(node) & F_BIGDATA)) {
|
if (unlikely(node_flags(node) & F_BIGDATA)) {
|
||||||
int dpages = (node_size(key, data) > env->me_leaf_nodemax)
|
const size_t dpages = (node_size(key, data) > env->me_leaf_nodemax)
|
||||||
? number_of_ovpages(env, data->iov_len)
|
? number_of_ovpages(env, data->iov_len)
|
||||||
: 0;
|
: 0;
|
||||||
|
|
||||||
const pgno_t pgno = node_largedata_pgno(node);
|
const pgno_t pgno = node_largedata_pgno(node);
|
||||||
pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
|
pgr_t lp = page_get_large(mc, pgno, mc->mc_pg[mc->mc_top]->mp_txnid);
|
||||||
@ -16607,13 +16576,13 @@ __hot int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
|
|||||||
cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
|
cASSERT(mc, PAGETYPE_WHOLE(lp.page) == P_OVERFLOW);
|
||||||
|
|
||||||
/* Is the ov page from this txn (or a parent) and big enough? */
|
/* Is the ov page from this txn (or a parent) and big enough? */
|
||||||
int ovpages = lp.page->mp_pages;
|
const size_t ovpages = lp.page->mp_pages;
|
||||||
if (!IS_FROZEN(mc->mc_txn, lp.page) &&
|
const size_t extra_threshold =
|
||||||
(unlikely(mc->mc_txn->mt_flags & MDBX_TXN_FROZEN_RE)
|
(mc->mc_dbi == FREE_DBI)
|
||||||
? (ovpages >= dpages)
|
? 1
|
||||||
: (ovpages ==
|
: /* LY: add configurable threshold to keep reserve space */ 0;
|
||||||
/* LY: add configurable threshold to keep reserve space */
|
if (!IS_FROZEN(mc->mc_txn, lp.page) && ovpages >= dpages &&
|
||||||
dpages))) {
|
ovpages <= dpages + extra_threshold) {
|
||||||
/* yes, overwrite it. */
|
/* yes, overwrite it. */
|
||||||
if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
|
if (!IS_MODIFIABLE(mc->mc_txn, lp.page)) {
|
||||||
if (IS_SPILLED(mc->mc_txn, lp.page)) {
|
if (IS_SPILLED(mc->mc_txn, lp.page)) {
|
||||||
@ -17168,7 +17137,7 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) {
|
|||||||
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
|
static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) {
|
||||||
pgr_t ret = likely(npages == 1)
|
pgr_t ret = likely(npages == 1)
|
||||||
? page_alloc(mc)
|
? page_alloc(mc)
|
||||||
: page_alloc_slowpath(mc, npages, MDBX_ALLOC_ALL);
|
: page_alloc_slowpath(mc, npages, MDBX_ALLOC_DEFAULT);
|
||||||
if (unlikely(ret.err != MDBX_SUCCESS))
|
if (unlikely(ret.err != MDBX_SUCCESS))
|
||||||
return ret;
|
return ret;
|
||||||
|
|
||||||
@ -17279,7 +17248,6 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|||||||
key ? key->iov_len : 0, DKEY_DEBUG(key));
|
key ? key->iov_len : 0, DKEY_DEBUG(key));
|
||||||
cASSERT(mc, key != NULL && data != NULL);
|
cASSERT(mc, key != NULL && data != NULL);
|
||||||
cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
|
cASSERT(mc, PAGETYPE_COMPAT(mp) == P_LEAF);
|
||||||
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
|
||||||
MDBX_page *largepage = NULL;
|
MDBX_page *largepage = NULL;
|
||||||
|
|
||||||
size_t node_bytes;
|
size_t node_bytes;
|
||||||
@ -17288,6 +17256,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|||||||
STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
|
STATIC_ASSERT(sizeof(pgno_t) % 2 == 0);
|
||||||
node_bytes =
|
node_bytes =
|
||||||
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
||||||
|
cASSERT(mc, page_room(mp) >= node_bytes);
|
||||||
} else if (unlikely(node_size(key, data) >
|
} else if (unlikely(node_size(key, data) >
|
||||||
mc->mc_txn->mt_env->me_leaf_nodemax)) {
|
mc->mc_txn->mt_env->me_leaf_nodemax)) {
|
||||||
/* Put data on large/overflow page. */
|
/* Put data on large/overflow page. */
|
||||||
@ -17301,6 +17270,7 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|||||||
flags);
|
flags);
|
||||||
return MDBX_PROBLEM;
|
return MDBX_PROBLEM;
|
||||||
}
|
}
|
||||||
|
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
||||||
const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
|
const pgno_t ovpages = number_of_ovpages(mc->mc_txn->mt_env, data->iov_len);
|
||||||
const pgr_t npr = page_new_large(mc, ovpages);
|
const pgr_t npr = page_new_large(mc, ovpages);
|
||||||
if (unlikely(npr.err != MDBX_SUCCESS))
|
if (unlikely(npr.err != MDBX_SUCCESS))
|
||||||
@ -17312,10 +17282,12 @@ __hot static int __must_check_result node_add_leaf(MDBX_cursor *mc, size_t indx,
|
|||||||
flags |= F_BIGDATA;
|
flags |= F_BIGDATA;
|
||||||
node_bytes =
|
node_bytes =
|
||||||
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
node_size_len(key->iov_len, 0) + sizeof(pgno_t) + sizeof(indx_t);
|
||||||
|
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
||||||
} else {
|
} else {
|
||||||
|
cASSERT(mc, page_room(mp) >= leaf_size(mc->mc_txn->mt_env, key, data));
|
||||||
node_bytes = node_size(key, data) + sizeof(indx_t);
|
node_bytes = node_size(key, data) + sizeof(indx_t);
|
||||||
|
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
||||||
}
|
}
|
||||||
cASSERT(mc, node_bytes == leaf_size(mc->mc_txn->mt_env, key, data));
|
|
||||||
|
|
||||||
/* Move higher pointers up one slot. */
|
/* Move higher pointers up one slot. */
|
||||||
const size_t nkeys = page_numkeys(mp);
|
const size_t nkeys = page_numkeys(mp);
|
||||||
@ -19056,7 +19028,8 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|||||||
"big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
|
"big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
|
||||||
dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
|
dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
|
||||||
if (unlikely(node_size_len(node_ks(node), dsize) <=
|
if (unlikely(node_size_len(node_ks(node), dsize) <=
|
||||||
mc->mc_txn->mt_env->me_leaf_nodemax))
|
mc->mc_txn->mt_env->me_leaf_nodemax) &&
|
||||||
|
mc->mc_dbi != FREE_DBI)
|
||||||
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
poor_page(mp, "too small data (%zu bytes) for bigdata-node", dsize);
|
||||||
|
|
||||||
if ((mc->mc_checking & CC_RETIRING) == 0) {
|
if ((mc->mc_checking & CC_RETIRING) == 0) {
|
||||||
@ -19071,7 +19044,7 @@ __cold static int page_check(MDBX_cursor *const mc, const MDBX_page *const mp) {
|
|||||||
rc = bad_page(lp.page,
|
rc = bad_page(lp.page,
|
||||||
"too less n-pages %u for bigdata-node (%zu bytes)",
|
"too less n-pages %u for bigdata-node (%zu bytes)",
|
||||||
lp.page->mp_pages, dsize);
|
lp.page->mp_pages, dsize);
|
||||||
else
|
else if (mc->mc_dbi != FREE_DBI)
|
||||||
poor_page(lp.page,
|
poor_page(lp.page,
|
||||||
"extra n-pages %u for bigdata-node (%zu bytes)",
|
"extra n-pages %u for bigdata-node (%zu bytes)",
|
||||||
lp.page->mp_pages, dsize);
|
lp.page->mp_pages, dsize);
|
||||||
|
@ -1000,13 +1000,9 @@ struct MDBX_txn {
|
|||||||
/* Additional flag for sync_locked() */
|
/* Additional flag for sync_locked() */
|
||||||
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
#define MDBX_SHRINK_ALLOWED UINT32_C(0x40000000)
|
||||||
|
|
||||||
#define MDBX_TXN_UPDATE_GC 0x20 /* GC is being updated */
|
|
||||||
#define MDBX_TXN_FROZEN_RE 0x40 /* list of reclaimed-pgno must not altered */
|
|
||||||
|
|
||||||
#define TXN_FLAGS \
|
#define TXN_FLAGS \
|
||||||
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
(MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | \
|
||||||
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID | MDBX_TXN_UPDATE_GC | \
|
MDBX_TXN_HAS_CHILD | MDBX_TXN_INVALID)
|
||||||
MDBX_TXN_FROZEN_RE)
|
|
||||||
|
|
||||||
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
#if (TXN_FLAGS & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) || \
|
||||||
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
((MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS | TXN_FLAGS) & \
|
||||||
@ -1147,6 +1143,9 @@ struct MDBX_cursor {
|
|||||||
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
#define C_SUB 0x04 /* Cursor is a sub-cursor */
|
||||||
#define C_DEL 0x08 /* last op was a cursor_del */
|
#define C_DEL 0x08 /* last op was a cursor_del */
|
||||||
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
#define C_UNTRACK 0x10 /* Un-track cursor when closing */
|
||||||
|
#define C_GCU \
|
||||||
|
0x20 /* Происходит подготовка к обновлению GC, поэтому \
|
||||||
|
* можно брать страницы из GC даже для FREE_DBI */
|
||||||
uint8_t mc_flags;
|
uint8_t mc_flags;
|
||||||
|
|
||||||
/* Cursor checking flags. */
|
/* Cursor checking flags. */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user