mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 19:34:13 +08:00
mdbx: merge branch 'devel'.
This commit is contained in:
commit
116d14bb76
@ -77,15 +77,19 @@ field_alignment(unsigned alignment_baseline, size_t field_offset) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* read-thunk for UB-sanitizer */
|
/* read-thunk for UB-sanitizer */
|
||||||
static __pure_function __always_inline uint8_t peek_u8(const uint8_t *ptr) {
|
static __pure_function __always_inline uint8_t
|
||||||
|
peek_u8(const uint8_t *const __restrict ptr) {
|
||||||
return *ptr;
|
return *ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* write-thunk for UB-sanitizer */
|
/* write-thunk for UB-sanitizer */
|
||||||
static __always_inline void poke_u8(uint8_t *ptr, const uint8_t v) { *ptr = v; }
|
static __always_inline void poke_u8(uint8_t *const __restrict ptr,
|
||||||
|
const uint8_t v) {
|
||||||
|
*ptr = v;
|
||||||
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline uint16_t
|
static __pure_function __always_inline uint16_t
|
||||||
unaligned_peek_u16(const unsigned expected_alignment, const void *ptr) {
|
unaligned_peek_u16(const unsigned expected_alignment, const void *const ptr) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint16_t)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint16_t)) == 0)
|
||||||
return *(const uint16_t *)ptr;
|
return *(const uint16_t *)ptr;
|
||||||
@ -97,8 +101,8 @@ unaligned_peek_u16(const unsigned expected_alignment, const void *ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
unaligned_poke_u16(const unsigned expected_alignment, void *ptr,
|
unaligned_poke_u16(const unsigned expected_alignment,
|
||||||
const uint16_t v) {
|
void *const __restrict ptr, const uint16_t v) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
||||||
*(uint16_t *)ptr = v;
|
*(uint16_t *)ptr = v;
|
||||||
@ -106,8 +110,8 @@ unaligned_poke_u16(const unsigned expected_alignment, void *ptr,
|
|||||||
memcpy(ptr, &v, sizeof(v));
|
memcpy(ptr, &v, sizeof(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline uint32_t
|
static __pure_function __always_inline uint32_t unaligned_peek_u32(
|
||||||
unaligned_peek_u32(const unsigned expected_alignment, const void *ptr) {
|
const unsigned expected_alignment, const void *const __restrict ptr) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint32_t)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint32_t)) == 0)
|
||||||
return *(const uint32_t *)ptr;
|
return *(const uint32_t *)ptr;
|
||||||
@ -125,8 +129,8 @@ unaligned_peek_u32(const unsigned expected_alignment, const void *ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
unaligned_poke_u32(const unsigned expected_alignment, void *ptr,
|
unaligned_poke_u32(const unsigned expected_alignment,
|
||||||
const uint32_t v) {
|
void *const __restrict ptr, const uint32_t v) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
||||||
*(uint32_t *)ptr = v;
|
*(uint32_t *)ptr = v;
|
||||||
@ -138,8 +142,8 @@ unaligned_poke_u32(const unsigned expected_alignment, void *ptr,
|
|||||||
memcpy(ptr, &v, sizeof(v));
|
memcpy(ptr, &v, sizeof(v));
|
||||||
}
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline uint64_t
|
static __pure_function __always_inline uint64_t unaligned_peek_u64(
|
||||||
unaligned_peek_u64(const unsigned expected_alignment, const void *ptr) {
|
const unsigned expected_alignment, const void *const __restrict ptr) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint64_t)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(uint64_t)) == 0)
|
||||||
return *(const uint64_t *)ptr;
|
return *(const uint64_t *)ptr;
|
||||||
@ -157,8 +161,8 @@ unaligned_peek_u64(const unsigned expected_alignment, const void *ptr) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void
|
static __always_inline void
|
||||||
unaligned_poke_u64(const unsigned expected_alignment, void *ptr,
|
unaligned_poke_u64(const unsigned expected_alignment,
|
||||||
const uint64_t v) {
|
void *const __restrict ptr, const uint64_t v) {
|
||||||
assert((uintptr_t)ptr % expected_alignment == 0);
|
assert((uintptr_t)ptr % expected_alignment == 0);
|
||||||
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
if (MDBX_UNALIGNED_OK || (expected_alignment % sizeof(v)) == 0)
|
||||||
*(uint64_t *)ptr = v;
|
*(uint64_t *)ptr = v;
|
||||||
@ -191,7 +195,8 @@ unaligned_poke_u64(const unsigned expected_alignment, void *ptr,
|
|||||||
unaligned_poke_u64(1, (char *)(ptr) + offsetof(struct, field), value)
|
unaligned_poke_u64(1, (char *)(ptr) + offsetof(struct, field), value)
|
||||||
|
|
||||||
/* Get the page number pointed to by a branch node */
|
/* Get the page number pointed to by a branch node */
|
||||||
static __pure_function __always_inline pgno_t node_pgno(const MDBX_node *node) {
|
static __pure_function __always_inline pgno_t
|
||||||
|
node_pgno(const MDBX_node *const __restrict node) {
|
||||||
pgno_t pgno = UNALIGNED_PEEK_32(node, MDBX_node, mn_pgno32);
|
pgno_t pgno = UNALIGNED_PEEK_32(node, MDBX_node, mn_pgno32);
|
||||||
if (sizeof(pgno) > 4)
|
if (sizeof(pgno) > 4)
|
||||||
pgno |= ((uint64_t)UNALIGNED_PEEK_8(node, MDBX_node, mn_extra)) << 32;
|
pgno |= ((uint64_t)UNALIGNED_PEEK_8(node, MDBX_node, mn_extra)) << 32;
|
||||||
@ -199,7 +204,8 @@ static __pure_function __always_inline pgno_t node_pgno(const MDBX_node *node) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Set the page number in a branch node */
|
/* Set the page number in a branch node */
|
||||||
static __always_inline void node_set_pgno(MDBX_node *node, pgno_t pgno) {
|
static __always_inline void node_set_pgno(MDBX_node *const __restrict node,
|
||||||
|
pgno_t pgno) {
|
||||||
assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO);
|
assert(pgno >= MIN_PAGENO && pgno <= MAX_PAGENO);
|
||||||
|
|
||||||
UNALIGNED_POKE_32(node, MDBX_node, mn_pgno32, (uint32_t)pgno);
|
UNALIGNED_POKE_32(node, MDBX_node, mn_pgno32, (uint32_t)pgno);
|
||||||
@ -209,33 +215,38 @@ static __always_inline void node_set_pgno(MDBX_node *node, pgno_t pgno) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Get the size of the data in a leaf node */
|
/* Get the size of the data in a leaf node */
|
||||||
static __pure_function __always_inline size_t node_ds(const MDBX_node *node) {
|
static __pure_function __always_inline size_t
|
||||||
|
node_ds(const MDBX_node *const __restrict node) {
|
||||||
return UNALIGNED_PEEK_32(node, MDBX_node, mn_dsize);
|
return UNALIGNED_PEEK_32(node, MDBX_node, mn_dsize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the size of the data for a leaf node */
|
/* Set the size of the data for a leaf node */
|
||||||
static __always_inline void node_set_ds(MDBX_node *node, size_t size) {
|
static __always_inline void node_set_ds(MDBX_node *const __restrict node,
|
||||||
|
size_t size) {
|
||||||
assert(size < INT_MAX);
|
assert(size < INT_MAX);
|
||||||
UNALIGNED_POKE_32(node, MDBX_node, mn_dsize, (uint32_t)size);
|
UNALIGNED_POKE_32(node, MDBX_node, mn_dsize, (uint32_t)size);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The size of a key in a node */
|
/* The size of a key in a node */
|
||||||
static __pure_function __always_inline size_t node_ks(const MDBX_node *node) {
|
static __pure_function __always_inline size_t
|
||||||
|
node_ks(const MDBX_node *const __restrict node) {
|
||||||
return UNALIGNED_PEEK_16(node, MDBX_node, mn_ksize);
|
return UNALIGNED_PEEK_16(node, MDBX_node, mn_ksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Set the size of the key for a leaf node */
|
/* Set the size of the key for a leaf node */
|
||||||
static __always_inline void node_set_ks(MDBX_node *node, size_t size) {
|
static __always_inline void node_set_ks(MDBX_node *const __restrict node,
|
||||||
|
size_t size) {
|
||||||
assert(size < INT16_MAX);
|
assert(size < INT16_MAX);
|
||||||
UNALIGNED_POKE_16(node, MDBX_node, mn_ksize, (uint16_t)size);
|
UNALIGNED_POKE_16(node, MDBX_node, mn_ksize, (uint16_t)size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline uint8_t
|
static __pure_function __always_inline uint8_t
|
||||||
node_flags(const MDBX_node *node) {
|
node_flags(const MDBX_node *const __restrict node) {
|
||||||
return UNALIGNED_PEEK_8(node, MDBX_node, mn_flags);
|
return UNALIGNED_PEEK_8(node, MDBX_node, mn_flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void node_set_flags(MDBX_node *node, uint8_t flags) {
|
static __always_inline void node_set_flags(MDBX_node *const __restrict node,
|
||||||
|
uint8_t flags) {
|
||||||
UNALIGNED_POKE_8(node, MDBX_node, mn_flags, flags);
|
UNALIGNED_POKE_8(node, MDBX_node, mn_flags, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -243,12 +254,14 @@ static __always_inline void node_set_flags(MDBX_node *node, uint8_t flags) {
|
|||||||
#define NODESIZE offsetof(MDBX_node, mn_data)
|
#define NODESIZE offsetof(MDBX_node, mn_data)
|
||||||
|
|
||||||
/* Address of the key for the node */
|
/* Address of the key for the node */
|
||||||
static __pure_function __always_inline void *node_key(const MDBX_node *node) {
|
static __pure_function __always_inline void *
|
||||||
|
node_key(const MDBX_node *const __restrict node) {
|
||||||
return (char *)node + NODESIZE;
|
return (char *)node + NODESIZE;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Address of the data for a node */
|
/* Address of the data for a node */
|
||||||
static __pure_function __always_inline void *node_data(const MDBX_node *node) {
|
static __pure_function __always_inline void *
|
||||||
|
node_data(const MDBX_node *const __restrict node) {
|
||||||
return (char *)node_key(node) + node_ks(node);
|
return (char *)node_key(node) + node_ks(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -263,7 +276,8 @@ static __pure_function __always_inline size_t node_size(const MDBX_val *key,
|
|||||||
return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0);
|
return node_size_len(key ? key->iov_len : 0, value ? value->iov_len : 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline pgno_t peek_pgno(const void *ptr) {
|
static __pure_function __always_inline pgno_t
|
||||||
|
peek_pgno(const void *const __restrict ptr) {
|
||||||
if (sizeof(pgno_t) == sizeof(uint32_t))
|
if (sizeof(pgno_t) == sizeof(uint32_t))
|
||||||
return (pgno_t)unaligned_peek_u32(1, ptr);
|
return (pgno_t)unaligned_peek_u32(1, ptr);
|
||||||
else if (sizeof(pgno_t) == sizeof(uint64_t))
|
else if (sizeof(pgno_t) == sizeof(uint64_t))
|
||||||
@ -275,7 +289,8 @@ static __pure_function __always_inline pgno_t peek_pgno(const void *ptr) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void poke_pgno(void *ptr, const pgno_t pgno) {
|
static __always_inline void poke_pgno(void *const __restrict ptr,
|
||||||
|
const pgno_t pgno) {
|
||||||
if (sizeof(pgno) == sizeof(uint32_t))
|
if (sizeof(pgno) == sizeof(uint32_t))
|
||||||
unaligned_poke_u32(1, ptr, pgno);
|
unaligned_poke_u32(1, ptr, pgno);
|
||||||
else if (sizeof(pgno) == sizeof(uint64_t))
|
else if (sizeof(pgno) == sizeof(uint64_t))
|
||||||
@ -285,7 +300,7 @@ static __always_inline void poke_pgno(void *ptr, const pgno_t pgno) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static __pure_function __always_inline pgno_t
|
static __pure_function __always_inline pgno_t
|
||||||
node_largedata_pgno(const MDBX_node *node) {
|
node_largedata_pgno(const MDBX_node *const __restrict node) {
|
||||||
assert(node_flags(node) & F_BIGDATA);
|
assert(node_flags(node) & F_BIGDATA);
|
||||||
return peek_pgno(node_data(node));
|
return peek_pgno(node_data(node));
|
||||||
}
|
}
|
||||||
@ -1801,7 +1816,7 @@ static __hot int __must_check_result mdbx_pnl_insert_range(MDBX_PNL *ppl,
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool __hot mdbx_pnl_check(const MDBX_PNL pl, const pgno_t limit) {
|
static bool mdbx_pnl_check(const MDBX_PNL pl, const pgno_t limit) {
|
||||||
assert(limit >= MIN_PAGENO && limit <= MAX_PAGENO + 1);
|
assert(limit >= MIN_PAGENO && limit <= MAX_PAGENO + 1);
|
||||||
if (likely(MDBX_PNL_SIZE(pl))) {
|
if (likely(MDBX_PNL_SIZE(pl))) {
|
||||||
assert(MDBX_PNL_LEAST(pl) >= MIN_PAGENO);
|
assert(MDBX_PNL_LEAST(pl) >= MIN_PAGENO);
|
||||||
@ -2977,7 +2992,7 @@ static __cold void mdbx_kill_page(MDBX_env *env, MDBX_page *mp, pgno_t pgno,
|
|||||||
* If the page wasn't dirtied in this txn, just add it
|
* If the page wasn't dirtied in this txn, just add it
|
||||||
* to this txn's free list. */
|
* to this txn's free list. */
|
||||||
|
|
||||||
static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
|
static int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
|
||||||
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
||||||
const pgno_t pgno = mp->mp_pgno;
|
const pgno_t pgno = mp->mp_pgno;
|
||||||
|
|
||||||
@ -3057,7 +3072,7 @@ static __hot int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __hot int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
|
static int mdbx_page_retire(MDBX_cursor *mc, MDBX_page *mp) {
|
||||||
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
||||||
const pgno_t pgno = mp->mp_pgno;
|
const pgno_t pgno = mp->mp_pgno;
|
||||||
MDBX_txn *const txn = mc->mc_txn;
|
MDBX_txn *const txn = mc->mc_txn;
|
||||||
@ -3469,15 +3484,15 @@ mdbx_meta_mostrecent(const enum meta_choise_mode mode, const MDBX_env *env) {
|
|||||||
return head;
|
return head;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __hot MDBX_meta *mdbx_meta_steady(const MDBX_env *env) {
|
static MDBX_meta *mdbx_meta_steady(const MDBX_env *env) {
|
||||||
return mdbx_meta_mostrecent(prefer_steady, env);
|
return mdbx_meta_mostrecent(prefer_steady, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __hot MDBX_meta *mdbx_meta_head(const MDBX_env *env) {
|
static MDBX_meta *mdbx_meta_head(const MDBX_env *env) {
|
||||||
return mdbx_meta_mostrecent(prefer_last, env);
|
return mdbx_meta_mostrecent(prefer_last, env);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __hot txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
|
static txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
|
||||||
while (true) {
|
while (true) {
|
||||||
const MDBX_meta *head = mdbx_meta_head(env);
|
const MDBX_meta *head = mdbx_meta_head(env);
|
||||||
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
|
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
|
||||||
@ -3488,7 +3503,7 @@ static __hot txnid_t mdbx_recent_committed_txnid(const MDBX_env *env) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static __hot txnid_t mdbx_recent_steady_txnid(const MDBX_env *env) {
|
static txnid_t mdbx_recent_steady_txnid(const MDBX_env *env) {
|
||||||
while (true) {
|
while (true) {
|
||||||
const MDBX_meta *head = mdbx_meta_steady(env);
|
const MDBX_meta *head = mdbx_meta_steady(env);
|
||||||
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
|
const txnid_t recent = mdbx_meta_txnid_fluid(env, head);
|
||||||
@ -3936,8 +3951,8 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
|
|||||||
#define MDBX_ALLOC_NEW 4
|
#define MDBX_ALLOC_NEW 4
|
||||||
#define MDBX_ALLOC_ALL (MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
#define MDBX_ALLOC_ALL (MDBX_ALLOC_CACHE | MDBX_ALLOC_GC | MDBX_ALLOC_NEW)
|
||||||
|
|
||||||
static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
|
__hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
|
||||||
MDBX_page **const mp, int flags) {
|
MDBX_page **const mp, int flags) {
|
||||||
int rc;
|
int rc;
|
||||||
MDBX_txn *txn = mc->mc_txn;
|
MDBX_txn *txn = mc->mc_txn;
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
@ -4427,9 +4442,8 @@ __hot static void mdbx_page_copy(MDBX_page *dst, MDBX_page *src,
|
|||||||
* [in] mp the page being referenced. It must not be dirty.
|
* [in] mp the page being referenced. It must not be dirty.
|
||||||
* [out] ret the writable page, if any.
|
* [out] ret the writable page, if any.
|
||||||
* ret is unchanged if mp wasn't spilled. */
|
* ret is unchanged if mp wasn't spilled. */
|
||||||
__hot static int __must_check_result mdbx_page_unspill(MDBX_txn *txn,
|
static int __must_check_result mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp,
|
||||||
MDBX_page *mp,
|
MDBX_page **ret) {
|
||||||
MDBX_page **ret) {
|
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
pgno_t pgno = mp->mp_pgno, pn = pgno << 1;
|
pgno_t pgno = mp->mp_pgno, pn = pgno << 1;
|
||||||
|
|
||||||
@ -4884,7 +4898,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
mdbx_assert(env, (flags & ~(MDBX_TXN_BEGIN_FLAGS | MDBX_TXN_SPILLS |
|
mdbx_assert(env, (flags & ~(MDBX_TXN_BEGIN_FLAGS | MDBX_TXN_SPILLS |
|
||||||
MDBX_WRITEMAP)) == 0);
|
MDBX_WRITEMAP)) == 0);
|
||||||
if (flags & MDBX_RDONLY) {
|
if (flags & MDBX_RDONLY) {
|
||||||
txn->mt_flags = MDBX_RDONLY | (env->me_flags & MDBX_NOTLS);
|
txn->mt_flags =
|
||||||
|
MDBX_RDONLY | (env->me_flags & (MDBX_NOTLS | MDBX_WRITEMAP));
|
||||||
MDBX_reader *r = txn->to.reader;
|
MDBX_reader *r = txn->to.reader;
|
||||||
STATIC_ASSERT(sizeof(size_t) == sizeof(r->mr_tid));
|
STATIC_ASSERT(sizeof(size_t) == sizeof(r->mr_tid));
|
||||||
if (likely(env->me_flags & MDBX_ENV_TXKEY)) {
|
if (likely(env->me_flags & MDBX_ENV_TXKEY)) {
|
||||||
@ -5827,39 +5842,40 @@ static __always_inline unsigned backlog_size(MDBX_txn *txn) {
|
|||||||
return MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) + txn->tw.loose_count;
|
return MDBX_PNL_SIZE(txn->tw.reclaimed_pglist) + txn->tw.loose_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline unsigned gctree_backlog(MDBX_txn *txn) {
|
|
||||||
return /* for split upto root page */ txn->mt_dbs[FREE_DBI].md_depth +
|
|
||||||
/* for rebalance */ 2 + /* for grow */ 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* LY: Prepare a backlog of pages to modify GC itself,
|
/* LY: Prepare a backlog of pages to modify GC itself,
|
||||||
* while reclaiming is prohibited. It should be enough to prevent search
|
* while reclaiming is prohibited. It should be enough to prevent search
|
||||||
* in mdbx_page_alloc() during a deleting, when GC tree is unbalanced. */
|
* in mdbx_page_alloc() during a deleting, when GC tree is unbalanced. */
|
||||||
static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
|
static int mdbx_prep_backlog(MDBX_txn *txn, MDBX_cursor *gc_cursor,
|
||||||
const size_t pnl_bytes) {
|
const size_t pnl_bytes) {
|
||||||
const unsigned linear = number_of_ovpages(
|
const unsigned linear4list = number_of_ovpages(txn->mt_env, pnl_bytes);
|
||||||
txn->mt_env,
|
const unsigned backlog4cow = txn->mt_dbs[FREE_DBI].md_depth;
|
||||||
pnl_bytes ? pnl_bytes : MDBX_PNL_SIZEOF(txn->tw.retired_pages));
|
const unsigned backlog4rebalance = backlog4cow + 1;
|
||||||
const unsigned backlog = linear + gctree_backlog(txn);
|
|
||||||
|
|
||||||
if (likely(
|
if (likely(linear4list == 1 &&
|
||||||
linear == 1 &&
|
backlog_size(txn) > (pnl_bytes
|
||||||
backlog_size(txn) >
|
? backlog4rebalance
|
||||||
(pnl_bytes
|
: (backlog4cow + backlog4rebalance))))
|
||||||
? backlog
|
|
||||||
: backlog + /* for COW */ txn->mt_dbs[FREE_DBI].md_depth)))
|
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
|
mdbx_trace(">> pnl_bytes %zu, backlog %u, 4list %u, 4cow %u, 4rebalance %u",
|
||||||
|
pnl_bytes, backlog_size(txn), linear4list, backlog4cow,
|
||||||
|
backlog4rebalance);
|
||||||
|
|
||||||
gc_cursor->mc_flags &= ~C_RECLAIMING;
|
gc_cursor->mc_flags &= ~C_RECLAIMING;
|
||||||
|
|
||||||
int err = mdbx_cursor_touch(gc_cursor);
|
int err = mdbx_cursor_touch(gc_cursor);
|
||||||
if (err == MDBX_SUCCESS && linear > 1)
|
mdbx_trace("== after-touch, backlog %u, err %d", backlog_size(txn), err);
|
||||||
err = mdbx_page_alloc(gc_cursor, linear, nullptr, MDBX_ALLOC_ALL);
|
|
||||||
|
|
||||||
while (err == MDBX_SUCCESS && backlog_size(txn) < backlog)
|
if (linear4list > 1 && err == MDBX_SUCCESS) {
|
||||||
|
err = mdbx_page_alloc(gc_cursor, linear4list, nullptr,
|
||||||
|
MDBX_ALLOC_GC | MDBX_ALLOC_CACHE);
|
||||||
|
mdbx_trace("== after-4linear, backlog %u, err %d", backlog_size(txn), err);
|
||||||
|
}
|
||||||
|
|
||||||
|
while (backlog_size(txn) < backlog4cow + linear4list && err == MDBX_SUCCESS)
|
||||||
err = mdbx_page_alloc(gc_cursor, 1, NULL, MDBX_ALLOC_GC);
|
err = mdbx_page_alloc(gc_cursor, 1, NULL, MDBX_ALLOC_GC);
|
||||||
|
|
||||||
gc_cursor->mc_flags |= C_RECLAIMING;
|
gc_cursor->mc_flags |= C_RECLAIMING;
|
||||||
|
mdbx_trace("<< backlog %u, err %d", backlog_size(txn), err);
|
||||||
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
return (err != MDBX_NOTFOUND) ? err : MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5909,6 +5925,10 @@ retry:
|
|||||||
goto bailout;
|
goto bailout;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rc = mdbx_prep_backlog(txn, &mc, MDBX_PNL_SIZEOF(txn->tw.retired_pages));
|
||||||
|
if (unlikely(rc != MDBX_SUCCESS))
|
||||||
|
goto bailout;
|
||||||
|
|
||||||
unsigned settled = 0, cleaned_gc_slot = 0, reused_gc_slot = 0,
|
unsigned settled = 0, cleaned_gc_slot = 0, reused_gc_slot = 0,
|
||||||
filled_gc_slot = ~0u;
|
filled_gc_slot = ~0u;
|
||||||
txnid_t cleaned_gc_id = 0, gc_rid = txn->tw.last_reclaimed;
|
txnid_t cleaned_gc_id = 0, gc_rid = txn->tw.last_reclaimed;
|
||||||
@ -6116,8 +6136,11 @@ retry:
|
|||||||
mdbx_debug_extra_print(" %" PRIaPGNO, txn->tw.retired_pages[i]);
|
mdbx_debug_extra_print(" %" PRIaPGNO, txn->tw.retired_pages[i]);
|
||||||
mdbx_debug_extra_print("%s", "\n");
|
mdbx_debug_extra_print("%s", "\n");
|
||||||
}
|
}
|
||||||
if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist)))
|
if (unlikely(amount != MDBX_PNL_SIZE(txn->tw.reclaimed_pglist))) {
|
||||||
|
mdbx_trace("%s.reclaimed-list changed %u -> %u, retry", dbg_prefix_mode,
|
||||||
|
amount, (unsigned)MDBX_PNL_SIZE(txn->tw.reclaimed_pglist));
|
||||||
goto retry /* rare case, but avoids GC fragmentation and one loop. */;
|
goto retry /* rare case, but avoids GC fragmentation and one loop. */;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6550,7 +6573,7 @@ static int mdbx_flush_iov(MDBX_txn *const txn, struct iovec *iov,
|
|||||||
* [in] txn the transaction that's being committed
|
* [in] txn the transaction that's being committed
|
||||||
* [in] keep number of initial pages in dirtylist to keep dirty.
|
* [in] keep number of initial pages in dirtylist to keep dirty.
|
||||||
* Returns 0 on success, non-zero on failure. */
|
* Returns 0 on success, non-zero on failure. */
|
||||||
static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
|
__hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
|
||||||
struct iovec iov[MDBX_COMMIT_PAGES];
|
struct iovec iov[MDBX_COMMIT_PAGES];
|
||||||
const MDBX_DPL dl = (keep || txn->tw.loose_count > 1)
|
const MDBX_DPL dl = (keep || txn->tw.loose_count > 1)
|
||||||
? mdbx_dpl_sort(txn->tw.dirtylist)
|
? mdbx_dpl_sort(txn->tw.dirtylist)
|
||||||
@ -9514,14 +9537,14 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
|
|||||||
MDBX_txn *txn = mc->mc_txn;
|
MDBX_txn *txn = mc->mc_txn;
|
||||||
if (unlikely(pgno >= txn->mt_next_pgno)) {
|
if (unlikely(pgno >= txn->mt_next_pgno)) {
|
||||||
mdbx_debug("page %" PRIaPGNO " not found", pgno);
|
mdbx_debug("page %" PRIaPGNO " not found", pgno);
|
||||||
txn->mt_flags |= MDBX_TXN_ERROR;
|
goto corrupted;
|
||||||
return MDBX_PAGE_NOTFOUND;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *const env = txn->mt_env;
|
||||||
MDBX_page *p = NULL;
|
MDBX_page *p = NULL;
|
||||||
int level;
|
int level;
|
||||||
if ((txn->mt_flags & (MDBX_RDONLY | MDBX_WRITEMAP)) == 0) {
|
mdbx_assert(env, ((txn->mt_flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
|
||||||
|
if (unlikely((txn->mt_flags & (MDBX_RDONLY | MDBX_WRITEMAP)) == 0)) {
|
||||||
level = 1;
|
level = 1;
|
||||||
do {
|
do {
|
||||||
/* Spilled pages were dirtied in this txn and flushed
|
/* Spilled pages were dirtied in this txn and flushed
|
||||||
@ -9542,20 +9565,21 @@ mapped:
|
|||||||
p = pgno2page(env, pgno);
|
p = pgno2page(env, pgno);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
txn = nullptr /* avoid future use */;
|
|
||||||
if (unlikely(p->mp_pgno != pgno)) {
|
if (unlikely(p->mp_pgno != pgno)) {
|
||||||
mdbx_error("mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO
|
mdbx_error("mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO
|
||||||
" (expected)",
|
" (expected)",
|
||||||
p->mp_pgno, pgno);
|
p->mp_pgno, pgno);
|
||||||
return MDBX_CORRUPTED;
|
goto corrupted;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(p->mp_upper < p->mp_lower || ((p->mp_lower | p->mp_upper) & 1) ||
|
if (likely(!IS_OVERFLOW(p))) {
|
||||||
PAGEHDRSZ + p->mp_upper > env->me_psize) &&
|
if (unlikely(p->mp_upper < p->mp_lower ||
|
||||||
!IS_OVERFLOW(p)) {
|
((p->mp_lower | p->mp_upper) & 1) ||
|
||||||
mdbx_error("invalid page lower(%u)/upper(%u), pg-limit %u", p->mp_lower,
|
PAGEHDRSZ + p->mp_upper > env->me_psize)) {
|
||||||
p->mp_upper, page_space(env));
|
mdbx_error("invalid page lower(%u)/upper(%u), pg-limit %u", p->mp_lower,
|
||||||
return MDBX_CORRUPTED;
|
p->mp_upper, page_space(env));
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/* TODO: more checks here, including p->mp_validator */
|
/* TODO: more checks here, including p->mp_validator */
|
||||||
|
|
||||||
@ -9569,6 +9593,10 @@ done:
|
|||||||
if (lvl)
|
if (lvl)
|
||||||
*lvl = level;
|
*lvl = level;
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
|
corrupted:
|
||||||
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
||||||
|
return MDBX_CORRUPTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Finish mdbx_page_search() / mdbx_page_search_lowest().
|
/* Finish mdbx_page_search() / mdbx_page_search_lowest().
|
||||||
@ -10148,8 +10176,8 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Set the cursor on a specific data item. */
|
/* Set the cursor on a specific data item. */
|
||||||
__hot static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
static int mdbx_cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
||||||
MDBX_cursor_op op, int *exactp) {
|
MDBX_cursor_op op, int *exactp) {
|
||||||
int rc;
|
int rc;
|
||||||
MDBX_page *mp;
|
MDBX_page *mp;
|
||||||
MDBX_node *node = NULL;
|
MDBX_node *node = NULL;
|
||||||
@ -16197,9 +16225,8 @@ __hot static ptrdiff_t estimate(const MDBX_db *db,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
__hot int mdbx_estimate_distance(const MDBX_cursor *first,
|
int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last,
|
||||||
const MDBX_cursor *last,
|
ptrdiff_t *distance_items) {
|
||||||
ptrdiff_t *distance_items) {
|
|
||||||
if (unlikely(first == NULL || last == NULL || distance_items == NULL))
|
if (unlikely(first == NULL || last == NULL || distance_items == NULL))
|
||||||
return MDBX_EINVAL;
|
return MDBX_EINVAL;
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user