mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 18:54:13 +08:00
mdbx: use page's mp_txnid for basic integrity checking.
Change-Id: I50d6f1251e4fd84e535a708e78dd24d84ec53780
This commit is contained in:
parent
d11bfef36b
commit
ccb45730f2
@ -2849,7 +2849,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
|
|||||||
most -= 1;
|
most -= 1;
|
||||||
}
|
}
|
||||||
const unsigned refunded = txn->mt_next_pgno - most;
|
const unsigned refunded = txn->mt_next_pgno - most;
|
||||||
mdbx_verbose("refund-sorted %u pages %" PRIaPGNO " -> %" PRIaPGNO,
|
mdbx_verbose("refund-suitable %u pages %" PRIaPGNO " -> %" PRIaPGNO,
|
||||||
refunded, most, txn->mt_next_pgno);
|
refunded, most, txn->mt_next_pgno);
|
||||||
txn->tw.loose_count -= refunded;
|
txn->tw.loose_count -= refunded;
|
||||||
txn->tw.dirtyroom += refunded;
|
txn->tw.dirtyroom += refunded;
|
||||||
@ -2889,7 +2889,7 @@ static void mdbx_refund_loose(MDBX_txn *txn) {
|
|||||||
while (dl->length && dl[dl->length].pgno == txn->mt_next_pgno - 1 &&
|
while (dl->length && dl[dl->length].pgno == txn->mt_next_pgno - 1 &&
|
||||||
dl[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) {
|
dl[dl->length].ptr->mp_flags == (P_LOOSE | P_DIRTY)) {
|
||||||
MDBX_page *dp = dl[dl->length].ptr;
|
MDBX_page *dp = dl[dl->length].ptr;
|
||||||
mdbx_verbose("refund-unsorted page %" PRIaPGNO, dp->mp_pgno);
|
mdbx_verbose("refund-sorted page %" PRIaPGNO, dp->mp_pgno);
|
||||||
mdbx_tassert(txn, dp->mp_pgno == dl[dl->length].pgno);
|
mdbx_tassert(txn, dp->mp_pgno == dl[dl->length].pgno);
|
||||||
dl->length -= 1;
|
dl->length -= 1;
|
||||||
}
|
}
|
||||||
@ -2958,7 +2958,7 @@ static __cold void mdbx_kill_page(MDBX_env *env, MDBX_page *mp, pgno_t pgno,
|
|||||||
mdbx_assert(env, pgno >= NUM_METAS && npages);
|
mdbx_assert(env, pgno >= NUM_METAS && npages);
|
||||||
if (IS_DIRTY(mp) || (env->me_flags & MDBX_WRITEMAP)) {
|
if (IS_DIRTY(mp) || (env->me_flags & MDBX_WRITEMAP)) {
|
||||||
const size_t bytes = pgno2bytes(env, npages);
|
const size_t bytes = pgno2bytes(env, npages);
|
||||||
memset(mp, 0, bytes);
|
memset(mp, -1, bytes);
|
||||||
mp->mp_pgno = pgno;
|
mp->mp_pgno = pgno;
|
||||||
if ((env->me_flags & MDBX_WRITEMAP) == 0)
|
if ((env->me_flags & MDBX_WRITEMAP) == 0)
|
||||||
mdbx_pwrite(env->me_lazy_fd, mp, bytes, pgno2bytes(env, pgno));
|
mdbx_pwrite(env->me_lazy_fd, mp, bytes, pgno2bytes(env, pgno));
|
||||||
@ -2996,6 +2996,7 @@ static int mdbx_page_loose(MDBX_txn *txn, MDBX_page *mp) {
|
|||||||
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
const unsigned npages = IS_OVERFLOW(mp) ? mp->mp_pages : 1;
|
||||||
const pgno_t pgno = mp->mp_pgno;
|
const pgno_t pgno = mp->mp_pgno;
|
||||||
|
|
||||||
|
mp->mp_txnid = INVALID_TXNID;
|
||||||
if (txn->mt_parent) {
|
if (txn->mt_parent) {
|
||||||
mdbx_tassert(txn, (txn->mt_env->me_flags & MDBX_WRITEMAP) == 0);
|
mdbx_tassert(txn, (txn->mt_env->me_flags & MDBX_WRITEMAP) == 0);
|
||||||
mdbx_tassert(txn, mp != pgno2page(txn->mt_env, pgno));
|
mdbx_tassert(txn, mp != pgno2page(txn->mt_env, pgno));
|
||||||
@ -3597,6 +3598,8 @@ static __cold pgno_t mdbx_find_largest(MDBX_env *env, pgno_t largest) {
|
|||||||
|
|
||||||
/* Add a page to the txn's dirty list */
|
/* Add a page to the txn's dirty list */
|
||||||
static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
|
static int __must_check_result mdbx_page_dirty(MDBX_txn *txn, MDBX_page *mp) {
|
||||||
|
mp->mp_txnid = INVALID_TXNID;
|
||||||
|
mp->mp_flags |= P_DIRTY;
|
||||||
const int rc = mdbx_dpl_append(txn->tw.dirtylist, mp->mp_pgno, mp);
|
const int rc = mdbx_dpl_append(txn->tw.dirtylist, mp->mp_pgno, mp);
|
||||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||||
txn->mt_flags |= MDBX_TXN_ERROR;
|
txn->mt_flags |= MDBX_TXN_ERROR;
|
||||||
@ -3997,6 +4000,8 @@ __hot static int mdbx_page_alloc(MDBX_cursor *mc, const unsigned num,
|
|||||||
mdbx_ensure(env, np->mp_pgno >= NUM_METAS);
|
mdbx_ensure(env, np->mp_pgno >= NUM_METAS);
|
||||||
VALGRIND_MAKE_MEM_UNDEFINED(page_data(np), page_space(txn->mt_env));
|
VALGRIND_MAKE_MEM_UNDEFINED(page_data(np), page_space(txn->mt_env));
|
||||||
ASAN_UNPOISON_MEMORY_REGION(page_data(np), page_space(txn->mt_env));
|
ASAN_UNPOISON_MEMORY_REGION(page_data(np), page_space(txn->mt_env));
|
||||||
|
np->mp_flags = P_DIRTY;
|
||||||
|
np->mp_txnid = INVALID_TXNID;
|
||||||
*mp = np;
|
*mp = np;
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -4488,10 +4493,8 @@ static int __must_check_result mdbx_page_unspill(MDBX_txn *txn, MDBX_page *mp,
|
|||||||
* page remains spilled until child commits */
|
* page remains spilled until child commits */
|
||||||
|
|
||||||
int rc = mdbx_page_dirty(txn, np);
|
int rc = mdbx_page_dirty(txn, np);
|
||||||
if (likely(rc == MDBX_SUCCESS)) {
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
np->mp_flags |= P_DIRTY;
|
|
||||||
*ret = np;
|
*ret = np;
|
||||||
}
|
|
||||||
return rc;
|
return rc;
|
||||||
}
|
}
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
@ -4575,6 +4578,7 @@ __hot static int mdbx_page_touch(MDBX_cursor *mc) {
|
|||||||
|
|
||||||
mdbx_page_copy(np, mp, txn->mt_env->me_psize);
|
mdbx_page_copy(np, mp, txn->mt_env->me_psize);
|
||||||
np->mp_pgno = pgno;
|
np->mp_pgno = pgno;
|
||||||
|
np->mp_txnid = INVALID_TXNID;
|
||||||
np->mp_flags |= P_DIRTY;
|
np->mp_flags |= P_DIRTY;
|
||||||
|
|
||||||
done:
|
done:
|
||||||
@ -6619,7 +6623,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
|
|||||||
(flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages;
|
(flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages;
|
||||||
*env->me_unsynced_pages += npages;
|
*env->me_unsynced_pages += npages;
|
||||||
dp->mp_flags &= ~P_DIRTY;
|
dp->mp_flags &= ~P_DIRTY;
|
||||||
dp->mp_validator = 0 /* TODO */;
|
dp->mp_txnid = txn->mt_txnid;
|
||||||
|
|
||||||
if ((env->me_flags & MDBX_WRITEMAP) == 0) {
|
if ((env->me_flags & MDBX_WRITEMAP) == 0) {
|
||||||
const size_t size = pgno2bytes(env, npages);
|
const size_t size = pgno2bytes(env, npages);
|
||||||
@ -9604,19 +9608,19 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
|
|||||||
* back in from the map (but don't unspill it here,
|
* back in from the map (but don't unspill it here,
|
||||||
* leave that unless page_touch happens again). */
|
* leave that unless page_touch happens again). */
|
||||||
if (txn->tw.spill_pages && mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1))
|
if (txn->tw.spill_pages && mdbx_pnl_exist(txn->tw.spill_pages, pgno << 1))
|
||||||
goto mapped;
|
goto spilled;
|
||||||
p = mdbx_dpl_find(txn->tw.dirtylist, pgno);
|
p = mdbx_dpl_find(txn->tw.dirtylist, pgno);
|
||||||
if (p)
|
if (p)
|
||||||
goto done;
|
goto dirty;
|
||||||
level++;
|
level++;
|
||||||
} while ((txn = txn->mt_parent) != NULL);
|
} while ((txn = txn->mt_parent) != NULL);
|
||||||
}
|
}
|
||||||
level = 0;
|
level = 0;
|
||||||
|
|
||||||
mapped:
|
spilled:
|
||||||
p = pgno2page(env, pgno);
|
p = pgno2page(env, pgno);
|
||||||
|
|
||||||
done:
|
dirty:
|
||||||
if (unlikely(p->mp_pgno != pgno)) {
|
if (unlikely(p->mp_pgno != pgno)) {
|
||||||
mdbx_error("mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO
|
mdbx_error("mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO
|
||||||
" (expected)",
|
" (expected)",
|
||||||
@ -9624,16 +9628,25 @@ done:
|
|||||||
goto corrupted;
|
goto corrupted;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (likely(!IS_OVERFLOW(p))) {
|
if (unlikely((p->mp_flags & (P_LOOSE | P_SUBP | P_META | P_DIRTY)) != 0 ||
|
||||||
if (unlikely(p->mp_upper < p->mp_lower ||
|
p->mp_txnid > mc->mc_txn->mt_txnid)) {
|
||||||
((p->mp_lower | p->mp_upper) & 1) ||
|
if (unlikely((mc->mc_txn->mt_flags & MDBX_RDONLY) != 0 ||
|
||||||
PAGEHDRSZ + p->mp_upper > env->me_psize)) {
|
(p->mp_flags & (P_LOOSE | P_SUBP | P_META | P_DIRTY)) !=
|
||||||
mdbx_error("invalid page lower(%u)/upper(%u), pg-limit %u", p->mp_lower,
|
P_DIRTY)) {
|
||||||
p->mp_upper, page_space(env));
|
mdbx_error("invalid page's flags (0x%x) or txnid %" PRIaTXN
|
||||||
|
" > (actual) %" PRIaTXN " (expected)",
|
||||||
|
p->mp_flags, p->mp_txnid, mc->mc_txn->mt_txnid);
|
||||||
goto corrupted;
|
goto corrupted;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* TODO: more checks here, including p->mp_validator */
|
|
||||||
|
if (unlikely(!IS_OVERFLOW(p) && (p->mp_upper < p->mp_lower ||
|
||||||
|
((p->mp_lower | p->mp_upper) & 1) != 0 ||
|
||||||
|
PAGEHDRSZ + p->mp_upper > env->me_psize))) {
|
||||||
|
mdbx_error("invalid page lower(%u)/upper(%u), pg-limit %u", p->mp_lower,
|
||||||
|
p->mp_upper, page_space(env));
|
||||||
|
goto corrupted;
|
||||||
|
}
|
||||||
|
|
||||||
if (mdbx_audit_enabled()) {
|
if (mdbx_audit_enabled()) {
|
||||||
int err = mdbx_page_check(env, p, true);
|
int err = mdbx_page_check(env, p, true);
|
||||||
@ -10908,6 +10921,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
insert_key = insert_data = (rc != MDBX_SUCCESS);
|
insert_key = insert_data = (rc != MDBX_SUCCESS);
|
||||||
uint16_t fp_flags = P_LEAF | P_DIRTY;
|
uint16_t fp_flags = P_LEAF | P_DIRTY;
|
||||||
MDBX_page *fp = env->me_pbuf;
|
MDBX_page *fp = env->me_pbuf;
|
||||||
|
fp->mp_txnid = INVALID_TXNID;
|
||||||
if (insert_key) {
|
if (insert_key) {
|
||||||
/* The key does not exist */
|
/* The key does not exist */
|
||||||
mdbx_debug("inserting key at index %i", mc->mc_ki[mc->mc_top]);
|
mdbx_debug("inserting key at index %i", mc->mc_ki[mc->mc_top]);
|
||||||
@ -10995,6 +11009,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
}
|
}
|
||||||
/* Is it dirty? */
|
/* Is it dirty? */
|
||||||
if (IS_DIRTY(omp)) {
|
if (IS_DIRTY(omp)) {
|
||||||
|
mdbx_cassert(mc, omp->mp_txnid > SAFE64_INVALID_THRESHOLD);
|
||||||
/* yes, overwrite it. Note in this case we don't
|
/* yes, overwrite it. Note in this case we don't
|
||||||
* bother to try shrinking the page if the new data
|
* bother to try shrinking the page if the new data
|
||||||
* is smaller than the overflow threshold. */
|
* is smaller than the overflow threshold. */
|
||||||
@ -11126,6 +11141,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
case MDBX_CURRENT | MDBX_NODUPDATA:
|
case MDBX_CURRENT | MDBX_NODUPDATA:
|
||||||
case MDBX_CURRENT:
|
case MDBX_CURRENT:
|
||||||
fp->mp_flags |= P_DIRTY;
|
fp->mp_flags |= P_DIRTY;
|
||||||
|
fp->mp_txnid = INVALID_TXNID;
|
||||||
fp->mp_pgno = mp->mp_pgno;
|
fp->mp_pgno = mp->mp_pgno;
|
||||||
mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
|
mc->mc_xcursor->mx_cursor.mc_pg[0] = fp;
|
||||||
flags |= F_DUPDATA;
|
flags |= F_DUPDATA;
|
||||||
@ -11167,6 +11183,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
|
|||||||
}
|
}
|
||||||
if (mp != fp) {
|
if (mp != fp) {
|
||||||
mp->mp_flags = fp_flags | P_DIRTY;
|
mp->mp_flags = fp_flags | P_DIRTY;
|
||||||
|
mp->mp_txnid = INVALID_TXNID;
|
||||||
mp->mp_leaf2_ksize = fp->mp_leaf2_ksize;
|
mp->mp_leaf2_ksize = fp->mp_leaf2_ksize;
|
||||||
mp->mp_lower = fp->mp_lower;
|
mp->mp_lower = fp->mp_lower;
|
||||||
mdbx_cassert(mc, fp->mp_upper + offset <= UINT16_MAX);
|
mdbx_cassert(mc, fp->mp_upper + offset <= UINT16_MAX);
|
||||||
@ -11514,6 +11531,7 @@ static int mdbx_page_new(MDBX_cursor *mc, unsigned flags, unsigned num,
|
|||||||
mdbx_debug("allocated new page #%" PRIaPGNO ", size %u", np->mp_pgno,
|
mdbx_debug("allocated new page #%" PRIaPGNO ", size %u", np->mp_pgno,
|
||||||
mc->mc_txn->mt_env->me_psize);
|
mc->mc_txn->mt_env->me_psize);
|
||||||
np->mp_flags = (uint16_t)(flags | P_DIRTY);
|
np->mp_flags = (uint16_t)(flags | P_DIRTY);
|
||||||
|
np->mp_txnid = INVALID_TXNID;
|
||||||
np->mp_lower = 0;
|
np->mp_lower = 0;
|
||||||
np->mp_upper = (indx_t)(mc->mc_txn->mt_env->me_psize - PAGEHDRSZ);
|
np->mp_upper = (indx_t)(mc->mc_txn->mt_env->me_psize - PAGEHDRSZ);
|
||||||
|
|
||||||
@ -13540,6 +13558,7 @@ static int mdbx_page_split(MDBX_cursor *mc, const MDBX_val *newkey,
|
|||||||
}
|
}
|
||||||
copy->mp_pgno = mp->mp_pgno;
|
copy->mp_pgno = mp->mp_pgno;
|
||||||
copy->mp_flags = mp->mp_flags;
|
copy->mp_flags = mp->mp_flags;
|
||||||
|
copy->mp_txnid = INVALID_TXNID;
|
||||||
copy->mp_lower = 0;
|
copy->mp_lower = 0;
|
||||||
copy->mp_upper = (indx_t)page_space(env);
|
copy->mp_upper = (indx_t)page_space(env);
|
||||||
|
|
||||||
@ -14082,6 +14101,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
|
|||||||
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
||||||
memcpy(mo, omp, my->mc_env->me_psize);
|
memcpy(mo, omp, my->mc_env->me_psize);
|
||||||
mo->mp_pgno = my->mc_next_pgno;
|
mo->mp_pgno = my->mc_next_pgno;
|
||||||
|
mo->mp_txnid = MIN_TXNID;
|
||||||
my->mc_next_pgno += omp->mp_pages;
|
my->mc_next_pgno += omp->mp_pages;
|
||||||
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
||||||
if (omp->mp_pages > 1) {
|
if (omp->mp_pages > 1) {
|
||||||
@ -14146,6 +14166,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
|
|||||||
}
|
}
|
||||||
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
mo = (MDBX_page *)(my->mc_wbuf[toggle] + my->mc_wlen[toggle]);
|
||||||
mdbx_page_copy(mo, mp, my->mc_env->me_psize);
|
mdbx_page_copy(mo, mp, my->mc_env->me_psize);
|
||||||
|
mo->mp_txnid = MIN_TXNID;
|
||||||
mo->mp_pgno = my->mc_next_pgno++;
|
mo->mp_pgno = my->mc_next_pgno++;
|
||||||
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
my->mc_wlen[toggle] += my->mc_env->me_psize;
|
||||||
if (mc.mc_top) {
|
if (mc.mc_top) {
|
||||||
|
@ -211,6 +211,7 @@ typedef uint32_t pgno_t;
|
|||||||
typedef uint64_t txnid_t;
|
typedef uint64_t txnid_t;
|
||||||
#define PRIaTXN PRIi64
|
#define PRIaTXN PRIi64
|
||||||
#define MIN_TXNID UINT64_C(1)
|
#define MIN_TXNID UINT64_C(1)
|
||||||
|
#define INVALID_TXNID UINT64_MAX
|
||||||
/* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
|
/* LY: for testing non-atomic 64-bit txnid on 32-bit arches.
|
||||||
* #define MDBX_TXNID_STEP (UINT32_MAX / 3) */
|
* #define MDBX_TXNID_STEP (UINT32_MAX / 3) */
|
||||||
#ifndef MDBX_TXNID_STEP
|
#ifndef MDBX_TXNID_STEP
|
||||||
@ -347,8 +348,7 @@ typedef struct MDBX_meta {
|
|||||||
typedef struct MDBX_page {
|
typedef struct MDBX_page {
|
||||||
union {
|
union {
|
||||||
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
|
struct MDBX_page *mp_next; /* for in-memory list of freed pages */
|
||||||
uint64_t mp_validator; /* checksum of page content or a txnid during
|
uint64_t mp_txnid; /* txnid during which the page has been COW-ed */
|
||||||
* which the page has been updated */
|
|
||||||
};
|
};
|
||||||
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
|
uint16_t mp_leaf2_ksize; /* key size if this is a LEAF2 page */
|
||||||
#define P_BRANCH 0x01 /* branch page */
|
#define P_BRANCH 0x01 /* branch page */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user