mdbx: parent-page-txnid checking.

Change-Id: I6d37326c4ff2aa32587704b971bd650d9221b06f
This commit is contained in:
Leonid Yuriev 2020-09-02 01:16:05 +03:00
parent cd4f732a87
commit cd6aa4a708

View File

@ -3084,7 +3084,8 @@ enum {
static int mdbx_txn_end(MDBX_txn *txn, unsigned mode);
static int __must_check_result mdbx_page_get(MDBX_cursor *mc, pgno_t pgno,
MDBX_page **mp, int *lvl);
MDBX_page **mp, int *lvl,
const txnid_t pp_txnid);
static int __must_check_result mdbx_page_search_root(MDBX_cursor *mc,
const MDBX_val *key,
int modify);
@ -3134,7 +3135,8 @@ static void mdbx_node_shrink(MDBX_page *mp, unsigned indx);
static int __must_check_result mdbx_node_move(MDBX_cursor *csrc,
MDBX_cursor *cdst, int fromleft);
static int __must_check_result mdbx_node_read(MDBX_cursor *mc, MDBX_node *leaf,
MDBX_val *data);
MDBX_val *data,
const txnid_t pp_txnid);
static int __must_check_result mdbx_rebalance(MDBX_cursor *mc);
static int __must_check_result mdbx_update_key(MDBX_cursor *mc,
const MDBX_val *key);
@ -3179,7 +3181,8 @@ static int __must_check_result mdbx_cursor_init(MDBX_cursor *mc, MDBX_txn *txn,
MDBX_dbi dbi);
static int __must_check_result mdbx_xcursor_init0(MDBX_cursor *mc);
static int __must_check_result mdbx_xcursor_init1(MDBX_cursor *mc,
MDBX_node *node);
MDBX_node *node,
const MDBX_page *mp);
static int __must_check_result mdbx_xcursor_init2(MDBX_cursor *mc,
MDBX_xcursor *src_mx,
int force);
@ -4077,12 +4080,12 @@ mdbx_retire_pgno(MDBX_cursor *mc, const pgno_t pgno) {
if (mdbx_audit_enabled()) {
const unsigned save_flags = mc->mc_flags;
mc->mc_flags |= C_RETIRING;
rc = mdbx_page_get(mc, pgno, &mp, NULL);
rc = mdbx_page_get(mc, pgno, &mp, NULL, mc->mc_txn->mt_txnid);
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_page_retire(mc, mp);
mc->mc_flags = (mc->mc_flags & ~C_RETIRING) | (save_flags & C_RETIRING);
} else {
rc = mdbx_page_get(mc, pgno, &mp, NULL);
rc = mdbx_page_get(mc, pgno, &mp, NULL, mc->mc_txn->mt_txnid);
if (likely(rc == MDBX_SUCCESS))
rc = mdbx_page_retire(mc, mp);
}
@ -4142,8 +4145,8 @@ mark_done:
if (pgno == P_INVALID)
continue;
int level;
if (unlikely((rc = mdbx_page_get(m0, pgno, &dp, &level)) !=
MDBX_SUCCESS))
if (unlikely((rc = mdbx_page_get(m0, pgno, &dp, &level,
txn->mt_txnid)) != MDBX_SUCCESS))
break;
if ((dp->mp_flags & Mask) == pflags && level <= 1)
dp->mp_flags ^= P_KEEP;
@ -4944,6 +4947,21 @@ __cold static int mdbx_wipe_steady(MDBX_env *env, const txnid_t last_steady) {
return MDBX_SUCCESS;
}
static __inline txnid_t pp_txnid4chk(const MDBX_page *mp, const MDBX_txn *txn) {
return IS_DIRTY(mp)
? txn->mt_txnid - 1
: (/* maybe zero in legacy DB */ mp->mp_txnid ? mp->mp_txnid
: MIN_TXNID);
}
static __inline txnid_t pp_txnid2chk(const MDBX_txn *txn) {
#ifdef MDBX_DEBUG_LEGACY
if (txn->mt_txnid < 2222)
return 0;
#endif
return txn->mt_txnid;
}
/* Allocate page numbers and memory for writing. Maintain mt_last_reclaimed,
* mt_reclaimed_pglist and mt_next_pgno. Set MDBX_TXN_ERROR on failure.
*
@ -5156,7 +5174,7 @@ skip_cache:
if (unlikely((rc = mdbx_node_read(
&recur.outer,
page_node(np, recur.outer.mc_ki[recur.outer.mc_top]),
&data)) != MDBX_SUCCESS))
&data, pp_txnid4chk(np, txn))) != MDBX_SUCCESS))
goto fail;
if ((flags & MDBX_LIFORECLAIM) && !txn->tw.lifo_reclaimed) {
@ -7779,7 +7797,7 @@ __hot static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
(flush_end > dp->mp_pgno + npages) ? flush_end : dp->mp_pgno + npages;
*env->me_unsynced_pages += npages;
dp->mp_flags &= ~P_DIRTY;
dp->mp_txnid = txn->mt_txnid;
dp->mp_txnid = pp_txnid2chk(txn);
if ((env->me_flags & MDBX_WRITEMAP) == 0) {
const size_t size = pgno2bytes(env, npages);
@ -8199,7 +8217,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
goto fail;
}
MDBX_db *db = &txn->mt_dbs[i];
db->md_mod_txnid = txn->mt_txnid;
db->md_mod_txnid = pp_txnid2chk(txn);
data.iov_base = db;
WITH_CURSOR_TRACKING(couple.outer,
rc = mdbx_cursor_put(&couple.outer,
@ -8224,7 +8242,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
rc = mdbx_page_flush(txn, 0);
if (likely(rc == MDBX_SUCCESS)) {
if (txn->mt_dbs[MAIN_DBI].md_flags & DBI_DIRTY)
txn->mt_dbs[MAIN_DBI].md_mod_txnid = txn->mt_txnid;
txn->mt_dbs[MAIN_DBI].md_mod_txnid = pp_txnid2chk(txn);
MDBX_meta meta, *head = mdbx_meta_head(env);
meta.mm_magic_and_version = head->mm_magic_and_version;
@ -10839,7 +10857,7 @@ static int mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp) {
*
* Returns 0 on success, non-zero on failure. */
__hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
int *lvl) {
int *lvl, const txnid_t pp_txnid) {
MDBX_txn *txn = mc->mc_txn;
if (unlikely(pgno >= txn->mt_next_pgno)) {
mdbx_debug("page %" PRIaPGNO " not found", pgno);
@ -10848,12 +10866,12 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
MDBX_env *const env = txn->mt_env;
MDBX_page *p = NULL;
int level;
mdbx_assert(env, ((txn->mt_flags ^ env->me_flags) & MDBX_WRITEMAP) == 0);
mdbx_assert(env, pp_txnid >= MIN_TXNID && pp_txnid <= txn->mt_txnid);
const uint16_t illegal_bits = (txn->mt_flags & MDBX_TXN_RDONLY)
? P_LOOSE | P_SUBP | P_META | P_DIRTY
: P_LOOSE | P_SUBP | P_META;
const uint64_t txnid = txn->mt_txnid;
int level;
if (unlikely((txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0)) {
level = 1;
do {
@ -10882,11 +10900,15 @@ dirty:
goto corrupted;
}
if (unlikely((p->mp_flags & illegal_bits) != 0 ||
p->mp_txnid > ((p->mp_flags & P_DIRTY) ? UINT64_MAX : txnid))) {
mdbx_error("invalid page's flags (0x%x) or txnid %" PRIaTXN
" > (actual) %" PRIaTXN " (expected)",
p->mp_flags, p->mp_txnid, mc->mc_txn->mt_txnid);
if (unlikely(p->mp_txnid >
((p->mp_flags & P_DIRTY) ? UINT64_MAX : parentpage_txnid))) {
mdbx_error("invalid page's txnid %" PRIaTXN "> %" PRIaTXN " of parent page",
p->mp_txnid, parentpage_txnid);
goto corrupted;
}
if (unlikely((p->mp_flags & illegal_bits))) {
mdbx_error("invalid page's flags (0x%x)", p->mp_flags);
goto corrupted;
}
@ -10965,7 +10987,8 @@ __hot static int mdbx_page_search_root(MDBX_cursor *mc, const MDBX_val *key,
mdbx_cassert(mc, i < (int)page_numkeys(mp));
node = page_node(mp, i);
if (unlikely((rc = mdbx_page_get(mc, node_pgno(node), &mp, NULL)) != 0))
if (unlikely((rc = mdbx_page_get(mc, node_pgno(node), &mp, NULL,
pp_txnid4chk(mp, mc->mc_txn))) != 0))
return rc;
mc->mc_ki[mc->mc_top] = (indx_t)i;
@ -11042,7 +11065,10 @@ static int mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi) {
return MDBX_BAD_DBI;
if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA))
return MDBX_INCOMPATIBLE; /* not a named DB */
rc = mdbx_node_read(&couple.outer, node, &data);
const txnid_t pp_txnid =
pp_txnid4chk(couple.outer.mc_pg[couple.outer.mc_top], txn);
rc = mdbx_node_read(&couple.outer, node, &data, pp_txnid);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@ -11057,6 +11083,9 @@ static int mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi) {
return MDBX_INCOMPATIBLE;
memcpy(db, data.iov_base, sizeof(MDBX_db));
mdbx_tassert(txn, txn->mt_txnid >= pp_txnid);
if (unlikely(db->md_mod_txnid > pp_txnid))
return MDBX_CORRUPTED;
rc = mdbx_setup_dbx(dbx, db, txn->mt_env->me_psize);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@ -11076,7 +11105,8 @@ __hot static int mdbx_page_search_lowest(MDBX_cursor *mc) {
MDBX_node *node = page_node(mp, 0);
int rc;
if (unlikely((rc = mdbx_page_get(mc, node_pgno(node), &mp, NULL)) != 0))
if (unlikely((rc = mdbx_page_get(mc, node_pgno(node), &mp, NULL,
pp_txnid4chk(mp, mc->mc_txn))) != 0))
return rc;
mc->mc_ki[mc->mc_top] = 0;
@ -11125,9 +11155,15 @@ __hot static int mdbx_page_search(MDBX_cursor *mc, const MDBX_val *key,
}
mdbx_cassert(mc, root >= NUM_METAS);
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root)
if (unlikely((rc = mdbx_page_get(mc, root, &mc->mc_pg[0], NULL)) != 0))
if (!mc->mc_pg[0] || mc->mc_pg[0]->mp_pgno != root) {
if (unlikely((rc = mdbx_page_get(
mc, root, &mc->mc_pg[0], NULL,
(/* maybe zero in legacy DB */ mc->mc_db->md_mod_txnid &&
!(*mc->mc_dbistate & DBI_DIRTY))
? mc->mc_db->md_mod_txnid
: mc->mc_txn->mt_txnid)) != 0))
return rc;
}
mc->mc_snum = 1;
mc->mc_top = 0;
@ -11154,13 +11190,14 @@ __hot static int mdbx_page_search(MDBX_cursor *mc, const MDBX_val *key,
*
* Returns 0 on success, non-zero on failure. */
static __always_inline int mdbx_node_read(MDBX_cursor *mc, MDBX_node *node,
MDBX_val *data) {
MDBX_val *data,
const txnid_t pp_txnid) {
data->iov_len = node_ds(node);
data->iov_base = node_data(node);
if (unlikely(F_ISSET(node_flags(node), F_BIGDATA))) {
/* Read overflow data. */
MDBX_page *omp; /* overflow page */
int rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL);
int rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL, pp_txnid);
if (unlikely((rc != MDBX_SUCCESS))) {
mdbx_debug("read overflow page %" PRIaPGNO " failed",
node_largedata_pgno(node));
@ -11323,8 +11360,9 @@ static int mdbx_cursor_sibling(MDBX_cursor *mc, int move_right) {
}
mdbx_cassert(mc, IS_BRANCH(mc->mc_pg[mc->mc_top]));
indx = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (unlikely((rc = mdbx_page_get(mc, node_pgno(indx), &mp, NULL)) != 0)) {
indx = page_node(mp = mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (unlikely((rc = mdbx_page_get(mc, node_pgno(indx), &mp, NULL,
pp_txnid4chk(mp, mc->mc_txn))) != 0)) {
/* mc will be inconsistent if caller does mc_snum++ as above */
mc->mc_flags &= ~(C_INITIALIZED | C_EOF);
return rc;
@ -11414,12 +11452,14 @@ skip:
node = page_node(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
}
if (data) {
if (unlikely((rc = mdbx_node_read(mc, node, data)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(mc, node, data,
pp_txnid4chk(mp, mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
if (F_ISSET(node_flags(node), F_DUPDATA)) {
@ -11506,12 +11546,14 @@ static int mdbx_cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
node = page_node(mp, mc->mc_ki[mc->mc_top]);
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
}
if (data) {
if (unlikely((rc = mdbx_node_read(mc, node, data)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(mc, node, data,
pp_txnid4chk(mp, mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
if (F_ISSET(node_flags(node), F_DUPDATA)) {
@ -11695,7 +11737,7 @@ set1:
}
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
}
@ -11738,7 +11780,10 @@ set1:
}
}
MDBX_val olddata;
if (unlikely((rc = mdbx_node_read(mc, node, &olddata)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(
mc, node, &olddata,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
rc = mc->mc_dbx->md_dcmp(&aligned_data, &olddata);
if (rc) {
@ -11750,7 +11795,10 @@ set1:
} else {
if (mc->mc_xcursor)
mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF);
if (unlikely((rc = mdbx_node_read(mc, node, data)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(
mc, node, data,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
}
}
@ -11790,14 +11838,17 @@ static int mdbx_cursor_first(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], 0);
if (likely(data)) {
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
if (unlikely(rc))
return rc;
} else {
if (unlikely((rc = mdbx_node_read(mc, node, data)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(
mc, node, data,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
}
}
@ -11834,14 +11885,17 @@ static int mdbx_cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data) {
MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (likely(data)) {
if (F_ISSET(node_flags(node), F_DUPDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_cursor_last(&mc->mc_xcursor->mx_cursor, data, NULL);
if (unlikely(rc))
return rc;
} else {
if (unlikely((rc = mdbx_node_read(mc, node, data)) != MDBX_SUCCESS))
if (unlikely((rc = mdbx_node_read(
mc, node, data,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) !=
MDBX_SUCCESS))
return rc;
}
}
@ -11887,7 +11941,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
if (data) {
if (F_ISSET(node_flags(node), F_DUPDATA)) {
if (unlikely(!(mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED))) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL);
@ -11897,7 +11951,7 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
rc = mdbx_cursor_get(&mc->mc_xcursor->mx_cursor, data, NULL,
MDBX_GET_CURRENT);
} else {
rc = mdbx_node_read(mc, node, data);
rc = mdbx_node_read(mc, node, data, pp_txnid4chk(mp, mc->mc_txn));
}
if (unlikely(rc))
return rc;
@ -11998,7 +12052,8 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]);
if (!F_ISSET(node_flags(node), F_DUPDATA)) {
get_key_optional(node, key);
rc = mdbx_node_read(mc, node, data);
rc = mdbx_node_read(mc, node, data,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn));
break;
}
}
@ -12359,7 +12414,7 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
insert_key = insert_data = (rc != MDBX_SUCCESS);
uint16_t fp_flags = P_LEAF | P_DIRTY;
MDBX_page *fp = env->me_pbuf;
fp->mp_txnid = INVALID_TXNID;
fp->mp_txnid = pp_txnid2chk(mc->mc_txn);
if (insert_key) {
/* The key does not exist */
mdbx_debug("inserting key at index %i", mc->mc_ki[mc->mc_top]);
@ -12430,7 +12485,9 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
const pgno_t pg = node_largedata_pgno(node);
MDBX_page *omp;
if (unlikely((rc2 = mdbx_page_get(mc, pg, &omp, &level)) != 0))
if (unlikely((rc2 = mdbx_page_get(
mc, pg, &omp, &level,
pp_txnid4chk(mc->mc_pg[mc->mc_top], mc->mc_txn))) != 0))
return rc2;
ovpages = omp->mp_pages;
@ -12629,7 +12686,8 @@ int mdbx_cursor_put(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data,
offset = env->me_psize - (unsigned)olddata.iov_len;
flags |= F_DUPDATA | F_SUBDATA;
nested_dupdb.md_root = mp->mp_pgno;
nested_dupdb.md_seq = nested_dupdb.md_mod_txnid = 0;
nested_dupdb.md_seq = 0;
nested_dupdb.md_mod_txnid = pp_txnid2chk(mc->mc_txn);
sub_root = mp;
}
if (mp != fp) {
@ -12764,7 +12822,7 @@ new_sub:;
SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE);
if ((flags & MDBX_CURRENT) == 0) {
xflags -= MDBX_CURRENT;
rc2 = mdbx_xcursor_init1(mc, node);
rc2 = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
if (unlikely(rc2 != MDBX_SUCCESS))
return rc2;
}
@ -12811,6 +12869,7 @@ new_sub:;
rc = mdbx_cursor_put(&mc->mc_xcursor->mx_cursor, data, &xdata, xflags);
if (flags & F_SUBDATA) {
void *db = node_data(node);
mc->mc_xcursor->mx_db.md_mod_txnid = pp_txnid2chk(mc->mc_txn);
memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db));
}
insert_data = (ecount != (size_t)mc->mc_xcursor->mx_db.md_entries);
@ -12900,6 +12959,7 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
if (node_flags(node) & F_SUBDATA) {
/* update subDB info */
void *db = node_data(node);
mc->mc_xcursor->mx_db.md_mod_txnid = pp_txnid2chk(mc->mc_txn);
memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db));
} else {
MDBX_cursor *m2;
@ -12952,9 +13012,9 @@ int mdbx_cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) {
/* add overflow pages to free list */
if (F_ISSET(node_flags(node), F_BIGDATA)) {
MDBX_page *omp;
if (unlikely(
(rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL)) ||
(rc = mdbx_page_retire(mc, omp))))
if (unlikely((rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL,
pp_txnid4chk(mp, mc->mc_txn))) ||
(rc = mdbx_page_retire(mc, omp))))
goto fail;
}
@ -13328,7 +13388,8 @@ static int mdbx_xcursor_init0(MDBX_cursor *mc) {
* [in] mc The main cursor whose sorted-dups cursor is to be initialized.
* [in] node The data containing the MDBX_db record for the sorted-dup database.
*/
static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node) {
static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node,
const MDBX_page *mp) {
MDBX_xcursor *mx = mc->mc_xcursor;
if (unlikely(mx == nullptr))
return MDBX_CORRUPTED;
@ -13337,6 +13398,13 @@ static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node) {
if (unlikely(node_ds(node) != sizeof(MDBX_db)))
return MDBX_CORRUPTED;
memcpy(&mx->mx_db, node_data(node), sizeof(MDBX_db));
const txnid_t pp_txnid = IS_DIRTY(mp) ? mc->mc_txn->mt_txnid : mp->mp_txnid;
if (unlikely(mx->mx_db.md_mod_txnid > pp_txnid)) {
mdbx_error("nested-db.md_mod_txnid (%" PRIaTXN ") > page-txnid (%" PRIaTXN
")",
mx->mx_db.md_mod_txnid, pp_txnid);
return MDBX_CORRUPTED;
}
mx->mx_cursor.mc_pg[0] = 0;
mx->mx_cursor.mc_snum = 0;
mx->mx_cursor.mc_top = 0;
@ -13351,6 +13419,7 @@ static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node) {
mx->mx_db.md_overflow_pages = 0;
mx->mx_db.md_entries = page_numkeys(fp);
mx->mx_db.md_root = fp->mp_pgno;
mx->mx_db.md_mod_txnid = mp->mp_txnid;
mx->mx_cursor.mc_snum = 1;
mx->mx_cursor.mc_top = 0;
mx->mx_cursor.mc_flags =
@ -14339,7 +14408,8 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
} else if (IS_BRANCH(mp) && nkeys == 1) {
mdbx_debug("%s", "collapsing root page!");
mc->mc_db->md_root = node_pgno(page_node(mp, 0));
rc = mdbx_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL);
rc = mdbx_page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], NULL,
pp_txnid4chk(mp, mc->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
return rc;
mc->mc_db->md_depth--;
@ -14401,7 +14471,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
if (mn.mc_ki[pre_top] > 0) {
rc = mdbx_page_get(
&mn, node_pgno(page_node(mn.mc_pg[pre_top], mn.mc_ki[pre_top] - 1)),
&left, NULL);
&left, NULL, pp_txnid4chk(mn.mc_pg[pre_top], mc->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
return rc;
mdbx_cassert(mc, PAGETYPE(left) == PAGETYPE(mc->mc_pg[mc->mc_top]));
@ -14409,7 +14479,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
if (mn.mc_ki[pre_top] + 1u < page_numkeys(mn.mc_pg[pre_top])) {
rc = mdbx_page_get(
&mn, node_pgno(page_node(mn.mc_pg[pre_top], mn.mc_ki[pre_top] + 1)),
&right, NULL);
&right, NULL, pp_txnid4chk(mn.mc_pg[pre_top], mc->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
return rc;
mdbx_cassert(mc, PAGETYPE(right) == PAGETYPE(mc->mc_pg[mc->mc_top]));
@ -14628,7 +14698,8 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
}
if ((options & C_RETIRING) == 0) {
MDBX_page *lp;
int err = mdbx_page_get(mc, node_largedata_pgno(node), &lp, NULL);
int err = mdbx_page_get(mc, node_largedata_pgno(node), &lp, NULL,
pp_txnid4chk(mp, mc->mc_txn));
if (unlikely(err != MDBX_SUCCESS))
return err;
mdbx_assert(env, IS_OVERFLOW(lp));
@ -14826,7 +14897,8 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, unsigned options) {
return MDBX_CURSOR_FULL;
pgno_t pgno = node_pgno(node);
MDBX_page *np;
int rc = mdbx_page_get(mc, pgno, &np, NULL);
int rc =
mdbx_page_get(mc, pgno, &np, NULL, pp_txnid4chk(mp, mc->mc_txn));
mdbx_cassert(mc, rc == MDBX_SUCCESS);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@ -14934,7 +15006,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) {
if (!(node_flags(node) & F_SUBDATA))
m3->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
} else {
rc = mdbx_xcursor_init1(m3, node);
rc = mdbx_xcursor_init1(m3, node, m3->mc_pg[m3->mc_top]);
if (unlikely(rc != MDBX_SUCCESS))
break;
m3->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
@ -14964,7 +15036,7 @@ static int mdbx_cursor_del0(MDBX_cursor *mc) {
if (!(node_flags(node) & F_SUBDATA))
mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node);
} else {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mc->mc_pg[mc->mc_top]);
if (likely(rc != MDBX_SUCCESS))
mc->mc_xcursor->mx_cursor.mc_flags |= C_DEL;
}
@ -15687,7 +15759,8 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
couple.outer.mc_flags = couple.inner.mx_cursor.mc_flags =
C_COPYING | C_SKIPORD;
rc = mdbx_page_get(&couple.outer, *pg, &couple.outer.mc_pg[0], NULL);
rc = mdbx_page_get(&couple.outer, *pg, &couple.outer.mc_pg[0], NULL,
my->mc_txn->mt_txnid);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_page_search_root(&couple.outer, NULL, MDBX_PS_FIRST);
@ -15732,7 +15805,8 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
const pgno_t pgno = node_largedata_pgno(node);
poke_pgno(node_data(node), my->mc_next_pgno);
rc = mdbx_page_get(&couple.outer, pgno, &omp, NULL);
rc = mdbx_page_get(&couple.outer, pgno, &omp, NULL,
pp_txnid4chk(mp, my->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
goto done;
if (my->mc_wlen[toggle] >= MDBX_WBUF) {
@ -15786,7 +15860,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
rc = mdbx_page_get(
&couple.outer,
node_pgno(page_node(mp, couple.outer.mc_ki[couple.outer.mc_top])),
&mp, NULL);
&mp, NULL, pp_txnid4chk(mp, my->mc_txn));
if (unlikely(rc != MDBX_SUCCESS))
goto done;
couple.outer.mc_top++;
@ -16949,7 +17023,8 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
MDBX_node *node = page_node(mp, i);
if (node_flags(node) & F_BIGDATA) {
MDBX_page *omp;
rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL);
rc = mdbx_page_get(mc, node_largedata_pgno(node), &omp, NULL,
pp_txnid4chk(mp, mc->mc_txn));
if (unlikely(rc))
goto done;
mdbx_cassert(mc, IS_OVERFLOW(omp));
@ -16959,7 +17034,7 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
if (!mc->mc_db->md_overflow_pages && !subs)
break;
} else if (subs && (node_flags(node) & F_SUBDATA)) {
rc = mdbx_xcursor_init1(mc, node);
rc = mdbx_xcursor_init1(mc, node, mp);
if (unlikely(rc != MDBX_SUCCESS))
goto done;
rc = mdbx_drop0(&mc->mc_xcursor->mx_cursor, 0);
@ -17531,10 +17606,11 @@ static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
const char *name, int deep);
/* Depth-first tree traversal. */
static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
const char *name, int deep) {
const char *name, int deep,
txnid_t parent_txnid) {
assert(pgno != P_INVALID);
MDBX_page *mp;
int rc = mdbx_page_get(ctx->mw_cursor, pgno, &mp, NULL);
int rc = mdbx_page_get(ctx->mw_cursor, pgno, &mp, NULL, parent_txnid);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
@ -17595,7 +17671,8 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
const pgno_t large_pgno = node_largedata_pgno(node);
MDBX_page *op;
rc = mdbx_page_get(ctx->mw_cursor, large_pgno, &op, NULL);
rc = mdbx_page_get(ctx->mw_cursor, large_pgno, &op, NULL,
pp_txnid4chk(mp, ctx->mw_txn));
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_page_check(ctx->mw_cursor, op, 0);
@ -17610,14 +17687,13 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
const size_t over_header = PAGEHDRSZ;
const size_t over_payload = node_ds(node);
const size_t over_unused =
pgno2bytes(ctx->mw_cursor->mc_txn->mt_env, op->mp_pages) -
over_payload - over_header;
const size_t over_unused = pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages) -
over_payload - over_header;
rc = ctx->mw_visitor(
large_pgno, op->mp_pages, ctx->mw_user, deep, name,
pgno2bytes(ctx->mw_cursor->mc_txn->mt_env, op->mp_pages),
MDBX_page_large, 1, over_payload, over_header, over_unused);
rc = ctx->mw_visitor(large_pgno, op->mp_pages, ctx->mw_user, deep, name,
pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages),
MDBX_page_large, 1, over_payload, over_header,
over_unused);
} break;
case F_SUBDATA /* sub-db */: {
@ -17690,8 +17766,8 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
}
rc = ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, deep, name,
ctx->mw_cursor->mc_txn->mt_env->me_psize, type, nkeys,
payload_size, header_size, unused_size + align_bytes);
ctx->mw_txn->mt_env->me_psize, type, nkeys, payload_size,
header_size, unused_size + align_bytes);
if (unlikely(rc != MDBX_SUCCESS))
return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc;
@ -17702,7 +17778,8 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
MDBX_node *node = page_node(mp, i);
if (type == MDBX_page_branch) {
rc = mdbx_walk_tree(ctx, node_pgno(node), name, deep + 1);
rc = mdbx_walk_tree(ctx, node_pgno(node), name, deep + 1,
pp_txnid4chk(mp, ctx->mw_txn));
if (unlikely(rc != MDBX_SUCCESS)) {
if (rc != MDBX_RESULT_TRUE)
return rc;
@ -17749,7 +17826,8 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
assert(ctx->mw_cursor->mc_xcursor ==
&container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner);
ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor;
rc = mdbx_walk_tree(ctx, db.md_root, name, deep + 1);
rc = mdbx_walk_tree(ctx, db.md_root, name, deep + 1,
pp_txnid4chk(mp, ctx->mw_txn));
MDBX_xcursor *inner_xcursor =
container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor);
MDBX_cursor_couple *couple =
@ -17783,7 +17861,7 @@ static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
}
couple.outer.mc_next = ctx->mw_cursor;
ctx->mw_cursor = &couple.outer;
rc = mdbx_walk_tree(ctx, db->md_root, name, deep);
rc = mdbx_walk_tree(ctx, db->md_root, name, deep, ctx->mw_txn->mt_txnid);
ctx->mw_cursor = couple.outer.mc_next;
return rc;
}