mdbx: add mdbx_page_check().

Change-Id: I8350d774f841726ca967be732593e33bfa8a6230
This commit is contained in:
Leonid Yuriev 2019-10-04 02:29:27 +03:00
parent 8768641872
commit 9e92ea2372

View File

@ -1604,6 +1604,9 @@ static int __must_check_result mdbx_update_key(MDBX_cursor *mc,
static void mdbx_cursor_pop(MDBX_cursor *mc); static void mdbx_cursor_pop(MDBX_cursor *mc);
static int __must_check_result mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp); static int __must_check_result mdbx_cursor_push(MDBX_cursor *mc, MDBX_page *mp);
static int __must_check_result mdbx_page_check(MDBX_env *env,
const MDBX_page *const mp,
bool maybe_unfinished);
static int __must_check_result mdbx_cursor_check(MDBX_cursor *mc, bool pending); static int __must_check_result mdbx_cursor_check(MDBX_cursor *mc, bool pending);
static int __must_check_result mdbx_cursor_del0(MDBX_cursor *mc); static int __must_check_result mdbx_cursor_del0(MDBX_cursor *mc);
static int __must_check_result mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi, static int __must_check_result mdbx_del0(MDBX_txn *txn, MDBX_dbi dbi,
@ -7868,11 +7871,14 @@ static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, MDBX_val *key,
unsigned i = 0; unsigned i = 0;
if (IS_LEAF2(mp)) { if (IS_LEAF2(mp)) {
nodekey.iov_len = mc->mc_db->md_xsize; mdbx_cassert(mc, mp->mp_leaf2_ksize == mc->mc_db->md_xsize);
nodekey.iov_len = mp->mp_leaf2_ksize;
node = NODEPTR(mp, 0); /* fake */ node = NODEPTR(mp, 0); /* fake */
while (low <= high) { while (low <= high) {
i = (low + high) >> 1; i = (low + high) >> 1;
nodekey.iov_base = LEAF2KEY(mp, i, nodekey.iov_len); nodekey.iov_base = LEAF2KEY(mp, i, nodekey.iov_len);
mdbx_cassert(mc, (char *)mp + mc->mc_txn->mt_env->me_psize >=
(char *)nodekey.iov_base + nodekey.iov_len);
rc = cmp(key, &nodekey); rc = cmp(key, &nodekey);
mdbx_debug("found leaf index %u [%s], rc = %i", i, DKEY(&nodekey), rc); mdbx_debug("found leaf index %u [%s], rc = %i", i, DKEY(&nodekey), rc);
if (rc == 0) if (rc == 0)
@ -7889,6 +7895,8 @@ static MDBX_node *__hot mdbx_node_search(MDBX_cursor *mc, MDBX_val *key,
node = NODEPTR(mp, i); node = NODEPTR(mp, i);
nodekey.iov_len = NODEKSZ(node); nodekey.iov_len = NODEKSZ(node);
nodekey.iov_base = NODEKEY(node); nodekey.iov_base = NODEKEY(node);
mdbx_cassert(mc, (char *)mp + mc->mc_txn->mt_env->me_psize >=
(char *)nodekey.iov_base + nodekey.iov_len);
rc = cmp(key, &nodekey); rc = cmp(key, &nodekey);
if (IS_LEAF(mp)) if (IS_LEAF(mp))
@ -8037,6 +8045,12 @@ done:
} }
/* TODO: more checks here, including p->mp_validator */ /* TODO: more checks here, including p->mp_validator */
if (mdbx_audit_enabled()) {
int err = mdbx_page_check(env, p, true);
if (unlikely(err != MDBX_SUCCESS))
return err;
}
*ret = p; *ret = p;
if (lvl) if (lvl)
*lvl = level; *lvl = level;
@ -9477,9 +9491,11 @@ int mdbx_cursor_put(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data,
fp->mp_flags |= P_LEAF2; fp->mp_flags |= P_LEAF2;
fp->mp_leaf2_ksize = (uint16_t)data->iov_len; fp->mp_leaf2_ksize = (uint16_t)data->iov_len;
xdata.iov_len += 2 * data->iov_len; /* leave space for 2 more */ xdata.iov_len += 2 * data->iov_len; /* leave space for 2 more */
mdbx_cassert(mc, xdata.iov_len <= env->me_psize);
} else { } else {
xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) +
(dkey.iov_len & 1) + (data->iov_len & 1); (dkey.iov_len & 1) + (data->iov_len & 1);
mdbx_cassert(mc, xdata.iov_len <= env->me_psize);
} }
fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ);
olddata.iov_len = xdata.iov_len; /* pretend olddata is fp */ olddata.iov_len = xdata.iov_len; /* pretend olddata is fp */
@ -11457,6 +11473,133 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
static __cold int mdbx_page_check(MDBX_env *env, const MDBX_page *const mp,
bool maybe_unfinished) {
const unsigned nkeys = NUMKEYS(mp);
char *const end_of_page = (char *)mp + env->me_psize;
mdbx_assert(env, mp->mp_pgno >= MIN_PAGENO && mp->mp_pgno <= MAX_PAGENO);
if (unlikely(mp->mp_pgno < MIN_PAGENO || mp->mp_pgno > MAX_PAGENO))
return MDBX_CORRUPTED;
if (IS_OVERFLOW(mp)) {
mdbx_assert(env, mp->mp_pages >= 1 && mp->mp_pages < MAX_PAGENO / 2);
if (unlikely(mp->mp_pages < 1 && mp->mp_pages >= MAX_PAGENO / 2))
return MDBX_CORRUPTED;
mdbx_assert(env, mp->mp_pgno <= MAX_PAGENO - mp->mp_pages);
if (unlikely(mp->mp_pgno > MAX_PAGENO - mp->mp_pages))
return MDBX_CORRUPTED;
return MDBX_SUCCESS;
}
if (!(IS_DIRTY(mp) && maybe_unfinished)) {
mdbx_assert(env, nkeys >= 2 || !IS_BRANCH(mp));
if (unlikely(nkeys < 2 && IS_BRANCH(mp)))
return MDBX_CORRUPTED;
}
for (unsigned i = IS_LEAF(mp) ? 0 : 1; i < nkeys; ++i) {
const MDBX_node *const node = NODEPTR(mp, i);
if (IS_LEAF2(mp)) {
const size_t ksize = mp->mp_leaf2_ksize;
const char *const key = LEAF2KEY(mp, i, ksize);
mdbx_assert(env, key + ksize <= end_of_page);
if (unlikely(end_of_page < key + ksize))
return MDBX_CORRUPTED;
} else {
const char *node_end = (char *)node + NODESIZE;
mdbx_assert(env, node_end <= end_of_page);
if (unlikely(node_end > end_of_page))
return MDBX_CORRUPTED;
if (IS_LEAF(mp) || i > 0) {
size_t ksize = NODEKSZ(node);
char *key = NODEKEY(node);
mdbx_assert(env, key + ksize <= end_of_page);
if (unlikely(end_of_page < key + ksize))
return MDBX_CORRUPTED;
}
if (IS_BRANCH(mp))
continue;
if (node->mn_flags == F_BIGDATA /* data on large-page */) {
continue;
}
const size_t dsize = NODEDSZ(node);
const char *const data = NODEDATA(node);
mdbx_assert(env, data + dsize <= end_of_page);
if (unlikely(end_of_page < data + dsize))
return MDBX_CORRUPTED;
switch (node->mn_flags) {
default:
mdbx_assert(env, false);
return MDBX_CORRUPTED;
case 0 /* usual */:
break;
case F_SUBDATA /* sub-db */:
mdbx_assert(env, dsize == sizeof(MDBX_db));
if (unlikely(dsize != sizeof(MDBX_db)))
return MDBX_CORRUPTED;
break;
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
mdbx_assert(env, dsize == sizeof(MDBX_db));
if (unlikely(dsize != sizeof(MDBX_db)))
return MDBX_CORRUPTED;
break;
case F_DUPDATA /* short sub-page */:
mdbx_assert(env, dsize > PAGEHDRSZ);
if (unlikely(dsize <= PAGEHDRSZ))
return MDBX_CORRUPTED;
else {
const MDBX_page *const sp = (MDBX_page *)data;
const char *const end_of_subpage = data + dsize;
const int nsubkeys = NUMKEYS(sp);
switch (sp->mp_flags & ~P_DIRTY /* ignore for sub-pages */) {
case P_LEAF | P_SUBP:
case P_LEAF | P_LEAF2 | P_SUBP:
break;
default:
mdbx_assert(env, false);
return MDBX_CORRUPTED;
}
for (int j = 0; j < nsubkeys; j++) {
if (IS_LEAF2(sp)) {
/* LEAF2 pages have no mp_ptrs[] or node headers */
size_t sub_ksize = sp->mp_leaf2_ksize;
char *sub_key = LEAF2KEY(sp, j, sub_ksize);
mdbx_assert(env, sub_key + sub_ksize <= end_of_subpage);
if (unlikely(end_of_subpage < sub_key + sub_ksize))
return MDBX_CORRUPTED;
} else {
mdbx_assert(env, IS_LEAF(sp));
if (unlikely(!IS_LEAF(sp)))
return MDBX_CORRUPTED;
const MDBX_node *const sub_node = NODEPTR(sp, j);
const char *sub_node_end = (char *)sub_node + NODESIZE;
mdbx_assert(env, sub_node_end <= end_of_subpage);
if (unlikely(sub_node_end > end_of_subpage))
return MDBX_CORRUPTED;
mdbx_assert(env, sub_node->mn_flags == 0);
if (unlikely(sub_node->mn_flags != 0))
return MDBX_CORRUPTED;
size_t sub_ksize = NODEKSZ(sub_node);
char *sub_key = NODEKEY(sub_node);
size_t sub_dsize = NODEDSZ(sub_node);
char *sub_data = NODEDATA(sub_node);
mdbx_assert(env, sub_key + sub_ksize <= end_of_subpage);
if (unlikely(end_of_subpage < sub_key + sub_ksize))
return MDBX_CORRUPTED;
mdbx_assert(env, sub_data + sub_dsize <= end_of_subpage);
if (unlikely(end_of_subpage < sub_data + sub_dsize))
return MDBX_CORRUPTED;
}
}
}
break;
}
}
}
return MDBX_SUCCESS;
}
static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) { static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) {
mdbx_cassert(mc, mc->mc_top == mc->mc_snum - 1); mdbx_cassert(mc, mc->mc_top == mc->mc_snum - 1);
if (unlikely(mc->mc_top != mc->mc_snum - 1)) if (unlikely(mc->mc_top != mc->mc_snum - 1))
@ -11491,6 +11634,10 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) {
return MDBX_CURSOR_FULL; return MDBX_CURSOR_FULL;
} }
int err = mdbx_page_check(mc->mc_txn->mt_env, mp, pending);
if (unlikely(err != MDBX_SUCCESS))
return err;
for (unsigned i = 0; i < numkeys; ++i) { for (unsigned i = 0; i < numkeys; ++i) {
MDBX_node *node = NODEPTR(mp, i); MDBX_node *node = NODEPTR(mp, i);
if (branch) { if (branch) {
@ -11507,6 +11654,9 @@ static __cold int mdbx_cursor_check(MDBX_cursor *mc, bool pending) {
mdbx_cassert(mc, nested_leaf == expect_nested_leaf); mdbx_cassert(mc, nested_leaf == expect_nested_leaf);
if (unlikely(nested_leaf != expect_nested_leaf)) if (unlikely(nested_leaf != expect_nested_leaf))
return MDBX_CURSOR_FULL; return MDBX_CURSOR_FULL;
err = mdbx_page_check(mc->mc_txn->mt_env, np, pending);
if (unlikely(err != MDBX_SUCCESS))
return err;
} }
} }
} }
@ -14135,6 +14285,10 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi,
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
rc = mdbx_page_check(ctx->mw_cursor.mc_txn->mt_env, mp, false);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
const int nkeys = NUMKEYS(mp); const int nkeys = NUMKEYS(mp);
size_t header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower; size_t header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower;
size_t unused_size = SIZELEFT(mp); size_t unused_size = SIZELEFT(mp);
@ -14188,6 +14342,9 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi,
pgno_t large_pgno; pgno_t large_pgno;
memcpy(&large_pgno, NODEDATA(node), sizeof(pgno_t)); memcpy(&large_pgno, NODEDATA(node), sizeof(pgno_t));
rc = mdbx_page_get(&ctx->mw_cursor, large_pgno, &op, NULL); rc = mdbx_page_get(&ctx->mw_cursor, large_pgno, &op, NULL);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_page_check(ctx->mw_cursor.mc_txn->mt_env, op, false);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;