mdbx: rework mdbx_chk & tree-traversal.

Change-Id: Idc131539426fe0cbb97a105cff2d0a12b1496bfe
This commit is contained in:
Leonid Yuriev 2020-09-04 02:21:00 +03:00
parent 9a5df2b284
commit b1877d08ae
4 changed files with 293 additions and 232 deletions

7
mdbx.h
View File

@ -4098,14 +4098,15 @@ mdbx_env_get_oomfunc(const MDBX_env *env);
/** Page types for traverse the b-tree. /** Page types for traverse the b-tree.
* \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */
enum MDBX_page_type_t { enum MDBX_page_type_t {
MDBX_page_void, MDBX_page_broken,
MDBX_page_meta, MDBX_page_meta,
MDBX_page_large, MDBX_page_large,
MDBX_page_branch, MDBX_page_branch,
MDBX_page_leaf, MDBX_page_leaf,
MDBX_page_dupfixed_leaf, MDBX_page_dupfixed_leaf,
MDBX_subpage_leaf, MDBX_subpage_leaf,
MDBX_subpage_dupfixed_leaf MDBX_subpage_dupfixed_leaf,
MDBX_subpage_broken,
}; };
#ifndef __cplusplus #ifndef __cplusplus
typedef enum MDBX_page_type_t MDBX_page_type_t; typedef enum MDBX_page_type_t MDBX_page_type_t;
@ -4122,7 +4123,7 @@ typedef enum MDBX_page_type_t MDBX_page_type_t;
typedef int MDBX_pgvisitor_func( typedef int MDBX_pgvisitor_func(
const uint64_t pgno, const unsigned number, void *const ctx, const int deep, const uint64_t pgno, const unsigned number, void *const ctx, const int deep,
const char *const dbi, const size_t page_size, const MDBX_page_type_t type, const char *const dbi, const size_t page_size, const MDBX_page_type_t type,
const size_t nentries, const size_t payload_bytes, const MDBX_error_t err, const size_t nentries, const size_t payload_bytes,
const size_t header_bytes, const size_t unused_bytes) cxx17_noexcept; const size_t header_bytes, const size_t unused_bytes) cxx17_noexcept;
/** B-tree traversal function. */ /** B-tree traversal function. */

View File

@ -581,12 +581,14 @@ number_of_ovpages(const MDBX_env *env, size_t bytes) {
return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1; return bytes2pgno(env, PAGEHDRSZ - 1 + bytes) + 1;
} }
__cold static int bad_page(const MDBX_page *mp, const char *fmt, ...) { __cold static int __printf_args(2, 3)
bad_page(const MDBX_page *mp, const char *fmt, ...) {
if (mdbx_log_enabled(MDBX_LOG_ERROR)) { if (mdbx_log_enabled(MDBX_LOG_ERROR)) {
static const MDBX_page *prev; static const MDBX_page *prev;
if (prev != mp) { if (prev != mp) {
prev = mp; prev = mp;
mdbx_debug_log(MDBX_LOG_ERROR, "badpage", 0, "#%u, page-txnid %zu\n", mdbx_debug_log(MDBX_LOG_ERROR, "badpage", 0,
"corrupted page #%u, mod-txnid %" PRIaTXN " \n",
mp->mp_pgno, mp->mp_txnid); mp->mp_pgno, mp->mp_txnid);
} }
@ -6968,7 +6970,7 @@ static __cold int mdbx_audit_ex(MDBX_txn *txn, unsigned retired_stored,
for (unsigned j = 0; j < page_numkeys(mp); j++) { for (unsigned j = 0; j < page_numkeys(mp); j++) {
MDBX_node *node = page_node(mp, j); MDBX_node *node = page_node(mp, j);
if (node_flags(node) == F_SUBDATA) { if (node_flags(node) == F_SUBDATA) {
if (unlikely(node_ds(node) < sizeof(MDBX_db))) if (unlikely(node_ds(node) != sizeof(MDBX_db)))
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
MDBX_db db_copy, *db; MDBX_db db_copy, *db;
memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); memcpy(db = &db_copy, node_data(node), sizeof(db_copy));
@ -10881,7 +10883,7 @@ __hot static int mdbx_page_get(MDBX_cursor *mc, pgno_t pgno, MDBX_page **ret,
int *lvl, const txnid_t pp_txnid) { int *lvl, const txnid_t pp_txnid) {
MDBX_txn *txn = mc->mc_txn; MDBX_txn *txn = mc->mc_txn;
if (unlikely(pgno >= txn->mt_next_pgno)) { if (unlikely(pgno >= txn->mt_next_pgno)) {
mdbx_error("page %" PRIaPGNO " beyond next-pgno", pgno); mdbx_error("page #%" PRIaPGNO " beyond next-pgno", pgno);
notfound: notfound:
*ret = nullptr; *ret = nullptr;
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
@ -10922,7 +10924,7 @@ dirty:
if (unlikely(p->mp_pgno != pgno)) { if (unlikely(p->mp_pgno != pgno)) {
bad_page( bad_page(
p, "mismatch pgno %" PRIaPGNO " (actual) != %" PRIaPGNO " (expected)\n", p, "mismatch actual pgno (%" PRIaPGNO ") != expected (%" PRIaPGNO ")\n",
p->mp_pgno, pgno); p->mp_pgno, pgno);
goto notfound; goto notfound;
} }
@ -10943,8 +10945,8 @@ dirty:
((p->mp_lower | p->mp_upper) & 1) != 0 || ((p->mp_lower | p->mp_upper) & 1) != 0 ||
PAGEHDRSZ + p->mp_upper > env->me_psize) && PAGEHDRSZ + p->mp_upper > env->me_psize) &&
!IS_OVERFLOW(p))) { !IS_OVERFLOW(p))) {
bad_page(p, "invalid page lower(%u)/upper(%u), pg-limit %u\n", p->mp_lower, bad_page(p, "invalid page lower(%u)/upper(%u) with limit (%u)\n",
p->mp_upper, page_space(env)); p->mp_lower, p->mp_upper, page_space(env));
goto corrupted; goto corrupted;
} }
@ -11060,8 +11062,8 @@ static int mdbx_setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db,
if ((db->md_flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->md_xsize) { if ((db->md_flags & (MDBX_DUPFIXED | MDBX_INTEGERDUP)) != 0 && db->md_xsize) {
if (unlikely(db->md_xsize < dbx->md_vlen_min || if (unlikely(db->md_xsize < dbx->md_vlen_min ||
db->md_xsize > dbx->md_vlen_max)) { db->md_xsize > dbx->md_vlen_max)) {
mdbx_error("db->md_xsize (%u) < vlen_min || db->md_xsize > vlen_max", mdbx_error("db.md_xsize (%u) <> min/max value-length (%zu/%zu)",
db->md_xsize); db->md_xsize, dbx->md_vlen_min, dbx->md_vlen_max);
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
} }
dbx->md_vlen_min = dbx->md_vlen_max = db->md_xsize; dbx->md_vlen_min = dbx->md_vlen_max = db->md_xsize;
@ -11096,7 +11098,7 @@ static int mdbx_fetch_sdb(MDBX_txn *txn, MDBX_dbi dbi) {
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
if (unlikely(data.iov_len < sizeof(MDBX_db))) if (unlikely(data.iov_len != sizeof(MDBX_db)))
return MDBX_INCOMPATIBLE; /* not a named DB */ return MDBX_INCOMPATIBLE; /* not a named DB */
uint16_t md_flags = UNALIGNED_PEEK_16(data.iov_base, MDBX_db, md_flags); uint16_t md_flags = UNALIGNED_PEEK_16(data.iov_base, MDBX_db, md_flags);
@ -13495,9 +13497,10 @@ static int mdbx_xcursor_init1(MDBX_cursor *mc, MDBX_node *node,
} }
if (unlikely(mx->mx_db.md_xsize < mc->mc_dbx->md_vlen_min || if (unlikely(mx->mx_db.md_xsize < mc->mc_dbx->md_vlen_min ||
mx->mx_db.md_xsize > mc->mc_dbx->md_vlen_max)) { mx->mx_db.md_xsize > mc->mc_dbx->md_vlen_max)) {
mdbx_error("mismatched nested-db %u md_xsize < md_vlen_min || md_xsize > " mdbx_error("mismatched nested-db.md_xsize (%u) <> min/max value-length "
"md_vlen_max", "(%zu/%zu)",
mx->mx_db.md_xsize); mx->mx_db.md_xsize, mc->mc_dbx->md_vlen_min,
mc->mc_dbx->md_vlen_max);
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
} }
mc->mc_db->md_xsize = mx->mx_db.md_xsize; mc->mc_db->md_xsize = mx->mx_db.md_xsize;
@ -14663,12 +14666,12 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
const unsigned nkeys = page_numkeys(mp); const unsigned nkeys = page_numkeys(mp);
char *const end_of_page = (char *)mp + env->me_psize; char *const end_of_page = (char *)mp + env->me_psize;
if (unlikely(mp->mp_pgno < MIN_PAGENO || mp->mp_pgno > MAX_PAGENO)) if (unlikely(mp->mp_pgno < MIN_PAGENO || mp->mp_pgno > MAX_PAGENO))
return bad_page(mp, "invalid pgno %u\n", mp->mp_pgno); return bad_page(mp, "invalid pgno (%u)\n", mp->mp_pgno);
if (IS_OVERFLOW(mp)) { if (IS_OVERFLOW(mp)) {
if (unlikely(mp->mp_pages < 1 && mp->mp_pages >= MAX_PAGENO / 2)) if (unlikely(mp->mp_pages < 1 && mp->mp_pages >= MAX_PAGENO / 2))
return bad_page(mp, "invalid overflow n-pages %u\n", mp->mp_pages); return bad_page(mp, "invalid overflow n-pages (%u)\n", mp->mp_pages);
if (unlikely(mp->mp_pgno > mc->mc_txn->mt_next_pgno - mp->mp_pages)) if (unlikely(mp->mp_pgno > mc->mc_txn->mt_next_pgno - mp->mp_pages))
return bad_page(mp, "overflow page %u beyond next-pgno\n", return bad_page(mp, "overflow page beyond (%u) next-pgno\n",
mp->mp_pgno + mp->mp_pages); mp->mp_pgno + mp->mp_pages);
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
@ -14676,7 +14679,7 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
int rc = MDBX_SUCCESS; int rc = MDBX_SUCCESS;
if ((options & C_UPDATING) == 0 || !IS_DIRTY(mp)) { if ((options & C_UPDATING) == 0 || !IS_DIRTY(mp)) {
if (unlikely(nkeys < 2 && IS_BRANCH(mp))) if (unlikely(nkeys < 2 && IS_BRANCH(mp)))
rc = bad_page(mp, "branch-page %u nkey < 2\n", nkeys); rc = bad_page(mp, "branch-page nkey (%u) < 2\n", nkeys);
} }
MDBX_val here, prev = {0, 0}; MDBX_val here, prev = {0, 0};
@ -14685,7 +14688,7 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
const size_t ksize = mp->mp_leaf2_ksize; const size_t ksize = mp->mp_leaf2_ksize;
char *const key = page_leaf2key(mp, i, ksize); char *const key = page_leaf2key(mp, i, ksize);
if (unlikely(end_of_page < key + ksize)) { if (unlikely(end_of_page < key + ksize)) {
rc = bad_page(mp, "leaf2-key %zu beyond page-end\n", rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n",
key + ksize - end_of_page); key + ksize - end_of_page);
continue; continue;
} }
@ -14694,9 +14697,9 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(ksize != mc->mc_dbx->md_klen_min)) { if (unlikely(ksize != mc->mc_dbx->md_klen_min)) {
if (unlikely(ksize < mc->mc_dbx->md_klen_min || if (unlikely(ksize < mc->mc_dbx->md_klen_min ||
ksize > mc->mc_dbx->md_klen_max)) ksize > mc->mc_dbx->md_klen_max))
rc = bad_page(mp, rc = bad_page(
"leaf2-key %zu size < klen_min || size > klen_max\n", mp, "leaf2-key size (%zu) <> min/max key-length (%zu/%zu)\n",
ksize); ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max);
else else
mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = ksize; mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = ksize;
} }
@ -14712,14 +14715,15 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
const MDBX_node *const node = page_node(mp, i); const MDBX_node *const node = page_node(mp, i);
const char *node_end = (char *)node + NODESIZE; const char *node_end = (char *)node + NODESIZE;
if (unlikely(node_end > end_of_page)) { if (unlikely(node_end > end_of_page)) {
rc = bad_page(mp, "node %zu beyond page-end\n", node_end - end_of_page); rc = bad_page(mp, "node (%zu) beyond page-end\n",
node_end - end_of_page);
continue; continue;
} }
if (IS_LEAF(mp) || i > 0) { if (IS_LEAF(mp) || i > 0) {
size_t ksize = node_ks(node); size_t ksize = node_ks(node);
char *key = node_key(node); char *key = node_key(node);
if (unlikely(end_of_page < key + ksize)) { if (unlikely(end_of_page < key + ksize)) {
rc = bad_page(mp, "node-key %zu beyond page-end\n", rc = bad_page(mp, "node-key (%zu) beyond page-end\n",
key + ksize - end_of_page); key + ksize - end_of_page);
continue; continue;
} }
@ -14728,7 +14732,8 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(ksize < mc->mc_dbx->md_klen_min || if (unlikely(ksize < mc->mc_dbx->md_klen_min ||
ksize > mc->mc_dbx->md_klen_max)) ksize > mc->mc_dbx->md_klen_max))
rc = bad_page( rc = bad_page(
mp, "node-key %zu size < klen_min || size > klen_max\n", ksize); mp, "node-key size (%zu) <> min/max key-length (%zu/%zu)\n",
ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max);
if ((options & C_SKIPORD) == 0) { if ((options & C_SKIPORD) == 0) {
here.iov_base = key; here.iov_base = key;
@ -14744,23 +14749,32 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if ((options & C_RETIRING) == 0) { if ((options & C_RETIRING) == 0) {
const pgno_t ref = node_pgno(node); const pgno_t ref = node_pgno(node);
if (unlikely(ref < MIN_PAGENO || ref >= mc->mc_txn->mt_next_pgno)) if (unlikely(ref < MIN_PAGENO || ref >= mc->mc_txn->mt_next_pgno))
rc = bad_page(mp, "branch-node wrong pgno %u\n", ref); rc = bad_page(mp, "branch-node wrong pgno (%u)\n", ref);
} }
continue; continue;
} }
switch (node_flags(node)) { switch (node_flags(node)) {
default: default:
rc = bad_page(mp, "invalid node flags %u\n", node_flags(node)); rc = bad_page(mp, "invalid node flags (%u)\n", node_flags(node));
break; break;
case F_BIGDATA /* data on large-page */: { case F_BIGDATA /* data on large-page */:
case 0 /* usual */:
case F_SUBDATA /* sub-db */:
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
case F_DUPDATA /* short sub-page */:
break;
}
if (node_flags(node) & F_BIGDATA) {
const size_t dsize = node_ds(node); const size_t dsize = node_ds(node);
if ((options & C_COPYING) == 0) { if ((options & C_COPYING) == 0) {
if (unlikely(dsize <= mc->mc_dbx->md_vlen_min || if (unlikely(dsize <= mc->mc_dbx->md_vlen_min ||
dsize > mc->mc_dbx->md_vlen_max)) dsize > mc->mc_dbx->md_vlen_max))
rc = bad_page( rc = bad_page(
mp, "big-node data %zu size <= vlen_min || size >= vlen_max\n", mp,
dsize); "big-node data size (%zu) <> min/max value-length (%zu/%zu)\n",
dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
} }
if ((options & C_RETIRING) == 0) { if ((options & C_RETIRING) == 0) {
MDBX_page *lp; MDBX_page *lp;
@ -14769,30 +14783,24 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(err != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return err; return err;
if (unlikely(!IS_OVERFLOW(lp))) { if (unlikely(!IS_OVERFLOW(lp))) {
rc = bad_page(mp, "big-node refs to non-overflow page %u\n", rc = bad_page(mp, "big-node refs to non-overflow page (%u)\n",
lp->mp_pgno); lp->mp_pgno);
continue; continue;
} }
if (unlikely(number_of_ovpages(env, dsize) != lp->mp_pages)) if (unlikely(number_of_ovpages(env, dsize) != lp->mp_pages))
rc = bad_page( rc = bad_page(
mp, "big-node size %zu mismatch overflow npagse size %u\n", mp, "big-node size (%zu) mismatch overflow npagse size (%u)\n",
dsize, lp->mp_pages); dsize, lp->mp_pages);
} }
}
continue; continue;
case 0 /* usual */:
case F_SUBDATA /* sub-db */:
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
case F_DUPDATA /* short sub-page */:
break;
} }
const size_t dsize = node_ds(node); const size_t dsize = node_ds(node);
const char *const data = node_data(node); const char *const data = node_data(node);
if (unlikely(end_of_page < data + dsize)) { if (unlikely(end_of_page < data + dsize)) {
rc = rc = bad_page(mp,
bad_page(mp, "node-data[%u of %u, %zu bytes] %zu beyond page end\n", "node-data(%u of %u, %zu bytes) beyond (%zu) page-end\n",
i, nkeys, dsize, data + dsize - end_of_page); i, nkeys, dsize, data + dsize - end_of_page);
continue; continue;
} }
@ -14805,27 +14813,27 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(dsize < mc->mc_dbx->md_vlen_min || if (unlikely(dsize < mc->mc_dbx->md_vlen_min ||
dsize > mc->mc_dbx->md_vlen_max)) { dsize > mc->mc_dbx->md_vlen_max)) {
rc = bad_page( rc = bad_page(
mp, "node-data %zu size <= vlen_min || size >= vlen_max\n", mp, "node-data size (%zu) <> min/max value-length (%zu/%zu)\n",
dsize); dsize, mc->mc_dbx->md_vlen_min, mc->mc_dbx->md_vlen_max);
continue; continue;
} }
} }
break; break;
case F_SUBDATA /* sub-db */: case F_SUBDATA /* sub-db */:
if (unlikely(dsize < sizeof(MDBX_db))) { if (unlikely(dsize != sizeof(MDBX_db))) {
rc = bad_page(mp, "invalid sub-db record size %zu\n", dsize); rc = bad_page(mp, "invalid sub-db record size (%zu)\n", dsize);
continue; continue;
} }
break; break;
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
if (unlikely(dsize != sizeof(MDBX_db))) { if (unlikely(dsize != sizeof(MDBX_db))) {
rc = bad_page(mp, "invalid nested-db record size %zu\n", dsize); rc = bad_page(mp, "invalid nested-db record size (%zu)\n", dsize);
continue; continue;
} }
break; break;
case F_DUPDATA /* short sub-page */: case F_DUPDATA /* short sub-page */:
if (unlikely(dsize <= PAGEHDRSZ)) { if (unlikely(dsize <= PAGEHDRSZ)) {
rc = bad_page(mp, "invalid nested-page record size %zu\n", dsize); rc = bad_page(mp, "invalid nested-page record size (%zu)\n", dsize);
continue; continue;
} else { } else {
const MDBX_page *const sp = (MDBX_page *)data; const MDBX_page *const sp = (MDBX_page *)data;
@ -14836,7 +14844,7 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
case P_LEAF | P_LEAF2 | P_SUBP: case P_LEAF | P_LEAF2 | P_SUBP:
break; break;
default: default:
rc = bad_page(mp, "invalid nested-page flags %uv", sp->mp_flags); rc = bad_page(mp, "invalid nested-page flags (%u)\n", sp->mp_flags);
continue; continue;
} }
@ -14847,7 +14855,7 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
size_t sub_ksize = sp->mp_leaf2_ksize; size_t sub_ksize = sp->mp_leaf2_ksize;
char *sub_key = page_leaf2key(sp, j, sub_ksize); char *sub_key = page_leaf2key(sp, j, sub_ksize);
if (unlikely(end_of_subpage < sub_key + sub_ksize)) { if (unlikely(end_of_subpage < sub_key + sub_ksize)) {
rc = bad_page(mp, "nested-leaf2-key %zu beyond nested-page\n", rc = bad_page(mp, "nested-leaf2-key beyond (%zu) nested-page\n",
sub_key + sub_ksize - end_of_subpage); sub_key + sub_ksize - end_of_subpage);
continue; continue;
} }
@ -14856,11 +14864,11 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(sub_ksize != mc->mc_dbx->md_vlen_min)) { if (unlikely(sub_ksize != mc->mc_dbx->md_vlen_min)) {
if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min ||
sub_ksize > mc->mc_dbx->md_vlen_max)) { sub_ksize > mc->mc_dbx->md_vlen_max)) {
rc = bad_page( rc = bad_page(mp,
mp, "nested-leaf2-key size (%zu) <> min/max "
"nested-leaf2-key %zu size < vlen_min || size > " "value-length (%zu/%zu)\n",
"vlen_max\n", sub_ksize, mc->mc_dbx->md_vlen_min,
sub_ksize); mc->mc_dbx->md_vlen_max);
continue; continue;
} }
mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize; mc->mc_dbx->md_vlen_min = mc->mc_dbx->md_vlen_max = sub_ksize;
@ -14878,12 +14886,12 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
const MDBX_node *const sub_node = page_node(sp, j); const MDBX_node *const sub_node = page_node(sp, j);
const char *sub_node_end = (char *)sub_node + NODESIZE; const char *sub_node_end = (char *)sub_node + NODESIZE;
if (unlikely(sub_node_end > end_of_subpage)) { if (unlikely(sub_node_end > end_of_subpage)) {
rc = bad_page(mp, "nested-node %zu beyond nested-page\n", rc = bad_page(mp, "nested-node beyond (%zu) nested-page\n",
end_of_subpage - sub_node_end); end_of_subpage - sub_node_end);
continue; continue;
} }
if (unlikely(node_flags(sub_node) != 0)) if (unlikely(node_flags(sub_node) != 0))
rc = bad_page(mp, "nested-node invalid flags %u\n", rc = bad_page(mp, "nested-node invalid flags (%u)\n",
node_flags(sub_node)); node_flags(sub_node));
size_t sub_ksize = node_ks(sub_node); size_t sub_ksize = node_ks(sub_node);
@ -14895,9 +14903,10 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min || if (unlikely(sub_ksize < mc->mc_dbx->md_vlen_min ||
sub_ksize > mc->mc_dbx->md_vlen_max)) sub_ksize > mc->mc_dbx->md_vlen_max))
rc = bad_page(mp, rc = bad_page(mp,
"nested-node-key %zu size < vlen_min || size > " "nested-node-key size (%zu) <> min/max "
"vlen_max\n", "value-length (%zu/%zu)\n",
sub_ksize); sub_ksize, mc->mc_dbx->md_vlen_min,
mc->mc_dbx->md_vlen_max);
if ((options & C_SKIPORD) == 0) { if ((options & C_SKIPORD) == 0) {
sub_here.iov_len = sub_ksize; sub_here.iov_len = sub_ksize;
@ -14909,10 +14918,10 @@ static __cold int mdbx_page_check(MDBX_cursor *const mc,
} }
} }
if (unlikely(sub_dsize != 0)) if (unlikely(sub_dsize != 0))
rc = bad_page(mp, "nested-node non-empty data size %zu\n", rc = bad_page(mp, "nested-node non-empty data size (%zu)\n",
sub_dsize); sub_dsize);
if (unlikely(end_of_subpage < sub_key + sub_ksize)) if (unlikely(end_of_subpage < sub_key + sub_ksize))
rc = bad_page(mp, "nested-node-key %zu beyond nested-page\n", rc = bad_page(mp, "nested-node-key beyond (%zu) nested-page\n",
sub_key + sub_ksize - end_of_subpage); sub_key + sub_ksize - end_of_subpage);
} }
} }
@ -15906,7 +15915,7 @@ static int __cold mdbx_env_cwalk(mdbx_copy *my, pgno_t *pg, int flags) {
toggle = my->mc_toggle; toggle = my->mc_toggle;
} }
} else if (node_flags(node) & F_SUBDATA) { } else if (node_flags(node) & F_SUBDATA) {
if (node_ds(node) < sizeof(MDBX_db)) { if (node_ds(node) != sizeof(MDBX_db)) {
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
goto done; goto done;
} }
@ -16859,7 +16868,7 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags,
rc = MDBX_INCOMPATIBLE; rc = MDBX_INCOMPATIBLE;
goto early_bailout; goto early_bailout;
} }
if (unlikely(data.iov_len < sizeof(MDBX_db))) { if (unlikely(data.iov_len != sizeof(MDBX_db))) {
rc = MDBX_CORRUPTED; rc = MDBX_CORRUPTED;
goto early_bailout; goto early_bailout;
} }
@ -17682,47 +17691,63 @@ typedef struct mdbx_walk_ctx {
static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db, static int __cold mdbx_walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const db,
const char *name, int deep); const char *name, int deep);
static MDBX_page_type_t walk_page_type(const MDBX_page *mp) {
if (mp)
switch (mp->mp_flags) {
case P_BRANCH:
return MDBX_page_branch;
case P_LEAF:
return MDBX_page_leaf;
case P_LEAF | P_LEAF2:
return MDBX_page_dupfixed_leaf;
case P_OVERFLOW:
return MDBX_page_large;
case P_META:
return MDBX_page_meta;
}
return MDBX_page_broken;
}
/* Depth-first tree traversal. */ /* Depth-first tree traversal. */
static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno, static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
const char *name, int deep, const char *name, int deep,
txnid_t parent_txnid) { txnid_t parent_txnid) {
assert(pgno != P_INVALID); assert(pgno != P_INVALID);
MDBX_page *mp; MDBX_page *mp = nullptr;
int rc = mdbx_page_get(ctx->mw_cursor, pgno, &mp, NULL, parent_txnid); int err = mdbx_page_get(ctx->mw_cursor, pgno, &mp, NULL, parent_txnid);
if (unlikely(rc != MDBX_SUCCESS)) if (err == MDBX_SUCCESS)
return rc; err = mdbx_page_check(ctx->mw_cursor, mp, 0);
rc = mdbx_page_check(ctx->mw_cursor, mp, 0); MDBX_page_type_t type = walk_page_type(mp);
if (unlikely(rc != MDBX_SUCCESS)) const int nentries = (mp && !IS_OVERFLOW(mp)) ? page_numkeys(mp) : 1;
return rc; unsigned npages = (mp && IS_OVERFLOW(mp)) ? mp->mp_pages : 1;
size_t pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages);
const int nkeys = page_numkeys(mp); size_t header_size = (mp && !IS_LEAF2(mp) && !IS_OVERFLOW(mp))
size_t header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower; ? PAGEHDRSZ + mp->mp_lower
size_t unused_size = page_room(mp); : PAGEHDRSZ;
size_t payload_size = 0; size_t payload_size = 0;
size_t unused_size =
(mp && !IS_OVERFLOW(mp) ? page_room(mp) : pagesize - header_size) -
payload_size;
size_t align_bytes = 0; size_t align_bytes = 0;
MDBX_page_type_t type;
/* LY: Don't use mask here, e.g bitwise /* LY: Don't use mask here, e.g bitwise
* (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP). * (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
* Pages should not me marked dirty/loose or otherwise. */ * Pages should not me marked dirty/loose or otherwise. */
switch (mp->mp_flags) { switch (mp->mp_flags) {
case P_BRANCH:
type = MDBX_page_branch;
if (unlikely(nkeys < 2))
return MDBX_CORRUPTED;
break;
case P_LEAF:
type = MDBX_page_leaf;
break;
case P_LEAF | P_LEAF2:
type = MDBX_page_dupfixed_leaf;
break;
default: default:
return MDBX_CORRUPTED; err = MDBX_CORRUPTED;
break;
case P_BRANCH:
if (unlikely(nentries < 2))
err = MDBX_CORRUPTED;
case P_LEAF:
case P_LEAF | P_LEAF2:
break;
} }
for (int i = 0; i < nkeys; for (int i = 0; err == MDBX_SUCCESS && i < nentries;
align_bytes += ((payload_size + align_bytes) & 1), i++) { align_bytes += ((payload_size + align_bytes) & 1), i++) {
if (type == MDBX_page_dupfixed_leaf) { if (type == MDBX_page_dupfixed_leaf) {
/* LEAF2 pages have no mp_ptrs[] or node headers */ /* LEAF2 pages have no mp_ptrs[] or node headers */
@ -17740,56 +17765,57 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
assert(type == MDBX_page_leaf); assert(type == MDBX_page_leaf);
switch (node_flags(node)) { switch (node_flags(node)) {
case 0 /* usual node */: { case 0 /* usual node */:
payload_size += node_ds(node); payload_size += node_ds(node);
} break; break;
case F_BIGDATA /* long data on the large/overflow page */: { case F_BIGDATA /* long data on the large/overflow page */: {
payload_size += sizeof(pgno_t); payload_size += sizeof(pgno_t);
const pgno_t large_pgno = node_largedata_pgno(node); const pgno_t large_pgno = node_largedata_pgno(node);
MDBX_page *op;
rc = mdbx_page_get(ctx->mw_cursor, large_pgno, &op, NULL,
pp_txnid4chk(mp, ctx->mw_txn));
if (unlikely(rc != MDBX_SUCCESS))
return rc;
rc = mdbx_page_check(ctx->mw_cursor, op, 0);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
/* LY: Don't use mask here, e.g bitwise
* (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
* Pages should not me marked dirty/loose or otherwise. */
if (unlikely(P_OVERFLOW != op->mp_flags))
return MDBX_CORRUPTED;
const size_t over_header = PAGEHDRSZ;
const size_t over_payload = node_ds(node); const size_t over_payload = node_ds(node);
const size_t over_unused = pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages) - const size_t over_header = PAGEHDRSZ;
over_payload - over_header; npages = 1;
rc = ctx->mw_visitor(large_pgno, op->mp_pages, ctx->mw_user, deep, name, MDBX_page *op;
pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages), err = mdbx_page_get(ctx->mw_cursor, large_pgno, &op, NULL,
MDBX_page_large, 1, over_payload, over_header, pp_txnid4chk(mp, ctx->mw_txn));
over_unused); if (err == MDBX_SUCCESS)
err = mdbx_page_check(ctx->mw_cursor, op, 0);
if (err == MDBX_SUCCESS) {
/* LY: Don't use mask here, e.g bitwise
* (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
* Pages should not me marked dirty/loose or otherwise. */
if (P_OVERFLOW != op->mp_flags)
err = bad_page(mp, "wrong page type %d for large data", op->mp_flags);
else
npages = op->mp_pages;
}
pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages);
const size_t over_unused = pagesize - over_payload - over_header;
err = ctx->mw_visitor(large_pgno, npages, ctx->mw_user, deep, name,
pagesize, MDBX_page_large, err, 1, over_payload,
over_header, over_unused);
} break; } break;
case F_SUBDATA /* sub-db */: { case F_SUBDATA /* sub-db */: {
const size_t namelen = node_ks(node); const size_t namelen = node_ks(node);
if (unlikely(namelen == 0 || node_ds(node) < sizeof(MDBX_db)))
return MDBX_CORRUPTED;
payload_size += node_ds(node); payload_size += node_ds(node);
if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db)))
err = MDBX_CORRUPTED;
} break; } break;
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: { case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
if (unlikely(node_ds(node) != sizeof(MDBX_db)))
return MDBX_CORRUPTED;
payload_size += sizeof(MDBX_db); payload_size += sizeof(MDBX_db);
} break; if (unlikely(node_ds(node) != sizeof(MDBX_db)))
err = MDBX_CORRUPTED;
break;
case F_DUPDATA /* short sub-page */: { case F_DUPDATA /* short sub-page */: {
if (unlikely(node_ds(node) <= PAGEHDRSZ)) if (unlikely(node_ds(node) <= PAGEHDRSZ)) {
return MDBX_CORRUPTED; err = MDBX_CORRUPTED;
break;
}
MDBX_page *sp = node_data(node); MDBX_page *sp = node_data(node);
const int nsubkeys = page_numkeys(sp); const int nsubkeys = page_numkeys(sp);
@ -17808,10 +17834,11 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
subtype = MDBX_subpage_dupfixed_leaf; subtype = MDBX_subpage_dupfixed_leaf;
break; break;
default: default:
return MDBX_CORRUPTED; subtype = MDBX_subpage_broken;
err = MDBX_CORRUPTED;
} }
for (int j = 0; j < nsubkeys; for (int j = 0; err == MDBX_SUCCESS && j < nsubkeys;
subalign_bytes += ((subpayload_size + subalign_bytes) & 1), j++) { subalign_bytes += ((subpayload_size + subalign_bytes) & 1), j++) {
if (subtype == MDBX_subpage_dupfixed_leaf) { if (subtype == MDBX_subpage_dupfixed_leaf) {
@ -17822,13 +17849,14 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
MDBX_node *subnode = page_node(sp, j); MDBX_node *subnode = page_node(sp, j);
subpayload_size += NODESIZE + node_ks(subnode) + node_ds(subnode); subpayload_size += NODESIZE + node_ks(subnode) + node_ds(subnode);
if (unlikely(node_flags(subnode) != 0)) if (unlikely(node_flags(subnode) != 0))
return MDBX_CORRUPTED; err = MDBX_CORRUPTED;
} }
} }
rc = ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), err =
subtype, nsubkeys, subpayload_size, subheader_size, ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node),
subunused_size + subalign_bytes); subtype, err, nsubkeys, subpayload_size,
subheader_size, subunused_size + subalign_bytes);
header_size += subheader_size; header_size += subheader_size;
unused_size += subunused_size; unused_size += subunused_size;
payload_size += subpayload_size; payload_size += subpayload_size;
@ -17836,32 +17864,29 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
} break; } break;
default: default:
return MDBX_CORRUPTED; err = MDBX_CORRUPTED;
} }
if (unlikely(rc != MDBX_SUCCESS))
return rc;
} }
rc = ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, deep, name, err = ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, deep, name,
ctx->mw_txn->mt_env->me_psize, type, nkeys, payload_size, ctx->mw_txn->mt_env->me_psize, type, err, nentries,
header_size, unused_size + align_bytes); payload_size, header_size, unused_size + align_bytes);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(err != MDBX_SUCCESS))
return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; return (err == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : err;
for (int i = 0; i < nkeys; i++) { for (int i = 0; err == MDBX_SUCCESS && i < nentries; i++) {
if (type == MDBX_page_dupfixed_leaf) if (type == MDBX_page_dupfixed_leaf)
continue; continue;
MDBX_node *node = page_node(mp, i); MDBX_node *node = page_node(mp, i);
if (type == MDBX_page_branch) { if (type == MDBX_page_branch) {
rc = mdbx_walk_tree(ctx, node_pgno(node), name, deep + 1, err = mdbx_walk_tree(ctx, node_pgno(node), name, deep + 1,
pp_txnid4chk(mp, ctx->mw_txn)); pp_txnid4chk(mp, ctx->mw_txn));
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(err != MDBX_SUCCESS)) {
if (rc != MDBX_RESULT_TRUE) if (err == MDBX_RESULT_TRUE)
return rc; break;
break; return err;
} }
continue; continue;
} }
@ -17874,8 +17899,10 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
case F_SUBDATA /* sub-db */: { case F_SUBDATA /* sub-db */: {
const size_t namelen = node_ks(node); const size_t namelen = node_ks(node);
if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) {
return MDBX_CORRUPTED; err = MDBX_CORRUPTED;
break;
}
char namebuf_onstask[64]; char namebuf_onstask[64];
char *const sub_name = (namelen < sizeof(namebuf_onstask)) char *const sub_name = (namelen < sizeof(namebuf_onstask))
@ -17885,37 +17912,33 @@ static int __cold mdbx_walk_tree(mdbx_walk_ctx_t *ctx, pgno_t pgno,
memcpy(sub_name, node_key(node), namelen); memcpy(sub_name, node_key(node), namelen);
sub_name[namelen] = 0; sub_name[namelen] = 0;
memcpy(&db, node_data(node), sizeof(db)); memcpy(&db, node_data(node), sizeof(db));
rc = mdbx_walk_sdb(ctx, &db, sub_name, deep + 1); err = mdbx_walk_sdb(ctx, &db, sub_name, deep + 1);
if (sub_name != namebuf_onstask) if (sub_name != namebuf_onstask)
mdbx_free(sub_name); mdbx_free(sub_name);
} else { } else {
rc = MDBX_ENOMEM; err = MDBX_ENOMEM;
} }
} break; } break;
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */:
if (unlikely(node_ds(node) != sizeof(MDBX_db))) if (unlikely(node_ds(node) != sizeof(MDBX_db) ||
return MDBX_CORRUPTED; ctx->mw_cursor->mc_xcursor == NULL))
err = MDBX_CORRUPTED;
if (unlikely(ctx->mw_cursor->mc_xcursor == NULL)) else {
return MDBX_CORRUPTED; memcpy(&db, node_data(node), sizeof(db));
assert(ctx->mw_cursor->mc_xcursor ==
memcpy(&db, node_data(node), sizeof(db)); &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner);
assert(ctx->mw_cursor->mc_xcursor == ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor;
&container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); err = mdbx_walk_tree(ctx, db.md_root, name, deep + 1,
ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; pp_txnid4chk(mp, ctx->mw_txn));
rc = mdbx_walk_tree(ctx, db.md_root, name, deep + 1, MDBX_xcursor *inner_xcursor =
pp_txnid4chk(mp, ctx->mw_txn)); container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor);
MDBX_xcursor *inner_xcursor = MDBX_cursor_couple *couple =
container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); container_of(inner_xcursor, MDBX_cursor_couple, inner);
MDBX_cursor_couple *couple = ctx->mw_cursor = &couple->outer;
container_of(inner_xcursor, MDBX_cursor_couple, inner); }
ctx->mw_cursor = &couple->outer;
break; break;
} }
if (unlikely(rc != MDBX_SUCCESS))
return rc;
} }
return MDBX_SUCCESS; return MDBX_SUCCESS;
@ -17958,17 +17981,14 @@ int __cold mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering; ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering;
rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META, rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META,
pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, NUM_METAS, pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, MDBX_SUCCESS,
sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS, NUM_METAS, sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS,
(txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * (txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) *
NUM_METAS); NUM_METAS);
if (!MDBX_IS_ERROR(rc)) if (!MDBX_IS_ERROR(rc))
rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0); rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0);
if (!MDBX_IS_ERROR(rc)) if (!MDBX_IS_ERROR(rc))
rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0); rc = mdbx_walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0);
if (!MDBX_IS_ERROR(rc))
rc = visitor(P_INVALID, 0, user, INT_MIN, NULL, 0, MDBX_page_void, 0, 0, 0,
0);
return rc; return rc;
} }

View File

@ -1037,9 +1037,9 @@ extern uint8_t mdbx_runtime_flags;
extern uint8_t mdbx_loglevel; extern uint8_t mdbx_loglevel;
extern MDBX_debug_func *mdbx_debug_logger; extern MDBX_debug_func *mdbx_debug_logger;
MDBX_INTERNAL_FUNC void mdbx_debug_log(int level, const char *function, MDBX_INTERNAL_FUNC void __printf_args(4, 5)
int line, const char *fmt, ...) mdbx_debug_log(int level, const char *function, int line, const char *fmt,
__printf_args(4, 5); ...) __printf_args(4, 5);
MDBX_INTERNAL_FUNC void mdbx_debug_log_va(int level, const char *function, MDBX_INTERNAL_FUNC void mdbx_debug_log_va(int level, const char *function,
int line, const char *fmt, int line, const char *fmt,
va_list args); va_list args);

View File

@ -58,7 +58,7 @@ static void signal_handler(int sig) {
#define EXIT_INTERRUPTED (EXIT_FAILURE + 4) #define EXIT_INTERRUPTED (EXIT_FAILURE + 4)
#define EXIT_FAILURE_SYS (EXIT_FAILURE + 3) #define EXIT_FAILURE_SYS (EXIT_FAILURE + 3)
#define EXIT_FAILURE_MDB (EXIT_FAILURE + 2) #define EXIT_FAILURE_MDBX (EXIT_FAILURE + 2)
#define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1) #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1)
#define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE
@ -117,19 +117,47 @@ static void __printf_args(1, 2) print(const char *msg, ...) {
} }
} }
static void __printf_args(1, 2) error(const char *msg, ...) { static void va_log(MDBX_log_level_t level, const char *msg, va_list args) {
total_problems++; static const char *const prefixes[] = {
"!!!fatal: ", " ! " /* error */, " ! " /* warning */,
" " /* notice */, " //" /* verbose */, " ///" /* debug */,
" ////" /* trace */
};
if (!quiet) { FILE *out = stdout;
va_list args; if (level <= MDBX_LOG_ERROR) {
total_problems++;
out = stderr;
}
if (!quiet && verbose + 1 >= (unsigned)level) {
fflush(nullptr); fflush(nullptr);
va_start(args, msg); fputs(prefixes[level], out);
fputs(" ! ", stderr); vfprintf(out, msg, args);
vfprintf(stderr, msg, args); if (msg[strlen(msg) - 1] != '\n')
va_end(args); fputc('\n', out);
fflush(nullptr); fflush(nullptr);
} }
if (level == MDBX_LOG_FATAL) {
exit(EXIT_FAILURE_MDBX);
abort();
}
}
static void __printf_args(1, 2) error(const char *msg, ...) {
va_list args;
va_start(args, msg);
va_log(MDBX_LOG_ERROR, msg, args);
va_end(args);
}
static void logger(MDBX_log_level_t level, const char *function, int line,
const char *msg, va_list args) {
(void)line;
(void)function;
if (level < MDBX_LOG_EXTRA)
va_log(level, msg, args);
} }
static int check_user_break(void) { static int check_user_break(void) {
@ -260,18 +288,15 @@ static size_t problems_pop(struct problem *list) {
static int pgvisitor(const uint64_t pgno, const unsigned pgnumber, static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
void *const ctx, const int deep, void *const ctx, const int deep,
const char *const dbi_name_or_tag, const size_t page_size, const char *const dbi_name_or_tag, const size_t page_size,
const MDBX_page_type_t pagetype, const size_t nentries, const MDBX_page_type_t pagetype, const MDBX_error_t err,
const size_t payload_bytes, const size_t header_bytes, const size_t nentries, const size_t payload_bytes,
const size_t unused_bytes) { const size_t header_bytes, const size_t unused_bytes) {
(void)ctx; (void)ctx;
if (deep > 42) { if (deep > 42) {
problem_add("deep", deep, "too large", nullptr); problem_add("deep", deep, "too large", nullptr);
return MDBX_CORRUPTED /* avoid infinite loop/recursion */; return MDBX_CORRUPTED /* avoid infinite loop/recursion */;
} }
if (pagetype == MDBX_page_void)
return MDBX_SUCCESS;
walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name_or_tag, false); walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name_or_tag, false);
if (!dbi) if (!dbi)
return MDBX_ENOMEM; return MDBX_ENOMEM;
@ -288,6 +313,13 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
pagetype_caption = "unknown"; pagetype_caption = "unknown";
dbi->pages.other += pgnumber; dbi->pages.other += pgnumber;
break; break;
case MDBX_page_broken:
pagetype_caption = "broken";
dbi->pages.other += pgnumber;
break;
case MDBX_subpage_broken:
pagetype_caption = "broken-subpage";
break;
case MDBX_page_meta: case MDBX_page_meta:
pagetype_caption = "meta"; pagetype_caption = "meta";
dbi->pages.other += pgnumber; dbi->pages.other += pgnumber;
@ -356,47 +388,51 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber,
: MDBX_SUCCESS; : MDBX_SUCCESS;
} }
if (unused_bytes > page_size) if (MDBX_IS_ERROR(err)) {
problem_add("page", pgno, "illegal unused-bytes", problem_add("page", pgno, "invalid/corrupted", "%s-page", pagetype_caption);
"%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0, } else {
unused_bytes, envstat.ms_psize); if (unused_bytes > page_size)
problem_add("page", pgno, "illegal unused-bytes",
"%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0,
unused_bytes, envstat.ms_psize);
if (header_bytes < (int)sizeof(long) || if (header_bytes < (int)sizeof(long) ||
(size_t)header_bytes >= envstat.ms_psize - sizeof(long)) (size_t)header_bytes >= envstat.ms_psize - sizeof(long))
problem_add("page", pgno, "illegal header-length", problem_add("page", pgno, "illegal header-length",
"%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR, "%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR,
pagetype_caption, sizeof(long), header_bytes, pagetype_caption, sizeof(long), header_bytes,
envstat.ms_psize - sizeof(long)); envstat.ms_psize - sizeof(long));
if (payload_bytes < 1) { if (payload_bytes < 1) {
if (nentries > 1) { if (nentries > 1) {
problem_add("page", pgno, "zero size-of-entry", problem_add("page", pgno, "zero size-of-entry",
"%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries", "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries",
pagetype_caption, payload_bytes, nentries); pagetype_caption, payload_bytes, nentries);
/* if ((size_t)header_bytes + unused_bytes < page_size) { /* if ((size_t)header_bytes + unused_bytes < page_size) {
// LY: hush a misuse error // LY: hush a misuse error
page_bytes = page_size; page_bytes = page_size;
} */ } */
} else { } else {
problem_add("page", pgno, "empty", problem_add("page", pgno, "empty",
"%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR
" entries, deep %i", " entries, deep %i",
pagetype_caption, payload_bytes, nentries, deep); pagetype_caption, payload_bytes, nentries, deep);
dbi->pages.empty += 1; dbi->pages.empty += 1;
}
} }
}
if (pgnumber) { if (pgnumber) {
if (page_bytes != page_size) { if (page_bytes != page_size) {
problem_add("page", pgno, "misused", problem_add("page", pgno, "misused",
"%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR
"h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i",
pagetype_caption, page_size, page_bytes, header_bytes, pagetype_caption, page_size, page_bytes, header_bytes,
payload_bytes, unused_bytes, deep); payload_bytes, unused_bytes, deep);
if (page_size > page_bytes) if (page_size > page_bytes)
dbi->lost_bytes += page_size - page_bytes; dbi->lost_bytes += page_size - page_bytes;
} else { } else {
dbi->payload_bytes += payload_bytes + header_bytes; dbi->payload_bytes += payload_bytes + header_bytes;
walk.total_payload_bytes += payload_bytes + header_bytes; walk.total_payload_bytes += payload_bytes + header_bytes;
}
} }
} }
@ -1026,11 +1062,15 @@ int main(int argc, char *argv[]) {
mdbx_version.git.tree, envname, mdbx_version.git.tree, envname,
(envflags & MDBX_RDONLY) ? "only" : "write"); (envflags & MDBX_RDONLY) ? "only" : "write");
fflush(nullptr); fflush(nullptr);
mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1)
? (MDBX_log_level_t)(verbose + 1)
: MDBX_LOG_TRACE,
MDBX_DBG_LEGACY_OVERLAP, logger);
rc = mdbx_env_create(&env); rc = mdbx_env_create(&env);
if (rc) { if (rc) {
error("mdbx_env_create failed, error %d %s\n", rc, mdbx_strerror(rc)); error("mdbx_env_create failed, error %d %s\n", rc, mdbx_strerror(rc));
return rc < 0 ? EXIT_FAILURE_MDB : EXIT_FAILURE_SYS; return rc < 0 ? EXIT_FAILURE_MDBX : EXIT_FAILURE_SYS;
} }
rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI); rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI);
@ -1460,7 +1500,7 @@ bailout:
if (rc) { if (rc) {
if (rc < 0) if (rc < 0)
return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS; return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS;
return EXIT_FAILURE_MDB; return EXIT_FAILURE_MDBX;
} }
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)