mdbx: enumeration for page types.

This commit is contained in:
Leo Yuriev 2018-08-28 16:41:34 +03:00
parent 3a2ea85c79
commit 3bc339ef19
3 changed files with 238 additions and 144 deletions

20
mdbx.h
View File

@ -1648,10 +1648,22 @@ typedef void MDBX_debug_func(int type, const char *function, int line,
LIBMDBX_API int mdbx_setup_debug(int flags, MDBX_debug_func *logger); LIBMDBX_API int mdbx_setup_debug(int flags, MDBX_debug_func *logger);
typedef int MDBX_pgvisitor_func(uint64_t pgno, unsigned pgnumber, void *ctx, typedef enum {
const char *dbi, const char *type, MDBX_page_void,
size_t nentries, size_t payload_bytes, MDBX_page_meta,
size_t header_bytes, size_t unused_bytes); MDBX_page_large,
MDBX_page_branch,
MDBX_page_leaf,
MDBX_page_dupfixed_leaf,
MDBX_subpage_leaf,
MDBX_subpage_dupfixed_leaf
} MDBX_page_type_t;
typedef int MDBX_pgvisitor_func(uint64_t pgno, unsigned number, void *ctx,
const char *dbi, size_t page_size,
MDBX_page_type_t type, size_t nentries,
size_t payload_bytes, size_t header_bytes,
size_t unused_bytes);
LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
void *ctx); void *ctx);

View File

@ -1290,13 +1290,13 @@ static void mdbx_page_list(MDBX_page *mp) {
type = "Leaf page"; type = "Leaf page";
break; break;
case P_LEAF | P_SUBP: case P_LEAF | P_SUBP:
type = "Sub-page"; type = "Leaf sub-page";
break; break;
case P_LEAF | P_LEAF2: case P_LEAF | P_LEAF2:
type = "LEAF2 page"; type = "Leaf2 page";
break; break;
case P_LEAF | P_LEAF2 | P_SUBP: case P_LEAF | P_LEAF2 | P_SUBP:
type = "LEAF2 sub-page"; type = "Leaf2 sub-page";
break; break;
case P_OVERFLOW: case P_OVERFLOW:
mdbx_print("Overflow page %" PRIu64 " pages %u%s\n", pgno, mp->mp_pages, mdbx_print("Overflow page %" PRIu64 " pages %u%s\n", pgno, mp->mp_pages,
@ -11855,13 +11855,8 @@ typedef struct mdbx_walk_ctx {
/* Depth-first tree traversal. */ /* Depth-first tree traversal. */
static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi, static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi,
pgno_t pg, int deep) { pgno_t pgno, int deep) {
MDBX_page *mp; if (pgno == P_INVALID)
int rc, i, nkeys;
size_t header_size, unused_size, payload_size, align_bytes;
const char *type;
if (pg == P_INVALID)
return MDBX_SUCCESS; /* empty db */ return MDBX_SUCCESS; /* empty db */
MDBX_cursor mc; MDBX_cursor mc;
@ -11869,116 +11864,174 @@ static int __cold mdbx_env_walk(mdbx_walk_ctx_t *ctx, const char *dbi,
mc.mc_snum = 1; mc.mc_snum = 1;
mc.mc_txn = ctx->mw_txn; mc.mc_txn = ctx->mw_txn;
rc = mdbx_page_get(&mc, pg, &mp, NULL); MDBX_page *mp;
int rc = mdbx_page_get(&mc, pgno, &mp, NULL);
if (rc) if (rc)
return rc; return rc;
if (pg != mp->mp_pgno) if (pgno != mp->mp_pgno)
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
nkeys = NUMKEYS(mp); const int nkeys = NUMKEYS(mp);
header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower; size_t header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEHDRSZ + mp->mp_lower;
unused_size = SIZELEFT(mp); size_t unused_size = SIZELEFT(mp);
payload_size = 0; size_t payload_size = 0;
size_t align_bytes = 0;
MDBX_page_type_t type;
/* LY: Don't use mask here, e.g bitwise /* LY: Don't use mask here, e.g bitwise
* (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP). * (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
* Pages should not me marked dirty/loose or otherwise. */ * Pages should not me marked dirty/loose or otherwise. */
switch (mp->mp_flags) { switch (mp->mp_flags) {
case P_BRANCH: case P_BRANCH:
type = "branch"; type = MDBX_page_branch;
if (nkeys < 1) if (nkeys < 2)
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
break; break;
case P_LEAF: case P_LEAF:
type = "leaf"; type = MDBX_page_leaf;
break;
case P_LEAF | P_SUBP:
type = "dupsort-subleaf";
break; break;
case P_LEAF | P_LEAF2: case P_LEAF | P_LEAF2:
type = "dupfixed-leaf"; type = MDBX_page_dupfixed_leaf;
break; break;
case P_LEAF | P_LEAF2 | P_SUBP:
type = "dupsort-dupfixed-subleaf";
break;
case P_META:
case P_OVERFLOW:
__fallthrough;
default: default:
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
} }
for (align_bytes = i = 0; i < nkeys; for (int i = 0; i < nkeys;
align_bytes += ((payload_size + align_bytes) & 1), i++) { align_bytes += ((payload_size + align_bytes) & 1), i++) {
MDBX_node *node; if (type == MDBX_page_dupfixed_leaf) {
if (IS_LEAF2(mp)) {
/* LEAF2 pages have no mp_ptrs[] or node headers */ /* LEAF2 pages have no mp_ptrs[] or node headers */
payload_size += mp->mp_leaf2_ksize; payload_size += mp->mp_leaf2_ksize;
continue; continue;
} }
node = NODEPTR(mp, i); MDBX_node *node = NODEPTR(mp, i);
payload_size += NODESIZE + node->mn_ksize; payload_size += NODESIZE + NODEKSZ(node);
if (IS_BRANCH(mp)) { if (type == MDBX_page_branch) {
rc = mdbx_env_walk(ctx, dbi, NODEPGNO(node), deep); rc = mdbx_env_walk(ctx, dbi, NODEPGNO(node), deep);
if (rc) if (rc)
return rc; return rc;
continue; continue;
} }
assert(IS_LEAF(mp)); assert(type == MDBX_page_leaf);
if (node->mn_flags & F_BIGDATA) { switch (node->mn_flags) {
MDBX_page *omp; case 0 /* usual node */: {
pgno_t *opg; payload_size += NODEDSZ(node);
size_t over_header, over_payload, over_unused; } break;
case F_BIGDATA /* long data on the large/overflow page */: {
payload_size += sizeof(pgno_t); payload_size += sizeof(pgno_t);
opg = NODEDATA(node);
rc = mdbx_page_get(&mc, *opg, &omp, NULL); MDBX_page *op;
pgno_t large_pgno;
memcpy(&large_pgno, NODEDATA(node), sizeof(pgno_t));
rc = mdbx_page_get(&mc, large_pgno, &op, NULL);
if (rc) if (rc)
return rc; return rc;
if (*opg != omp->mp_pgno)
if (large_pgno != op->mp_pgno)
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
/* LY: Don't use mask here, e.g bitwise /* LY: Don't use mask here, e.g bitwise
* (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP). * (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
* Pages should not me marked dirty/loose or otherwise. */ * Pages should not me marked dirty/loose or otherwise. */
if (P_OVERFLOW != omp->mp_flags) if (P_OVERFLOW != op->mp_flags)
return MDBX_CORRUPTED; return MDBX_CORRUPTED;
over_header = PAGEHDRSZ; const size_t over_header = PAGEHDRSZ;
over_payload = NODEDSZ(node); const size_t over_payload = NODEDSZ(node);
over_unused = pgno2bytes(ctx->mw_txn->mt_env, omp->mp_pages) - const size_t over_unused = pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages) -
over_payload - over_header; over_payload - over_header;
rc = ctx->mw_visitor(*opg, omp->mp_pages, ctx->mw_user, dbi, rc = ctx->mw_visitor(large_pgno, op->mp_pages, ctx->mw_user, dbi,
"overflow-data", 1, over_payload, over_header, pgno2bytes(ctx->mw_txn->mt_env, op->mp_pages),
MDBX_page_large, 1, over_payload, over_header,
over_unused); over_unused);
if (rc) } break;
return rc;
continue;
}
payload_size += NODEDSZ(node); case F_SUBDATA /* sub-db */: {
if (node->mn_flags & F_SUBDATA) { const size_t namelen = NODEKSZ(node);
MDBX_db *db = NODEDATA(node); if (namelen == 0 || NODEDSZ(node) != sizeof(MDBX_db))
char *name = NULL; return MDBX_CORRUPTED;
payload_size += sizeof(MDBX_db);
if (!(node->mn_flags & F_DUPDATA)) { MDBX_db db;
name = NODEKEY(node); memcpy(&db, NODEDATA(node), sizeof(db));
ptrdiff_t namelen = (char *)db - name; char *name = memcpy(alloca(namelen + 1), NODEKEY(node), namelen);
name = memcpy(alloca(namelen + 1), name, namelen);
name[namelen] = 0; name[namelen] = 0;
rc = mdbx_env_walk(ctx, name, db.md_root, deep + 1);
} break;
case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: {
if (NODEDSZ(node) != sizeof(MDBX_db))
return MDBX_CORRUPTED;
payload_size += sizeof(MDBX_db);
MDBX_db db;
memcpy(&db, NODEDATA(node), sizeof(db));
rc = mdbx_env_walk(ctx, dbi, db.md_root, deep + 1);
} break;
case F_DUPDATA /* short sub-page */: {
if (NODEDSZ(node) < PAGEHDRSZ)
return MDBX_CORRUPTED;
MDBX_page *sp = NODEDATA(node);
const int nsubkeys = NUMKEYS(sp);
size_t subheader_size =
IS_LEAF2(sp) ? PAGEHDRSZ : PAGEHDRSZ + sp->mp_lower;
size_t subunused_size = SIZELEFT(sp);
size_t subpayload_size = 0;
size_t subalign_bytes = 0;
MDBX_page_type_t subtype;
switch (sp->mp_flags & ~P_DIRTY /* ignore for sub-pages */) {
case P_LEAF | P_SUBP:
subtype = MDBX_subpage_leaf;
break;
case P_LEAF | P_LEAF2 | P_SUBP:
subtype = MDBX_subpage_dupfixed_leaf;
break;
default:
return MDBX_CORRUPTED;
} }
rc = mdbx_env_walk(ctx, (name && name[0]) ? name : dbi, db->md_root,
deep + 1); for (int j = 0; j < nsubkeys;
if (rc) subalign_bytes += ((subpayload_size + subalign_bytes) & 1), j++) {
return rc;
if (subtype == MDBX_subpage_dupfixed_leaf) {
/* LEAF2 pages have no mp_ptrs[] or node headers */
subpayload_size += sp->mp_leaf2_ksize;
} else {
assert(subtype == MDBX_subpage_leaf);
MDBX_node *subnode = NODEPTR(sp, j);
subpayload_size += NODESIZE + NODEKSZ(subnode) + NODEDSZ(subnode);
if (subnode->mn_flags != 0)
return MDBX_CORRUPTED;
} }
} }
return ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, dbi, type, nkeys, rc = ctx->mw_visitor(pgno, 0, ctx->mw_user, dbi, NODEDSZ(node), subtype,
nsubkeys, subpayload_size, subheader_size,
subunused_size + subalign_bytes);
header_size += subheader_size;
unused_size += subunused_size;
payload_size += subpayload_size;
align_bytes += subalign_bytes;
} break;
default:
return MDBX_CORRUPTED;
}
if (unlikely(rc))
return rc;
}
return ctx->mw_visitor(mp->mp_pgno, 1, ctx->mw_user, dbi,
ctx->mw_txn->mt_env->me_psize, type, nkeys,
payload_size, header_size, unused_size + align_bytes); payload_size, header_size, unused_size + align_bytes);
} }
@ -11998,16 +12051,17 @@ int __cold mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
ctx.mw_user = user; ctx.mw_user = user;
ctx.mw_visitor = visitor; ctx.mw_visitor = visitor;
int rc = visitor(0, NUM_METAS, user, "meta", "meta", NUM_METAS, int rc = visitor(
sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS, 0, NUM_METAS, user, "meta", pgno2bytes(txn->mt_env, NUM_METAS),
(txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * MDBX_page_meta, NUM_METAS, sizeof(MDBX_meta) * NUM_METAS,
NUM_METAS); PAGEHDRSZ * NUM_METAS,
(txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * NUM_METAS);
if (!rc) if (!rc)
rc = mdbx_env_walk(&ctx, "free", txn->mt_dbs[FREE_DBI].md_root, 0); rc = mdbx_env_walk(&ctx, "free", txn->mt_dbs[FREE_DBI].md_root, 0);
if (!rc) if (!rc)
rc = mdbx_env_walk(&ctx, "main", txn->mt_dbs[MAIN_DBI].md_root, 0); rc = mdbx_env_walk(&ctx, "main", txn->mt_dbs[MAIN_DBI].md_root, 0);
if (!rc) if (!rc)
rc = visitor(P_INVALID, 0, user, NULL, NULL, 0, 0, 0, 0); rc = visitor(P_INVALID, 0, user, NULL, 0, MDBX_page_void, 0, 0, 0, 0);
return rc; return rc;
} }

View File

@ -241,33 +241,62 @@ static uint64_t problems_pop(struct problem *list) {
} }
static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx, static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx,
const char *dbi_name, const char *type, size_t nentries, const char *dbi_name, size_t page_size,
MDBX_page_type_t pagetype, size_t nentries,
size_t payload_bytes, size_t header_bytes, size_t payload_bytes, size_t header_bytes,
size_t unused_bytes) { size_t unused_bytes) {
(void)ctx; (void)ctx;
if (pagetype == MDBX_page_void)
return MDBX_SUCCESS;
if (type) {
uint64_t page_bytes = payload_bytes + header_bytes + unused_bytes; uint64_t page_bytes = payload_bytes + header_bytes + unused_bytes;
size_t page_size = (size_t)pgnumber * envstat.ms_psize;
walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name); walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name);
if (!dbi) if (!dbi)
return MDBX_ENOMEM; return MDBX_ENOMEM;
walk.pgcount += pgnumber;
const char *pagetype_caption;
switch (pagetype) {
default:
problem_add("page", pgno, "unknown page-type", "%u", (unsigned)pagetype);
pagetype_caption = "unknown";
break;
case MDBX_page_meta:
pagetype_caption = "meta";
break;
case MDBX_page_large:
pagetype_caption = "large";
break;
case MDBX_page_branch:
pagetype_caption = "branch";
break;
case MDBX_page_leaf:
pagetype_caption = "leaf";
break;
case MDBX_page_dupfixed_leaf:
pagetype_caption = "leaf-dupfixed";
break;
case MDBX_subpage_leaf:
pagetype_caption = "subleaf-dupsort";
break;
case MDBX_subpage_dupfixed_leaf:
pagetype_caption = "subleaf-dupfixed";
break;
}
if (verbose > 2 && (!only_subdb || strcmp(only_subdb, dbi_name) == 0)) { if (verbose > 2 && (!only_subdb || strcmp(only_subdb, dbi_name) == 0)) {
if (pgnumber == 1) if (pgnumber == 1)
print(" %s-page %" PRIu64, type, pgno); print(" %s-page %" PRIu64, pagetype_caption, pgno);
else else
print(" %s-span %" PRIu64 "[%u]", type, pgno, pgnumber); print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber);
print(" of %s: header %" PRIiPTR ", payload %" PRIiPTR print(" of %s: header %" PRIiPTR ", payload %" PRIiPTR ", unused %" PRIiPTR
", unused %" PRIiPTR "\n", "\n",
dbi_name, header_bytes, payload_bytes, unused_bytes); dbi_name, header_bytes, payload_bytes, unused_bytes);
} }
walk.pgcount += pgnumber;
if (unused_bytes > page_size) if (unused_bytes > page_size)
problem_add("page", pgno, "illegal unused-bytes", problem_add("page", pgno, "illegal unused-bytes", "%u < %" PRIuPTR " < %u",
"%u < %" PRIuPTR " < %u", 0, unused_bytes, envstat.ms_psize); 0, unused_bytes, envstat.ms_psize);
if (header_bytes < (int)sizeof(long) || if (header_bytes < (int)sizeof(long) ||
(size_t)header_bytes >= envstat.ms_psize - sizeof(long)) (size_t)header_bytes >= envstat.ms_psize - sizeof(long))
@ -319,7 +348,6 @@ static int pgvisitor(uint64_t pgno, unsigned pgnumber, void *ctx,
++pgno; ++pgno;
} while (--pgnumber); } while (--pgnumber);
} }
}
return user_break ? MDBX_EINTR : MDBX_SUCCESS; return user_break ? MDBX_EINTR : MDBX_SUCCESS;
} }