mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-20 05:58:21 +08:00
lmdb: major rework of traversal b-tree for mdb_chk.
Change-Id: I9d382516f76092f44fc1a12d7554039582b87656
This commit is contained in:
parent
8ff2458003
commit
15e0600b6c
8
lmdb.h
8
lmdb.h
@ -1658,10 +1658,10 @@ typedef void MDB_debug_func(int type, const char *function, int line,
|
||||
|
||||
int mdb_setup_debug(int flags, MDB_debug_func* logger, long edge_txn);
|
||||
|
||||
typedef int MDB_pgwalk_func(size_t pgno, unsigned pgnumber, void* ctx,
|
||||
const char* dbi, char type,
|
||||
int payload_bytes, int header_bytes);
|
||||
int mdb_env_pgwalk(MDB_txn *txn, MDB_pgwalk_func* visitor, void* ctx);
|
||||
typedef int MDB_pgvisitor_func(size_t pgno, unsigned pgnumber, void* ctx,
|
||||
const char* dbi, const char *type,
|
||||
int payload_bytes, int header_bytes, int unused_bytes);
|
||||
int mdb_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* ctx);
|
||||
|
||||
char* mdb_dkey(MDB_val *key, char *buf);
|
||||
|
||||
|
209
mdb.c
209
mdb.c
@ -1472,8 +1472,7 @@ mdb_page_list(MDB_page *mp)
|
||||
key.mv_data = node->mn_data;
|
||||
nsize = NODESIZE + key.mv_size;
|
||||
if (IS_BRANCH(mp)) {
|
||||
mdb_print("key %d: page %zu, %s\n", i, NODEPGNO(node),
|
||||
DKEY(&key));
|
||||
mdb_print("key %d: page %zu, %s\n", i, NODEPGNO(node), DKEY(&key));
|
||||
total += nsize;
|
||||
} else {
|
||||
if (F_ISSET(node->mn_flags, F_BIGDATA))
|
||||
@ -9794,154 +9793,141 @@ mdb_env_get_oomfunc(MDB_env *env)
|
||||
struct mdb_walk_ctx {
|
||||
MDB_txn *mw_txn;
|
||||
void *mw_user;
|
||||
MDB_pgwalk_func *mw_visitor;
|
||||
MDB_pgvisitor_func *mw_visitor;
|
||||
};
|
||||
|
||||
typedef struct mdb_walk_ctx mdb_walk_ctx_t;
|
||||
|
||||
|
||||
/** Depth-first tree traversal. */
|
||||
static int ESECT
|
||||
mdb_env_walk(mdb_walk_ctx_t *ctx, const char* dbi, pgno_t pg, int flags, int deep)
|
||||
{
|
||||
MDB_cursor mc;
|
||||
MDB_node *ni;
|
||||
MDB_page *mp;
|
||||
int rc;
|
||||
unsigned i;
|
||||
int rc, i, nkeys;
|
||||
unsigned header_size, unused_size, payload_size, align_bytes;
|
||||
const char* type;
|
||||
|
||||
/* Empty DB, nothing to do */
|
||||
if (pg == P_INVALID)
|
||||
return MDB_SUCCESS;
|
||||
return MDB_CORRUPTED;
|
||||
|
||||
if (deep < 2) {
|
||||
if ((rc = mdb_page_get(ctx->mw_txn, pg, &mp, NULL)) != 0)
|
||||
return rc;
|
||||
rc = ctx->mw_visitor(pg, 0, ctx->mw_user, dbi, 'R',
|
||||
ctx->mw_txn->mt_env->me_psize - PAGEHDRSZ - SIZELEFT(mp), PAGEHDRSZ);
|
||||
rc = mdb_page_get(ctx->mw_txn, pg, &mp, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
if (pg != mp->mp_p.p_pgno)
|
||||
return MDB_CORRUPTED;
|
||||
|
||||
nkeys = NUMKEYS(mp);
|
||||
header_size = IS_LEAF2(mp) ? PAGEHDRSZ : PAGEBASE + mp->mp_lower;
|
||||
unused_size = SIZELEFT(mp);
|
||||
payload_size = 0;
|
||||
|
||||
/* LY: Don't use mask here, e.g bitwise (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
|
||||
* Pages should not me marked dirty/loose or otherwise. */
|
||||
switch (mp->mp_flags) {
|
||||
case P_BRANCH:
|
||||
type = "branch";
|
||||
if (nkeys < 1)
|
||||
return MDB_CORRUPTED;
|
||||
break;
|
||||
case P_LEAF:
|
||||
type = "leaf";
|
||||
break;
|
||||
case P_LEAF|P_SUBP:
|
||||
type = "leaf-dupsort";
|
||||
break;
|
||||
case P_LEAF|P_LEAF2:
|
||||
/* #MDB_DUPFIXED records */
|
||||
type = "leaf-dupfixed";
|
||||
break;
|
||||
case P_LEAF|P_LEAF2|P_SUBP:
|
||||
/* #MDB_DUPSORT sub-pages */
|
||||
type = "leaf-dupfixed-dupsort";
|
||||
break;
|
||||
case P_META:
|
||||
case P_OVERFLOW:
|
||||
default:
|
||||
return MDB_CORRUPTED;
|
||||
}
|
||||
|
||||
mc.mc_snum = 1;
|
||||
mc.mc_top = 0;
|
||||
mc.mc_txn = ctx->mw_txn;
|
||||
|
||||
rc = mdb_page_get(ctx->mw_txn, pg, &mc.mc_pg[0], NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for (mp = mc.mc_pg[mc.mc_top]; IS_BRANCH(mp); ) {
|
||||
for (align_bytes = i = 0; i < nkeys;
|
||||
align_bytes += ((payload_size + align_bytes) & 1), i++) {
|
||||
MDB_node *node;
|
||||
|
||||
rc = ctx->mw_visitor(mp->mp_p.p_pgno, 1, ctx->mw_user, dbi, 'B',
|
||||
ctx->mw_txn->mt_env->me_psize - PAGEHDRSZ - SIZELEFT(mp), PAGEHDRSZ);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (NUMKEYS(mp) < 1)
|
||||
return MDB_CORRUPTED;
|
||||
|
||||
mdb_debug("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp));
|
||||
mdb_cassert(&mc, NUMKEYS(mp) > 1);
|
||||
mdb_debug("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0)));
|
||||
|
||||
node = NODEPTR(mp, 0);
|
||||
if ((rc = mdb_page_get(mc.mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
|
||||
return rc;
|
||||
|
||||
mc.mc_ki[mc.mc_top] = 0;
|
||||
if ((rc = mdb_cursor_push(&mc, mp)))
|
||||
return rc;
|
||||
if (IS_LEAF2(mp)) {
|
||||
/* LEAF2 pages have no mp_ptrs[] or node headers */
|
||||
payload_size += mp->mp_ksize;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!IS_LEAF(mp)) {
|
||||
mdb_debug("internal error, index points to a %02X page!?",
|
||||
mp->mp_flags);
|
||||
mc.mc_txn->mt_flags |= MDB_TXN_ERROR;
|
||||
return MDB_CORRUPTED;
|
||||
}
|
||||
node = NODEPTR(mp, i);
|
||||
payload_size += NODESIZE + node->mn_ksize;
|
||||
|
||||
mc.mc_flags |= C_INITIALIZED;
|
||||
mc.mc_flags &= ~C_EOF;
|
||||
|
||||
rc = ctx->mw_visitor(mp->mp_p.p_pgno, 1, ctx->mw_user, dbi, 'L',
|
||||
ctx->mw_txn->mt_env->me_psize - PAGEHDRSZ - SIZELEFT(mp), PAGEHDRSZ);
|
||||
if (IS_BRANCH(mp)) {
|
||||
rc = mdb_env_walk(ctx, dbi, NODEPGNO(node), flags, deep);
|
||||
if (rc)
|
||||
return rc;
|
||||
continue;
|
||||
}
|
||||
|
||||
while (mc.mc_snum > 0) {
|
||||
unsigned n;
|
||||
mp = mc.mc_pg[mc.mc_top];
|
||||
n = NUMKEYS(mp);
|
||||
|
||||
if (IS_LEAF(mp)) {
|
||||
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
|
||||
for (i = 0; i < n; i++) {
|
||||
ni = NODEPTR(mp, i);
|
||||
if (ni->mn_flags & F_BIGDATA) {
|
||||
assert(IS_LEAF(mp));
|
||||
if (node->mn_ksize < 1)
|
||||
return MDB_CORRUPTED;
|
||||
if (node->mn_flags & F_BIGDATA) {
|
||||
MDB_page *omp;
|
||||
pgno_t *pg;
|
||||
pgno_t *opg;
|
||||
size_t over_header, over_payload, over_unused;
|
||||
|
||||
pg = NODEDATA(ni);
|
||||
rc = mdb_page_get(ctx->mw_txn, *pg, &omp, NULL);
|
||||
payload_size += sizeof(pgno_t);
|
||||
opg = NODEDATA(node);
|
||||
rc = mdb_page_get(ctx->mw_txn, *opg, &omp, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = ctx->mw_visitor(*pg, omp->mp_pages, ctx->mw_user, dbi, 'L',
|
||||
ctx->mw_txn->mt_env->me_psize - PAGEHDRSZ - SIZELEFT(mp), PAGEHDRSZ);
|
||||
if (*opg != omp->mp_p.p_pgno)
|
||||
return MDB_CORRUPTED;
|
||||
/* LY: Don't use mask here, e.g bitwise (P_BRANCH|P_LEAF|P_LEAF2|P_META|P_OVERFLOW|P_SUBP).
|
||||
* Pages should not me marked dirty/loose or otherwise. */
|
||||
if (P_OVERFLOW != omp->mp_flags)
|
||||
return MDB_CORRUPTED;
|
||||
|
||||
over_header = PAGEHDRSZ;
|
||||
over_payload = NODEDSZ(node);
|
||||
over_unused = omp->mp_pages * ctx->mw_txn->mt_env->me_psize
|
||||
- over_payload - over_header;
|
||||
|
||||
rc = ctx->mw_visitor(*opg, omp->mp_pages, ctx->mw_user, dbi, "overflow-data",
|
||||
over_payload, over_header, over_unused);
|
||||
if (rc)
|
||||
return rc;
|
||||
} else if (ni->mn_flags & F_SUBDATA) {
|
||||
MDB_db *db = NODEDATA(ni);
|
||||
continue;
|
||||
}
|
||||
|
||||
payload_size += NODEDSZ(node);
|
||||
if (node->mn_flags & F_SUBDATA) {
|
||||
MDB_db *db = NODEDATA(node);
|
||||
char* name = NULL;
|
||||
if (! (ni->mn_flags & F_DUPDATA)) {
|
||||
name = NODEKEY(ni);
|
||||
|
||||
if (NODEDSZ(node) < 1)
|
||||
return MDB_CORRUPTED;
|
||||
if (! (node->mn_flags & F_DUPDATA)) {
|
||||
name = NODEKEY(node);
|
||||
int namelen = (char*) db - name;
|
||||
name = memcpy(alloca(namelen + 1), name, namelen);
|
||||
name[namelen] = 0;
|
||||
}
|
||||
rc = mdb_env_walk(ctx, (name && name[0]) ? name : dbi, db->md_root, ni->mn_flags & F_DUPDATA, deep + 1);
|
||||
rc = mdb_env_walk(ctx, (name && name[0]) ? name : dbi,
|
||||
db->md_root, node->mn_flags & F_DUPDATA, deep + 1);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mc.mc_ki[mc.mc_top]++;
|
||||
if (mc.mc_ki[mc.mc_top] < n) {
|
||||
pgno_t pg;
|
||||
do {
|
||||
ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]);
|
||||
pg = NODEPGNO(ni);
|
||||
rc = mdb_page_get(ctx->mw_txn, pg, &mp, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = ctx->mw_visitor(pg, 1, ctx->mw_user, dbi, IS_BRANCH(mp) ? 'B' : 'L',
|
||||
ctx->mw_txn->mt_env->me_psize - PAGEHDRSZ - SIZELEFT(mp), PAGEHDRSZ);
|
||||
if (rc)
|
||||
return rc;
|
||||
mc.mc_top++;
|
||||
mc.mc_snum++;
|
||||
mc.mc_ki[mc.mc_top] = 0;
|
||||
mc.mc_pg[mc.mc_top] = mp;
|
||||
}
|
||||
/* Whenever we advance to a sibling branch page,
|
||||
* we must proceed all the way down to its first leaf.
|
||||
*/
|
||||
while (IS_BRANCH(mp));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (! mc.mc_top)
|
||||
break;
|
||||
|
||||
mdb_cursor_pop(&mc);
|
||||
}
|
||||
return rc;
|
||||
return ctx->mw_visitor(mp->mp_p.p_pgno, 1, ctx->mw_user, dbi,
|
||||
type, payload_size, header_size, unused_size + align_bytes);
|
||||
}
|
||||
|
||||
int ESECT
|
||||
mdb_env_pgwalk(MDB_txn *txn, MDB_pgwalk_func* visitor, void* user)
|
||||
mdb_env_pgwalk(MDB_txn *txn, MDB_pgvisitor_func* visitor, void* user)
|
||||
{
|
||||
mdb_walk_ctx_t ctx;
|
||||
int rc;
|
||||
@ -9950,13 +9936,14 @@ mdb_env_pgwalk(MDB_txn *txn, MDB_pgwalk_func* visitor, void* user)
|
||||
ctx.mw_user = user;
|
||||
ctx.mw_visitor = visitor;
|
||||
|
||||
rc = visitor(0, 2, user, "meta", 'M', sizeof(MDB_meta), PAGEHDRSZ);
|
||||
if (! rc)
|
||||
rc = visitor(0, 2, user, "lmdb", "meta", sizeof(MDB_meta)*2, PAGEHDRSZ*2,
|
||||
(txn->mt_env->me_psize - sizeof(MDB_meta) - PAGEHDRSZ) *2);
|
||||
if (! rc && txn->mt_dbs[FREE_DBI].md_root != P_INVALID)
|
||||
rc = mdb_env_walk(&ctx, "free", txn->mt_dbs[FREE_DBI].md_root, 0, 0);
|
||||
if (! rc)
|
||||
if (! rc && txn->mt_dbs[MAIN_DBI].md_root != P_INVALID)
|
||||
rc = mdb_env_walk(&ctx, "main", txn->mt_dbs[MAIN_DBI].md_root, 0, 0);
|
||||
if (! rc)
|
||||
rc = visitor(P_INVALID, 0, user, NULL, 0, -1, 0);
|
||||
rc = visitor(P_INVALID, 0, user, NULL, NULL, -1, 0, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
|
53
mdb_chk.c
53
mdb_chk.c
@ -134,6 +134,10 @@ static int pagemap_lookup_dbi(const char* dbi) {
|
||||
return last = -1;
|
||||
|
||||
walk.dbi_names[last] = strdup(dbi);
|
||||
|
||||
if (verbose > 2)
|
||||
print(" - found '%s' area\n", dbi);
|
||||
|
||||
return last;
|
||||
}
|
||||
|
||||
@ -201,28 +205,43 @@ static size_t problems_pop(struct problem* list) {
|
||||
}
|
||||
|
||||
static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
|
||||
char type, int payload_bytes, int header_bytes)
|
||||
const char* type, int payload_bytes, int header_bytes, int unused_bytes)
|
||||
{
|
||||
if (pgnumber) {
|
||||
if (type) {
|
||||
size_t page_bytes = payload_bytes + header_bytes + unused_bytes;
|
||||
size_t page_size = pgnumber * stat.ms_psize;
|
||||
int index = pagemap_lookup_dbi(dbi);
|
||||
if (index < 0)
|
||||
return ENOMEM;
|
||||
|
||||
if (verbose > 3) {
|
||||
print((pgnumber < 2) ? " %s-page %zu" : " %s-span %zu..%zu (%u pages)",
|
||||
type, pgno, pgno + pgnumber - 1, pgnumber);
|
||||
print(" of %s: header %i, payload %i, unused %i\n",
|
||||
dbi, header_bytes, payload_bytes, unused_bytes);
|
||||
}
|
||||
|
||||
walk.pgcount += pgnumber;
|
||||
|
||||
if (unused_bytes < 0 || (size_t) unused_bytes > page_size)
|
||||
problem_add(pgno, "illegal unused-bytes", "(%zu < %i < %zu)",
|
||||
0, unused_bytes, stat.ms_psize);
|
||||
|
||||
if (header_bytes < sizeof(long) || header_bytes >= stat.ms_psize - sizeof(long))
|
||||
problem_add(pgno, "wrong header-length", "(%zu < %i < %zu)",
|
||||
sizeof(long), header_bytes, header_bytes >= stat.ms_psize - sizeof(long));
|
||||
problem_add(pgno, "illegal header-length", "(%zu < %i < %zu)",
|
||||
sizeof(long), header_bytes, stat.ms_psize - sizeof(long));
|
||||
else if (payload_bytes < 1)
|
||||
problem_add(pgno, "empty page", "(payload %zu bytes)", payload_bytes);
|
||||
else if (payload_bytes + header_bytes > pgnumber * stat.ms_psize)
|
||||
problem_add(pgno, "overflowed page", "(%zu + %zu > %zu)",
|
||||
payload_bytes, header_bytes, pgnumber * stat.ms_psize);
|
||||
problem_add(pgno, "empty page", "(payload %i bytes)", payload_bytes);
|
||||
|
||||
if (page_bytes != page_size)
|
||||
problem_add(pgno, "misused page", "(%zu != %zu (%ih + %ip + %iu))",
|
||||
page_size, page_bytes, header_bytes, payload_bytes, unused_bytes);
|
||||
else {
|
||||
walk.dbi_payload_bytes[index] += payload_bytes + header_bytes;
|
||||
walk.total_payload_bytes += payload_bytes + header_bytes;
|
||||
}
|
||||
|
||||
if (pgnumber) {
|
||||
do {
|
||||
if (pgno >= lastpgno)
|
||||
problem_add(pgno, "wrong page-no", "(> %zi)", lastpgno);
|
||||
@ -235,6 +254,7 @@ static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi,
|
||||
++pgno;
|
||||
} while(--pgnumber);
|
||||
}
|
||||
}
|
||||
|
||||
return gotsignal ? EINTR : MDB_SUCCESS;
|
||||
}
|
||||
@ -285,7 +305,7 @@ static int handle_freedb(size_t record_number, MDB_val *key, MDB_val* data) {
|
||||
for (; i >= span && iptr[i - span] == pg; span++, pg++) ;
|
||||
}
|
||||
if (verbose > 2)
|
||||
print(" - transaction %zu, %zd pages, maxspan %zd%s\n",
|
||||
print(" transaction %zu, %zd pages, maxspan %zd%s\n",
|
||||
*(size_t *)key->mv_data, number, span, bad);
|
||||
if (verbose > 3) {
|
||||
int j = number - 1;
|
||||
@ -655,8 +675,11 @@ int main(int argc, char *argv[])
|
||||
info.me_mapsize / k, sf[i]);
|
||||
if (info.me_mapaddr)
|
||||
print(" - mapaddr %p\n", info.me_mapaddr);
|
||||
print(" - pagesize %u, max keysize %zu, max readers %u\n",
|
||||
stat.ms_psize, maxkeysize, info.me_maxreaders);
|
||||
print(" - pagesize %u, max keysize %zu (%s), max readers %u\n",
|
||||
stat.ms_psize, maxkeysize,
|
||||
(maxkeysize == 511) ? "default" :
|
||||
(maxkeysize == 0) ? "devel" : "custom",
|
||||
info.me_maxreaders);
|
||||
print(" - transactions: last %zu, bottom %zu, lag reading %zi\n", info.me_last_txnid,
|
||||
info.me_tail_txnid, info.me_last_txnid - info.me_tail_txnid);
|
||||
|
||||
@ -712,6 +735,9 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
|
||||
if (!dont_traversal) {
|
||||
struct problem* saved_list;
|
||||
size_t traversal_problems;
|
||||
|
||||
print("Traversal b-tree...\n");
|
||||
fflush(NULL);
|
||||
walk.pagemap = calloc(lastpgno, sizeof(*walk.pagemap));
|
||||
@ -721,7 +747,10 @@ int main(int argc, char *argv[])
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
saved_list = problems_push();
|
||||
rc = mdb_env_pgwalk(txn, pgvisitor, NULL);
|
||||
traversal_problems = problems_pop(saved_list);
|
||||
|
||||
if (rc) {
|
||||
if (rc == EINTR && gotsignal) {
|
||||
print(" - interrupted by signal\n");
|
||||
@ -759,7 +788,7 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
}
|
||||
print(" - summary: average fill %.1f%%, %zu problems\n",
|
||||
walk.total_payload_bytes * 100.0 / total_page_bytes, total_problems);
|
||||
walk.total_payload_bytes * 100.0 / total_page_bytes, traversal_problems);
|
||||
}
|
||||
} else if (verbose) {
|
||||
print("Skipping b-tree walk...\n");
|
||||
|
Loading…
x
Reference in New Issue
Block a user