mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:14:12 +08:00
lmdb: mdb: b-tree walk, page-map check in mdb_chk.
Change-Id: I6678b4d891c8fbfbc49ed600212f4ade39e25282
This commit is contained in:
parent
23720958b6
commit
f2703156f0
3
lmdb.h
3
lmdb.h
@ -1636,6 +1636,9 @@ typedef void MDB_debug_func(int type, const char *function, int line,
|
||||
|
||||
int mdb_setup_debug(int flags, MDB_debug_func* logger, long edge_txn);
|
||||
|
||||
typedef int MDB_pgwalk_func(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi, char type);
|
||||
int mdb_env_pgwalk(MDB_txn *txn, MDB_pgwalk_func* visitor, void* ctx);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
163
mdb.c
163
mdb.c
@ -205,6 +205,7 @@ static MDB_INLINE void mdb_invalidate_cache(void *addr, int nbytes) {
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
#include <alloca.h>
|
||||
|
||||
#if defined(__sun) || defined(ANDROID)
|
||||
/* Most platforms have posix_memalign, older may only have memalign */
|
||||
@ -9865,4 +9866,166 @@ mdb_env_get_oomfunc(MDB_env *env)
|
||||
return env ? env->me_oom_func : NULL;
|
||||
}
|
||||
|
||||
struct mdb_walk_ctx {
|
||||
MDB_txn *mw_txn;
|
||||
void *mw_user;
|
||||
MDB_pgwalk_func *mw_visitor;
|
||||
};
|
||||
|
||||
typedef struct mdb_walk_ctx mdb_walk_ctx_t;
|
||||
|
||||
/** Depth-first tree traversal. */
|
||||
static int ESECT
|
||||
mdb_env_walk(mdb_walk_ctx_t *ctx, const char* dbi, pgno_t pg, int flags, int deep)
|
||||
{
|
||||
MDB_cursor mc;
|
||||
MDB_node *ni;
|
||||
MDB_page *mp;
|
||||
int rc;
|
||||
unsigned int i;
|
||||
|
||||
if (deep < 2) {
|
||||
rc = ctx->mw_visitor(pg, 0, ctx->mw_user, dbi, 'R');
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
|
||||
/* Empty DB, nothing to do */
|
||||
if (pg == P_INVALID)
|
||||
return MDB_SUCCESS;
|
||||
|
||||
mc.mc_snum = 1;
|
||||
mc.mc_top = 0;
|
||||
mc.mc_txn = ctx->mw_txn;
|
||||
|
||||
rc = mdb_page_get(ctx->mw_txn, pg, &mc.mc_pg[0], NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
for (mp = mc.mc_pg[mc.mc_top]; IS_BRANCH(mp); ) {
|
||||
MDB_node *node;
|
||||
|
||||
rc = ctx->mw_visitor(mp->mp_p.p_pgno, 1, ctx->mw_user, dbi, 'B');
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
if (NUMKEYS(mp) < 1)
|
||||
return MDB_CORRUPTED;
|
||||
|
||||
mdb_debug("branch page %zu has %u keys", mp->mp_pgno, NUMKEYS(mp));
|
||||
mdb_cassert(&mc, NUMKEYS(mp) > 1);
|
||||
mdb_debug("found index 0 to page %zu", NODEPGNO(NODEPTR(mp, 0)));
|
||||
|
||||
node = NODEPTR(mp, 0);
|
||||
|
||||
if ((rc = mdb_page_get(mc.mc_txn, NODEPGNO(node), &mp, NULL)) != 0)
|
||||
return rc;
|
||||
|
||||
mc.mc_ki[mc.mc_top] = 0;
|
||||
if ((rc = mdb_cursor_push(&mc, mp)))
|
||||
return rc;
|
||||
}
|
||||
|
||||
if (!IS_LEAF(mp)) {
|
||||
mdb_debug("internal error, index points to a %02X page!?",
|
||||
mp->mp_flags);
|
||||
mc.mc_txn->mt_flags |= MDB_TXN_ERROR;
|
||||
return MDB_CORRUPTED;
|
||||
}
|
||||
|
||||
mc.mc_flags |= C_INITIALIZED;
|
||||
mc.mc_flags &= ~C_EOF;
|
||||
|
||||
rc = ctx->mw_visitor(mp->mp_p.p_pgno, 1, ctx->mw_user, dbi, 'L');
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
while (mc.mc_snum > 0) {
|
||||
unsigned n;
|
||||
mp = mc.mc_pg[mc.mc_top];
|
||||
n = NUMKEYS(mp);
|
||||
|
||||
if (IS_LEAF(mp)) {
|
||||
if (!IS_LEAF2(mp) && !(flags & F_DUPDATA)) {
|
||||
for (i = 0; i < n; i++) {
|
||||
ni = NODEPTR(mp, i);
|
||||
if (ni->mn_flags & F_BIGDATA) {
|
||||
MDB_page *omp;
|
||||
pgno_t *pg;
|
||||
|
||||
pg = NODEDATA(ni);
|
||||
rc = mdb_page_get(ctx->mw_txn, *pg, &omp, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = ctx->mw_visitor(*pg, omp->mp_pages, ctx->mw_user, dbi, 'L');
|
||||
if (rc)
|
||||
return rc;
|
||||
} else if (ni->mn_flags & F_SUBDATA) {
|
||||
MDB_db *db = NODEDATA(ni);
|
||||
char* name = NULL;
|
||||
if (! (ni->mn_flags & F_DUPDATA)) {
|
||||
name = NODEKEY(ni);
|
||||
int namelen = (char*) db - name;
|
||||
name = memcpy(alloca(namelen + 1), name, namelen);
|
||||
name[namelen] = 0;
|
||||
}
|
||||
rc = mdb_env_walk(ctx, (name && name[0]) ? name : dbi, db->md_root, ni->mn_flags & F_DUPDATA, deep + 1);
|
||||
if (rc)
|
||||
return rc;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
mc.mc_ki[mc.mc_top]++;
|
||||
if (mc.mc_ki[mc.mc_top] < n) {
|
||||
pgno_t pg;
|
||||
do {
|
||||
ni = NODEPTR(mp, mc.mc_ki[mc.mc_top]);
|
||||
pg = NODEPGNO(ni);
|
||||
rc = mdb_page_get(ctx->mw_txn, pg, &mp, NULL);
|
||||
if (rc)
|
||||
return rc;
|
||||
rc = ctx->mw_visitor(pg, 1, ctx->mw_user, dbi, IS_BRANCH(mp) ? 'B' : 'L');
|
||||
if (rc)
|
||||
return rc;
|
||||
mc.mc_top++;
|
||||
mc.mc_snum++;
|
||||
mc.mc_ki[mc.mc_top] = 0;
|
||||
mc.mc_pg[mc.mc_top] = mp;
|
||||
}
|
||||
/* Whenever we advance to a sibling branch page,
|
||||
* we must proceed all the way down to its first leaf.
|
||||
*/
|
||||
while (IS_BRANCH(mp));
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (! mc.mc_top)
|
||||
break;
|
||||
|
||||
mdb_cursor_pop(&mc);
|
||||
}
|
||||
return rc;
|
||||
}
|
||||
|
||||
int mdb_env_pgwalk(MDB_txn *txn, MDB_pgwalk_func* visitor, void* user)
|
||||
{
|
||||
mdb_walk_ctx_t ctx;
|
||||
int rc;
|
||||
|
||||
ctx.mw_txn = txn;
|
||||
ctx.mw_user = user;
|
||||
ctx.mw_visitor = visitor;
|
||||
|
||||
rc = visitor(0, 2, user, "meta", 'M');
|
||||
if (! rc)
|
||||
rc = mdb_env_walk(&ctx, "free", txn->mt_dbs[FREE_DBI].md_root, 0, 0);
|
||||
if (! rc)
|
||||
rc = mdb_env_walk(&ctx, "main", txn->mt_dbs[MAIN_DBI].md_root, 0, 0);
|
||||
if (! rc)
|
||||
rc = visitor(P_INVALID, 0, user, NULL, 0);
|
||||
return rc;
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
148
mdb_chk.c
148
mdb_chk.c
@ -26,6 +26,7 @@
|
||||
#include <unistd.h>
|
||||
#include <signal.h>
|
||||
#include <stdarg.h>
|
||||
#include <malloc.h>
|
||||
|
||||
#include "lmdb.h"
|
||||
#include "midl.h"
|
||||
@ -52,13 +53,20 @@ static void signal_hanlder( int sig )
|
||||
gotsignal = 1;
|
||||
}
|
||||
|
||||
#define MAX_DBI 32768
|
||||
|
||||
const char* dbi_names[MAX_DBI] = { "@gc" };
|
||||
size_t dbi_pages[MAX_DBI];
|
||||
short *pagemap;
|
||||
|
||||
MDB_env *env;
|
||||
MDB_txn *txn;
|
||||
MDB_envinfo info;
|
||||
MDB_stat stat;
|
||||
size_t maxkeysize, reclaimable_pages, freedb_pages;
|
||||
size_t maxkeysize, reclaimable_pages, freedb_pages, lastpgno;
|
||||
unsigned userdb_count;
|
||||
unsigned verbose = 1, quiet;
|
||||
size_t pgcount;
|
||||
|
||||
static void print(const char* msg, ...) {
|
||||
if (! quiet) {
|
||||
@ -91,6 +99,23 @@ struct problem {
|
||||
struct problem* problems_list;
|
||||
size_t total_problems;
|
||||
|
||||
static int pagemap_lookup_dbi(const char* dbi) {
|
||||
static int last;
|
||||
|
||||
if (last > 0 && strcmp(dbi_names[last], dbi) == 0)
|
||||
return last;
|
||||
|
||||
for(last = 1; dbi_names[last] && last < MAX_DBI; ++last)
|
||||
if (strcmp(dbi_names[last], dbi) == 0)
|
||||
return last;
|
||||
|
||||
if (last == MAX_DBI)
|
||||
return last = -1;
|
||||
|
||||
dbi_names[last] = strdup(dbi);
|
||||
return last;
|
||||
}
|
||||
|
||||
static void problem_add(size_t entry_number, const char* msg, const char *extra, ...) {
|
||||
total_problems++;
|
||||
|
||||
@ -149,6 +174,31 @@ static size_t problems_pop(struct problem* list) {
|
||||
return total;
|
||||
}
|
||||
|
||||
static int pgvisitor(size_t pgno, unsigned pgnumber, void* ctx, const char* dbi, char type)
|
||||
{
|
||||
if (pgnumber) {
|
||||
pgcount += pgnumber;
|
||||
|
||||
int index = pagemap_lookup_dbi(dbi);
|
||||
if (index < 0)
|
||||
return ENOMEM;
|
||||
|
||||
do {
|
||||
if (pgno >= lastpgno)
|
||||
problem_add(pgno, "wrong page-no", "(> %zi)", lastpgno);
|
||||
else if (pagemap[pgno])
|
||||
problem_add(pgno, "page already used", "(in %s)", dbi_names[pagemap[pgno]]);
|
||||
else {
|
||||
pagemap[pgno] = index;
|
||||
dbi_pages[index] += 1;
|
||||
}
|
||||
++pgno;
|
||||
} while(--pgnumber);
|
||||
}
|
||||
|
||||
return MDB_SUCCESS;
|
||||
}
|
||||
|
||||
typedef long (visitor)(size_t record_number, MDB_val *key, MDB_val* data);
|
||||
static long process_db(MDB_dbi dbi, char *name, visitor *handler, int silent);
|
||||
|
||||
@ -362,7 +412,6 @@ static long process_db(MDB_dbi dbi, char *name, visitor *handler, int silent)
|
||||
if (record_count != ms.ms_entries )
|
||||
problem_add(record_count, "differentent number of entries",
|
||||
" (%zu != %zu)", record_count, ms.ms_entries);
|
||||
|
||||
bailout:
|
||||
problems_count = problems_pop(saved_list);
|
||||
if (! silent && verbose) {
|
||||
@ -388,6 +437,7 @@ int main(int argc, char *argv[])
|
||||
char *envname;
|
||||
int envflags = 0;
|
||||
long problems_maindb = 0, problems_freedb = 0, problems_deep = 0;
|
||||
size_t n;
|
||||
|
||||
if (argc < 2) {
|
||||
usage(prog);
|
||||
@ -463,7 +513,16 @@ int main(int argc, char *argv[])
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (! quiet && verbose) {
|
||||
lastpgno = info.me_last_pgno + 1;
|
||||
errno = 0;
|
||||
pagemap = calloc(lastpgno, sizeof(*pagemap));
|
||||
if (! pagemap) {
|
||||
rc = errno ? errno : ENOMEM;
|
||||
error("calloc failed, error %d %s\n", rc, mdb_strerror(rc));
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (verbose) {
|
||||
print(" - map size %zu (%.1fMb, %.1fGb)\n", info.me_mapsize,
|
||||
(double) info.me_mapsize / (1024 * 1024),
|
||||
(double) info.me_mapsize / (1024 * 1024 * 1024));
|
||||
@ -473,31 +532,6 @@ int main(int argc, char *argv[])
|
||||
stat.ms_psize, maxkeysize, info.me_maxreaders);
|
||||
print(" - last txn %zu, tail %zu (%zi)\n", info.me_last_txnid,
|
||||
info.me_tail_txnid, info.me_tail_txnid - info.me_last_txnid);
|
||||
|
||||
size_t value = info.me_mapsize / stat.ms_psize;
|
||||
double percent = value / 100.0;
|
||||
print(" - pages: %zu total", value);
|
||||
|
||||
value = info.me_last_pgno + 1;
|
||||
print(", allocated %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = info.me_mapsize / stat.ms_psize - (info.me_last_pgno+1);
|
||||
print(", remained %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = info.me_last_pgno + 1 - freedb_pages;
|
||||
print(", used now %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = freedb_pages;
|
||||
print(", free %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = freedb_pages - reclaimable_pages;
|
||||
print(", reading %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = reclaimable_pages;
|
||||
print(", reclaimable %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = info.me_mapsize / stat.ms_psize - (info.me_last_pgno + 1) + reclaimable_pages;
|
||||
print(", available %zu (%.1f%%)\n", value, value / percent);
|
||||
}
|
||||
|
||||
rc = mdb_txn_begin(env, NULL, MDB_RDONLY, &txn);
|
||||
@ -506,20 +540,74 @@ int main(int argc, char *argv[])
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
problems_maindb = process_db(-1, /* MAINT_DBI */ NULL, NULL, 0);
|
||||
print("Walking b-tree...\n");
|
||||
rc = mdb_env_pgwalk(txn, pgvisitor, NULL);
|
||||
if (rc) {
|
||||
error("mdb_env_pgwalk failed, error %d %s\n", rc, mdb_strerror(rc));
|
||||
goto bailout;
|
||||
}
|
||||
for( n = 0; n < lastpgno; ++n)
|
||||
if (! pagemap[n])
|
||||
dbi_pages[0] += 1;
|
||||
if (verbose) {
|
||||
print(" - dbi pages: %zu total", pgcount);
|
||||
if (verbose > 1)
|
||||
for (i = 1; i < MAX_DBI && dbi_names[i]; ++i)
|
||||
print(", %s %zu", dbi_names[i], dbi_pages[i]);
|
||||
print(", %s %zu\n", dbi_names[0], dbi_pages[0]);
|
||||
}
|
||||
|
||||
problems_maindb = process_db(-1, /* MAIN_DBI */ NULL, NULL, 0);
|
||||
problems_freedb = process_db(0 /* FREE_DBI */, "free", handle_freedb, 0);
|
||||
|
||||
if (verbose) {
|
||||
size_t value = info.me_mapsize / stat.ms_psize;
|
||||
double percent = value / 100.0;
|
||||
print(" - pages info: %zu total", value);
|
||||
print(", allocated %zu (%.1f%%)", lastpgno, lastpgno / percent);
|
||||
|
||||
if (verbose > 1) {
|
||||
value = info.me_mapsize / stat.ms_psize - lastpgno;
|
||||
print(", remained %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
value = lastpgno - freedb_pages;
|
||||
print(", used %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
print(", gc %zu (%.1f%%)", freedb_pages, freedb_pages / percent);
|
||||
|
||||
value = freedb_pages - reclaimable_pages;
|
||||
print(", reading %zu (%.1f%%)", value, value / percent);
|
||||
|
||||
print(", reclaimable %zu (%.1f%%)", reclaimable_pages, reclaimable_pages / percent);
|
||||
}
|
||||
|
||||
value = info.me_mapsize / stat.ms_psize - lastpgno + reclaimable_pages;
|
||||
print(", available %zu (%.1f%%)\n", value, value / percent);
|
||||
}
|
||||
|
||||
if (pgcount != lastpgno - freedb_pages) {
|
||||
error("used pages mismatch (%zu != %zu)\n", pgcount, lastpgno - freedb_pages);
|
||||
goto bailout;
|
||||
}
|
||||
if (dbi_pages[0] != freedb_pages) {
|
||||
error("gc pages mismatch (%zu != %zu)\n", dbi_pages[0], freedb_pages);
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (problems_maindb == 0 && problems_freedb == 0)
|
||||
problems_deep = process_db(-1, NULL, handle_maindb, 1);
|
||||
|
||||
mdb_txn_abort(txn);
|
||||
|
||||
if (! userdb_count && verbose)
|
||||
print("%s: %s does not contain multiple databases\n", prog, envname);
|
||||
|
||||
if (rc && ! quiet)
|
||||
if (rc)
|
||||
error("%s: %s: %s\n", prog, envname, mdb_strerror(rc));
|
||||
|
||||
bailout:
|
||||
mdb_env_close(env);
|
||||
free(pagemap);
|
||||
if (rc)
|
||||
return EXIT_FAILURE + 2;
|
||||
if (problems_maindb || problems_freedb)
|
||||
|
Loading…
x
Reference in New Issue
Block a user