mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-30 22:47:16 +08:00
mdbx: add env_open_for_recovery() (squashed).
Change-Id: I0151b21610def433745c33d1f6e0b66ce655d1a9
This commit is contained in:
parent
b321f67ed2
commit
d9daf2944d
9
mdbx.h
9
mdbx.h
@ -4232,6 +4232,15 @@ typedef int MDBX_pgvisitor_func(
|
||||
/** \brief B-tree traversal function. */
|
||||
LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor,
|
||||
void *ctx, bool dont_check_keys_ordering);
|
||||
|
||||
/** \brief Open an environment instance using specific meta-page
|
||||
* for checking and recovery.
|
||||
*
|
||||
* This function mostly of internal API for `mdbx_chk` utility. */
|
||||
LIBMDBX_API int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname,
|
||||
unsigned target_meta,
|
||||
bool writeable);
|
||||
|
||||
/** @} B-tree Traversal */
|
||||
|
||||
/**** Attribute support functions for Nexenta
|
||||
|
160
src/core.c
160
src/core.c
@ -6163,42 +6163,65 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
||||
}
|
||||
|
||||
/* Seek & fetch the last meta */
|
||||
while (1) {
|
||||
MDBX_meta *const meta = mdbx_meta_head(env);
|
||||
mdbx_jitter4testing(false);
|
||||
const txnid_t snap = mdbx_meta_txnid_fluid(env, meta);
|
||||
mdbx_jitter4testing(false);
|
||||
if (likely(/* not recovery mode */ env->me_stuck_meta < 0)) {
|
||||
while (1) {
|
||||
MDBX_meta *const meta = mdbx_meta_head(env);
|
||||
mdbx_jitter4testing(false);
|
||||
const txnid_t snap = mdbx_meta_txnid_fluid(env, meta);
|
||||
mdbx_jitter4testing(false);
|
||||
if (likely(r)) {
|
||||
safe64_reset(&r->mr_txnid, false);
|
||||
r->mr_snapshot_pages_used = meta->mm_geo.next;
|
||||
r->mr_snapshot_pages_retired = meta->mm_pages_retired;
|
||||
safe64_write(&r->mr_txnid, snap);
|
||||
mdbx_jitter4testing(false);
|
||||
mdbx_assert(env, r->mr_pid == mdbx_getpid());
|
||||
mdbx_assert(
|
||||
env, r->mr_tid ==
|
||||
((env->me_flags & MDBX_NOTLS) ? 0 : mdbx_thread_self()));
|
||||
mdbx_assert(env, r->mr_txnid.inconsistent == snap);
|
||||
mdbx_compiler_barrier();
|
||||
env->me_lck->mti_readers_refresh_flag = true;
|
||||
mdbx_flush_incoherent_cpu_writeback();
|
||||
}
|
||||
mdbx_jitter4testing(true);
|
||||
|
||||
/* Snap the state from current meta-head */
|
||||
txn->mt_txnid = snap;
|
||||
txn->mt_geo = meta->mm_geo;
|
||||
memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDBX_db));
|
||||
txn->mt_canary = meta->mm_canary;
|
||||
|
||||
/* LY: Retry on a race, ITS#7970. */
|
||||
mdbx_compiler_barrier();
|
||||
if (likely(meta == mdbx_meta_head(env) &&
|
||||
snap == mdbx_meta_txnid_fluid(env, meta) &&
|
||||
snap >= *env->me_oldest)) {
|
||||
mdbx_jitter4testing(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
/* recovery mode */
|
||||
MDBX_meta *const meta = METAPAGE(env, env->me_stuck_meta);
|
||||
txn->mt_txnid = mdbx_meta_txnid_stable(env, meta);
|
||||
txn->mt_geo = meta->mm_geo;
|
||||
memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDBX_db));
|
||||
txn->mt_canary = meta->mm_canary;
|
||||
if (likely(r)) {
|
||||
safe64_reset(&r->mr_txnid, false);
|
||||
r->mr_snapshot_pages_used = meta->mm_geo.next;
|
||||
r->mr_snapshot_pages_retired = meta->mm_pages_retired;
|
||||
safe64_write(&r->mr_txnid, snap);
|
||||
r->mr_txnid.inconsistent = txn->mt_txnid;
|
||||
mdbx_jitter4testing(false);
|
||||
mdbx_assert(env, r->mr_pid == mdbx_getpid());
|
||||
mdbx_assert(
|
||||
env, r->mr_tid ==
|
||||
((env->me_flags & MDBX_NOTLS) ? 0 : mdbx_thread_self()));
|
||||
mdbx_assert(env, r->mr_txnid.inconsistent == snap);
|
||||
mdbx_assert(env, r->mr_txnid.inconsistent == txn->mt_txnid);
|
||||
mdbx_compiler_barrier();
|
||||
env->me_lck->mti_readers_refresh_flag = true;
|
||||
mdbx_flush_incoherent_cpu_writeback();
|
||||
}
|
||||
mdbx_jitter4testing(true);
|
||||
|
||||
/* Snap the state from current meta-head */
|
||||
txn->mt_txnid = snap;
|
||||
txn->mt_geo = meta->mm_geo;
|
||||
memcpy(txn->mt_dbs, meta->mm_dbs, CORE_DBS * sizeof(MDBX_db));
|
||||
txn->mt_canary = meta->mm_canary;
|
||||
|
||||
/* LY: Retry on a race, ITS#7970. */
|
||||
mdbx_compiler_barrier();
|
||||
if (likely(meta == mdbx_meta_head(env) &&
|
||||
snap == mdbx_meta_txnid_fluid(env, meta) &&
|
||||
snap >= *env->me_oldest)) {
|
||||
mdbx_jitter4testing(false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (unlikely(txn->mt_txnid < MIN_TXNID || txn->mt_txnid > MAX_TXNID)) {
|
||||
@ -6214,7 +6237,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
||||
} else {
|
||||
mdbx_assert(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS |
|
||||
MDBX_WRITEMAP)) == 0);
|
||||
if (unlikely(txn->mt_owner == tid))
|
||||
if (unlikely(txn->mt_owner == tid ||
|
||||
/* not recovery mode */ env->me_stuck_meta >= 0))
|
||||
return MDBX_BUSY;
|
||||
MDBX_lockinfo *const lck = env->me_lck;
|
||||
if (lck && (env->me_flags & MDBX_NOTLS) == 0 &&
|
||||
@ -8578,7 +8602,9 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
|
||||
if (rc != MDBX_SUCCESS)
|
||||
continue;
|
||||
|
||||
if (mdbx_meta_ot(prefer_steady, env, dest, meta)) {
|
||||
if ((env->me_stuck_meta < 0)
|
||||
? mdbx_meta_ot(prefer_steady, env, dest, meta)
|
||||
: (meta_number == (unsigned)env->me_stuck_meta)) {
|
||||
*dest = *meta;
|
||||
if (!META_IS_STEADY(dest))
|
||||
loop_limit += 1; /* LY: should re-read to hush race with update */
|
||||
@ -8587,8 +8613,10 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *dest,
|
||||
}
|
||||
|
||||
if (dest->mm_psize == 0 ||
|
||||
(!META_IS_STEADY(dest) &&
|
||||
!meta_weak_acceptable(env, dest, lck_exclusive))) {
|
||||
((env->me_stuck_meta < 0)
|
||||
? (!META_IS_STEADY(dest) &&
|
||||
!meta_weak_acceptable(env, dest, lck_exclusive))
|
||||
: false)) {
|
||||
mdbx_error("%s", "no usable meta-pages, database is corrupted");
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
/* TODO: try to restore the database by fully checking b-tree structure
|
||||
@ -9001,6 +9029,7 @@ int __cold mdbx_env_create(MDBX_env **penv) {
|
||||
env->me_dsync_fd = INVALID_HANDLE_VALUE;
|
||||
env->me_lfd = INVALID_HANDLE_VALUE;
|
||||
env->me_pid = mdbx_getpid();
|
||||
env->me_stuck_meta = -1;
|
||||
|
||||
int rc;
|
||||
const size_t os_psize = mdbx_syspagesize();
|
||||
@ -9450,7 +9479,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
int err = mdbx_read_header(env, &meta, &filesize_before, lck_rc);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE || err != MDBX_ENODATA ||
|
||||
(env->me_flags & MDBX_RDONLY) != 0)
|
||||
(env->me_flags & MDBX_RDONLY) != 0 ||
|
||||
/* recovery mode */ env->me_stuck_meta >= 0)
|
||||
return err;
|
||||
|
||||
mdbx_debug("%s", "create new database");
|
||||
@ -9497,7 +9527,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
const size_t used_aligned2os_bytes =
|
||||
ceil_powerof2(used_bytes, env->me_os_psize);
|
||||
if ((env->me_flags & MDBX_RDONLY) /* readonly */
|
||||
|| lck_rc != MDBX_RESULT_TRUE /* not exclusive */) {
|
||||
|| lck_rc != MDBX_RESULT_TRUE /* not exclusive */
|
||||
|| /* recovery mode */ env->me_stuck_meta >= 0) {
|
||||
/* use present params from db */
|
||||
const size_t pagesize = meta.mm_psize;
|
||||
err = mdbx_env_set_geometry(
|
||||
@ -9505,7 +9536,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
meta.mm_geo.upper * pagesize, meta.mm_geo.grow * pagesize,
|
||||
meta.mm_geo.shrink * pagesize, meta.mm_psize);
|
||||
if (unlikely(err != MDBX_SUCCESS)) {
|
||||
mdbx_error("%s: err %d", "could not apply preconfigured db-geometry",
|
||||
mdbx_error("%s: err %d", "could not apply preconfigured geometry from db",
|
||||
err);
|
||||
return (err == MDBX_EINVAL) ? MDBX_INCOMPATIBLE : err;
|
||||
}
|
||||
@ -9651,12 +9682,17 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
|
||||
|
||||
const unsigned meta_clash_mask = mdbx_meta_eq_mask(env);
|
||||
if (meta_clash_mask) {
|
||||
mdbx_error("meta-pages are clashed: mask 0x%d", meta_clash_mask);
|
||||
return MDBX_CORRUPTED;
|
||||
if (unlikely(meta_clash_mask)) {
|
||||
if (/* not recovery mode */ env->me_stuck_meta < 0) {
|
||||
mdbx_error("meta-pages are clashed: mask 0x%d", meta_clash_mask);
|
||||
return MDBX_CORRUPTED;
|
||||
} else {
|
||||
mdbx_warning("ignore meta-pages clashing (mask 0x%d) in recovery mode",
|
||||
meta_clash_mask);
|
||||
}
|
||||
}
|
||||
|
||||
while (1) {
|
||||
while (likely(/* not recovery mode */ env->me_stuck_meta < 0)) {
|
||||
MDBX_meta *const head = mdbx_meta_head(env);
|
||||
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
||||
MDBX_meta *const steady = mdbx_meta_steady(env);
|
||||
@ -9771,21 +9807,22 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
}
|
||||
if (env->me_dxb_mmap.current != env->me_dbgeo.now) {
|
||||
meta.mm_geo.now = bytes2pgno(env, env->me_dxb_mmap.current);
|
||||
mdbx_notice("update meta-geo to filesize %" PRIuPTR " bytes, %" PRIaPGNO
|
||||
" pages",
|
||||
mdbx_notice("need update meta-geo to filesize %" PRIuPTR
|
||||
" bytes, %" PRIaPGNO " pages",
|
||||
env->me_dxb_mmap.current, meta.mm_geo.now);
|
||||
}
|
||||
|
||||
if (memcmp(&meta.mm_geo, &head->mm_geo, sizeof(meta.mm_geo))) {
|
||||
if (env->me_flags & MDBX_RDONLY) {
|
||||
mdbx_warning(
|
||||
"skipped update meta.geo in read-only mode: from l%" PRIaPGNO
|
||||
"-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u, to l%" PRIaPGNO
|
||||
"-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u",
|
||||
head->mm_geo.lower, head->mm_geo.now, head->mm_geo.upper,
|
||||
head->mm_geo.shrink, head->mm_geo.grow, meta.mm_geo.lower,
|
||||
meta.mm_geo.now, meta.mm_geo.upper, meta.mm_geo.shrink,
|
||||
meta.mm_geo.grow);
|
||||
if ((env->me_flags & MDBX_RDONLY) != 0 ||
|
||||
/* recovery mode */ env->me_stuck_meta >= 0) {
|
||||
mdbx_warning("skipped update meta.geo in %s mode: from l%" PRIaPGNO
|
||||
"-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u, to l%" PRIaPGNO
|
||||
"-n%" PRIaPGNO "-u%" PRIaPGNO "/s%u-g%u",
|
||||
(env->me_stuck_meta < 0) ? "read-only" : "recovery",
|
||||
head->mm_geo.lower, head->mm_geo.now, head->mm_geo.upper,
|
||||
head->mm_geo.shrink, head->mm_geo.grow, meta.mm_geo.lower,
|
||||
meta.mm_geo.now, meta.mm_geo.upper, meta.mm_geo.shrink,
|
||||
meta.mm_geo.grow);
|
||||
} else {
|
||||
const txnid_t txnid = mdbx_meta_txnid_stable(env, head);
|
||||
const txnid_t next_txnid = safe64_txnid_next(txnid);
|
||||
@ -9822,8 +9859,10 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
||||
*env->me_discarded_tail = bytes2pgno(env, used_aligned2os_bytes);
|
||||
if (used_aligned2os_bytes < env->me_dxb_mmap.current) {
|
||||
#if defined(MADV_REMOVE)
|
||||
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0) {
|
||||
mdbx_notice("open-MADV_%s %u..%u", "REMOVE", *env->me_discarded_tail,
|
||||
if (lck_rc && (env->me_flags & MDBX_WRITEMAP) != 0 &&
|
||||
/* not recovery mode */ env->me_stuck_meta < 0) {
|
||||
mdbx_notice("open-MADV_%s %u..%u", "REMOVE (deallocate file space)",
|
||||
*env->me_discarded_tail,
|
||||
bytes2pgno(env, env->me_dxb_mmap.current));
|
||||
err =
|
||||
madvise(env->me_map + used_aligned2os_bytes,
|
||||
@ -10207,7 +10246,21 @@ static uint32_t merge_sync_flags(const uint32_t a, const uint32_t b) {
|
||||
return r;
|
||||
}
|
||||
|
||||
int __cold mdbx_env_open(MDBX_env *env, const char *pathname,
|
||||
__cold int mdbx_env_open_for_recovery(MDBX_env *env, const char *pathname,
|
||||
unsigned target_meta, bool writeable) {
|
||||
if (unlikely(target_meta >= NUM_METAS))
|
||||
return MDBX_EINVAL;
|
||||
int rc = check_env(env);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
|
||||
env->me_stuck_meta = (int8_t)target_meta;
|
||||
return mdbx_env_open(
|
||||
env, pathname, writeable ? MDBX_EXCLUSIVE : MDBX_EXCLUSIVE | MDBX_RDONLY,
|
||||
0);
|
||||
}
|
||||
|
||||
__cold int mdbx_env_open(MDBX_env *env, const char *pathname,
|
||||
MDBX_env_flags_t flags, mdbx_mode_t mode) {
|
||||
int rc = check_env(env);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
@ -10434,6 +10487,14 @@ int __cold mdbx_env_open(MDBX_env *env, const char *pathname,
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(/* recovery mode */ env->me_stuck_meta >= 0) &&
|
||||
(lck_rc != /* exclusive */ MDBX_RESULT_TRUE ||
|
||||
(flags & MDBX_EXCLUSIVE) == 0)) {
|
||||
mdbx_error("%s", "recovery requires exclusive mode");
|
||||
rc = MDBX_BUSY;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
mdbx_debug("opened dbenv %p", (void *)env);
|
||||
if (env->me_lck) {
|
||||
if (lck_rc == MDBX_RESULT_TRUE) {
|
||||
@ -10523,6 +10584,7 @@ bailout:
|
||||
|
||||
/* Destroy resources from mdbx_env_open(), clear our readers & DBIs */
|
||||
static int __cold mdbx_env_close0(MDBX_env *env) {
|
||||
env->me_stuck_meta = -1;
|
||||
if (!(env->me_flags & MDBX_ENV_ACTIVE)) {
|
||||
mdbx_ensure(env, env->me_lcklist_next == nullptr);
|
||||
return MDBX_SUCCESS;
|
||||
|
@ -929,17 +929,18 @@ struct MDBX_env {
|
||||
#define MDBX_DEPRECATED_MAPASYNC UINT32_C(0x100000)
|
||||
#define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY)
|
||||
uint32_t me_flags;
|
||||
mdbx_mmap_t me_dxb_mmap; /* The main data file */
|
||||
mdbx_mmap_t me_dxb_mmap; /* The main data file */
|
||||
#define me_map me_dxb_mmap.dxb
|
||||
#define me_lazy_fd me_dxb_mmap.fd
|
||||
mdbx_filehandle_t me_dsync_fd;
|
||||
mdbx_mmap_t me_lck_mmap; /* The lock file */
|
||||
mdbx_mmap_t me_lck_mmap; /* The lock file */
|
||||
#define me_lfd me_lck_mmap.fd
|
||||
#define me_lck me_lck_mmap.lck
|
||||
|
||||
unsigned me_psize; /* DB page size, inited from me_os_psize */
|
||||
unsigned me_psize2log; /* log2 of DB page size */
|
||||
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
|
||||
unsigned me_psize; /* DB page size, inited from me_os_psize */
|
||||
uint8_t me_psize2log; /* log2 of DB page size */
|
||||
int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */
|
||||
unsigned me_os_psize; /* OS page size, from mdbx_syspagesize() */
|
||||
unsigned me_maxreaders; /* size of the reader table */
|
||||
mdbx_fastmutex_t me_dbi_lock;
|
||||
MDBX_dbi me_numdbs; /* number of DBs opened */
|
||||
|
Loading…
x
Reference in New Issue
Block a user