mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-20 05:38:20 +08:00
mdbx: refine rollback while opening weak/invalid DB.
More for https://github.com/erthink/libmdbx/issues/217
This commit is contained in:
parent
8bdee27248
commit
108398c213
143
src/core.c
143
src/core.c
@ -11637,6 +11637,7 @@ __cold static int mdbx_setup_dxb(MDBX_env *env, const int lck_rc,
|
|||||||
env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
|
env->me_poison_edge = bytes2pgno(env, env->me_dxb_mmap.limit);
|
||||||
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
|
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
|
||||||
|
|
||||||
|
//----------------------------------------- validate head & steady meta-pages
|
||||||
if (unlikely(env->me_stuck_meta >= 0)) {
|
if (unlikely(env->me_stuck_meta >= 0)) {
|
||||||
/* recovery mode */
|
/* recovery mode */
|
||||||
MDBX_meta clone;
|
MDBX_meta clone;
|
||||||
@ -11656,7 +11657,8 @@ __cold static int mdbx_setup_dxb(MDBX_env *env, const int lck_rc,
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
|
if (lck_rc != /* lck exclusive */ MDBX_RESULT_TRUE) {
|
||||||
/* non-exclusive mode */
|
/* non-exclusive mode,
|
||||||
|
* meta-pages should be validated by a first process opened the DB */
|
||||||
MDBX_meta *const head = mdbx_meta_head(env);
|
MDBX_meta *const head = mdbx_meta_head(env);
|
||||||
MDBX_meta *const steady = mdbx_meta_steady(env);
|
MDBX_meta *const steady = mdbx_meta_steady(env);
|
||||||
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
||||||
@ -11679,129 +11681,77 @@ __cold static int mdbx_setup_dxb(MDBX_env *env, const int lck_rc,
|
|||||||
mdbx_assert(env, lck_rc == MDBX_RESULT_TRUE);
|
mdbx_assert(env, lck_rc == MDBX_RESULT_TRUE);
|
||||||
/* exclusive mode */
|
/* exclusive mode */
|
||||||
|
|
||||||
MDBX_meta head_clone;
|
MDBX_meta const *const steady = mdbx_meta_steady(env);
|
||||||
MDBX_meta const *const head = mdbx_meta_head(env);
|
const txnid_t steady_txnid = mdbx_meta_txnid_fluid(env, steady);
|
||||||
err = mdbx_validate_meta_copy(env, head, &head_clone);
|
MDBX_meta steady_clone;
|
||||||
|
err = mdbx_validate_meta_copy(env, steady, &steady_clone);
|
||||||
if (unlikely(err != MDBX_SUCCESS)) {
|
if (unlikely(err != MDBX_SUCCESS)) {
|
||||||
mdbx_error("meta[%u] with %s txnid is corrupted",
|
mdbx_error("meta[%u] with %s txnid %" PRIaTXN
|
||||||
bytes2pgno(env, (uint8_t *)data_page(head) - env->me_map),
|
" is corrupted, %s needed",
|
||||||
"last");
|
bytes2pgno(env, (uint8_t *)steady - env->me_map), "steady",
|
||||||
|
steady_txnid, "manual recovery");
|
||||||
return MDBX_CORRUPTED;
|
return MDBX_CORRUPTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_meta steady_clone;
|
MDBX_meta const *const head = mdbx_meta_head(env);
|
||||||
MDBX_meta const *const steady = mdbx_meta_steady(env);
|
|
||||||
if (steady == head)
|
if (steady == head)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
err = mdbx_validate_meta_copy(env, steady, &steady_clone);
|
const pgno_t pgno = bytes2pgno(env, (uint8_t *)head - env->me_map);
|
||||||
if (unlikely(err != MDBX_SUCCESS)) {
|
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
||||||
mdbx_error("meta[%u] with %s txnid is corrupted",
|
MDBX_meta head_clone;
|
||||||
bytes2pgno(env, (uint8_t *)data_page(steady) - env->me_map),
|
const bool head_valid =
|
||||||
"steady");
|
mdbx_validate_meta_copy(env, head, &head_clone) == MDBX_SUCCESS;
|
||||||
return MDBX_CORRUPTED;
|
if (unlikely(!head_valid)) {
|
||||||
|
mdbx_error("meta[%u] with %s txnid %" PRIaTXN
|
||||||
|
" is corrupted, %s needed",
|
||||||
|
pgno, "last", head_txnid, "rollback");
|
||||||
|
goto purge_meta_head;
|
||||||
}
|
}
|
||||||
|
|
||||||
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
|
||||||
const txnid_t steady_txnid = mdbx_meta_txnid_fluid(env, steady);
|
|
||||||
mdbx_assert(env, head_txnid != head_txnid);
|
mdbx_assert(env, head_txnid != head_txnid);
|
||||||
if (head_txnid == steady_txnid)
|
if (head_txnid == steady_txnid)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
mdbx_assert(env, META_IS_STEADY(steady) && !META_IS_STEADY(head));
|
mdbx_assert(env, META_IS_STEADY(steady) && !META_IS_STEADY(head));
|
||||||
if (meta_bootid_match(head)) {
|
if (meta_bootid_match(head)) {
|
||||||
MDBX_meta clone = *head;
|
mdbx_warning(
|
||||||
err = mdbx_validate_meta(
|
"opening after an unclean shutdown, but boot-id(%016" PRIx64
|
||||||
env, &clone, data_page(head),
|
"-%016" PRIx64
|
||||||
bytes2pgno(env, (uint8_t *)data_page(head) - env->me_map), nullptr);
|
") is MATCH: rollback NOT needed, steady-sync NEEDED%s",
|
||||||
if (err == MDBX_SUCCESS) {
|
bootid.x, bootid.y,
|
||||||
mdbx_warning(
|
(env->me_flags & MDBX_RDONLY) ? ", but unable in read-only mode"
|
||||||
"opening after an unclean shutdown, but boot-id(%016" PRIx64
|
: "");
|
||||||
"-%016" PRIx64
|
if (env->me_flags & MDBX_RDONLY)
|
||||||
") is MATCH: rollback NOT needed, steady-sync NEEDED%s",
|
return MDBX_WANNA_RECOVERY;
|
||||||
bootid.x, bootid.y,
|
meta = head_clone;
|
||||||
(env->me_flags & MDBX_RDONLY) ? ", but unable in read-only mode"
|
atomic_store32(&env->me_lck->mti_unsynced_pages, meta.mm_geo.next,
|
||||||
: "");
|
mo_Relaxed);
|
||||||
if (env->me_flags & MDBX_RDONLY)
|
break;
|
||||||
return MDBX_WANNA_RECOVERY /* LY: could not recovery/sync */;
|
|
||||||
meta = clone;
|
|
||||||
atomic_store32(&env->me_lck->mti_unsynced_pages, meta.mm_geo.next,
|
|
||||||
mo_Relaxed);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
mdbx_warning("opening after an unclean shutdown, "
|
|
||||||
"but boot-id(%016" PRIx64 "-%016" PRIx64 ") is MATCH, "
|
|
||||||
"but last meta not valid, rollback needed",
|
|
||||||
bootid.x, bootid.y);
|
|
||||||
}
|
}
|
||||||
if (env->me_flags & MDBX_RDONLY) {
|
if (env->me_flags & MDBX_RDONLY) {
|
||||||
mdbx_error("rollback needed: (from head %" PRIaTXN
|
mdbx_error("rollback needed: (from head %" PRIaTXN
|
||||||
" to steady %" PRIaTXN "), but unable in read-only mode",
|
" to steady %" PRIaTXN "), but unable in read-only mode",
|
||||||
head_txnid, steady_txnid);
|
head_txnid, steady_txnid);
|
||||||
return MDBX_WANNA_RECOVERY /* LY: could not recovery/rollback */;
|
return MDBX_WANNA_RECOVERY;
|
||||||
}
|
}
|
||||||
|
|
||||||
const MDBX_meta *const meta0 = METAPAGE(env, 0);
|
purge_meta_head:
|
||||||
const MDBX_meta *const meta1 = METAPAGE(env, 1);
|
mdbx_notice("rollback: purge%s meta[%u] with%s txnid %" PRIaTXN,
|
||||||
const MDBX_meta *const meta2 = METAPAGE(env, 2);
|
head_valid ? "" : " invalid", pgno, head_valid ? " weak" : "",
|
||||||
txnid_t undo_txnid = 0 /* zero means undo is unneeded */;
|
head_txnid);
|
||||||
while (
|
err = mdbx_override_meta(env, pgno, 0, head_valid ? head : steady);
|
||||||
(head != meta0 && mdbx_meta_txnid_fluid(env, meta0) == undo_txnid) ||
|
|
||||||
(head != meta1 && mdbx_meta_txnid_fluid(env, meta1) == undo_txnid) ||
|
|
||||||
(head != meta2 && mdbx_meta_txnid_fluid(env, meta2) == undo_txnid))
|
|
||||||
undo_txnid = safe64_txnid_next(undo_txnid);
|
|
||||||
if (unlikely(undo_txnid >= steady_txnid)) {
|
|
||||||
mdbx_fatal("rollback failed: no suitable txnid (0,1,2) < %" PRIaTXN,
|
|
||||||
steady_txnid);
|
|
||||||
return MDBX_PANIC /* LY: could not recovery/rollback */;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* LY: rollback weak checkpoint */
|
|
||||||
mdbx_notice("rollback: from %" PRIaTXN ", to %" PRIaTXN " as %" PRIaTXN,
|
|
||||||
head_txnid, steady_txnid, undo_txnid);
|
|
||||||
mdbx_ensure(env, head_txnid == mdbx_meta_txnid_stable(env, head));
|
|
||||||
|
|
||||||
#if MDBX_ENABLE_PGOP_STAT
|
|
||||||
safe64_inc(&env->me_lck->mti_pgop_stat.wops, 1);
|
|
||||||
#endif /* MDBX_ENABLE_PGOP_STAT */
|
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
|
||||||
/* It is possible to update txnid without safe64_write(),
|
|
||||||
* since DB opened exclusive for now */
|
|
||||||
unaligned_poke_u64(4, (MDBX_meta *)head->mm_txnid_a, undo_txnid);
|
|
||||||
unaligned_poke_u64(4, (MDBX_meta *)head->mm_datasync_sign,
|
|
||||||
MDBX_DATASIGN_WEAK);
|
|
||||||
unaligned_poke_u64(4, (MDBX_meta *)head->mm_txnid_b, undo_txnid);
|
|
||||||
const size_t offset = (uint8_t *)data_page(head) - env->me_dxb_mmap.dxb;
|
|
||||||
const size_t paged_offset = floor_powerof2(offset, env->me_os_psize);
|
|
||||||
const size_t paged_length = ceil_powerof2(
|
|
||||||
env->me_psize + offset - paged_offset, env->me_os_psize);
|
|
||||||
err = mdbx_msync(&env->me_dxb_mmap, paged_offset, paged_length,
|
|
||||||
MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
|
|
||||||
} else {
|
|
||||||
MDBX_meta rollback = *head;
|
|
||||||
mdbx_meta_set_txnid(env, &rollback, undo_txnid);
|
|
||||||
unaligned_poke_u64(4, rollback.mm_datasync_sign, MDBX_DATASIGN_WEAK);
|
|
||||||
const mdbx_filehandle_t fd = (env->me_dsync_fd != INVALID_HANDLE_VALUE)
|
|
||||||
? env->me_dsync_fd
|
|
||||||
: env->me_lazy_fd;
|
|
||||||
err = mdbx_pwrite(fd, &rollback, sizeof(MDBX_meta),
|
|
||||||
(uint8_t *)head - (uint8_t *)env->me_map);
|
|
||||||
if (err == MDBX_SUCCESS && fd == env->me_lazy_fd)
|
|
||||||
err = mdbx_fsync(env->me_lazy_fd, MDBX_SYNC_DATA | MDBX_SYNC_IODQ);
|
|
||||||
}
|
|
||||||
if (err) {
|
if (err) {
|
||||||
mdbx_error("error %d rollback from %" PRIaTXN ", to %" PRIaTXN
|
mdbx_error("rollback: overwrite meta[%u] with txnid %" PRIaTXN
|
||||||
" as %" PRIaTXN,
|
", error %d",
|
||||||
err, head_txnid, steady_txnid, undo_txnid);
|
pgno, head_txnid, err);
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
mdbx_ensure(env, 0 == mdbx_meta_txnid_fluid(env, head));
|
||||||
mdbx_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS),
|
|
||||||
env->me_os_psize);
|
|
||||||
mdbx_ensure(env, undo_txnid == mdbx_meta_txnid_fluid(env, head));
|
|
||||||
mdbx_ensure(env, 0 == mdbx_meta_eq_mask(env));
|
mdbx_ensure(env, 0 == mdbx_meta_eq_mask(env));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//---------------------------------------------------- shrink DB & update geo
|
||||||
const MDBX_meta *head = mdbx_meta_head(env);
|
const MDBX_meta *head = mdbx_meta_head(env);
|
||||||
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
||||||
/* re-check size after mmap */
|
/* re-check size after mmap */
|
||||||
@ -11870,6 +11820,7 @@ __cold static int mdbx_setup_dxb(MDBX_env *env, const int lck_rc,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//--------------------------------------------------- setup madvise/readahead
|
||||||
atomic_store32(&env->me_lck->mti_discarded_tail,
|
atomic_store32(&env->me_lck->mti_discarded_tail,
|
||||||
bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed);
|
bytes2pgno(env, used_aligned2os_bytes), mo_Relaxed);
|
||||||
#if MDBX_ENABLE_MADVISE
|
#if MDBX_ENABLE_MADVISE
|
||||||
|
Loading…
x
Reference in New Issue
Block a user