mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-04 17:14:12 +08:00
mdbx: check-and-retry a mvcc-snapshot for unified page/buffer cache coherency.
Part 1 of 2 of the workaround for https://github.com/erthink/libmdbx/issues/269.
This commit is contained in:
parent
f84d9f6208
commit
00ed61c685
142
src/core.c
142
src/core.c
@ -3762,6 +3762,8 @@ static int __must_check_result mdbx_page_split(MDBX_cursor *mc,
|
|||||||
MDBX_val *const newdata,
|
MDBX_val *const newdata,
|
||||||
pgno_t newpgno, unsigned nflags);
|
pgno_t newpgno, unsigned nflags);
|
||||||
|
|
||||||
|
static bool meta_checktxnid(const MDBX_env *env, const MDBX_meta *meta,
|
||||||
|
bool report);
|
||||||
static int __must_check_result mdbx_validate_meta_copy(MDBX_env *env,
|
static int __must_check_result mdbx_validate_meta_copy(MDBX_env *env,
|
||||||
const MDBX_meta *meta,
|
const MDBX_meta *meta,
|
||||||
MDBX_meta *dest);
|
MDBX_meta *dest);
|
||||||
@ -6271,6 +6273,8 @@ static int mdbx_meta_unsteady(MDBX_env *env, const txnid_t last_steady,
|
|||||||
else
|
else
|
||||||
return mdbx_pwrite(fd, &wipe, sizeof(meta->mm_datasync_sign),
|
return mdbx_pwrite(fd, &wipe, sizeof(meta->mm_datasync_sign),
|
||||||
(uint8_t *)&meta->mm_datasync_sign - env->me_map);
|
(uint8_t *)&meta->mm_datasync_sign - env->me_map);
|
||||||
|
if (constmeta_txnid(env, meta) == last_steady)
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, meta, true));
|
||||||
}
|
}
|
||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
@ -7548,6 +7552,102 @@ __cold int mdbx_thread_unregister(const MDBX_env *env) {
|
|||||||
return MDBX_SUCCESS;
|
return MDBX_SUCCESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* check against https://github.com/erthink/libmdbx/issues/269 */
|
||||||
|
static bool meta_checktxnid(const MDBX_env *env, const MDBX_meta *meta,
|
||||||
|
bool report) {
|
||||||
|
const txnid_t meta_txnid = constmeta_txnid(env, meta);
|
||||||
|
const txnid_t freedb_mod_txnid = meta->mm_dbs[FREE_DBI].md_mod_txnid;
|
||||||
|
const txnid_t maindb_mod_txnid = meta->mm_dbs[MAIN_DBI].md_mod_txnid;
|
||||||
|
|
||||||
|
const pgno_t freedb_root_pgno = meta->mm_dbs[FREE_DBI].md_root;
|
||||||
|
const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno != P_INVALID)
|
||||||
|
? pgno2page(env, freedb_root_pgno)
|
||||||
|
: nullptr;
|
||||||
|
|
||||||
|
const pgno_t maindb_root_pgno = meta->mm_dbs[MAIN_DBI].md_root;
|
||||||
|
const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno != P_INVALID)
|
||||||
|
? pgno2page(env, maindb_root_pgno)
|
||||||
|
: nullptr;
|
||||||
|
|
||||||
|
const uint64_t magic_and_version =
|
||||||
|
unaligned_peek_u64(4, &meta->mm_magic_and_version);
|
||||||
|
bool ok = true;
|
||||||
|
if (unlikely(meta_txnid < freedb_mod_txnid ||
|
||||||
|
(!freedb_mod_txnid && freedb_root &&
|
||||||
|
likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
||||||
|
if (report)
|
||||||
|
mdbx_warning(
|
||||||
|
"catch invalid %sdb_mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN
|
||||||
|
"%s",
|
||||||
|
"free", freedb_mod_txnid, meta_txnid,
|
||||||
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
if (unlikely(meta_txnid < maindb_mod_txnid ||
|
||||||
|
(!maindb_mod_txnid && maindb_root &&
|
||||||
|
likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
||||||
|
if (report)
|
||||||
|
mdbx_warning(
|
||||||
|
"catch invalid %sdb_mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN
|
||||||
|
" %s",
|
||||||
|
"main", maindb_mod_txnid, meta_txnid,
|
||||||
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
if (likely(freedb_root && freedb_mod_txnid)) {
|
||||||
|
const txnid_t root_txnid = freedb_root->mp_txnid;
|
||||||
|
if (unlikely(root_txnid != freedb_mod_txnid)) {
|
||||||
|
if (report)
|
||||||
|
mdbx_warning(
|
||||||
|
"catch invalid root_page_txnid %" PRIaTXN
|
||||||
|
" for %sdb_mod_txnid %" PRIaTXN " %s",
|
||||||
|
root_txnid, "free", maindb_mod_txnid,
|
||||||
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (likely(maindb_root && maindb_mod_txnid)) {
|
||||||
|
const txnid_t root_txnid = maindb_root->mp_txnid;
|
||||||
|
if (unlikely(root_txnid != maindb_mod_txnid)) {
|
||||||
|
if (report)
|
||||||
|
mdbx_warning(
|
||||||
|
"catch invalid root_page_txnid %" PRIaTXN
|
||||||
|
" for %sdb_mod_txnid %" PRIaTXN " %s",
|
||||||
|
root_txnid, "main", maindb_mod_txnid,
|
||||||
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
||||||
|
ok = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ok;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check with timeout as the workaround
|
||||||
|
* for https://github.com/erthink/libmdbx/issues/269 */
|
||||||
|
static int meta_waittxnid(const MDBX_env *env, const MDBX_meta *meta,
|
||||||
|
uint64_t *timestamp) {
|
||||||
|
if (likely(meta_checktxnid(env, (const MDBX_meta *)meta, !*timestamp)))
|
||||||
|
return MDBX_SUCCESS;
|
||||||
|
|
||||||
|
if (!*timestamp)
|
||||||
|
*timestamp = mdbx_osal_monotime();
|
||||||
|
else if (unlikely(mdbx_osal_monotime() - *timestamp > 65536 / 10)) {
|
||||||
|
mdbx_error("bailout waiting for valid snapshot %s",
|
||||||
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
||||||
|
return MDBX_CORRUPTED;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if defined(_WIN32) || defined(_WIN64)
|
||||||
|
SwitchToThread();
|
||||||
|
#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE)
|
||||||
|
sched_yield();
|
||||||
|
#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS)
|
||||||
|
pthread_yield();
|
||||||
|
#else
|
||||||
|
usleep(42);
|
||||||
|
#endif
|
||||||
|
return MDBX_RESULT_TRUE;
|
||||||
|
}
|
||||||
|
|
||||||
/* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */
|
/* Common code for mdbx_txn_begin() and mdbx_txn_renew(). */
|
||||||
static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
||||||
MDBX_env *env = txn->mt_env;
|
MDBX_env *env = txn->mt_env;
|
||||||
@ -7623,6 +7723,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
|||||||
|
|
||||||
/* Seek & fetch the last meta */
|
/* Seek & fetch the last meta */
|
||||||
if (likely(/* not recovery mode */ env->me_stuck_meta < 0)) {
|
if (likely(/* not recovery mode */ env->me_stuck_meta < 0)) {
|
||||||
|
uint64_t timestamp = 0;
|
||||||
while (1) {
|
while (1) {
|
||||||
volatile const MDBX_meta *const meta = meta_prefer_last(env);
|
volatile const MDBX_meta *const meta = meta_prefer_last(env);
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
@ -7644,6 +7745,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
|||||||
mdbx_assert(env, r->mr_txnid.weak == snap);
|
mdbx_assert(env, r->mr_txnid.weak == snap);
|
||||||
atomic_store32(&env->me_lck->mti_readers_refresh_flag, true,
|
atomic_store32(&env->me_lck->mti_readers_refresh_flag, true,
|
||||||
mo_AcquireRelease);
|
mo_AcquireRelease);
|
||||||
|
} else {
|
||||||
|
/* exclusive mode without lck */
|
||||||
}
|
}
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
|
|
||||||
@ -7664,8 +7767,14 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
|||||||
snap == meta_txnid(env, meta) &&
|
snap == meta_txnid(env, meta) &&
|
||||||
snap >= atomic_load64(&env->me_lck->mti_oldest_reader,
|
snap >= atomic_load64(&env->me_lck->mti_oldest_reader,
|
||||||
mo_AcquireRelease))) {
|
mo_AcquireRelease))) {
|
||||||
|
/* workaround for https://github.com/erthink/libmdbx/issues/269 */
|
||||||
|
rc = meta_waittxnid(env, (const MDBX_meta *)meta, ×tamp);
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
break;
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
|
break;
|
||||||
|
if (likely(rc == MDBX_RESULT_TRUE))
|
||||||
|
continue;
|
||||||
|
goto bailout;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
@ -7745,6 +7854,14 @@ static int mdbx_txn_renew0(MDBX_txn *txn, const unsigned flags) {
|
|||||||
|
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
const MDBX_meta *meta = constmeta_prefer_last(env);
|
const MDBX_meta *meta = constmeta_prefer_last(env);
|
||||||
|
uint64_t timestamp = 0;
|
||||||
|
while ("workaround for https://github.com/erthink/libmdbx/issues/269") {
|
||||||
|
rc = meta_waittxnid(env, (const MDBX_meta *)meta, ×tamp);
|
||||||
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
|
break;
|
||||||
|
if (unlikely(rc != MDBX_RESULT_TRUE))
|
||||||
|
goto bailout;
|
||||||
|
}
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
txn->mt_canary = meta->mm_canary;
|
txn->mt_canary = meta->mm_canary;
|
||||||
const txnid_t snap = constmeta_txnid(env, meta);
|
const txnid_t snap = constmeta_txnid(env, meta);
|
||||||
@ -10730,6 +10847,7 @@ __cold static MDBX_page *mdbx_meta_model(const MDBX_env *env, MDBX_page *model,
|
|||||||
model_meta->mm_dbs[MAIN_DBI].md_root = P_INVALID;
|
model_meta->mm_dbs[MAIN_DBI].md_root = P_INVALID;
|
||||||
meta_set_txnid(env, model_meta, MIN_TXNID + num);
|
meta_set_txnid(env, model_meta, MIN_TXNID + num);
|
||||||
unaligned_poke_u64(4, model_meta->mm_datasync_sign, meta_sign(model_meta));
|
unaligned_poke_u64(4, model_meta->mm_datasync_sign, meta_sign(model_meta));
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, model_meta, true));
|
||||||
return (MDBX_page *)((uint8_t *)model + env->me_psize);
|
return (MDBX_page *)((uint8_t *)model + env->me_psize);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -10892,6 +11010,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
goto fail;
|
goto fail;
|
||||||
}
|
}
|
||||||
meta_set_txnid(env, pending, txnid);
|
meta_set_txnid(env, pending, txnid);
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, pending, true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -10924,6 +11043,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
|
rc = (flags & MDBX_SAFE_NOSYNC) ? MDBX_RESULT_TRUE /* carry non-steady */
|
||||||
: MDBX_RESULT_FALSE /* carry steady */;
|
: MDBX_RESULT_FALSE /* carry steady */;
|
||||||
}
|
}
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, pending, true));
|
||||||
|
|
||||||
/* Steady or Weak */
|
/* Steady or Weak */
|
||||||
if (rc == MDBX_RESULT_FALSE /* carry steady */) {
|
if (rc == MDBX_RESULT_FALSE /* carry steady */) {
|
||||||
@ -11032,6 +11152,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
/* LY: 'commit' the meta */
|
/* LY: 'commit' the meta */
|
||||||
meta_update_end(env, target, unaligned_peek_u64(4, pending->mm_txnid_b));
|
meta_update_end(env, target, unaligned_peek_u64(4, pending->mm_txnid_b));
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, target, true));
|
||||||
} else {
|
} else {
|
||||||
/* dangerous case (target == head), only mm_datasync_sign could
|
/* dangerous case (target == head), only mm_datasync_sign could
|
||||||
* me updated, check assertions once again */
|
* me updated, check assertions once again */
|
||||||
@ -11081,6 +11202,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
if (rc != MDBX_SUCCESS)
|
if (rc != MDBX_SUCCESS)
|
||||||
goto undo;
|
goto undo;
|
||||||
}
|
}
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, target, true));
|
||||||
}
|
}
|
||||||
env->me_lck->mti_meta_sync_txnid.weak =
|
env->me_lck->mti_meta_sync_txnid.weak =
|
||||||
(uint32_t)unaligned_peek_u64(4, pending->mm_txnid_a) -
|
(uint32_t)unaligned_peek_u64(4, pending->mm_txnid_a) -
|
||||||
@ -11094,6 +11216,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
pending->mm_geo.upper);
|
pending->mm_geo.upper);
|
||||||
if (MDBX_IS_ERROR(rc))
|
if (MDBX_IS_ERROR(rc))
|
||||||
goto fail;
|
goto fail;
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, target, true));
|
||||||
}
|
}
|
||||||
|
|
||||||
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
MDBX_lockinfo *const lck = env->me_lck_mmap.lck;
|
||||||
@ -11552,7 +11675,16 @@ mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, intptr_t size_now,
|
|||||||
if (!inside_txn) {
|
if (!inside_txn) {
|
||||||
mdbx_assert(env, need_unlock);
|
mdbx_assert(env, need_unlock);
|
||||||
const MDBX_meta *head = constmeta_prefer_last(env);
|
const MDBX_meta *head = constmeta_prefer_last(env);
|
||||||
meta = *head;
|
|
||||||
|
uint64_t timestamp = 0;
|
||||||
|
while ("workaround for https://github.com/erthink/libmdbx/issues/269") {
|
||||||
|
meta = *head;
|
||||||
|
rc = meta_waittxnid(env, &meta, ×tamp);
|
||||||
|
if (likely(rc == MDBX_SUCCESS))
|
||||||
|
break;
|
||||||
|
if (unlikely(rc != MDBX_RESULT_TRUE))
|
||||||
|
goto bailout;
|
||||||
|
}
|
||||||
const txnid_t txnid = safe64_txnid_next(constmeta_txnid(env, &meta));
|
const txnid_t txnid = safe64_txnid_next(constmeta_txnid(env, &meta));
|
||||||
if (unlikely(txnid > MAX_TXNID)) {
|
if (unlikely(txnid > MAX_TXNID)) {
|
||||||
rc = MDBX_TXN_FULL;
|
rc = MDBX_TXN_FULL;
|
||||||
@ -12455,7 +12587,9 @@ __cold static int __must_check_result mdbx_override_meta(
|
|||||||
mdbx_meta_model(env, page, target);
|
mdbx_meta_model(env, page, target);
|
||||||
MDBX_meta *const model = page_meta(page);
|
MDBX_meta *const model = page_meta(page);
|
||||||
meta_set_txnid(env, model, txnid);
|
meta_set_txnid(env, model, txnid);
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, model, true));
|
||||||
if (shape) {
|
if (shape) {
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, shape, true));
|
||||||
model->mm_extra_flags = shape->mm_extra_flags;
|
model->mm_extra_flags = shape->mm_extra_flags;
|
||||||
model->mm_validator_id = shape->mm_validator_id;
|
model->mm_validator_id = shape->mm_validator_id;
|
||||||
model->mm_extra_pagehdr = shape->mm_extra_pagehdr;
|
model->mm_extra_pagehdr = shape->mm_extra_pagehdr;
|
||||||
@ -12464,6 +12598,7 @@ __cold static int __must_check_result mdbx_override_meta(
|
|||||||
memcpy(&model->mm_canary, &shape->mm_canary, sizeof(model->mm_canary));
|
memcpy(&model->mm_canary, &shape->mm_canary, sizeof(model->mm_canary));
|
||||||
memcpy(&model->mm_pages_retired, &shape->mm_pages_retired,
|
memcpy(&model->mm_pages_retired, &shape->mm_pages_retired,
|
||||||
sizeof(model->mm_pages_retired));
|
sizeof(model->mm_pages_retired));
|
||||||
|
mdbx_assert(env, meta_checktxnid(env, model, true));
|
||||||
}
|
}
|
||||||
unaligned_poke_u64(4, model->mm_datasync_sign, meta_sign(model));
|
unaligned_poke_u64(4, model->mm_datasync_sign, meta_sign(model));
|
||||||
rc = mdbx_validate_meta(env, model, page, target, nullptr);
|
rc = mdbx_validate_meta(env, model, page, target, nullptr);
|
||||||
@ -17367,6 +17502,7 @@ static int mdbx_rebalance(MDBX_cursor *mc) {
|
|||||||
if (nkeys == 0) {
|
if (nkeys == 0) {
|
||||||
mdbx_cassert(mc, IS_LEAF(mp));
|
mdbx_cassert(mc, IS_LEAF(mp));
|
||||||
mdbx_debug("%s", "tree is completely empty");
|
mdbx_debug("%s", "tree is completely empty");
|
||||||
|
mdbx_cassert(mc, (*mc->mc_dbistate & DBI_DIRTY) != 0);
|
||||||
mc->mc_db->md_root = P_INVALID;
|
mc->mc_db->md_root = P_INVALID;
|
||||||
mc->mc_db->md_depth = 0;
|
mc->mc_db->md_depth = 0;
|
||||||
mdbx_cassert(mc, mc->mc_db->md_branch_pages == 0 &&
|
mdbx_cassert(mc, mc->mc_db->md_branch_pages == 0 &&
|
||||||
@ -20172,6 +20308,7 @@ static int dbi_open(MDBX_txn *txn, const char *table_name, unsigned user_flags,
|
|||||||
|
|
||||||
dbiflags |= DBI_DIRTY | DBI_CREAT;
|
dbiflags |= DBI_DIRTY | DBI_CREAT;
|
||||||
txn->mt_flags |= MDBX_TXN_DIRTY;
|
txn->mt_flags |= MDBX_TXN_DIRTY;
|
||||||
|
mdbx_tassert(txn, (txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Got info, register DBI in this txn */
|
/* Got info, register DBI in this txn */
|
||||||
@ -20459,6 +20596,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) {
|
|||||||
txn->mt_dbs[dbi].md_entries = 0;
|
txn->mt_dbs[dbi].md_entries = 0;
|
||||||
txn->mt_dbs[dbi].md_root = P_INVALID;
|
txn->mt_dbs[dbi].md_root = P_INVALID;
|
||||||
txn->mt_dbs[dbi].md_seq = 0;
|
txn->mt_dbs[dbi].md_seq = 0;
|
||||||
|
/* txn->mt_dbs[dbi].md_mod_txnid = txn->mt_txnid; */
|
||||||
txn->mt_flags |= MDBX_TXN_DIRTY;
|
txn->mt_flags |= MDBX_TXN_DIRTY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user