2024-05-19 22:07:58 +03:00
|
|
|
|
/// \copyright SPDX-License-Identifier: Apache-2.0
|
|
|
|
|
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
|
|
|
|
|
|
|
|
|
|
#include "internals.h"
|
|
|
|
|
|
|
|
|
|
/* check against https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
|
|
|
|
static bool coherency_check(const MDBX_env *env, const txnid_t txnid,
|
|
|
|
|
const volatile tree_t *trees,
|
|
|
|
|
const volatile meta_t *meta, bool report) {
|
|
|
|
|
const txnid_t freedb_mod_txnid = trees[FREE_DBI].mod_txnid;
|
|
|
|
|
const txnid_t maindb_mod_txnid = trees[MAIN_DBI].mod_txnid;
|
|
|
|
|
const pgno_t last_pgno = meta->geometry.now;
|
|
|
|
|
|
|
|
|
|
const pgno_t freedb_root_pgno = trees[FREE_DBI].root;
|
|
|
|
|
const page_t *freedb_root =
|
|
|
|
|
(env->dxb_mmap.base && freedb_root_pgno < last_pgno)
|
|
|
|
|
? pgno2page(env, freedb_root_pgno)
|
|
|
|
|
: nullptr;
|
|
|
|
|
|
|
|
|
|
const pgno_t maindb_root_pgno = trees[MAIN_DBI].root;
|
|
|
|
|
const page_t *maindb_root =
|
|
|
|
|
(env->dxb_mmap.base && maindb_root_pgno < last_pgno)
|
|
|
|
|
? pgno2page(env, maindb_root_pgno)
|
|
|
|
|
: nullptr;
|
|
|
|
|
const uint64_t magic_and_version =
|
|
|
|
|
unaligned_peek_u64_volatile(4, &meta->magic_and_version);
|
|
|
|
|
|
|
|
|
|
bool ok = true;
|
|
|
|
|
if (freedb_root_pgno != P_INVALID &&
|
|
|
|
|
unlikely(freedb_root_pgno >= last_pgno)) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING(
|
2024-08-03 14:23:18 +03:00
|
|
|
|
"catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN
|
2024-05-19 22:07:58 +03:00
|
|
|
|
" %s",
|
|
|
|
|
"free", freedb_root_pgno, txnid,
|
|
|
|
|
(env->stuck_meta < 0)
|
|
|
|
|
? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
|
|
|
|
if (maindb_root_pgno != P_INVALID &&
|
|
|
|
|
unlikely(maindb_root_pgno >= last_pgno)) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING(
|
2024-08-03 14:23:18 +03:00
|
|
|
|
"catch invalid %s-db root %" PRIaPGNO " for meta_txnid %" PRIaTXN
|
2024-05-19 22:07:58 +03:00
|
|
|
|
" %s",
|
|
|
|
|
"main", maindb_root_pgno, txnid,
|
|
|
|
|
(env->stuck_meta < 0)
|
|
|
|
|
? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
|
|
|
|
if (unlikely(txnid < freedb_mod_txnid ||
|
|
|
|
|
(!freedb_mod_txnid && freedb_root &&
|
|
|
|
|
likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING(
|
2024-08-03 14:23:18 +03:00
|
|
|
|
"catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN
|
2024-05-19 22:07:58 +03:00
|
|
|
|
" %s",
|
|
|
|
|
"free", freedb_mod_txnid, txnid,
|
|
|
|
|
(env->stuck_meta < 0)
|
|
|
|
|
? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
|
|
|
|
if (unlikely(txnid < maindb_mod_txnid ||
|
|
|
|
|
(!maindb_mod_txnid && maindb_root &&
|
|
|
|
|
likely(magic_and_version == MDBX_DATA_MAGIC)))) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING(
|
2024-08-03 14:23:18 +03:00
|
|
|
|
"catch invalid %s-db.mod_txnid %" PRIaTXN " for meta_txnid %" PRIaTXN
|
2024-05-19 22:07:58 +03:00
|
|
|
|
" %s",
|
|
|
|
|
"main", maindb_mod_txnid, txnid,
|
|
|
|
|
(env->stuck_meta < 0)
|
|
|
|
|
? "(workaround for incoherent flaw of unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
2024-08-07 17:38:53 +03:00
|
|
|
|
|
|
|
|
|
/* Проверяем отметки внутри корневых страниц только если сами страницы
|
|
|
|
|
* в пределах текущего отображения. Иначе возможны SIGSEGV до переноса
|
|
|
|
|
* вызова coherency_check_head() после dxb_resize() внутри txn_renew(). */
|
|
|
|
|
if (likely(freedb_root && freedb_mod_txnid &&
|
|
|
|
|
(size_t)ptr_dist(env->dxb_mmap.base, freedb_root) <
|
|
|
|
|
env->dxb_mmap.limit)) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
VALGRIND_MAKE_MEM_DEFINED(freedb_root, sizeof(freedb_root->txnid));
|
|
|
|
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(freedb_root, sizeof(freedb_root->txnid));
|
|
|
|
|
const txnid_t root_txnid = freedb_root->txnid;
|
|
|
|
|
if (unlikely(root_txnid != freedb_mod_txnid)) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN
|
2024-08-03 14:23:18 +03:00
|
|
|
|
" for %s-db.mod_txnid %" PRIaTXN " %s",
|
2024-05-19 22:07:58 +03:00
|
|
|
|
freedb_root_pgno, root_txnid, "free", freedb_mod_txnid,
|
|
|
|
|
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
|
|
|
|
|
"unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2024-08-07 17:38:53 +03:00
|
|
|
|
if (likely(maindb_root && maindb_mod_txnid &&
|
|
|
|
|
(size_t)ptr_dist(env->dxb_mmap.base, maindb_root) <
|
|
|
|
|
env->dxb_mmap.limit)) {
|
2024-05-19 22:07:58 +03:00
|
|
|
|
VALGRIND_MAKE_MEM_DEFINED(maindb_root, sizeof(maindb_root->txnid));
|
|
|
|
|
MDBX_ASAN_UNPOISON_MEMORY_REGION(maindb_root, sizeof(maindb_root->txnid));
|
|
|
|
|
const txnid_t root_txnid = maindb_root->txnid;
|
|
|
|
|
if (unlikely(root_txnid != maindb_mod_txnid)) {
|
|
|
|
|
if (report)
|
|
|
|
|
WARNING("catch invalid root_page %" PRIaPGNO " mod_txnid %" PRIaTXN
|
2024-08-03 14:23:18 +03:00
|
|
|
|
" for %s-db.mod_txnid %" PRIaTXN " %s",
|
2024-05-19 22:07:58 +03:00
|
|
|
|
maindb_root_pgno, root_txnid, "main", maindb_mod_txnid,
|
|
|
|
|
(env->stuck_meta < 0) ? "(workaround for incoherent flaw of "
|
|
|
|
|
"unified page/buffer cache)"
|
|
|
|
|
: "(wagering meta)");
|
|
|
|
|
ok = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (unlikely(!ok) && report)
|
|
|
|
|
env->lck->pgops.incoherence.weak =
|
|
|
|
|
(env->lck->pgops.incoherence.weak >= INT32_MAX)
|
|
|
|
|
? INT32_MAX
|
|
|
|
|
: env->lck->pgops.incoherence.weak + 1;
|
|
|
|
|
return ok;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
__cold int coherency_timeout(uint64_t *timestamp, intptr_t pgno,
|
|
|
|
|
const MDBX_env *env) {
|
|
|
|
|
if (likely(timestamp && *timestamp == 0))
|
|
|
|
|
*timestamp = osal_monotime();
|
|
|
|
|
else if (unlikely(!timestamp || osal_monotime() - *timestamp >
|
|
|
|
|
osal_16dot16_to_monotime(65536 / 10))) {
|
|
|
|
|
if (pgno >= 0 && pgno != env->stuck_meta)
|
|
|
|
|
ERROR("bailout waiting for %" PRIuSIZE " page arrival %s", pgno,
|
|
|
|
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
|
|
|
|
else if (env->stuck_meta < 0)
|
|
|
|
|
ERROR("bailout waiting for valid snapshot (%s)",
|
|
|
|
|
"workaround for incoherent flaw of unified page/buffer cache");
|
|
|
|
|
return MDBX_PROBLEM;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
osal_memory_fence(mo_AcquireRelease, true);
|
|
|
|
|
#if defined(_WIN32) || defined(_WIN64)
|
|
|
|
|
SwitchToThread();
|
|
|
|
|
#elif defined(__linux__) || defined(__gnu_linux__) || defined(_UNIX03_SOURCE)
|
|
|
|
|
sched_yield();
|
|
|
|
|
#elif (defined(_GNU_SOURCE) && __GLIBC_PREREQ(2, 1)) || defined(_OPEN_THREADS)
|
|
|
|
|
pthread_yield();
|
|
|
|
|
#else
|
|
|
|
|
usleep(42);
|
|
|
|
|
#endif
|
|
|
|
|
return MDBX_RESULT_TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* check with timeout as the workaround
|
|
|
|
|
* for https://libmdbx.dqdkfa.ru/dead-github/issues/269 */
|
2024-08-11 00:00:22 +03:00
|
|
|
|
__hot int coherency_fetch_head(MDBX_txn *txn, const meta_ptr_t head,
|
2024-05-19 22:07:58 +03:00
|
|
|
|
uint64_t *timestamp) {
|
|
|
|
|
/* Copy the DB info and flags */
|
2024-08-11 00:00:22 +03:00
|
|
|
|
txn->txnid = head.txnid;
|
|
|
|
|
txn->geo = head.ptr_c->geometry;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
memcpy(txn->dbs, &head.ptr_c->trees, sizeof(head.ptr_c->trees));
|
|
|
|
|
STATIC_ASSERT(sizeof(head.ptr_c->trees) == CORE_DBS * sizeof(tree_t));
|
|
|
|
|
VALGRIND_MAKE_MEM_UNDEFINED(txn->dbs + CORE_DBS,
|
|
|
|
|
txn->env->max_dbi - CORE_DBS);
|
2024-08-11 00:00:22 +03:00
|
|
|
|
txn->canary = head.ptr_c->canary;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
|
|
|
|
|
if (unlikely(!coherency_check(txn->env, head.txnid, txn->dbs, head.ptr_v,
|
2024-08-11 00:00:22 +03:00
|
|
|
|
*timestamp == 0) ||
|
|
|
|
|
txn->txnid != meta_txnid(head.ptr_v)))
|
2024-05-19 22:07:58 +03:00
|
|
|
|
return coherency_timeout(timestamp, -1, txn->env);
|
|
|
|
|
|
2024-07-20 14:05:16 +03:00
|
|
|
|
if (unlikely(txn->dbs[FREE_DBI].flags != MDBX_INTEGERKEY)) {
|
|
|
|
|
if ((txn->dbs[FREE_DBI].flags & DB_PERSISTENT_FLAGS) != MDBX_INTEGERKEY ||
|
|
|
|
|
unaligned_peek_u64(4, &head.ptr_c->magic_and_version) ==
|
|
|
|
|
MDBX_DATA_MAGIC) {
|
2024-07-24 11:22:59 +03:00
|
|
|
|
ERROR("unexpected/invalid db-flags 0x%x for %s", txn->dbs[FREE_DBI].flags,
|
|
|
|
|
"GC/FreeDB");
|
2024-07-20 14:05:16 +03:00
|
|
|
|
return MDBX_INCOMPATIBLE;
|
|
|
|
|
}
|
|
|
|
|
txn->dbs[FREE_DBI].flags &= DB_PERSISTENT_FLAGS;
|
|
|
|
|
}
|
2024-05-19 22:07:58 +03:00
|
|
|
|
tASSERT(txn, txn->dbs[FREE_DBI].flags == MDBX_INTEGERKEY);
|
2024-08-03 14:23:18 +03:00
|
|
|
|
tASSERT(txn, check_table_flags(txn->dbs[MAIN_DBI].flags));
|
2024-05-19 22:07:58 +03:00
|
|
|
|
return MDBX_SUCCESS;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
int coherency_check_written(const MDBX_env *env, const txnid_t txnid,
|
|
|
|
|
const volatile meta_t *meta, const intptr_t pgno,
|
|
|
|
|
uint64_t *timestamp) {
|
|
|
|
|
const bool report = !(timestamp && *timestamp);
|
|
|
|
|
const txnid_t head_txnid = meta_txnid(meta);
|
2024-07-05 22:00:05 +03:00
|
|
|
|
if (likely(head_txnid >= MIN_TXNID && head_txnid >= txnid)) {
|
|
|
|
|
if (likely(
|
|
|
|
|
coherency_check(env, head_txnid, &meta->trees.gc, meta, report))) {
|
|
|
|
|
eASSERT(env, meta->trees.gc.flags == MDBX_INTEGERKEY);
|
2024-08-03 14:23:18 +03:00
|
|
|
|
eASSERT(env, check_table_flags(meta->trees.main.flags));
|
2024-07-05 22:00:05 +03:00
|
|
|
|
return MDBX_SUCCESS;
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
2024-07-05 22:00:05 +03:00
|
|
|
|
} else if (report) {
|
|
|
|
|
env->lck->pgops.incoherence.weak =
|
|
|
|
|
(env->lck->pgops.incoherence.weak >= INT32_MAX)
|
|
|
|
|
? INT32_MAX
|
|
|
|
|
: env->lck->pgops.incoherence.weak + 1;
|
|
|
|
|
WARNING("catch %s txnid %" PRIaTXN " for meta_%" PRIaPGNO " %s",
|
|
|
|
|
(head_txnid < MIN_TXNID) ? "invalid" : "unexpected", head_txnid,
|
|
|
|
|
bytes2pgno(env, ptr_dist(meta, env->dxb_mmap.base)),
|
|
|
|
|
"(workaround for incoherent flaw of unified page/buffer cache)");
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
2024-07-05 22:00:05 +03:00
|
|
|
|
return coherency_timeout(timestamp, pgno, env);
|
2024-05-19 22:07:58 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bool coherency_check_meta(const MDBX_env *env, const volatile meta_t *meta,
|
|
|
|
|
bool report) {
|
|
|
|
|
uint64_t timestamp = 0;
|
|
|
|
|
return coherency_check_written(env, 0, meta, -1,
|
|
|
|
|
report ? ×tamp : nullptr) == MDBX_SUCCESS;
|
|
|
|
|
}
|