mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-06 18:44:13 +08:00
mdbx: refine txnid-safety for 32-bit archs.
Change-Id: I21d3a50fbc7ae0c625c51e919cb214740c1e97cb
This commit is contained in:
parent
b3c2118eb4
commit
a0025d84fd
@ -273,59 +273,82 @@ size_t __hot mdbx_e2k_strnlen_bug_workaround(const char *s, size_t maxlen) {
|
|||||||
#endif /* Elbrus's memcmp() bug. */
|
#endif /* Elbrus's memcmp() bug. */
|
||||||
|
|
||||||
/*------------------------------------------------------------------------------
|
/*------------------------------------------------------------------------------
|
||||||
* safe write volatile txnid for non-64-bit and/or not-atomic platforms. */
|
* safe read/write volatile 64-bit fields on 32-bit architectures. */
|
||||||
|
|
||||||
#ifndef MDBX_64BIT_ATOMIC
|
static __inline bool safe64_is_valid(uint64_t v) {
|
||||||
#if MDBX_WORDBITS >= 64
|
#if MDBX_WORDBITS >= 64
|
||||||
#define MDBX_64BIT_ATOMIC 1
|
return v < SAFE64_INVALID_THRESHOLD;
|
||||||
#else
|
#else
|
||||||
#define MDBX_64BIT_ATOMIC 0
|
return (v >> 32) != UINT32_MAX;
|
||||||
#endif
|
#endif /* MDBX_WORDBITS */
|
||||||
#endif /* MDBX_64BIT_ATOMIC */
|
}
|
||||||
|
|
||||||
static __always_inline void safe_txnid_reset(volatile uint64_t *const ptr) {
|
static __inline bool safe64_is_valid_ptr(const mdbx_safe64_t *ptr) {
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
#if MDBX_64BIT_ATOMIC
|
#if MDBX_64BIT_ATOMIC
|
||||||
*ptr = UINT64_MAX;
|
return ptr->atomic < SAFE64_INVALID_THRESHOLD;
|
||||||
#else
|
#else
|
||||||
volatile uint32_t *const high_part =
|
return ptr->high != UINT32_MAX;
|
||||||
((volatile uint32_t *)ptr) + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__);
|
|
||||||
*high_part = UINT32_MAX;
|
|
||||||
#endif /* MDBX_64BIT_ATOMIC */
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline void safe64_reset(mdbx_safe64_t *ptr) {
|
||||||
|
mdbx_compiler_barrier();
|
||||||
|
#if MDBX_64BIT_ATOMIC
|
||||||
|
ptr->atomic = UINT64_MAX;
|
||||||
|
#else
|
||||||
|
/* atomically make value >= SAFE64_INVALID_THRESHOLD */
|
||||||
|
ptr->high = UINT32_MAX;
|
||||||
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
|
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
|
||||||
mdbx_flush_noncoherent_cpu_writeback();
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void safe_txnid_set(volatile uint64_t *const ptr,
|
static __inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) {
|
||||||
uint64_t v) {
|
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
assert(*ptr >= BAD_TXNID);
|
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
|
||||||
#if MDBX_64BIT_ATOMIC
|
#if MDBX_64BIT_ATOMIC
|
||||||
*ptr = v;
|
ptr->atomic = v;
|
||||||
#else
|
#else /* MDBX_64BIT_ATOMIC */
|
||||||
volatile uint32_t *const high_part =
|
/* update low-part but still value >= SAFE64_INVALID_THRESHOLD */
|
||||||
((volatile uint32_t *)ptr) + (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__);
|
ptr->low = (uint32_t)v;
|
||||||
volatile uint32_t *const lower_part =
|
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
|
||||||
((volatile uint32_t *)ptr) + (__BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__);
|
|
||||||
mdbx_jitter4testing(true);
|
|
||||||
*lower_part = (uint32_t)v;
|
|
||||||
mdbx_flush_noncoherent_cpu_writeback();
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
*high_part = (uint32_t)(v >> 32);
|
/* update high-part from SAFE64_INVALID_THRESHOLD to actual value */
|
||||||
|
ptr->high = (uint32_t)(v >> 32);
|
||||||
#endif /* MDBX_64BIT_ATOMIC */
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
mdbx_compiler_barrier();
|
assert(ptr->inconsistent == v);
|
||||||
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline void safe_txnid_update(volatile uint64_t *const ptr,
|
static __always_inline uint64_t safe64_read(const mdbx_safe64_t *ptr) {
|
||||||
uint64_t v) {
|
|
||||||
#if MDBX_64BIT_ATOMIC
|
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
*ptr = v;
|
mdbx_jitter4testing(true);
|
||||||
#else
|
uint64_t v;
|
||||||
safe_txnid_reset(ptr);
|
#if MDBX_64BIT_ATOMIC
|
||||||
safe_txnid_set(ptr, v);
|
v = ptr->atomic;
|
||||||
|
#else /* MDBX_64BIT_ATOMIC */
|
||||||
|
uint32_t hi, lo;
|
||||||
|
do {
|
||||||
|
hi = ptr->high;
|
||||||
|
mdbx_compiler_barrier();
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
|
lo = ptr->low;
|
||||||
|
mdbx_compiler_barrier();
|
||||||
|
mdbx_jitter4testing(true);
|
||||||
|
} while (unlikely(hi != ptr->high));
|
||||||
|
v = lo | (uint64_t)hi << 32;
|
||||||
#endif /* MDBX_64BIT_ATOMIC */
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
mdbx_flush_noncoherent_cpu_writeback();
|
mdbx_jitter4testing(true);
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
static __inline void safe64_update(mdbx_safe64_t *ptr, const uint64_t v) {
|
||||||
|
safe64_reset(ptr);
|
||||||
|
safe64_write(ptr, v);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*----------------------------------------------------------------------------*/
|
/*----------------------------------------------------------------------------*/
|
||||||
@ -1851,8 +1874,8 @@ static void mdbx_page_list(MDBX_page *mp) {
|
|||||||
state);
|
state);
|
||||||
return;
|
return;
|
||||||
case P_META:
|
case P_META:
|
||||||
mdbx_print("Meta-page %" PRIu64 " txnid %" PRIu64 "\n", pgno,
|
mdbx_print("Meta-page %" PRIaPGNO " txnid %" PRIu64 "\n", pgno,
|
||||||
((MDBX_meta *)PAGEDATA(mp))->mm_txnid);
|
((MDBX_meta *)PAGEDATA(mp))->mm_txnid_a.inconsistent);
|
||||||
return;
|
return;
|
||||||
default:
|
default:
|
||||||
mdbx_print("Bad page %" PRIu64 " flags 0x%X\n", pgno, mp->mp_flags);
|
mdbx_print("Bad page %" PRIu64 " flags 0x%X\n", pgno, mp->mp_flags);
|
||||||
@ -2368,8 +2391,8 @@ bailout:
|
|||||||
static __inline txnid_t meta_txnid(const MDBX_env *env, const MDBX_meta *meta,
|
static __inline txnid_t meta_txnid(const MDBX_env *env, const MDBX_meta *meta,
|
||||||
bool allow_volatile) {
|
bool allow_volatile) {
|
||||||
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
||||||
txnid_t a = meta->mm_txnid_a;
|
txnid_t a = safe64_read(&meta->mm_txnid_a);
|
||||||
txnid_t b = meta->mm_txnid_b;
|
txnid_t b = safe64_read(&meta->mm_txnid_b);
|
||||||
if (allow_volatile)
|
if (allow_volatile)
|
||||||
return (a == b) ? a : 0;
|
return (a == b) ? a : 0;
|
||||||
mdbx_assert(env, a == b);
|
mdbx_assert(env, a == b);
|
||||||
@ -2389,27 +2412,30 @@ static __inline txnid_t mdbx_meta_txnid_fluid(const MDBX_env *env,
|
|||||||
static __inline void mdbx_meta_update_begin(const MDBX_env *env,
|
static __inline void mdbx_meta_update_begin(const MDBX_env *env,
|
||||||
MDBX_meta *meta, txnid_t txnid) {
|
MDBX_meta *meta, txnid_t txnid) {
|
||||||
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
||||||
mdbx_assert(env, meta->mm_txnid_a < txnid && meta->mm_txnid_b < txnid);
|
mdbx_assert(env, meta->mm_txnid_a.inconsistent < txnid &&
|
||||||
|
meta->mm_txnid_b.inconsistent < txnid);
|
||||||
(void)env;
|
(void)env;
|
||||||
safe_txnid_update(&meta->mm_txnid_a, txnid);
|
safe64_update(&meta->mm_txnid_a, txnid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void mdbx_meta_update_end(const MDBX_env *env, MDBX_meta *meta,
|
static __inline void mdbx_meta_update_end(const MDBX_env *env, MDBX_meta *meta,
|
||||||
txnid_t txnid) {
|
txnid_t txnid) {
|
||||||
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
mdbx_assert(env, meta >= METAPAGE(env, 0) || meta < METAPAGE_END(env));
|
||||||
mdbx_assert(env, meta->mm_txnid_a == txnid);
|
mdbx_assert(env, meta->mm_txnid_a.inconsistent == txnid);
|
||||||
mdbx_assert(env, meta->mm_txnid_b < txnid);
|
mdbx_assert(env, meta->mm_txnid_b.inconsistent < txnid);
|
||||||
(void)env;
|
(void)env;
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
safe_txnid_update(&meta->mm_txnid_b, txnid);
|
safe64_update(&meta->mm_txnid_b, txnid);
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline void mdbx_meta_set_txnid(const MDBX_env *env, MDBX_meta *meta,
|
static __inline void mdbx_meta_set_txnid(const MDBX_env *env, MDBX_meta *meta,
|
||||||
txnid_t txnid) {
|
txnid_t txnid) {
|
||||||
mdbx_assert(env, meta < METAPAGE(env, 0) || meta > METAPAGE_END(env));
|
mdbx_assert(env, meta < METAPAGE(env, 0) || meta > METAPAGE_END(env));
|
||||||
(void)env;
|
(void)env;
|
||||||
meta->mm_txnid_a = txnid;
|
/* update inconsistent since this function used ONLY for filling meta-image
|
||||||
meta->mm_txnid_b = txnid;
|
* for writing, but not the actual meta-page */
|
||||||
|
meta->mm_txnid_a.inconsistent = txnid;
|
||||||
|
meta->mm_txnid_b.inconsistent = txnid;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __inline uint64_t mdbx_meta_sign(const MDBX_meta *meta) {
|
static __inline uint64_t mdbx_meta_sign(const MDBX_meta *meta) {
|
||||||
@ -2569,7 +2595,7 @@ static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
|
|||||||
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
||||||
if (lck->mti_readers[i].mr_pid) {
|
if (lck->mti_readers[i].mr_pid) {
|
||||||
/* mdbx_jitter4testing(true); */
|
/* mdbx_jitter4testing(true); */
|
||||||
const txnid_t snap = lck->mti_readers[i].mr_txnid;
|
const txnid_t snap = safe64_read(&lck->mti_readers[i].mr_txnid);
|
||||||
if (oldest > snap && last_oldest <= /* ignore pending updates */ snap) {
|
if (oldest > snap && last_oldest <= /* ignore pending updates */ snap) {
|
||||||
oldest = snap;
|
oldest = snap;
|
||||||
if (oldest == last_oldest)
|
if (oldest == last_oldest)
|
||||||
@ -2596,10 +2622,10 @@ static __cold pgno_t mdbx_find_largest(MDBX_env *env, pgno_t largest) {
|
|||||||
if (lck->mti_readers[i].mr_pid) {
|
if (lck->mti_readers[i].mr_pid) {
|
||||||
/* mdbx_jitter4testing(true); */
|
/* mdbx_jitter4testing(true); */
|
||||||
const pgno_t snap_pages = lck->mti_readers[i].mr_snapshot_pages_used;
|
const pgno_t snap_pages = lck->mti_readers[i].mr_snapshot_pages_used;
|
||||||
const txnid_t snap_txnid = lck->mti_readers[i].mr_txnid;
|
const txnid_t snap_txnid = safe64_read(&lck->mti_readers[i].mr_txnid);
|
||||||
mdbx_memory_barrier();
|
mdbx_memory_barrier();
|
||||||
if (unlikely(snap_pages != lck->mti_readers[i].mr_snapshot_pages_used ||
|
if (unlikely(snap_pages != lck->mti_readers[i].mr_snapshot_pages_used ||
|
||||||
snap_txnid != lck->mti_readers[i].mr_txnid))
|
snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid)))
|
||||||
goto retry;
|
goto retry;
|
||||||
if (largest < snap_pages &&
|
if (largest < snap_pages &&
|
||||||
lck->mti_oldest_reader <= /* ignore pending updates */ snap_txnid &&
|
lck->mti_oldest_reader <= /* ignore pending updates */ snap_txnid &&
|
||||||
@ -3596,7 +3622,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (likely(r)) {
|
if (likely(r)) {
|
||||||
if (unlikely(r->mr_pid != env->me_pid || r->mr_txnid < BAD_TXNID))
|
if (unlikely(r->mr_pid != env->me_pid ||
|
||||||
|
r->mr_txnid.inconsistent < SAFE64_INVALID_THRESHOLD))
|
||||||
return MDBX_BAD_RSLOT;
|
return MDBX_BAD_RSLOT;
|
||||||
} else if (env->me_lck) {
|
} else if (env->me_lck) {
|
||||||
unsigned slot, nreaders;
|
unsigned slot, nreaders;
|
||||||
@ -3651,7 +3678,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
* that, it is safe for mdbx_env_close() to touch it.
|
* that, it is safe for mdbx_env_close() to touch it.
|
||||||
* When it will be closed, we can finally claim it. */
|
* When it will be closed, we can finally claim it. */
|
||||||
r->mr_pid = 0;
|
r->mr_pid = 0;
|
||||||
safe_txnid_reset(&r->mr_txnid);
|
safe64_reset(&r->mr_txnid);
|
||||||
if (slot == nreaders)
|
if (slot == nreaders)
|
||||||
env->me_lck->mti_numreaders = ++nreaders;
|
env->me_lck->mti_numreaders = ++nreaders;
|
||||||
r->mr_tid = tid;
|
r->mr_tid = tid;
|
||||||
@ -3670,14 +3697,14 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
const txnid_t snap = mdbx_meta_txnid_fluid(env, meta);
|
const txnid_t snap = mdbx_meta_txnid_fluid(env, meta);
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
if (likely(r)) {
|
if (likely(r)) {
|
||||||
safe_txnid_reset(&r->mr_txnid);
|
safe64_reset(&r->mr_txnid);
|
||||||
r->mr_snapshot_pages_used = meta->mm_geo.next;
|
r->mr_snapshot_pages_used = meta->mm_geo.next;
|
||||||
r->mr_snapshot_pages_retired = meta->mm_pages_retired;
|
r->mr_snapshot_pages_retired = meta->mm_pages_retired;
|
||||||
safe_txnid_set(&r->mr_txnid, snap);
|
safe64_write(&r->mr_txnid, snap);
|
||||||
mdbx_jitter4testing(false);
|
mdbx_jitter4testing(false);
|
||||||
mdbx_assert(env, r->mr_pid == mdbx_getpid());
|
mdbx_assert(env, r->mr_pid == mdbx_getpid());
|
||||||
mdbx_assert(env, r->mr_tid == mdbx_thread_self());
|
mdbx_assert(env, r->mr_tid == mdbx_thread_self());
|
||||||
mdbx_assert(env, r->mr_txnid == snap);
|
mdbx_assert(env, r->mr_txnid.inconsistent == snap);
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
env->me_lck->mti_readers_refresh_flag = true;
|
env->me_lck->mti_readers_refresh_flag = true;
|
||||||
mdbx_flush_noncoherent_cpu_writeback();
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
@ -3700,7 +3727,8 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (unlikely(txn->mt_txnid == 0 || txn->mt_txnid >= BAD_TXNID)) {
|
if (unlikely(txn->mt_txnid == 0 ||
|
||||||
|
txn->mt_txnid >= SAFE64_INVALID_THRESHOLD)) {
|
||||||
mdbx_error("environment corrupted by died writer, must shutdown!");
|
mdbx_error("environment corrupted by died writer, must shutdown!");
|
||||||
rc = MDBX_WANNA_RECOVERY;
|
rc = MDBX_WANNA_RECOVERY;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
@ -3737,7 +3765,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
|
|||||||
#else
|
#else
|
||||||
txn->mt_txnid = snap + 1;
|
txn->mt_txnid = snap + 1;
|
||||||
#endif
|
#endif
|
||||||
if (unlikely(txn->mt_txnid >= BAD_TXNID)) {
|
if (unlikely(txn->mt_txnid >= SAFE64_INVALID_THRESHOLD)) {
|
||||||
mdbx_debug("txnid overflow!");
|
mdbx_debug("txnid overflow!");
|
||||||
rc = MDBX_TXN_FULL;
|
rc = MDBX_TXN_FULL;
|
||||||
goto bailout;
|
goto bailout;
|
||||||
@ -4077,13 +4105,14 @@ int mdbx_txn_info(MDBX_txn *txn, MDBX_txn_info *info, int scan_rlt) {
|
|||||||
retry:
|
retry:
|
||||||
if (lck->mti_readers[i].mr_pid) {
|
if (lck->mti_readers[i].mr_pid) {
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
const txnid_t snap_txnid = lck->mti_readers[i].mr_txnid;
|
const txnid_t snap_txnid =
|
||||||
|
safe64_read(&lck->mti_readers[i].mr_txnid);
|
||||||
const uint64_t snap_retired =
|
const uint64_t snap_retired =
|
||||||
lck->mti_readers[i].mr_snapshot_pages_retired;
|
lck->mti_readers[i].mr_snapshot_pages_retired;
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
if (unlikely(snap_txnid != lck->mti_readers[i].mr_txnid ||
|
if (unlikely(snap_retired !=
|
||||||
snap_retired !=
|
lck->mti_readers[i].mr_snapshot_pages_retired) ||
|
||||||
lck->mti_readers[i].mr_snapshot_pages_retired))
|
snap_txnid != safe64_read(&lck->mti_readers[i].mr_txnid))
|
||||||
goto retry;
|
goto retry;
|
||||||
if (snap_txnid > txn->mt_txnid && snap_txnid < next_reader) {
|
if (snap_txnid > txn->mt_txnid && snap_txnid < next_reader) {
|
||||||
next_reader = snap_txnid;
|
next_reader = snap_txnid;
|
||||||
@ -4115,7 +4144,7 @@ int mdbx_txn_info(MDBX_txn *txn, MDBX_txn_info *info, int scan_rlt) {
|
|||||||
bool exists = false;
|
bool exists = false;
|
||||||
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
for (unsigned i = 0; i < snap_nreaders; ++i) {
|
||||||
if (lck->mti_readers[i].mr_pid &&
|
if (lck->mti_readers[i].mr_pid &&
|
||||||
txn->mt_txnid > lck->mti_readers[i].mr_txnid) {
|
txn->mt_txnid > safe64_read(&lck->mti_readers[i].mr_txnid)) {
|
||||||
exists = true;
|
exists = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -4220,11 +4249,11 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
|
|||||||
#endif
|
#endif
|
||||||
if (txn->mt_ro_reader) {
|
if (txn->mt_ro_reader) {
|
||||||
mdbx_ensure(env, /* paranoia is appropriate here */
|
mdbx_ensure(env, /* paranoia is appropriate here */
|
||||||
txn->mt_txnid == txn->mt_ro_reader->mr_txnid &&
|
txn->mt_txnid == txn->mt_ro_reader->mr_txnid.inconsistent &&
|
||||||
txn->mt_ro_reader->mr_txnid >=
|
txn->mt_ro_reader->mr_txnid.inconsistent >=
|
||||||
env->me_lck->mti_oldest_reader);
|
env->me_lck->mti_oldest_reader);
|
||||||
txn->mt_ro_reader->mr_snapshot_pages_used = 0;
|
txn->mt_ro_reader->mr_snapshot_pages_used = 0;
|
||||||
safe_txnid_reset(&txn->mt_ro_reader->mr_txnid);
|
safe64_reset(&txn->mt_ro_reader->mr_txnid);
|
||||||
if (mode & MDBX_END_SLOT) {
|
if (mode & MDBX_END_SLOT) {
|
||||||
if ((env->me_flags & MDBX_ENV_TXKEY) == 0)
|
if ((env->me_flags & MDBX_ENV_TXKEY) == 0)
|
||||||
txn->mt_ro_reader->mr_pid = 0;
|
txn->mt_ro_reader->mr_pid = 0;
|
||||||
@ -5635,7 +5664,8 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
|
|||||||
page.mp_meta.mm_psize);
|
page.mp_meta.mm_psize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page.mp_meta.mm_txnid_a != page.mp_meta.mm_txnid_b) {
|
if (safe64_read(&page.mp_meta.mm_txnid_a) !=
|
||||||
|
safe64_read(&page.mp_meta.mm_txnid_b)) {
|
||||||
mdbx_warning("meta[%u] not completely updated, skip it", meta_number);
|
mdbx_warning("meta[%u] not completely updated, skip it", meta_number);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -5657,7 +5687,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
|
|||||||
page.mp_meta.mm_dbs[FREE_DBI].md_root, page.mp_meta.mm_geo.lower,
|
page.mp_meta.mm_dbs[FREE_DBI].md_root, page.mp_meta.mm_geo.lower,
|
||||||
page.mp_meta.mm_geo.next, page.mp_meta.mm_geo.now,
|
page.mp_meta.mm_geo.next, page.mp_meta.mm_geo.now,
|
||||||
page.mp_meta.mm_geo.upper, page.mp_meta.mm_geo.grow,
|
page.mp_meta.mm_geo.upper, page.mp_meta.mm_geo.grow,
|
||||||
page.mp_meta.mm_geo.shrink, page.mp_meta.mm_txnid_a,
|
page.mp_meta.mm_geo.shrink, page.mp_meta.mm_txnid_a.inconsistent,
|
||||||
mdbx_durable_str(&page.mp_meta));
|
mdbx_durable_str(&page.mp_meta));
|
||||||
|
|
||||||
/* LY: check min-pages value */
|
/* LY: check min-pages value */
|
||||||
@ -5797,7 +5827,7 @@ static int __cold mdbx_read_header(MDBX_env *env, MDBX_meta *meta,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (page.mp_meta.mm_txnid_a == 0) {
|
if (safe64_read(&page.mp_meta.mm_txnid_a) == 0) {
|
||||||
mdbx_warning("meta[%u] has zero txnid, skip it", meta_number);
|
mdbx_warning("meta[%u] has zero txnid, skip it", meta_number);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -5943,8 +5973,10 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
|
flags &= MDBX_WRITEMAP | MDBX_SHRINK_ALLOWED; /* force steady */
|
||||||
shrink = pending->mm_geo.now - bottom;
|
shrink = pending->mm_geo.now - bottom;
|
||||||
pending->mm_geo.now = bottom;
|
pending->mm_geo.now = bottom;
|
||||||
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a)
|
if (mdbx_meta_txnid_stable(env, head) ==
|
||||||
mdbx_meta_set_txnid(env, pending, pending->mm_txnid_a + 1);
|
pending->mm_txnid_a.inconsistent)
|
||||||
|
mdbx_meta_set_txnid(env, pending,
|
||||||
|
pending->mm_txnid_a.inconsistent + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -5996,7 +6028,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
}
|
}
|
||||||
|
|
||||||
MDBX_meta *target = nullptr;
|
MDBX_meta *target = nullptr;
|
||||||
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a) {
|
if (mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a.inconsistent) {
|
||||||
mdbx_assert(env, memcmp(&head->mm_dbs, &pending->mm_dbs,
|
mdbx_assert(env, memcmp(&head->mm_dbs, &pending->mm_dbs,
|
||||||
sizeof(head->mm_dbs)) == 0);
|
sizeof(head->mm_dbs)) == 0);
|
||||||
mdbx_assert(env, memcmp(&head->mm_canary, &pending->mm_canary,
|
mdbx_assert(env, memcmp(&head->mm_canary, &pending->mm_canary,
|
||||||
@ -6027,8 +6059,8 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
pending->mm_dbs[MAIN_DBI].md_root,
|
pending->mm_dbs[MAIN_DBI].md_root,
|
||||||
pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower,
|
pending->mm_dbs[FREE_DBI].md_root, pending->mm_geo.lower,
|
||||||
pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper,
|
pending->mm_geo.next, pending->mm_geo.now, pending->mm_geo.upper,
|
||||||
pending->mm_geo.grow, pending->mm_geo.shrink, pending->mm_txnid_a,
|
pending->mm_geo.grow, pending->mm_geo.shrink,
|
||||||
mdbx_durable_str(pending));
|
pending->mm_txnid_a.inconsistent, mdbx_durable_str(pending));
|
||||||
|
|
||||||
mdbx_debug("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO
|
mdbx_debug("meta0: %s, %s, txn_id %" PRIaTXN ", root %" PRIaPGNO
|
||||||
"/%" PRIaPGNO,
|
"/%" PRIaPGNO,
|
||||||
@ -6052,12 +6084,12 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
|
|
||||||
mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0);
|
mdbx_assert(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0);
|
||||||
mdbx_ensure(env, target == head || mdbx_meta_txnid_stable(env, target) <
|
mdbx_ensure(env, target == head || mdbx_meta_txnid_stable(env, target) <
|
||||||
pending->mm_txnid_a);
|
pending->mm_txnid_a.inconsistent);
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
if (likely(target != head)) {
|
if (likely(target != head)) {
|
||||||
/* LY: 'invalidate' the meta. */
|
/* LY: 'invalidate' the meta. */
|
||||||
mdbx_meta_update_begin(env, target, pending->mm_txnid_a);
|
mdbx_meta_update_begin(env, target, pending->mm_txnid_a.inconsistent);
|
||||||
target->mm_datasync_sign = MDBX_DATASIGN_WEAK;
|
target->mm_datasync_sign = MDBX_DATASIGN_WEAK;
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
/* debug: provoke failure to catch a violators, but don't touch mm_psize
|
/* debug: provoke failure to catch a violators, but don't touch mm_psize
|
||||||
@ -6078,14 +6110,14 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
|
|||||||
mdbx_flush_noncoherent_cpu_writeback();
|
mdbx_flush_noncoherent_cpu_writeback();
|
||||||
|
|
||||||
/* LY: 'commit' the meta */
|
/* LY: 'commit' the meta */
|
||||||
mdbx_meta_update_end(env, target, pending->mm_txnid_b);
|
mdbx_meta_update_end(env, target, pending->mm_txnid_b.inconsistent);
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
} else {
|
} else {
|
||||||
/* dangerous case (target == head), only mm_datasync_sign could
|
/* dangerous case (target == head), only mm_datasync_sign could
|
||||||
* me updated, check assertions once again */
|
* me updated, check assertions once again */
|
||||||
mdbx_ensure(env,
|
mdbx_ensure(env, mdbx_meta_txnid_stable(env, head) ==
|
||||||
mdbx_meta_txnid_stable(env, head) == pending->mm_txnid_a &&
|
pending->mm_txnid_a.inconsistent &&
|
||||||
!META_IS_STEADY(head) && META_IS_STEADY(pending));
|
!META_IS_STEADY(head) && META_IS_STEADY(pending));
|
||||||
mdbx_ensure(env, memcmp(&head->mm_geo, &pending->mm_geo,
|
mdbx_ensure(env, memcmp(&head->mm_geo, &pending->mm_geo,
|
||||||
sizeof(head->mm_geo)) == 0);
|
sizeof(head->mm_geo)) == 0);
|
||||||
mdbx_ensure(env, memcmp(&head->mm_dbs, &pending->mm_dbs,
|
mdbx_ensure(env, memcmp(&head->mm_dbs, &pending->mm_dbs,
|
||||||
@ -6761,7 +6793,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
|
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
|
||||||
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
|
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
|
||||||
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
|
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
|
||||||
meta.mm_txnid_a, mdbx_durable_str(&meta));
|
meta.mm_txnid_a.inconsistent, mdbx_durable_str(&meta));
|
||||||
|
|
||||||
mdbx_setup_pagesize(env, meta.mm_psize);
|
mdbx_setup_pagesize(env, meta.mm_psize);
|
||||||
const size_t used_bytes = pgno2bytes(env, meta.mm_geo.next);
|
const size_t used_bytes = pgno2bytes(env, meta.mm_geo.next);
|
||||||
@ -6824,7 +6856,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
|
meta.mm_dbs[MAIN_DBI].md_root, meta.mm_dbs[FREE_DBI].md_root,
|
||||||
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
|
meta.mm_geo.lower, meta.mm_geo.next, meta.mm_geo.now,
|
||||||
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
|
meta.mm_geo.upper, meta.mm_geo.grow, meta.mm_geo.shrink,
|
||||||
meta.mm_txnid_a, mdbx_durable_str(&meta));
|
meta.mm_txnid_a.inconsistent, mdbx_durable_str(&meta));
|
||||||
}
|
}
|
||||||
mdbx_ensure(env, meta.mm_geo.now >= meta.mm_geo.next);
|
mdbx_ensure(env, meta.mm_geo.now >= meta.mm_geo.next);
|
||||||
} else {
|
} else {
|
||||||
@ -6896,7 +6928,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
while (1) {
|
while (1) {
|
||||||
MDBX_meta *head = mdbx_meta_head(env);
|
MDBX_meta *head = mdbx_meta_head(env);
|
||||||
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
const txnid_t head_txnid = mdbx_meta_txnid_fluid(env, head);
|
||||||
if (head_txnid == meta.mm_txnid_a)
|
if (head_txnid == meta.mm_txnid_a.inconsistent)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
if (lck_rc == /* lck exclusive */ MDBX_RESULT_TRUE) {
|
||||||
@ -6904,7 +6936,7 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
if (env->me_flags & MDBX_RDONLY) {
|
if (env->me_flags & MDBX_RDONLY) {
|
||||||
mdbx_error("rollback needed: (from head %" PRIaTXN
|
mdbx_error("rollback needed: (from head %" PRIaTXN
|
||||||
" to steady %" PRIaTXN "), but unable in read-only mode",
|
" to steady %" PRIaTXN "), but unable in read-only mode",
|
||||||
head_txnid, meta.mm_txnid_a);
|
head_txnid, meta.mm_txnid_a.inconsistent);
|
||||||
return MDBX_WANNA_RECOVERY /* LY: could not recovery/rollback */;
|
return MDBX_WANNA_RECOVERY /* LY: could not recovery/rollback */;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -6917,21 +6949,23 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
|
|||||||
(head != meta1 && mdbx_meta_txnid_fluid(env, meta1) == undo_txnid) ||
|
(head != meta1 && mdbx_meta_txnid_fluid(env, meta1) == undo_txnid) ||
|
||||||
(head != meta2 && mdbx_meta_txnid_fluid(env, meta2) == undo_txnid))
|
(head != meta2 && mdbx_meta_txnid_fluid(env, meta2) == undo_txnid))
|
||||||
undo_txnid += 1;
|
undo_txnid += 1;
|
||||||
if (unlikely(undo_txnid >= meta.mm_txnid_a)) {
|
if (unlikely(undo_txnid >= meta.mm_txnid_a.inconsistent)) {
|
||||||
mdbx_fatal("rollback failed: no suitable txnid (0,1,2) < %" PRIaTXN,
|
mdbx_fatal("rollback failed: no suitable txnid (0,1,2) < %" PRIaTXN,
|
||||||
meta.mm_txnid_a);
|
meta.mm_txnid_a.inconsistent);
|
||||||
return MDBX_PANIC /* LY: could not recovery/rollback */;
|
return MDBX_PANIC /* LY: could not recovery/rollback */;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* LY: rollback weak checkpoint */
|
/* LY: rollback weak checkpoint */
|
||||||
mdbx_trace("rollback: from %" PRIaTXN ", to %" PRIaTXN " as %" PRIaTXN,
|
mdbx_trace("rollback: from %" PRIaTXN ", to %" PRIaTXN " as %" PRIaTXN,
|
||||||
head_txnid, meta.mm_txnid_a, undo_txnid);
|
head_txnid, meta.mm_txnid_a.inconsistent, undo_txnid);
|
||||||
mdbx_ensure(env, head_txnid == mdbx_meta_txnid_stable(env, head));
|
mdbx_ensure(env, head_txnid == mdbx_meta_txnid_stable(env, head));
|
||||||
|
|
||||||
if (env->me_flags & MDBX_WRITEMAP) {
|
if (env->me_flags & MDBX_WRITEMAP) {
|
||||||
head->mm_txnid_a = undo_txnid;
|
/* It is possible to update txnid without safe64_write(),
|
||||||
|
* since DB opened exclusive for now */
|
||||||
|
head->mm_txnid_a.inconsistent = undo_txnid;
|
||||||
head->mm_datasync_sign = MDBX_DATASIGN_WEAK;
|
head->mm_datasync_sign = MDBX_DATASIGN_WEAK;
|
||||||
head->mm_txnid_b = undo_txnid;
|
head->mm_txnid_b.inconsistent = undo_txnid;
|
||||||
const size_t offset =
|
const size_t offset =
|
||||||
((uint8_t *)container_of(head, MDBX_page, mp_meta)) -
|
((uint8_t *)container_of(head, MDBX_page, mp_meta)) -
|
||||||
env->me_dxb_mmap.dxb;
|
env->me_dxb_mmap.dxb;
|
||||||
@ -12958,13 +12992,13 @@ int __cold mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn,
|
|||||||
|
|
||||||
arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid = 0;
|
arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid = 0;
|
||||||
if (env->me_lck) {
|
if (env->me_lck) {
|
||||||
MDBX_reader *r = env->me_lck->mti_readers;
|
MDBX_reader *rlt = env->me_lck->mti_readers;
|
||||||
arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid =
|
arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid =
|
||||||
arg->mi_recent_txnid;
|
arg->mi_recent_txnid;
|
||||||
for (unsigned i = 0; i < arg->mi_numreaders; ++i) {
|
for (unsigned i = 0; i < arg->mi_numreaders; ++i) {
|
||||||
const uint32_t pid = r[i].mr_pid;
|
const uint32_t pid = rlt[i].mr_pid;
|
||||||
if (pid) {
|
if (pid) {
|
||||||
const txnid_t txnid = r[i].mr_txnid;
|
const txnid_t txnid = safe64_read(&rlt[i].mr_txnid);
|
||||||
if (arg->mi_latter_reader_txnid > txnid)
|
if (arg->mi_latter_reader_txnid > txnid)
|
||||||
arg->mi_latter_reader_txnid = txnid;
|
arg->mi_latter_reader_txnid = txnid;
|
||||||
if (pid == env->me_pid && arg->mi_self_latter_reader_txnid > txnid)
|
if (pid == env->me_pid && arg->mi_self_latter_reader_txnid > txnid)
|
||||||
@ -13521,19 +13555,19 @@ int __cold mdbx_reader_list(MDBX_env *env, MDBX_reader_list_func *func,
|
|||||||
const uint32_t pid = r->mr_pid;
|
const uint32_t pid = r->mr_pid;
|
||||||
if (!pid)
|
if (!pid)
|
||||||
continue;
|
continue;
|
||||||
txnid_t txnid = r->mr_txnid;
|
txnid_t txnid = safe64_read(&r->mr_txnid);
|
||||||
const size_t tid = r->mr_tid;
|
const size_t tid = r->mr_tid;
|
||||||
const pgno_t pages_used = r->mr_snapshot_pages_used;
|
const pgno_t pages_used = r->mr_snapshot_pages_used;
|
||||||
const uint64_t reader_pages_retired = r->mr_snapshot_pages_retired;
|
const uint64_t reader_pages_retired = r->mr_snapshot_pages_retired;
|
||||||
mdbx_compiler_barrier();
|
mdbx_compiler_barrier();
|
||||||
if (unlikely(pid != r->mr_pid || txnid != r->mr_txnid ||
|
if (unlikely(tid != r->mr_tid ||
|
||||||
tid != r->mr_tid ||
|
|
||||||
pages_used != r->mr_snapshot_pages_used ||
|
pages_used != r->mr_snapshot_pages_used ||
|
||||||
reader_pages_retired != r->mr_snapshot_pages_retired))
|
reader_pages_retired != r->mr_snapshot_pages_retired ||
|
||||||
|
txnid != safe64_read(&r->mr_txnid) || pid != r->mr_pid))
|
||||||
goto retry_reader;
|
goto retry_reader;
|
||||||
|
|
||||||
mdbx_assert(env, txnid > 0);
|
mdbx_assert(env, txnid > 0);
|
||||||
if (txnid >= BAD_TXNID)
|
if (txnid >= SAFE64_INVALID_THRESHOLD)
|
||||||
txnid = 0;
|
txnid = 0;
|
||||||
|
|
||||||
size_t bytes_used = 0;
|
size_t bytes_used = 0;
|
||||||
@ -13700,7 +13734,7 @@ int __cold mdbx_reader_check0(MDBX_env *env, int rdt_locked, int *dead) {
|
|||||||
for (unsigned j = i; j < snap_nreaders; j++) {
|
for (unsigned j = i; j < snap_nreaders; j++) {
|
||||||
if (lck->mti_readers[j].mr_pid == pid) {
|
if (lck->mti_readers[j].mr_pid == pid) {
|
||||||
mdbx_debug("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN,
|
mdbx_debug("clear stale reader pid %" PRIuPTR " txn %" PRIaTXN,
|
||||||
(size_t)pid, lck->mti_readers[j].mr_txnid);
|
(size_t)pid, lck->mti_readers[j].mr_txnid.inconsistent);
|
||||||
lck->mti_readers[j].mr_pid = 0;
|
lck->mti_readers[j].mr_pid = 0;
|
||||||
lck->mti_readers_refresh_flag = true;
|
lck->mti_readers_refresh_flag = true;
|
||||||
count++;
|
count++;
|
||||||
@ -13782,15 +13816,15 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL)))
|
if (MDBX_IS_ERROR(mdbx_reader_check0(env, false, NULL)))
|
||||||
break;
|
break;
|
||||||
|
|
||||||
MDBX_reader *const rtbl = env->me_lck->mti_readers;
|
MDBX_reader *const rlt = env->me_lck->mti_readers;
|
||||||
MDBX_reader *asleep = nullptr;
|
MDBX_reader *asleep = nullptr;
|
||||||
for (int i = env->me_lck->mti_numreaders; --i >= 0;) {
|
for (int i = env->me_lck->mti_numreaders; --i >= 0;) {
|
||||||
if (rtbl[i].mr_pid) {
|
if (rlt[i].mr_pid) {
|
||||||
mdbx_jitter4testing(true);
|
mdbx_jitter4testing(true);
|
||||||
const txnid_t snap = rtbl[i].mr_txnid;
|
const txnid_t snap = safe64_read(&rlt[i].mr_txnid);
|
||||||
if (oldest > snap && laggard <= /* ignore pending updates */ snap) {
|
if (oldest > snap && laggard <= /* ignore pending updates */ snap) {
|
||||||
oldest = snap;
|
oldest = snap;
|
||||||
asleep = &rtbl[i];
|
asleep = &rlt[i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -13813,7 +13847,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
|
|
||||||
uint32_t pid = asleep->mr_pid;
|
uint32_t pid = asleep->mr_pid;
|
||||||
size_t tid = asleep->mr_tid;
|
size_t tid = asleep->mr_tid;
|
||||||
if (asleep->mr_txnid != laggard || pid <= 0)
|
if (safe64_read(&asleep->mr_txnid) != laggard || pid <= 0)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
const txnid_t gap =
|
const txnid_t gap =
|
||||||
@ -13825,7 +13859,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
if (rc) {
|
if (rc) {
|
||||||
safe_txnid_reset(&asleep->mr_txnid);
|
safe64_reset(&asleep->mr_txnid);
|
||||||
if (rc > 1) {
|
if (rc > 1) {
|
||||||
asleep->mr_tid = 0;
|
asleep->mr_tid = 0;
|
||||||
asleep->mr_pid = 0;
|
asleep->mr_pid = 0;
|
||||||
|
@ -153,6 +153,14 @@
|
|||||||
#define MDBX_WORDBITS 32
|
#define MDBX_WORDBITS 32
|
||||||
#endif /* MDBX_WORDBITS */
|
#endif /* MDBX_WORDBITS */
|
||||||
|
|
||||||
|
#ifndef MDBX_64BIT_ATOMIC
|
||||||
|
#if MDBX_WORDBITS >= 64
|
||||||
|
#define MDBX_64BIT_ATOMIC 1
|
||||||
|
#else
|
||||||
|
#define MDBX_64BIT_ATOMIC 0
|
||||||
|
#endif
|
||||||
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
|
|
||||||
/* Some platforms define the EOWNERDEAD error code even though they
|
/* Some platforms define the EOWNERDEAD error code even though they
|
||||||
* don't support Robust Mutexes. Compile with -DMDBX_USE_ROBUST=0. */
|
* don't support Robust Mutexes. Compile with -DMDBX_USE_ROBUST=0. */
|
||||||
#ifndef MDBX_USE_ROBUST
|
#ifndef MDBX_USE_ROBUST
|
||||||
@ -263,7 +271,6 @@ typedef uint32_t pgno_t;
|
|||||||
typedef uint64_t txnid_t;
|
typedef uint64_t txnid_t;
|
||||||
#define PRIaTXN PRIi64
|
#define PRIaTXN PRIi64
|
||||||
#define MIN_TXNID UINT64_C(1)
|
#define MIN_TXNID UINT64_C(1)
|
||||||
#define BAD_TXNID UINT64_C(0xffffFFFF00000000)
|
|
||||||
|
|
||||||
/* Used for offsets within a single page.
|
/* Used for offsets within a single page.
|
||||||
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
|
* Since memory pages are typically 4 or 8KB in size, 12-13 bits,
|
||||||
@ -276,6 +283,27 @@ typedef uint16_t indx_t;
|
|||||||
/* Core structures for database and shared memory (i.e. format definition) */
|
/* Core structures for database and shared memory (i.e. format definition) */
|
||||||
#pragma pack(push, 1)
|
#pragma pack(push, 1)
|
||||||
|
|
||||||
|
typedef union mdbx_safe64 {
|
||||||
|
volatile uint64_t inconsistent;
|
||||||
|
#if MDBX_64BIT_ATOMIC
|
||||||
|
volatile uint64_t atomic;
|
||||||
|
#else
|
||||||
|
struct {
|
||||||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||||
|
volatile uint32_t low;
|
||||||
|
volatile uint32_t high;
|
||||||
|
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||||
|
volatile uint32_t high;
|
||||||
|
volatile uint32_t low;
|
||||||
|
#else
|
||||||
|
#error "FIXME: Unsupported byte order"
|
||||||
|
#endif /* __BYTE_ORDER__ */
|
||||||
|
};
|
||||||
|
#endif /* MDBX_64BIT_ATOMIC */
|
||||||
|
} mdbx_safe64_t;
|
||||||
|
|
||||||
|
#define SAFE64_INVALID_THRESHOLD UINT64_C(0xffffFFFF00000000)
|
||||||
|
|
||||||
/* Information about a single database in the environment. */
|
/* Information about a single database in the environment. */
|
||||||
typedef struct MDBX_db {
|
typedef struct MDBX_db {
|
||||||
uint16_t md_flags; /* see mdbx_dbi_open */
|
uint16_t md_flags; /* see mdbx_dbi_open */
|
||||||
@ -310,7 +338,7 @@ typedef struct MDBX_meta {
|
|||||||
uint64_t mm_magic_and_version;
|
uint64_t mm_magic_and_version;
|
||||||
|
|
||||||
/* txnid that committed this page, the first of a two-phase-update pair */
|
/* txnid that committed this page, the first of a two-phase-update pair */
|
||||||
volatile txnid_t mm_txnid_a;
|
mdbx_safe64_t mm_txnid_a;
|
||||||
|
|
||||||
uint16_t mm_extra_flags; /* extra DB flags, zero (nothing) for now */
|
uint16_t mm_extra_flags; /* extra DB flags, zero (nothing) for now */
|
||||||
uint8_t mm_validator_id; /* ID of checksum and page validation method,
|
uint8_t mm_validator_id; /* ID of checksum and page validation method,
|
||||||
@ -336,7 +364,7 @@ typedef struct MDBX_meta {
|
|||||||
volatile uint64_t mm_datasync_sign;
|
volatile uint64_t mm_datasync_sign;
|
||||||
|
|
||||||
/* txnid that committed this page, the second of a two-phase-update pair */
|
/* txnid that committed this page, the second of a two-phase-update pair */
|
||||||
volatile txnid_t mm_txnid_b;
|
mdbx_safe64_t mm_txnid_b;
|
||||||
|
|
||||||
/* Number of non-meta pages which were put in GC after COW. May be 0 in case
|
/* Number of non-meta pages which were put in GC after COW. May be 0 in case
|
||||||
* DB was previously handled by libmdbx without corresponding feature.
|
* DB was previously handled by libmdbx without corresponding feature.
|
||||||
@ -448,7 +476,7 @@ typedef struct MDBX_reader {
|
|||||||
* anything; all we need to know is which version of the DB they
|
* anything; all we need to know is which version of the DB they
|
||||||
* started from so we can avoid overwriting any data used in that
|
* started from so we can avoid overwriting any data used in that
|
||||||
* particular version. */
|
* particular version. */
|
||||||
volatile uint64_t /* txnid_t */ mr_txnid;
|
mdbx_safe64_t /* txnid_t */ mr_txnid;
|
||||||
|
|
||||||
/* The information we store in a single slot of the reader table.
|
/* The information we store in a single slot of the reader table.
|
||||||
* In addition to a transaction ID, we also record the process and
|
* In addition to a transaction ID, we also record the process and
|
||||||
|
Loading…
x
Reference in New Issue
Block a user