mdbx: refork mdbx_flush_incoherent_mmap().

This commit is contained in:
Leonid Yuriev 2019-11-13 22:12:09 +03:00
parent fe40af160d
commit c09fbc2ad2
3 changed files with 78 additions and 58 deletions

View File

@ -704,7 +704,7 @@ static __inline void safe64_reset(mdbx_safe64_t *ptr, bool single_writer) {
ptr->high = UINT32_MAX; ptr->high = UINT32_MAX;
#endif /* MDBX_64BIT_ATOMIC */ #endif /* MDBX_64BIT_ATOMIC */
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD); assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
} }
@ -718,7 +718,7 @@ static __inline bool safe64_reset_compare(mdbx_safe64_t *ptr, txnid_t compare) {
* if a new transaction was started (i.e. if `mr_txnid` was changed). */ * if a new transaction was started (i.e. if `mr_txnid` was changed). */
#if MDBX_64BIT_CAS #if MDBX_64BIT_CAS
bool rc = atomic_cas64(&ptr->inconsistent, compare, UINT64_MAX); bool rc = atomic_cas64(&ptr->inconsistent, compare, UINT64_MAX);
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
#else #else
/* LY: There is no gold ratio here since shared mutex is too costly, /* LY: There is no gold ratio here since shared mutex is too costly,
* in such way we must acquire/release it for every update of mr_txnid, * in such way we must acquire/release it for every update of mr_txnid,
@ -745,13 +745,13 @@ static __inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) {
/* update low-part but still value >= SAFE64_INVALID_THRESHOLD */ /* update low-part but still value >= SAFE64_INVALID_THRESHOLD */
ptr->low = (uint32_t)v; ptr->low = (uint32_t)v;
assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD); assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD);
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
/* update high-part from SAFE64_INVALID_THRESHOLD to actual value */ /* update high-part from SAFE64_INVALID_THRESHOLD to actual value */
ptr->high = (uint32_t)(v >> 32); ptr->high = (uint32_t)(v >> 32);
#endif /* MDBX_64BIT_ATOMIC */ #endif /* MDBX_64BIT_ATOMIC */
assert(ptr->inconsistent == v); assert(ptr->inconsistent == v);
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
} }
@ -1247,7 +1247,7 @@ static int uniq_poke(const mdbx_mmap_t *pending, mdbx_mmap_t *scan,
<< 24 | << 24 |
*abra >> 40; *abra >> 40;
scan->lck->mti_bait_uniqueness = cadabra; scan->lck->mti_bait_uniqueness = cadabra;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
*abra = *abra * UINT64_C(6364136223846793005) + 1; *abra = *abra * UINT64_C(6364136223846793005) + 1;
return uniq_peek(pending, scan); return uniq_peek(pending, scan);
} }
@ -3399,7 +3399,7 @@ static txnid_t mdbx_find_oldest(MDBX_txn *txn) {
txnid_t oldest = edge; txnid_t oldest = edge;
lck->mti_readers_refresh_flag = nothing_changed; lck->mti_readers_refresh_flag = nothing_changed;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
const unsigned snap_nreaders = lck->mti_numreaders; const unsigned snap_nreaders = lck->mti_numreaders;
for (unsigned i = 0; i < snap_nreaders; ++i) { for (unsigned i = 0; i < snap_nreaders; ++i) {
if (lck->mti_readers[i].mr_pid) { if (lck->mti_readers[i].mr_pid) {
@ -4724,7 +4724,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) {
mdbx_assert(env, r->mr_txnid.inconsistent == snap); mdbx_assert(env, r->mr_txnid.inconsistent == snap);
mdbx_compiler_barrier(); mdbx_compiler_barrier();
env->me_lck->mti_readers_refresh_flag = true; env->me_lck->mti_readers_refresh_flag = true;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
} }
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
@ -5297,7 +5297,7 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) {
slot->mr_snapshot_pages_used = 0; slot->mr_snapshot_pages_used = 0;
safe64_reset(&slot->mr_txnid, false); safe64_reset(&slot->mr_txnid, false);
env->me_lck->mti_readers_refresh_flag = true; env->me_lck->mti_readers_refresh_flag = true;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
} else { } else {
mdbx_assert(env, slot->mr_pid == env->me_pid); mdbx_assert(env, slot->mr_pid == env->me_pid);
mdbx_assert(env, mdbx_assert(env,
@ -6325,16 +6325,14 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
int rc = mdbx_flush_iov(txn, iov, iov_items, iov_off, iov_bytes); int rc = mdbx_flush_iov(txn, iov, iov_items, iov_off, iov_bytes);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
#if MDBX_CPU_CACHE_MMAP_NONCOHERENT
#if defined(__linux__) || defined(__gnu_linux__) #if defined(__linux__) || defined(__gnu_linux__)
if (mdbx_linux_kernel_version >= 0x02060b00) if (mdbx_linux_kernel_version >= 0x02060b00)
/* Linux kernels older than version 2.6.11 ignore the addr and nbytes /* Linux kernels older than version 2.6.11 ignore the addr and nbytes
* arguments, making this function fairly expensive. Therefore, the * arguments, making this function fairly expensive. Therefore, the
* whole cache is always flushed. */ * whole cache is always flushed. */
#endif /* Linux */ #endif /* Linux */
mdbx_invalidate_mmap_noncoherent_cache(env->me_map + iov_off, mdbx_flush_incoherent_mmap(env->me_map + iov_off, iov_bytes,
iov_bytes); env->me_os_psize);
#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */
iov_items = 0; iov_items = 0;
iov_bytes = 0; iov_bytes = 0;
} }
@ -6353,17 +6351,16 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) {
return rc; return rc;
} }
#if MDBX_CPU_CACHE_MMAP_NONCOHERENT && \ #if defined(__linux__) || defined(__gnu_linux__)
(defined(__linux__) || defined(__gnu_linux__))
if ((env->me_flags & MDBX_WRITEMAP) == 0 && if ((env->me_flags & MDBX_WRITEMAP) == 0 &&
mdbx_linux_kernel_version < 0x02060b00) mdbx_linux_kernel_version < 0x02060b00)
/* Linux kernels older than version 2.6.11 ignore the addr and nbytes /* Linux kernels older than version 2.6.11 ignore the addr and nbytes
* arguments, making this function fairly expensive. Therefore, the * arguments, making this function fairly expensive. Therefore, the
* whole cache is always flushed. */ * whole cache is always flushed. */
mdbx_invalidate_mmap_noncoherent_cache( mdbx_flush_incoherent_mmap(env->me_map + pgno2bytes(env, flush_begin),
env->me_map + pgno2bytes(env, flush_begin), pgno2bytes(env, flush_end - flush_begin),
pgno2bytes(env, flush_end - flush_begin)); env->me_os_psize);
#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT && Linux */ #endif /* Linux */
/* TODO: use flush_begin & flush_end for msync() & sync_file_range(). */ /* TODO: use flush_begin & flush_end for msync() & sync_file_range(). */
(void)flush_begin; (void)flush_begin;
@ -7336,7 +7333,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
target->mm_canary = pending->mm_canary; target->mm_canary = pending->mm_canary;
target->mm_pages_retired = pending->mm_pages_retired; target->mm_pages_retired = pending->mm_pages_retired;
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
/* LY: 'commit' the meta */ /* LY: 'commit' the meta */
mdbx_meta_update_end(env, target, pending->mm_txnid_b.inconsistent); mdbx_meta_update_end(env, target, pending->mm_txnid_b.inconsistent);
@ -7355,7 +7352,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
sizeof(head->mm_canary)) == 0); sizeof(head->mm_canary)) == 0);
} }
target->mm_datasync_sign = pending->mm_datasync_sign; target->mm_datasync_sign = pending->mm_datasync_sign;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
} else { } else {
rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta), rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta),
@ -7369,7 +7366,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags,
(uint8_t *)target - env->me_map); (uint8_t *)target - env->me_map);
goto fail; goto fail;
} }
mdbx_invalidate_mmap_noncoherent_cache(target, sizeof(MDBX_meta)); mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize);
} }
/* LY: step#3 - sync meta-pages. */ /* LY: step#3 - sync meta-pages. */
@ -8217,8 +8214,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) {
return err; return err;
} }
mdbx_invalidate_mmap_noncoherent_cache(env->me_map, mdbx_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS),
pgno2bytes(env, NUM_METAS)); env->me_os_psize);
mdbx_ensure(env, undo_txnid == mdbx_meta_txnid_fluid(env, head)); mdbx_ensure(env, undo_txnid == mdbx_meta_txnid_fluid(env, head));
mdbx_ensure(env, 0 == mdbx_meta_eq_mask(env)); mdbx_ensure(env, 0 == mdbx_meta_eq_mask(env));
continue; continue;
@ -15229,7 +15226,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) {
asleep->mr_pid = 0; asleep->mr_pid = 0;
} }
lck->mti_readers_refresh_flag = true; lck->mti_readers_refresh_flag = true;
mdbx_flush_noncoherent_cpu_writeback(); mdbx_flush_incoherent_cpu_writeback();
} }
} }
@ -16685,7 +16682,9 @@ __dll_export
" MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG " MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG
#endif /* !Windows */ #endif /* !Windows */
" MDBX_CACHELINE_SIZE=" STRINGIFY(MDBX_CACHELINE_SIZE) " MDBX_CACHELINE_SIZE=" STRINGIFY(MDBX_CACHELINE_SIZE)
" MDBX_CPU_WRITEBACK_IS_COHERENT=" STRINGIFY(MDBX_CPU_WRITEBACK_IS_COHERENT) " MDBX_CPU_WRITEBACK_INCOHERENT=" STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT)
" MDBX_MMAP_INCOHERENT_CPU_CACHE=" STRINGIFY(MDBX_MMAP_INCOHERENT_CPU_CACHE)
" MDBX_MMAP_INCOHERENT_FILE_WRITE=" STRINGIFY(MDBX_MMAP_INCOHERENT_FILE_WRITE)
" MDBX_UNALIGNED_OK=" STRINGIFY(MDBX_UNALIGNED_OK) " MDBX_UNALIGNED_OK=" STRINGIFY(MDBX_UNALIGNED_OK)
" MDBX_PNL_ASCENDING=" STRINGIFY(MDBX_PNL_ASCENDING) " MDBX_PNL_ASCENDING=" STRINGIFY(MDBX_PNL_ASCENDING)
, ,

View File

@ -158,31 +158,6 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
#include "options.h" #include "options.h"
/*----------------------------------------------------------------------------*/
/* Cache coherence and invalidation */
#if MDBX_CPU_WRITEBACK_IS_COHERENT
#define mdbx_flush_noncoherent_cpu_writeback() mdbx_compiler_barrier()
#else
#define mdbx_flush_noncoherent_cpu_writeback() mdbx_memory_barrier()
#endif
static __maybe_unused __inline void
mdbx_invalidate_mmap_noncoherent_cache(void *addr, size_t nbytes) {
#if MDBX_CPU_CACHE_MMAP_NONCOHERENT
#ifdef DCACHE
/* MIPS has cache coherency issues.
* Note: for any nbytes >= on-chip cache size, entire is flushed. */
cacheflush(addr, nbytes, DCACHE);
#else
#error "Oops, cacheflush() not available"
#endif /* DCACHE */
#else /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */
(void)addr;
(void)nbytes;
#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */
}
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Basic constants and types */ /* Basic constants and types */
@ -1133,6 +1108,44 @@ MDBX_INTERNAL_FUNC void mdbx_assert_fail(const MDBX_env *env, const char *msg,
#define assert(expr) mdbx_assert(NULL, expr) #define assert(expr) mdbx_assert(NULL, expr)
#endif #endif
/*----------------------------------------------------------------------------*/
/* Cache coherence and mmap invalidation */
#if MDBX_CPU_WRITEBACK_INCOHERENT
#define mdbx_flush_incoherent_cpu_writeback() mdbx_memory_barrier()
#else
#define mdbx_flush_incoherent_cpu_writeback() mdbx_compiler_barrier()
#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
static __inline void mdbx_flush_incoherent_mmap(void *addr, size_t nbytes,
const intptr_t pagesize) {
#if MDBX_MMAP_INCOHERENT_FILE_WRITE
char *const begin = (char *)(-pagesize & (intptr_t)addr);
char *const end =
(char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1));
int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0;
mdbx_assert(nullptr, err == 0);
(void)err;
#else
(void)pagesize;
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
#if MDBX_MMAP_INCOHERENT_CPU_CACHE
#ifdef DCACHE
/* MIPS has cache coherency issues.
* Note: for any nbytes >= on-chip cache size, entire is flushed. */
cacheflush(addr, nbytes, DCACHE);
#else
#error "Oops, cacheflush() not available"
#endif /* DCACHE */
#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE
(void)addr;
(void)nbytes;
#endif
}
/*----------------------------------------------------------------------------*/ /*----------------------------------------------------------------------------*/
/* Internal prototypes */ /* Internal prototypes */

View File

@ -114,26 +114,34 @@
//------------------------------------------------------------------------------ //------------------------------------------------------------------------------
#ifndef MDBX_CPU_WRITEBACK_IS_COHERENT #ifndef MDBX_CPU_WRITEBACK_INCOHERENT
#if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || \ #if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || \
defined(__hppa__) defined(__hppa__)
#define MDBX_CPU_WRITEBACK_IS_COHERENT 1 #define MDBX_CPU_WRITEBACK_INCOHERENT 0
#else #else
#define MDBX_CPU_WRITEBACK_IS_COHERENT 0 #define MDBX_CPU_WRITEBACK_INCOHERENT 1
#endif #endif
#endif /* MDBX_CPU_WRITEBACK_IS_COHERENT */ #endif /* MDBX_CPU_WRITEBACK_INCOHERENT */
#ifndef MDBX_CPU_CACHE_MMAP_NONCOHERENT #ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE
#ifdef __OpenBSD__
#define MDBX_MMAP_INCOHERENT_FILE_WRITE 1
#else
#define MDBX_MMAP_INCOHERENT_FILE_WRITE 0
#endif
#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */
#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE
#if defined(__mips) || defined(__mips__) || defined(__mips64) || \ #if defined(__mips) || defined(__mips__) || defined(__mips64) || \
defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \
defined(__MWERKS__) || defined(__sgi) defined(__MWERKS__) || defined(__sgi)
/* MIPS has cache coherency issues. */ /* MIPS has cache coherency issues. */
#define MDBX_CPU_CACHE_MMAP_NONCOHERENT 1 #define MDBX_MMAP_INCOHERENT_CPU_CACHE 1
#else #else
/* LY: assume no relevant mmap/dcache issues. */ /* LY: assume no relevant mmap/dcache issues. */
#define MDBX_CPU_CACHE_MMAP_NONCOHERENT 0 #define MDBX_MMAP_INCOHERENT_CPU_CACHE 0
#endif #endif
#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */ #endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */
#ifndef MDBX_64BIT_ATOMIC #ifndef MDBX_64BIT_ATOMIC
#if MDBX_WORDBITS >= 64 #if MDBX_WORDBITS >= 64