From c09fbc2ad212f07b7b5f675788534908d48c0a99 Mon Sep 17 00:00:00 2001 From: Leonid Yuriev Date: Wed, 13 Nov 2019 22:12:09 +0300 Subject: [PATCH] mdbx: refork mdbx_flush_incoherent_mmap(). --- src/elements/core.c | 49 +++++++++++++++---------------- src/elements/internals.h | 63 ++++++++++++++++++++++++---------------- src/elements/options.h | 24 ++++++++++----- 3 files changed, 78 insertions(+), 58 deletions(-) diff --git a/src/elements/core.c b/src/elements/core.c index 0cd72c3f..ff47d19c 100644 --- a/src/elements/core.c +++ b/src/elements/core.c @@ -704,7 +704,7 @@ static __inline void safe64_reset(mdbx_safe64_t *ptr, bool single_writer) { ptr->high = UINT32_MAX; #endif /* MDBX_64BIT_ATOMIC */ assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD); - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); mdbx_jitter4testing(true); } @@ -718,7 +718,7 @@ static __inline bool safe64_reset_compare(mdbx_safe64_t *ptr, txnid_t compare) { * if a new transaction was started (i.e. if `mr_txnid` was changed). */ #if MDBX_64BIT_CAS bool rc = atomic_cas64(&ptr->inconsistent, compare, UINT64_MAX); - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); #else /* LY: There is no gold ratio here since shared mutex is too costly, * in such way we must acquire/release it for every update of mr_txnid, @@ -745,13 +745,13 @@ static __inline void safe64_write(mdbx_safe64_t *ptr, const uint64_t v) { /* update low-part but still value >= SAFE64_INVALID_THRESHOLD */ ptr->low = (uint32_t)v; assert(ptr->inconsistent >= SAFE64_INVALID_THRESHOLD); - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); mdbx_jitter4testing(true); /* update high-part from SAFE64_INVALID_THRESHOLD to actual value */ ptr->high = (uint32_t)(v >> 32); #endif /* MDBX_64BIT_ATOMIC */ assert(ptr->inconsistent == v); - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); mdbx_jitter4testing(true); } @@ -1247,7 +1247,7 @@ static int uniq_poke(const mdbx_mmap_t *pending, mdbx_mmap_t *scan, << 24 | *abra >> 40; scan->lck->mti_bait_uniqueness = cadabra; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); *abra = *abra * UINT64_C(6364136223846793005) + 1; return uniq_peek(pending, scan); } @@ -3399,7 +3399,7 @@ static txnid_t mdbx_find_oldest(MDBX_txn *txn) { txnid_t oldest = edge; lck->mti_readers_refresh_flag = nothing_changed; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); const unsigned snap_nreaders = lck->mti_numreaders; for (unsigned i = 0; i < snap_nreaders; ++i) { if (lck->mti_readers[i].mr_pid) { @@ -4724,7 +4724,7 @@ static int mdbx_txn_renew0(MDBX_txn *txn, unsigned flags) { mdbx_assert(env, r->mr_txnid.inconsistent == snap); mdbx_compiler_barrier(); env->me_lck->mti_readers_refresh_flag = true; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); } mdbx_jitter4testing(true); @@ -5297,7 +5297,7 @@ static int mdbx_txn_end(MDBX_txn *txn, unsigned mode) { slot->mr_snapshot_pages_used = 0; safe64_reset(&slot->mr_txnid, false); env->me_lck->mti_readers_refresh_flag = true; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); } else { mdbx_assert(env, slot->mr_pid == env->me_pid); mdbx_assert(env, @@ -6325,16 +6325,14 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) { int rc = mdbx_flush_iov(txn, iov, iov_items, iov_off, iov_bytes); if (unlikely(rc != MDBX_SUCCESS)) return rc; -#if MDBX_CPU_CACHE_MMAP_NONCOHERENT #if defined(__linux__) || defined(__gnu_linux__) if (mdbx_linux_kernel_version >= 0x02060b00) /* Linux kernels older than version 2.6.11 ignore the addr and nbytes * arguments, making this function fairly expensive. Therefore, the * whole cache is always flushed. */ #endif /* Linux */ - mdbx_invalidate_mmap_noncoherent_cache(env->me_map + iov_off, - iov_bytes); -#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */ + mdbx_flush_incoherent_mmap(env->me_map + iov_off, iov_bytes, + env->me_os_psize); iov_items = 0; iov_bytes = 0; } @@ -6353,17 +6351,16 @@ static int mdbx_page_flush(MDBX_txn *txn, const unsigned keep) { return rc; } -#if MDBX_CPU_CACHE_MMAP_NONCOHERENT && \ - (defined(__linux__) || defined(__gnu_linux__)) +#if defined(__linux__) || defined(__gnu_linux__) if ((env->me_flags & MDBX_WRITEMAP) == 0 && mdbx_linux_kernel_version < 0x02060b00) /* Linux kernels older than version 2.6.11 ignore the addr and nbytes * arguments, making this function fairly expensive. Therefore, the * whole cache is always flushed. */ - mdbx_invalidate_mmap_noncoherent_cache( - env->me_map + pgno2bytes(env, flush_begin), - pgno2bytes(env, flush_end - flush_begin)); -#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT && Linux */ + mdbx_flush_incoherent_mmap(env->me_map + pgno2bytes(env, flush_begin), + pgno2bytes(env, flush_end - flush_begin), + env->me_os_psize); +#endif /* Linux */ /* TODO: use flush_begin & flush_end for msync() & sync_file_range(). */ (void)flush_begin; @@ -7336,7 +7333,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, target->mm_canary = pending->mm_canary; target->mm_pages_retired = pending->mm_pages_retired; mdbx_jitter4testing(true); - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); /* LY: 'commit' the meta */ mdbx_meta_update_end(env, target, pending->mm_txnid_b.inconsistent); @@ -7355,7 +7352,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, sizeof(head->mm_canary)) == 0); } target->mm_datasync_sign = pending->mm_datasync_sign; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); mdbx_jitter4testing(true); } else { rc = mdbx_pwrite(env->me_fd, pending, sizeof(MDBX_meta), @@ -7369,7 +7366,7 @@ static int mdbx_sync_locked(MDBX_env *env, unsigned flags, (uint8_t *)target - env->me_map); goto fail; } - mdbx_invalidate_mmap_noncoherent_cache(target, sizeof(MDBX_meta)); + mdbx_flush_incoherent_mmap(target, sizeof(MDBX_meta), env->me_os_psize); } /* LY: step#3 - sync meta-pages. */ @@ -8217,8 +8214,8 @@ static int __cold mdbx_setup_dxb(MDBX_env *env, const int lck_rc) { return err; } - mdbx_invalidate_mmap_noncoherent_cache(env->me_map, - pgno2bytes(env, NUM_METAS)); + mdbx_flush_incoherent_mmap(env->me_map, pgno2bytes(env, NUM_METAS), + env->me_os_psize); mdbx_ensure(env, undo_txnid == mdbx_meta_txnid_fluid(env, head)); mdbx_ensure(env, 0 == mdbx_meta_eq_mask(env)); continue; @@ -15229,7 +15226,7 @@ static txnid_t __cold mdbx_oomkick(MDBX_env *env, const txnid_t laggard) { asleep->mr_pid = 0; } lck->mti_readers_refresh_flag = true; - mdbx_flush_noncoherent_cpu_writeback(); + mdbx_flush_incoherent_cpu_writeback(); } } @@ -16685,7 +16682,9 @@ __dll_export " MDBX_USE_OFDLOCKS=" MDBX_USE_OFDLOCKS_CONFIG #endif /* !Windows */ " MDBX_CACHELINE_SIZE=" STRINGIFY(MDBX_CACHELINE_SIZE) - " MDBX_CPU_WRITEBACK_IS_COHERENT=" STRINGIFY(MDBX_CPU_WRITEBACK_IS_COHERENT) + " MDBX_CPU_WRITEBACK_INCOHERENT=" STRINGIFY(MDBX_CPU_WRITEBACK_INCOHERENT) + " MDBX_MMAP_INCOHERENT_CPU_CACHE=" STRINGIFY(MDBX_MMAP_INCOHERENT_CPU_CACHE) + " MDBX_MMAP_INCOHERENT_FILE_WRITE=" STRINGIFY(MDBX_MMAP_INCOHERENT_FILE_WRITE) " MDBX_UNALIGNED_OK=" STRINGIFY(MDBX_UNALIGNED_OK) " MDBX_PNL_ASCENDING=" STRINGIFY(MDBX_PNL_ASCENDING) , diff --git a/src/elements/internals.h b/src/elements/internals.h index e1fdc5c1..0eb157a0 100644 --- a/src/elements/internals.h +++ b/src/elements/internals.h @@ -158,31 +158,6 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor; #include "options.h" -/*----------------------------------------------------------------------------*/ -/* Cache coherence and invalidation */ - -#if MDBX_CPU_WRITEBACK_IS_COHERENT -#define mdbx_flush_noncoherent_cpu_writeback() mdbx_compiler_barrier() -#else -#define mdbx_flush_noncoherent_cpu_writeback() mdbx_memory_barrier() -#endif - -static __maybe_unused __inline void -mdbx_invalidate_mmap_noncoherent_cache(void *addr, size_t nbytes) { -#if MDBX_CPU_CACHE_MMAP_NONCOHERENT -#ifdef DCACHE - /* MIPS has cache coherency issues. - * Note: for any nbytes >= on-chip cache size, entire is flushed. */ - cacheflush(addr, nbytes, DCACHE); -#else -#error "Oops, cacheflush() not available" -#endif /* DCACHE */ -#else /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */ - (void)addr; - (void)nbytes; -#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */ -} - /*----------------------------------------------------------------------------*/ /* Basic constants and types */ @@ -1133,6 +1108,44 @@ MDBX_INTERNAL_FUNC void mdbx_assert_fail(const MDBX_env *env, const char *msg, #define assert(expr) mdbx_assert(NULL, expr) #endif +/*----------------------------------------------------------------------------*/ +/* Cache coherence and mmap invalidation */ + +#if MDBX_CPU_WRITEBACK_INCOHERENT +#define mdbx_flush_incoherent_cpu_writeback() mdbx_memory_barrier() +#else +#define mdbx_flush_incoherent_cpu_writeback() mdbx_compiler_barrier() +#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ + +static __inline void mdbx_flush_incoherent_mmap(void *addr, size_t nbytes, + const intptr_t pagesize) { +#if MDBX_MMAP_INCOHERENT_FILE_WRITE + char *const begin = (char *)(-pagesize & (intptr_t)addr); + char *const end = + (char *)(-pagesize & (intptr_t)((char *)addr + nbytes + pagesize - 1)); + int err = msync(begin, end - begin, MS_SYNC | MS_INVALIDATE) ? errno : 0; + mdbx_assert(nullptr, err == 0); + (void)err; +#else + (void)pagesize; +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + +#if MDBX_MMAP_INCOHERENT_CPU_CACHE +#ifdef DCACHE + /* MIPS has cache coherency issues. + * Note: for any nbytes >= on-chip cache size, entire is flushed. */ + cacheflush(addr, nbytes, DCACHE); +#else +#error "Oops, cacheflush() not available" +#endif /* DCACHE */ +#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ + +#if !MDBX_MMAP_INCOHERENT_FILE_WRITE && !MDBX_MMAP_INCOHERENT_CPU_CACHE + (void)addr; + (void)nbytes; +#endif +} + /*----------------------------------------------------------------------------*/ /* Internal prototypes */ diff --git a/src/elements/options.h b/src/elements/options.h index a1d4eef4..2694e920 100644 --- a/src/elements/options.h +++ b/src/elements/options.h @@ -114,26 +114,34 @@ //------------------------------------------------------------------------------ -#ifndef MDBX_CPU_WRITEBACK_IS_COHERENT +#ifndef MDBX_CPU_WRITEBACK_INCOHERENT #if defined(__ia32__) || defined(__e2k__) || defined(__hppa) || \ defined(__hppa__) -#define MDBX_CPU_WRITEBACK_IS_COHERENT 1 +#define MDBX_CPU_WRITEBACK_INCOHERENT 0 #else -#define MDBX_CPU_WRITEBACK_IS_COHERENT 0 +#define MDBX_CPU_WRITEBACK_INCOHERENT 1 #endif -#endif /* MDBX_CPU_WRITEBACK_IS_COHERENT */ +#endif /* MDBX_CPU_WRITEBACK_INCOHERENT */ -#ifndef MDBX_CPU_CACHE_MMAP_NONCOHERENT +#ifndef MDBX_MMAP_INCOHERENT_FILE_WRITE +#ifdef __OpenBSD__ +#define MDBX_MMAP_INCOHERENT_FILE_WRITE 1 +#else +#define MDBX_MMAP_INCOHERENT_FILE_WRITE 0 +#endif +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + +#ifndef MDBX_MMAP_INCOHERENT_CPU_CACHE #if defined(__mips) || defined(__mips__) || defined(__mips64) || \ defined(__mips64__) || defined(_M_MRX000) || defined(_MIPS_) || \ defined(__MWERKS__) || defined(__sgi) /* MIPS has cache coherency issues. */ -#define MDBX_CPU_CACHE_MMAP_NONCOHERENT 1 +#define MDBX_MMAP_INCOHERENT_CPU_CACHE 1 #else /* LY: assume no relevant mmap/dcache issues. */ -#define MDBX_CPU_CACHE_MMAP_NONCOHERENT 0 +#define MDBX_MMAP_INCOHERENT_CPU_CACHE 0 #endif -#endif /* MDBX_CPU_CACHE_MMAP_NONCOHERENT */ +#endif /* MDBX_MMAP_INCOHERENT_CPU_CACHE */ #ifndef MDBX_64BIT_ATOMIC #if MDBX_WORDBITS >= 64