mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-02 00:04:12 +08:00
mdbx: use C11 atomics if available instead of legacy memory barriers.
This done better support architectures with a weak/relaxed memory consistency model (ARM, AARCH64, PPC, MIPS, RISC-V, etc). Change-Id: Iee831c8dc564f1d027ff84b0d6daa559325d5a9b
This commit is contained in:
parent
bc33875a9e
commit
9f0ff865e8
1
.github/actions/spelling/expect.txt
vendored
1
.github/actions/spelling/expect.txt
vendored
@ -205,6 +205,7 @@ cpflags
|
||||
cplus
|
||||
cplusplus
|
||||
cpp
|
||||
cppreference
|
||||
cpuid
|
||||
CROSSCOMPILING
|
||||
crtdbg
|
||||
|
@ -48,6 +48,7 @@ New features:
|
||||
- more effective refunding/compactification especially for the loosed page cache.
|
||||
- Added `MDBX_ENABLE_REFUND` and `MDBX_PNL_ASCENDING` internal/advanced build options.
|
||||
- Added `mdbx_default_pagesize()` function.
|
||||
- Better support architectures with a weak/relaxed memory consistency model (ARM, AARCH64, PPC, MIPS, RISC-V, etc) by means [C11 atomics](https://en.cppreference.com/w/c/atomic).
|
||||
|
||||
Fixes:
|
||||
|
||||
|
911
src/core.c
911
src/core.c
File diff suppressed because it is too large
Load Diff
124
src/internals.h
124
src/internals.h
@ -186,6 +186,31 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Basic constants and types */
|
||||
|
||||
typedef union {
|
||||
volatile uint32_t weak;
|
||||
#ifdef MDBX_HAVE_C11ATOMICS
|
||||
volatile _Atomic uint32_t c11a;
|
||||
#endif /* MDBX_HAVE_C11ATOMICS */
|
||||
} MDBX_atomic_uint32_t;
|
||||
|
||||
typedef union {
|
||||
volatile uint64_t weak;
|
||||
#if defined(MDBX_HAVE_C11ATOMICS) && (MDBX_64BIT_CAS || MDBX_64BIT_ATOMIC)
|
||||
volatile _Atomic uint64_t c11a;
|
||||
#endif
|
||||
#if !defined(MDBX_HAVE_C11ATOMICS) || !MDBX_64BIT_CAS || !MDBX_64BIT_ATOMIC
|
||||
__anonymous_struct_extension__ struct {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
MDBX_atomic_uint32_t low, high;
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
MDBX_atomic_uint32_t high, low;
|
||||
#else
|
||||
#error "FIXME: Unsupported byte order"
|
||||
#endif /* __BYTE_ORDER__ */
|
||||
};
|
||||
#endif
|
||||
} MDBX_atomic_uint64_t;
|
||||
|
||||
/* The minimum number of keys required in a database page.
|
||||
* Setting this to a larger value will place a smaller bound on the
|
||||
* maximum size of a data item. Data items larger than this size will
|
||||
@ -224,6 +249,7 @@ extern LIBMDBX_API const char *const mdbx_sourcery_anchor;
|
||||
* MDBX uses 32 bit for page numbers. This limits database
|
||||
* size up to 2^44 bytes, in case of 4K pages. */
|
||||
typedef uint32_t pgno_t;
|
||||
typedef MDBX_atomic_uint32_t atomic_pgno_t;
|
||||
#define PRIaPGNO PRIu32
|
||||
#define MAX_PAGENO UINT32_C(0x7FFFffff)
|
||||
#define MIN_PAGENO NUM_METAS
|
||||
@ -232,6 +258,7 @@ typedef uint32_t pgno_t;
|
||||
|
||||
/* A transaction ID. */
|
||||
typedef uint64_t txnid_t;
|
||||
typedef MDBX_atomic_uint64_t atomic_txnid_t;
|
||||
#define PRIaTXN PRIi64
|
||||
#define MIN_TXNID UINT64_C(1)
|
||||
#define MAX_TXNID (SAFE64_INVALID_THRESHOLD - 1)
|
||||
@ -258,24 +285,6 @@ typedef uint16_t indx_t;
|
||||
/* Core structures for database and shared memory (i.e. format definition) */
|
||||
#pragma pack(push, 1)
|
||||
|
||||
typedef union mdbx_safe64 {
|
||||
volatile uint64_t inconsistent;
|
||||
#if MDBX_64BIT_ATOMIC
|
||||
volatile uint64_t atomic;
|
||||
#endif /* MDBX_64BIT_ATOMIC */
|
||||
__anonymous_struct_extension__ struct {
|
||||
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||||
volatile uint32_t low;
|
||||
volatile uint32_t high;
|
||||
#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
|
||||
volatile uint32_t high;
|
||||
volatile uint32_t low;
|
||||
#else
|
||||
#error "FIXME: Unsupported byte order"
|
||||
#endif /* __BYTE_ORDER__ */
|
||||
};
|
||||
} mdbx_safe64_t;
|
||||
|
||||
/* Information about a single database in the environment. */
|
||||
typedef struct MDBX_db {
|
||||
uint16_t md_flags; /* see mdbx_dbi_open */
|
||||
@ -478,7 +487,7 @@ typedef struct MDBX_reader {
|
||||
* anything; all we need to know is which version of the DB they
|
||||
* started from so we can avoid overwriting any data used in that
|
||||
* particular version. */
|
||||
mdbx_safe64_t /* txnid_t */ mr_txnid;
|
||||
MDBX_atomic_uint64_t /* txnid_t */ mr_txnid;
|
||||
|
||||
/* The information we store in a single slot of the reader table.
|
||||
* In addition to a transaction ID, we also record the process and
|
||||
@ -490,23 +499,18 @@ typedef struct MDBX_reader {
|
||||
* opening the lock file. */
|
||||
|
||||
/* The thread ID of the thread owning this txn. */
|
||||
#if MDBX_WORDBITS >= 64
|
||||
volatile uint64_t mr_tid;
|
||||
#else
|
||||
volatile uint32_t mr_tid;
|
||||
volatile uint32_t mr_aba_curer; /* CSN to resolve ABA_problems on 32-bit arch,
|
||||
unused for now */
|
||||
#endif
|
||||
MDBX_atomic_uint64_t mr_tid;
|
||||
|
||||
/* The process ID of the process owning this reader txn. */
|
||||
volatile uint32_t mr_pid;
|
||||
MDBX_atomic_uint32_t mr_pid;
|
||||
|
||||
/* The number of pages used in the reader's MVCC snapshot,
|
||||
* i.e. the value of meta->mm_geo.next and txn->mt_next_pgno */
|
||||
volatile pgno_t mr_snapshot_pages_used;
|
||||
atomic_pgno_t mr_snapshot_pages_used;
|
||||
/* Number of retired pages at the time this reader starts transaction. So,
|
||||
* at any time the difference mm_pages_retired - mr_snapshot_pages_retired
|
||||
* will give the number of pages which this reader restraining from reuse. */
|
||||
volatile uint64_t mr_snapshot_pages_retired;
|
||||
MDBX_atomic_uint64_t mr_snapshot_pages_retired;
|
||||
} MDBX_reader;
|
||||
|
||||
/* The header for the reader table (a memory-mapped lock file). */
|
||||
@ -519,25 +523,25 @@ typedef struct MDBX_lockinfo {
|
||||
uint32_t mti_os_and_format;
|
||||
|
||||
/* Flags which environment was opened. */
|
||||
volatile uint32_t mti_envmode;
|
||||
MDBX_atomic_uint32_t mti_envmode;
|
||||
|
||||
/* Threshold of un-synced-with-disk pages for auto-sync feature,
|
||||
* zero means no-threshold, i.e. auto-sync is disabled. */
|
||||
volatile pgno_t mti_autosync_threshold;
|
||||
atomic_pgno_t mti_autosync_threshold;
|
||||
|
||||
/* Low 32-bit of txnid with which meta-pages was synced,
|
||||
* i.e. for sync-polling in the MDBX_NOMETASYNC mode. */
|
||||
volatile uint32_t mti_meta_sync_txnid;
|
||||
MDBX_atomic_uint32_t mti_meta_sync_txnid;
|
||||
|
||||
/* Period for timed auto-sync feature, i.e. at the every steady checkpoint
|
||||
* the mti_unsynced_timeout sets to the current_time + mti_autosync_period.
|
||||
* The time value is represented in a suitable system-dependent form, for
|
||||
* example clock_gettime(CLOCK_BOOTTIME) or clock_gettime(CLOCK_MONOTONIC).
|
||||
* Zero means timed auto-sync is disabled. */
|
||||
volatile uint64_t mti_autosync_period;
|
||||
MDBX_atomic_uint64_t mti_autosync_period;
|
||||
|
||||
/* Marker to distinguish uniqueness of DB/CLK.*/
|
||||
volatile uint64_t mti_bait_uniqueness;
|
||||
MDBX_atomic_uint64_t mti_bait_uniqueness;
|
||||
|
||||
alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/
|
||||
|
||||
@ -546,21 +550,21 @@ typedef struct MDBX_lockinfo {
|
||||
mdbx_ipclock_t mti_wlock;
|
||||
#endif /* MDBX_LOCKING > 0 */
|
||||
|
||||
volatile txnid_t mti_oldest_reader;
|
||||
atomic_txnid_t mti_oldest_reader;
|
||||
|
||||
/* Timestamp of the last steady sync. Value is represented in a suitable
|
||||
* system-dependent form, for example clock_gettime(CLOCK_BOOTTIME) or
|
||||
* clock_gettime(CLOCK_MONOTONIC). */
|
||||
volatile uint64_t mti_sync_timestamp;
|
||||
MDBX_atomic_uint64_t mti_sync_timestamp;
|
||||
|
||||
/* Number un-synced-with-disk pages for auto-sync feature. */
|
||||
volatile pgno_t mti_unsynced_pages;
|
||||
atomic_pgno_t mti_unsynced_pages;
|
||||
|
||||
/* Number of page which was discarded last time by madvise(MADV_FREE). */
|
||||
volatile pgno_t mti_discarded_tail;
|
||||
atomic_pgno_t mti_discarded_tail;
|
||||
|
||||
/* Timestamp of the last readers check. */
|
||||
volatile uint64_t mti_reader_check_timestamp;
|
||||
MDBX_atomic_uint64_t mti_reader_check_timestamp;
|
||||
|
||||
alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/
|
||||
|
||||
@ -572,8 +576,8 @@ typedef struct MDBX_lockinfo {
|
||||
/* The number of slots that have been used in the reader table.
|
||||
* This always records the maximum count, it is not decremented
|
||||
* when readers release their slots. */
|
||||
volatile unsigned mti_numreaders;
|
||||
volatile unsigned mti_readers_refresh_flag;
|
||||
MDBX_atomic_uint32_t mti_numreaders;
|
||||
MDBX_atomic_uint32_t mti_readers_refresh_flag;
|
||||
|
||||
#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || \
|
||||
(!defined(__cplusplus) && defined(_MSC_VER))
|
||||
@ -912,7 +916,7 @@ typedef struct MDBX_cursor_couple {
|
||||
/* The database environment. */
|
||||
struct MDBX_env {
|
||||
#define MDBX_ME_SIGNATURE UINT32_C(0x9A899641)
|
||||
uint32_t me_signature;
|
||||
MDBX_atomic_uint32_t me_signature;
|
||||
/* Failed to update the meta page. Probably an I/O error. */
|
||||
#define MDBX_FATAL_ERROR UINT32_C(0x80000000)
|
||||
/* Some fields are initialized. */
|
||||
@ -958,11 +962,11 @@ struct MDBX_env {
|
||||
mdbx_ipclock_t *me_wlock;
|
||||
#endif /* MDBX_LOCKING > 0 */
|
||||
|
||||
MDBX_dbx *me_dbxs; /* array of static DB info */
|
||||
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
|
||||
unsigned *me_dbiseqs; /* array of dbi sequence numbers */
|
||||
volatile txnid_t *me_oldest; /* ID of oldest reader last time we looked */
|
||||
MDBX_page *me_dp_reserve; /* list of malloc'd blocks for re-use */
|
||||
MDBX_dbx *me_dbxs; /* array of static DB info */
|
||||
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
|
||||
unsigned *me_dbiseqs; /* array of dbi sequence numbers */
|
||||
atomic_txnid_t *me_oldest; /* ID of oldest reader last time we looked */
|
||||
MDBX_page *me_dp_reserve; /* list of malloc'd blocks for re-use */
|
||||
/* PNL of pages that became unused in a write txn */
|
||||
MDBX_PNL me_retired_pages;
|
||||
/* Number of freelist items that can fit in a single overflow page */
|
||||
@ -970,12 +974,12 @@ struct MDBX_env {
|
||||
unsigned me_branch_nodemax; /* max size of a branch-node */
|
||||
uint32_t me_live_reader; /* have liveness lock in reader table */
|
||||
void *me_userctx; /* User-settable context */
|
||||
volatile uint64_t *me_sync_timestamp;
|
||||
volatile uint64_t *me_autosync_period;
|
||||
volatile pgno_t *me_unsynced_pages;
|
||||
volatile pgno_t *me_autosync_threshold;
|
||||
volatile pgno_t *me_discarded_tail;
|
||||
volatile uint32_t *me_meta_sync_txnid;
|
||||
MDBX_atomic_uint64_t *me_sync_timestamp;
|
||||
MDBX_atomic_uint64_t *me_autosync_period;
|
||||
atomic_pgno_t *me_unsynced_pages;
|
||||
atomic_pgno_t *me_autosync_threshold;
|
||||
atomic_pgno_t *me_discarded_tail;
|
||||
MDBX_atomic_uint32_t *me_meta_sync_txnid;
|
||||
MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
|
||||
unsigned me_dp_reserve_len;
|
||||
struct {
|
||||
@ -992,13 +996,13 @@ struct MDBX_env {
|
||||
#if MDBX_LOCKING > 0
|
||||
mdbx_ipclock_t wlock;
|
||||
#endif /* MDBX_LOCKING > 0 */
|
||||
txnid_t oldest;
|
||||
uint64_t sync_timestamp;
|
||||
uint64_t autosync_period;
|
||||
pgno_t autosync_pending;
|
||||
pgno_t autosync_threshold;
|
||||
pgno_t discarded_tail;
|
||||
uint32_t meta_sync_txnid;
|
||||
atomic_txnid_t oldest;
|
||||
MDBX_atomic_uint64_t sync_timestamp;
|
||||
MDBX_atomic_uint64_t autosync_period;
|
||||
atomic_pgno_t autosync_pending;
|
||||
atomic_pgno_t autosync_threshold;
|
||||
atomic_pgno_t discarded_tail;
|
||||
MDBX_atomic_uint32_t meta_sync_txnid;
|
||||
} me_lckless_stub;
|
||||
#if MDBX_DEBUG
|
||||
MDBX_assert_func *me_assert_func; /* Callback for assertion failures */
|
||||
|
@ -496,7 +496,7 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
|
||||
|
||||
mdbx_assert(env, rc == 0);
|
||||
if (rc == 0) {
|
||||
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages == 0;
|
||||
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0;
|
||||
mdbx_munmap(&env->me_lck_mmap);
|
||||
if (synced)
|
||||
rc = ftruncate(env->me_lfd, 0) ? errno : 0;
|
||||
|
@ -265,23 +265,25 @@ mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
|
||||
if (env->me_lck) {
|
||||
/* Scan LCK for threads of the current process */
|
||||
const MDBX_reader *const begin = env->me_lck->mti_readers;
|
||||
const MDBX_reader *const end = begin + env->me_lck->mti_numreaders;
|
||||
const MDBX_reader *const end =
|
||||
begin + atomic_load32(&env->me_lck->mti_numreaders, mo_AcquireRelease);
|
||||
const uintptr_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0;
|
||||
for (const MDBX_reader *reader = begin; reader < end; ++reader) {
|
||||
if (reader->mr_pid != env->me_pid || !reader->mr_tid) {
|
||||
if (reader->mr_pid.weak != env->me_pid || !reader->mr_tid.weak) {
|
||||
skip_lck:
|
||||
continue;
|
||||
}
|
||||
if (reader->mr_tid == CurrentTid || reader->mr_tid == WriteTxnOwner)
|
||||
if (reader->mr_tid.weak == CurrentTid ||
|
||||
reader->mr_tid.weak == WriteTxnOwner)
|
||||
goto skip_lck;
|
||||
if (env->me_flags & MDBX_NOTLS) {
|
||||
/* Skip duplicates in no-tls mode */
|
||||
for (const MDBX_reader *scan = reader; --scan >= begin;)
|
||||
if (scan->mr_tid == reader->mr_tid)
|
||||
if (scan->mr_tid.weak == reader->mr_tid.weak)
|
||||
goto skip_lck;
|
||||
}
|
||||
|
||||
rc = suspend_and_append(array, (mdbx_tid_t)reader->mr_tid);
|
||||
rc = suspend_and_append(array, (mdbx_tid_t)reader->mr_tid.weak);
|
||||
if (rc != MDBX_SUCCESS) {
|
||||
bailout_lck:
|
||||
(void)mdbx_resume_threads_after_remap(*array);
|
||||
@ -599,7 +601,7 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
|
||||
if (env->me_map)
|
||||
mdbx_munmap(&env->me_dxb_mmap);
|
||||
if (env->me_lck) {
|
||||
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages == 0;
|
||||
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0;
|
||||
mdbx_munmap(&env->me_lck_mmap);
|
||||
if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE &&
|
||||
mdbx_lck_upgrade(env) == MDBX_SUCCESS)
|
||||
|
32
src/osal.h
32
src/osal.h
@ -465,15 +465,17 @@ extern void mdbx_osal_jitter(bool tiny);
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* Atomics */
|
||||
|
||||
#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && __has_include(<cstdatomic>)
|
||||
#if defined(__cplusplus) && !defined(__STDC_NO_ATOMICS__) && (__has_include(<cstdatomic>) || __has_extension(cxx_atomic))
|
||||
#include <cstdatomic>
|
||||
#elif !defined(__cplusplus) && (__STDC_VERSION__ >= 201112L) && \
|
||||
#define MDBX_HAVE_C11ATOMICS
|
||||
#elif !defined(__cplusplus) && \
|
||||
(__STDC_VERSION__ >= 201112L || __has_extension(c_atomic)) && \
|
||||
!defined(__STDC_NO_ATOMICS__) && \
|
||||
(__GNUC_PREREQ(4, 9) || __CLANG_PREREQ(3, 8) || \
|
||||
!(defined(__GNUC__) || defined(__clang__)))
|
||||
#include <stdatomic.h>
|
||||
#define MDBX_HAVE_C11ATOMICS
|
||||
#elif defined(__GNUC__) || defined(__clang__)
|
||||
/* LY: nothing required */
|
||||
#elif defined(_MSC_VER)
|
||||
#pragma warning(disable : 4163) /* 'xyz': not available as an intrinsic */
|
||||
#pragma warning(disable : 4133) /* 'function': incompatible types - from \
|
||||
@ -509,14 +511,6 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
|
||||
_ReadWriteBarrier();
|
||||
#elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
|
||||
__memory_barrier();
|
||||
if (type > MDBX_BARRIER_COMPILER)
|
||||
#if defined(__ia64__) || defined(__ia64) || defined(_M_IA64)
|
||||
__mf();
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
_mm_mfence();
|
||||
#else
|
||||
#error "Unknown target for Intel Compiler, please report to us."
|
||||
#endif
|
||||
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
|
||||
__compiler_barrier();
|
||||
#elif (defined(_HPUX_SOURCE) || defined(__hpux) || defined(__HP_aCC)) && \
|
||||
@ -531,21 +525,23 @@ static __maybe_unused __inline void mdbx_compiler_barrier(void) {
|
||||
}
|
||||
|
||||
static __maybe_unused __inline void mdbx_memory_barrier(void) {
|
||||
#if __has_extension(c_atomic) && !defined(__STDC_NO_ATOMICS__)
|
||||
atomic_thread_fence(__ATOMIC_SEQ_CST);
|
||||
#ifdef MDBX_HAVE_C11ATOMICS
|
||||
atomic_thread_fence(memory_order_seq_cst);
|
||||
#elif defined(__ATOMIC_SEQ_CST)
|
||||
#ifdef __clang__
|
||||
__c11_atomic_thread_fence(__ATOMIC_SEQ_CST);
|
||||
#else
|
||||
__atomic_thread_fence(__ATOMIC_SEQ_CST);
|
||||
#endif
|
||||
#elif defined(__clang__) || defined(__GNUC__)
|
||||
__sync_synchronize();
|
||||
#elif defined(_MSC_VER)
|
||||
#elif defined(_WIN32) || defined(_WIN64)
|
||||
MemoryBarrier();
|
||||
#elif defined(__INTEL_COMPILER) /* LY: Intel Compiler may mimic GCC and MSC */
|
||||
#if defined(__ia64__) || defined(__ia64) || defined(_M_IA64)
|
||||
__mf();
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
#if defined(__ia32__)
|
||||
_mm_mfence();
|
||||
#else
|
||||
#error "Unknown target for Intel Compiler, please report to us."
|
||||
__mf();
|
||||
#endif
|
||||
#elif defined(__SUNPRO_C) || defined(__sun) || defined(sun)
|
||||
__machine_rw_barrier();
|
||||
|
Loading…
x
Reference in New Issue
Block a user