mdbx: use single pointer to lck-less stub.

Change-Id: I80542423386bd54cac574d1678af2620f147c769
This commit is contained in:
Leonid Yuriev 2021-04-28 03:44:54 +03:00
parent 7d249c97ad
commit e57e42d0f8
4 changed files with 230 additions and 264 deletions

File diff suppressed because it is too large Load Diff

View File

@ -1133,7 +1133,7 @@ struct MDBX_env {
mdbx_filehandle_t me_dsync_fd; mdbx_filehandle_t me_dsync_fd;
mdbx_mmap_t me_lck_mmap; /* The lock file */ mdbx_mmap_t me_lck_mmap; /* The lock file */
#define me_lfd me_lck_mmap.fd #define me_lfd me_lck_mmap.fd
#define me_lck me_lck_mmap.lck struct MDBX_lockinfo *me_lck;
unsigned me_psize; /* DB page size, initialized from me_os_psize */ unsigned me_psize; /* DB page size, initialized from me_os_psize */
unsigned me_leaf_nodemax; /* max size of a leaf-node */ unsigned me_leaf_nodemax; /* max size of a leaf-node */
@ -1151,22 +1151,12 @@ struct MDBX_env {
MDBX_dbx *me_dbxs; /* array of static DB info */ MDBX_dbx *me_dbxs; /* array of static DB info */
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */ uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
unsigned *me_dbiseqs; /* array of dbi sequence numbers */ unsigned *me_dbiseqs; /* array of dbi sequence numbers */
atomic_txnid_t *me_oldest; /* ID of oldest reader last time we looked */ unsigned
/* Number of freelist items that can fit in a single overflow page */ me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */
unsigned me_maxgc_ov1page;
uint32_t me_live_reader; /* have liveness lock in reader table */ uint32_t me_live_reader; /* have liveness lock in reader table */
void *me_userctx; /* User-settable context */ void *me_userctx; /* User-settable context */
MDBX_atomic_uint64_t *me_sync_timestamp;
MDBX_atomic_uint64_t *me_autosync_period;
atomic_pgno_t *me_unsynced_pages;
atomic_pgno_t *me_autosync_threshold;
atomic_pgno_t *me_discarded_tail;
pgno_t *me_readahead_anchor;
MDBX_atomic_uint32_t *me_meta_sync_txnid;
#if MDBX_ENABLE_PGOP_STAT
MDBX_pgop_stat_t *me_pgop_stat;
#endif /* MDBX_ENABLE_PGOP_STAT*/
MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */ MDBX_hsr_func *me_hsr_callback; /* Callback for kicking laggard readers */
struct { struct {
unsigned dp_reserve_limit; unsigned dp_reserve_limit;
unsigned rp_augment_limit; unsigned rp_augment_limit;
@ -1216,11 +1206,6 @@ struct MDBX_env {
/* PNL of pages that became unused in a write txn */ /* PNL of pages that became unused in a write txn */
MDBX_PNL me_retired_pages; MDBX_PNL me_retired_pages;
/* write-txn lock */
#if MDBX_LOCKING > 0
mdbx_ipclock_t *me_wlock;
#endif /* MDBX_LOCKING > 0 */
#if defined(_WIN32) || defined(_WIN64) #if defined(_WIN32) || defined(_WIN64)
MDBX_srwlock me_remap_guard; MDBX_srwlock me_remap_guard;
/* Workaround for LockFileEx and WriteFile multithread bug */ /* Workaround for LockFileEx and WriteFile multithread bug */
@ -1229,23 +1214,6 @@ struct MDBX_env {
mdbx_fastmutex_t me_remap_guard; mdbx_fastmutex_t me_remap_guard;
#endif #endif
struct {
#if MDBX_LOCKING > 0
mdbx_ipclock_t wlock;
#endif /* MDBX_LOCKING > 0 */
atomic_txnid_t oldest;
MDBX_atomic_uint64_t sync_timestamp;
MDBX_atomic_uint64_t autosync_period;
atomic_pgno_t autosync_pending;
atomic_pgno_t autosync_threshold;
atomic_pgno_t discarded_tail;
pgno_t readahead_anchor;
MDBX_atomic_uint32_t meta_sync_txnid;
#if MDBX_ENABLE_PGOP_STAT
MDBX_pgop_stat_t pgop_stat;
#endif /* MDBX_ENABLE_PGOP_STAT*/
} me_lckless_stub;
/* -------------------------------------------------------------- debugging */ /* -------------------------------------------------------------- debugging */
#if MDBX_DEBUG #if MDBX_DEBUG
@ -1257,6 +1225,12 @@ struct MDBX_env {
#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)
pgno_t me_poison_edge; pgno_t me_poison_edge;
#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */
/* ------------------------------------------------- stub for lck-less mode */
alignas(MDBX_CACHELINE_SIZE) uint64_t
me_lckless_stub[((sizeof(MDBX_lockinfo) + MDBX_CACHELINE_SIZE - 1) &
~(MDBX_CACHELINE_SIZE - 1)) /
8];
}; };
#ifndef __cplusplus #ifndef __cplusplus

View File

@ -474,8 +474,8 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
int rc = MDBX_SUCCESS; int rc = MDBX_SUCCESS;
struct stat lck_info; struct stat lck_info;
if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor && MDBX_lockinfo *lck = env->me_lck_mmap.lck;
env->me_lck && if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor && lck &&
/* try get exclusive access */ /* try get exclusive access */
lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 &&
/* if LCK was not removed */ /* if LCK was not removed */
@ -489,14 +489,14 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
if (env->me_sysv_ipc.semid != -1) if (env->me_sysv_ipc.semid != -1)
rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0; rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0;
#else #else
rc = mdbx_ipclock_destroy(&env->me_lck->mti_rlock); rc = mdbx_ipclock_destroy(&lck->mti_rlock);
if (rc == 0) if (rc == 0)
rc = mdbx_ipclock_destroy(&env->me_lck->mti_wlock); rc = mdbx_ipclock_destroy(&lck->mti_wlock);
#endif /* MDBX_LOCKING */ #endif /* MDBX_LOCKING */
mdbx_assert(env, rc == 0); mdbx_assert(env, rc == 0);
if (rc == 0) { if (rc == 0) {
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; const bool synced = lck->mti_unsynced_pages.weak == 0;
mdbx_munmap(&env->me_lck_mmap); mdbx_munmap(&env->me_lck_mmap);
if (synced) if (synced)
rc = ftruncate(env->me_lfd, 0) ? errno : 0; rc = ftruncate(env->me_lfd, 0) ? errno : 0;
@ -617,9 +617,9 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
/* don't initialize semaphores twice */ /* don't initialize semaphores twice */
(void)inprocess_neighbor; (void)inprocess_neighbor;
if (global_uniqueness_flag == MDBX_RESULT_TRUE) { if (global_uniqueness_flag == MDBX_RESULT_TRUE) {
if (sem_init(&env->me_lck->mti_rlock, true, 1)) if (sem_init(&env->me_lck_mmap.lck->mti_rlock, true, 1))
return errno; return errno;
if (sem_init(&env->me_lck->mti_wlock, true, 1)) if (sem_init(&env->me_lck_mmap.lck->mti_wlock, true, 1))
return errno; return errno;
} }
return MDBX_SUCCESS; return MDBX_SUCCESS;
@ -688,10 +688,10 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
if (rc && rc != ENOTSUP) if (rc && rc != ENOTSUP)
goto bailout; goto bailout;
rc = pthread_mutex_init(&env->me_lck->mti_rlock, &ma); rc = pthread_mutex_init(&env->me_lck_mmap.lck->mti_rlock, &ma);
if (rc) if (rc)
goto bailout; goto bailout;
rc = pthread_mutex_init(&env->me_lck->mti_wlock, &ma); rc = pthread_mutex_init(&env->me_lck_mmap.lck->mti_wlock, &ma);
bailout: bailout:
pthread_mutexattr_destroy(&ma); pthread_mutexattr_destroy(&ma);
@ -708,7 +708,7 @@ static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
if (err == EOWNERDEAD) { if (err == EOWNERDEAD) {
/* We own the mutex. Clean up after dead previous owner. */ /* We own the mutex. Clean up after dead previous owner. */
const bool rlocked = (env->me_lck && ipc == &env->me_lck->mti_rlock); const bool rlocked = ipc == &env->me_lck->mti_rlock;
rc = MDBX_SUCCESS; rc = MDBX_SUCCESS;
if (!rlocked) { if (!rlocked) {
if (unlikely(env->me_txn)) { if (unlikely(env->me_txn)) {
@ -785,7 +785,7 @@ static int mdbx_ipclock_lock(MDBX_env *env, mdbx_ipclock_t *ipc,
} else if (sem_wait(ipc)) } else if (sem_wait(ipc))
rc = errno; rc = errno;
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV #elif MDBX_LOCKING == MDBX_LOCKING_SYSV
struct sembuf op = {.sem_num = (ipc != env->me_wlock), struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock),
.sem_op = -1, .sem_op = -1,
.sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO}; .sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO};
int rc; int rc;
@ -818,8 +818,9 @@ static int mdbx_ipclock_unlock(MDBX_env *env, mdbx_ipclock_t *ipc) {
if (unlikely(*ipc != (pid_t)env->me_pid)) if (unlikely(*ipc != (pid_t)env->me_pid))
return EPERM; return EPERM;
*ipc = 0; *ipc = 0;
struct sembuf op = { struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock),
.sem_num = (ipc != env->me_wlock), .sem_op = 1, .sem_flg = SEM_UNDO}; .sem_op = 1,
.sem_flg = SEM_UNDO};
int rc = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; int rc = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS;
#else #else
#error "FIXME" #error "FIXME"
@ -847,14 +848,14 @@ MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { int mdbx_txn_lock(MDBX_env *env, bool dont_wait) {
mdbx_trace("%swait %s", dont_wait ? "dont-" : "", ">>"); mdbx_trace("%swait %s", dont_wait ? "dont-" : "", ">>");
mdbx_jitter4testing(true); mdbx_jitter4testing(true);
int rc = mdbx_ipclock_lock(env, env->me_wlock, dont_wait); int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait);
mdbx_trace("<< rc %d", rc); mdbx_trace("<< rc %d", rc);
return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS; return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS;
} }
void mdbx_txn_unlock(MDBX_env *env) { void mdbx_txn_unlock(MDBX_env *env) {
mdbx_trace("%s", ">>"); mdbx_trace("%s", ">>");
int rc = mdbx_ipclock_unlock(env, env->me_wlock); int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock);
mdbx_trace("<< rc %d", rc); mdbx_trace("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
mdbx_panic("%s() failed: err %d\n", __func__, rc); mdbx_panic("%s() failed: err %d\n", __func__, rc);

View File

@ -262,11 +262,12 @@ MDBX_INTERNAL_FUNC int
mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) { mdbx_suspend_threads_before_remap(MDBX_env *env, mdbx_handle_array_t **array) {
const uintptr_t CurrentTid = GetCurrentThreadId(); const uintptr_t CurrentTid = GetCurrentThreadId();
int rc; int rc;
if (env->me_lck) { if (env->me_lck_mmap.lck) {
/* Scan LCK for threads of the current process */ /* Scan LCK for threads of the current process */
const MDBX_reader *const begin = env->me_lck->mti_readers; const MDBX_reader *const begin = env->me_lck_mmap.lck->mti_readers;
const MDBX_reader *const end = const MDBX_reader *const end =
begin + atomic_load32(&env->me_lck->mti_numreaders, mo_AcquireRelease); begin +
atomic_load32(&env->me_lck_mmap.lck->mti_numreaders, mo_AcquireRelease);
const uintptr_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0; const uintptr_t WriteTxnOwner = env->me_txn0 ? env->me_txn0->mt_owner : 0;
for (const MDBX_reader *reader = begin; reader < end; ++reader) { for (const MDBX_reader *reader = begin; reader < end; ++reader) {
if (reader->mr_pid.weak != env->me_pid || !reader->mr_tid.weak) { if (reader->mr_pid.weak != env->me_pid || !reader->mr_tid.weak) {
@ -599,7 +600,7 @@ MDBX_INTERNAL_FUNC int mdbx_lck_destroy(MDBX_env *env,
* STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */
if (env->me_map) if (env->me_map)
mdbx_munmap(&env->me_dxb_mmap); mdbx_munmap(&env->me_dxb_mmap);
if (env->me_lck) { if (env->me_lck_mmap.lck) {
const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0;
mdbx_munmap(&env->me_lck_mmap); mdbx_munmap(&env->me_lck_mmap);
if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE &&