mdbx: use SysV semaphores on systems without shared mutexes.

Change-Id: Ib2ad9ed137ab76999a2a8e832f9f77ff1a0788ca
This commit is contained in:
Leonid Yuriev 2019-11-11 12:48:31 +03:00
parent 2f45c37320
commit 0dc544fefd
4 changed files with 140 additions and 26 deletions

View File

@ -7494,7 +7494,7 @@ int __cold mdbx_env_create(MDBX_env **penv) {
goto bailout;
}
#if MDBX_LOCKING > 0
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
rc = mdbx_ipclock_stub(&env->me_lckless_stub.wlock);
#endif /* MDBX_LOCKING */
if (unlikely(rc != MDBX_SUCCESS)) {
@ -8338,7 +8338,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
env->me_maxreaders = UINT_MAX;
#if MDBX_LOCKING > 0
env->me_wlock = &env->me_lckless_stub.wlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
mdbx_debug("lck-setup:%s%s%s", " lck-less",
(env->me_flags & MDBX_RDONLY) ? " readonly" : "",
(rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative");
@ -8472,7 +8472,7 @@ static int __cold mdbx_setup_lck(MDBX_env *env, char *lck_pathname,
env->me_meta_sync_txnid = &lck->mti_meta_sync_txnid;
#if MDBX_LOCKING > 0
env->me_wlock = &lck->mti_wlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
return lck_seize_rc;
}
@ -8704,6 +8704,14 @@ int __cold mdbx_env_open(MDBX_env *env, const char *path, unsigned flags,
if (rc != MDBX_SUCCESS)
goto bailout;
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
env->me_sysv_ipc.key = ftok(dxb_pathname, 42);
if (env->me_sysv_ipc.key == -1) {
rc = errno;
goto bailout;
}
#endif /* MDBX_LOCKING */
const int lck_rc = mdbx_setup_lck(env, lck_pathname, mode);
if (MDBX_IS_ERROR(lck_rc)) {
rc = lck_rc;
@ -8930,7 +8938,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
mdbx_fastmutex_destroy(&env->me_remap_guard) == MDBX_SUCCESS);
#endif /* Windows */
#if MDBX_LOCKING > 0
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
mdbx_ensure(env, mdbx_ipclock_destroy(&env->me_lckless_stub.wlock) == 0);
#endif /* MDBX_LOCKING */

View File

@ -204,9 +204,11 @@
#endif /* MDBX_64BIT_CAS */
#define MDBX_LOCKING_WIN32FILES -1
#define MDBX_LOCKING_SYSV 5 /* SystemV IPC semaphores */
#define MDBX_LOCKING_POSIX1988 1988 /* POSIX-1 Shared anonymous semaphores */
#define MDBX_LOCKING_POSIX2001 2001 /* POSIX-2001 Shared Mutexes */
#define MDBX_LOCKING_POSIX2008 2008 /* POSIX-2008 Robust Mutexes */
#define MDBX_LOCKING_BENAPHORE 1995 /* BeOS Benaphores, aka Futexes */
#if defined(_WIN32) || defined(_WIN64)
#define MDBX_LOCKING MDBX_LOCKING_WIN32FILES
@ -227,8 +229,10 @@
#else
#define MDBX_LOCKING MDBX_LOCKING_POSIX2001
#endif
#else
#elif defined(__sun) || defined(__SVR4) || defined(__svr4__)
#define MDBX_LOCKING MDBX_LOCKING_POSIX1988
#else
#define MDBX_LOCKING MDBX_LOCKING_SYSV
#endif
#define MDBX_LOCKING_CONFIG "AUTO=" STRINGIFY(MDBX_LOCKING)
#else
@ -507,8 +511,18 @@ typedef struct MDBX_page {
#pragma pack(pop)
#if MDBX_LOCKING > 0
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
#if MDBX_LOCKING == MDBX_LOCKING_WIN32FILES
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
typedef void mdbx_ipclock_t;
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV
#define MDBX_CLOCK_SIGN UINT32_C(0xF18D)
typedef mdbx_pid_t mdbx_ipclock_t;
#ifndef EOWNERDEAD
#define EOWNERDEAD MDBX_RESULT_TRUE
#endif
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
MDBX_LOCKING == MDBX_LOCKING_POSIX2008
#define MDBX_CLOCK_SIGN UINT32_C(0x8017)
typedef pthread_mutex_t mdbx_ipclock_t;
@ -517,13 +531,11 @@ typedef pthread_mutex_t mdbx_ipclock_t;
typedef sem_t mdbx_ipclock_t;
#else
#error "FIXME"
#endif
#endif /* MDBX_LOCKING */
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
MDBX_INTERNAL_FUNC int mdbx_ipclock_stub(mdbx_ipclock_t *ipc);
MDBX_INTERNAL_FUNC int mdbx_ipclock_destroy(mdbx_ipclock_t *ipc);
#else
#define MDBX_CLOCK_SIGN UINT32_C(0xF10C)
#endif /* MDBX_LOCKING */
/* Reader Lock Table
@ -642,10 +654,10 @@ typedef struct MDBX_lockinfo {
alignas(MDBX_CACHELINE_SIZE) /* cacheline ---------------------------------*/
/* Write transation lok. */
/* Write transation lock. */
#if MDBX_LOCKING > 0
mdbx_ipclock_t mti_wlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
volatile txnid_t mti_oldest_reader;
@ -668,7 +680,7 @@ typedef struct MDBX_lockinfo {
/* Readeaders registration lock. */
#if MDBX_LOCKING > 0
mdbx_ipclock_t mti_rlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
/* The number of slots that have been used in the reader table.
* This always records the maximum count, it is not decremented
@ -1020,9 +1032,16 @@ struct MDBX_env {
MDBX_txn *me_txn0; /* prealloc'd write transaction */
/* write-txn lock */
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
union {
key_t key;
int semid;
} me_sysv_ipc;
#endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */
#if MDBX_LOCKING > 0
mdbx_ipclock_t *me_wlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
MDBX_dbx *me_dbxs; /* array of static DB info */
uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */
@ -1050,7 +1069,7 @@ struct MDBX_env {
struct {
#if MDBX_LOCKING > 0
mdbx_ipclock_t wlock;
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > 0 */
txnid_t oldest;
uint64_t sync_timestamp;
uint64_t autosync_period;

View File

@ -13,6 +13,7 @@
*/
#include "internals.h"
#include <sys/sem.h>
/*----------------------------------------------------------------------------*/
/* global constructor/destructor */
@ -195,7 +196,7 @@ MDBX_INTERNAL_FUNC int mdbx_rpid_check(MDBX_env *env, uint32_t pid) {
/*---------------------------------------------------------------------------*/
#if MDBX_LOCKING > 0
#if MDBX_LOCKING > MDBX_LOCKING_SYSV
MDBX_INTERNAL_FUNC int mdbx_ipclock_stub(mdbx_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
return sem_init(ipc, false, 1) ? errno : 0;
@ -217,7 +218,7 @@ MDBX_INTERNAL_FUNC int mdbx_ipclock_destroy(mdbx_ipclock_t *ipc) {
#error "FIXME"
#endif
}
#endif /* MDBX_LOCKING */
#endif /* MDBX_LOCKING > MDBX_LOCKING_SYSV */
MDBX_INTERNAL_FUNC int __cold mdbx_lck_seize(MDBX_env *env) {
assert(env->me_fd != INVALID_HANDLE_VALUE);
@ -365,7 +366,10 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_destroy(MDBX_env *env,
OFF_T_MAX) == 0) {
mdbx_verbose("%s: got exclusive, drown locks", __func__);
#if MDBX_LOCKING > 0
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
if (env->me_sysv_ipc.semid != -1)
rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0;
#else
rc = mdbx_ipclock_destroy(&env->me_lck->mti_rlock);
if (rc == 0)
rc = mdbx_ipclock_destroy(&env->me_lck->mti_wlock);
@ -431,8 +435,58 @@ MDBX_INTERNAL_FUNC int __cold mdbx_lck_init(MDBX_env *env,
return MDBX_SUCCESS /* currently don't need any initialization
if LCK already opened/used inside current process */
;
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
int semid = -1;
if (global_uniqueness_flag) {
struct stat st;
if (fstat(env->me_fd, &st))
return errno;
sysv_retry_create:
semid = semget(env->me_sysv_ipc.key, 2,
IPC_CREAT | IPC_EXCL |
(st.st_mode & (S_IRWXU | S_IRWXG | S_IRWXO)));
if (unlikely(semid == -1)) {
int err = errno;
if (err != EEXIST)
return err;
#if MDBX_LOCKING == MDBX_LOCKING_POSIX1988
/* remove and re-create semaphore set */
semid = semget(env->me_sysv_ipc.key, 2, 0);
if (semid == -1) {
err = errno;
if (err != ENOENT)
return err;
goto sysv_retry_create;
}
if (semctl(semid, 2, IPC_RMID)) {
err = errno;
if (err != EIDRM)
return err;
}
goto sysv_retry_create;
}
unsigned short val_array[2] = {1, 1};
if (semctl(semid, 2, SETALL, val_array))
return errno;
} else {
semid = semget(env->me_sysv_ipc.key, 2, 0);
if (semid == -1)
return errno;
/* check read & write access */
struct semid_ds data[2];
if (semctl(semid, 2, IPC_STAT, data) || semctl(semid, 2, IPC_SET, data))
return errno;
}
env->me_sysv_ipc.semid = semid;
return MDBX_SUCCESS;
#elif MDBX_LOCKING == MDBX_LOCKING_FUTEX
#warning "TODO"
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
/* don't initialize semaphores twice */
if (global_uniqueness_flag == MDBX_RESULT_TRUE) {
@ -519,7 +573,7 @@ bailout:
static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
const int err) {
int rc = err;
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2008
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV
if (err == EOWNERDEAD) {
/* We own the mutex. Clean up after dead previous owner. */
@ -533,12 +587,15 @@ static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
rc = MDBX_PANIC;
}
}
mdbx_notice("%cmutex owner died, %s", (rlocked ? 'r' : 'w'),
mdbx_notice("%clock owner died, %s", (rlocked ? 'r' : 'w'),
(rc ? "this process' env is hosed" : "recovering"));
int check_rc = mdbx_reader_check0(env, rlocked, NULL);
check_rc = (check_rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : check_rc;
#if MDBX_LOCKING == MDBX_LOCKING_SYSV
rc = (rc == MDBX_SUCCESS) ? check_rc : rc;
#else
#if defined(PTHREAD_MUTEX_ROBUST) || defined(pthread_mutex_consistent)
int mreco_rc = pthread_mutex_consistent(ipc);
#elif defined(PTHREAD_MUTEX_ROBUST_NP) || defined(pthread_mutex_consistent_np)
@ -551,17 +608,21 @@ static int __cold mdbx_ipclock_failed(MDBX_env *env, mdbx_ipclock_t *ipc,
check_rc = (mreco_rc == 0) ? check_rc : mreco_rc;
if (unlikely(mreco_rc))
mdbx_error("mutex recovery failed, %s", mdbx_strerror(mreco_rc));
mdbx_error("lock recovery failed, %s", mdbx_strerror(mreco_rc));
rc = (rc == MDBX_SUCCESS) ? check_rc : rc;
if (MDBX_IS_ERROR(rc))
pthread_mutex_unlock(ipc);
#endif /* MDBX_LOCKING == MDBX_LOCKING_POSIX2008 */
return rc;
}
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001
(void)ipc;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
(void)ipc;
#elif MDBX_LOCKING == MDBX_LOCKING_FUTEX
#warning "TODO"
(void)ipc;
#else
#error "FIXME"
#endif /* MDBX_LOCKING */
@ -588,6 +649,19 @@ static int mdbx_ipclock_lock(MDBX_env *env, mdbx_ipclock_t *ipc,
}
} else if (sem_wait(ipc))
rc = errno;
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV
struct sembuf op = {.sem_num = (ipc != env->me_wlock),
.sem_op = -1,
.sem_flg = dont_wait ? IPC_NOWAIT | SEM_UNDO : SEM_UNDO};
int rc;
if (semop(env->me_sysv_ipc.semid, &op, 1)) {
rc = errno;
if (dont_wait && rc == EAGAIN)
rc = MDBX_BUSY;
} else {
rc = *ipc ? EOWNERDEAD : MDBX_SUCCESS;
*ipc = env->me_pid;
}
#else
#error "FIXME"
#endif /* MDBX_LOCKING */
@ -597,12 +671,21 @@ static int mdbx_ipclock_lock(MDBX_env *env, mdbx_ipclock_t *ipc,
return rc;
}
static int mdbx_ipclock_unlock(mdbx_ipclock_t *ipc) {
static int mdbx_ipclock_unlock(MDBX_env *env, mdbx_ipclock_t *ipc) {
#if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \
MDBX_LOCKING == MDBX_LOCKING_POSIX2008
int rc = pthread_mutex_unlock(ipc);
(void)env;
#elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988
int rc = sem_post(ipc) ? errno : MDBX_SUCCESS;
(void)env;
#elif MDBX_LOCKING == MDBX_LOCKING_SYSV
if (unlikely(*ipc != (pid_t)env->me_pid))
return EPERM;
*ipc = 0;
struct sembuf op = {
.sem_num = (ipc != env->me_wlock), .sem_op = 1, .sem_flg = SEM_UNDO};
int rc = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS;
#else
#error "FIXME"
#endif /* MDBX_LOCKING */
@ -619,7 +702,7 @@ MDBX_INTERNAL_FUNC int mdbx_rdt_lock(MDBX_env *env) {
MDBX_INTERNAL_FUNC void mdbx_rdt_unlock(MDBX_env *env) {
mdbx_trace("%s", ">>");
int rc = mdbx_ipclock_unlock(&env->me_lck->mti_rlock);
int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_rlock);
mdbx_trace("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
mdbx_panic("%s() failed: errcode %d\n", __func__, rc);
@ -636,7 +719,7 @@ int mdbx_txn_lock(MDBX_env *env, bool dont_wait) {
void mdbx_txn_unlock(MDBX_env *env) {
mdbx_trace("%s", ">>");
int rc = mdbx_ipclock_unlock(env->me_wlock);
int rc = mdbx_ipclock_unlock(env, env->me_wlock);
mdbx_trace("<< rc %d", rc);
if (unlikely(rc != MDBX_SUCCESS))
mdbx_panic("%s() failed: errcode %d\n", __func__, rc);

View File

@ -125,6 +125,9 @@
#if defined(__sun) || defined(__SVR4) || defined(__svr4__)
#include <kstat.h>
/* On Solaris, it's easier to add a missing prototype rather than find a
* combination of #defines that break nothing. */
__extern_C key_t ftok(const char *, int);
#endif /* SunOS/Solaris */
#if defined(_WIN32) || defined(_WIN64)
@ -194,6 +197,7 @@ static inline void *mdbx_realloc(void *ptr, size_t bytes) {
#include <semaphore.h>
#include <signal.h>
#include <sys/file.h>
#include <sys/ipc.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <sys/stat.h>