lmdb: Make SysV semaphores robust. Cleanup MDB_ROBUST.

Backport from master: 66e3f5139105822196aea6a6e63596d25734222a.

Change-Id: Idda165be19ee95c2a0839a180a2ece395502ce26
This commit is contained in:
Hallvard Furuseth 2015-01-12 21:02:29 +01:00 committed by Leo Yuriev
parent 3015e8d03e
commit eb885ab910
2 changed files with 66 additions and 46 deletions

4
lmdb.h
View File

@ -109,7 +109,9 @@
* The transaction becomes "long-lived" as above until a check
* for stale readers is performed or the lockfile is reset,
* since the process may not remove it from the lockfile.
* Except write-transactions on Unix with MDB_ROBUST or on Windows.
*
* This does not apply to write transactions if the system clears
* stale writers, see above.
*
* - If you do that anyway, do a periodic check for stale readers. Or
* close the environment once in a while, so the lockfile can get reset.

108
mdb.c
View File

@ -208,7 +208,7 @@ union semun {
#define MDB_DEVEL 0
#endif
#if defined(_WIN32) || (defined(EOWNERDEAD) && !defined(MDB_USE_SYSV_SEM))
#if defined(_WIN32) || defined(MDB_USE_SYSV_SEM) || defined(EOWNERDEAD)
#define MDB_ROBUST_SUPPORTED 1
#endif
@ -222,6 +222,16 @@ union semun {
# define mdb_func_ "<mdb_unknown>"
#endif
/* Internal error codes, not exposed outside liblmdb */
#define MDB_NO_ROOT (MDB_LAST_ERRCODE + 10)
#ifdef _WIN32
#define MDB_OWNERDEAD ((int) WAIT_ABANDONED)
#elif defined MDB_USE_SYSV_SEM
#define MDB_OWNERDEAD (MDB_LAST_ERRCODE + 11)
#else
#define MDB_OWNERDEAD EOWNERDEAD
#endif
#ifdef _WIN32
#define MDB_USE_HASH 1
#define MDB_PIDLOCK 0
@ -237,7 +247,6 @@ typedef HANDLE mdb_mutex_t;
#define pthread_key_delete(x) TlsFree(x)
#define pthread_getspecific(x) TlsGetValue(x)
#define pthread_setspecific(x,y) (TlsSetValue(x,y) ? 0 : ErrCode())
#define pthread_mutex_consistent(mutex) 0
#define pthread_mutex_unlock(x) ReleaseMutex(*x)
#define pthread_mutex_lock(x) WaitForSingleObject(*x, INFINITE)
#define pthread_cond_signal(x) SetEvent(*x)
@ -247,6 +256,7 @@ typedef HANDLE mdb_mutex_t;
#define MDB_MUTEX(env, rw) ((env)->me_##rw##mutex)
#define LOCK_MUTEX0(mutex) WaitForSingleObject(mutex, INFINITE)
#define UNLOCK_MUTEX(mutex) ReleaseMutex(mutex)
#define mdb_mutex_consistent(mutex) 0
#define getpid() GetCurrentProcessId()
#define MDB_FDATASYNC(fd) (!FlushFileBuffers(fd))
#define MDB_MSYNC(addr,len,flags) (!FlushViewOfFile(addr,len))
@ -274,6 +284,7 @@ typedef HANDLE mdb_mutex_t;
typedef struct mdb_mutex {
int semid;
int semnum;
int *locked;
} mdb_mutex_t;
#define MDB_MUTEX(env, rw) (&(env)->me_##rw##mutex)
@ -281,19 +292,28 @@ typedef struct mdb_mutex {
#define UNLOCK_MUTEX(mutex) do { \
struct sembuf sb = { 0, 1, SEM_UNDO }; \
sb.sem_num = (mutex)->semnum; \
*(mutex)->locked = 0; \
semop((mutex)->semid, &sb, 1); \
} while(0)
static int
mdb_sem_wait(mdb_mutex_t *sem)
{
int rc;
struct sembuf sb = { 0, -1, SEM_UNDO };
sb.sem_num = sem->semnum;
while ((rc = semop(sem->semid, &sb, 1)) && (rc = errno) == EINTR) ;
return rc;
int rc, *locked = sem->locked;
struct sembuf sb = { 0, -1, SEM_UNDO };
sb.sem_num = sem->semnum;
do {
if (!semop(sem->semid, &sb, 1)) {
rc = *locked ? MDB_OWNERDEAD : MDB_SUCCESS;
*locked = 1;
break;
}
} while ((rc = errno) == EINTR);
return rc;
}
#define mdb_mutex_consistent(mutex) 0
#else
/** Pointer/HANDLE type of shared mutex/semaphore.
*/
@ -308,6 +328,9 @@ typedef pthread_mutex_t mdb_mutex_t;
/** Unlock the reader or writer mutex.
*/
#define UNLOCK_MUTEX(mutex) pthread_mutex_unlock(mutex)
/** Mark mutex-protected data as repaired, after death of previous owner.
*/
#define mdb_mutex_consistent(mutex) pthread_mutex_consistent(mutex)
#endif /* MDB_USE_SYSV_SEM */
/** Get the error code for the last failed system function.
@ -336,11 +359,17 @@ typedef pthread_mutex_t mdb_mutex_t;
#if defined(_WIN32)
#define MNAME_LEN 32
#elif defined(MDB_USE_SYSV_SEM)
#define MNAME_LEN 0
#define MNAME_LEN (sizeof(int))
#else
#define MNAME_LEN (sizeof(pthread_mutex_t))
#endif
#ifdef MDB_USE_SYSV_SEM
#define SYSV_SEM_FLAG 1 /**< SysV sems in lockfile format */
#else
#define SYSV_SEM_FLAG 0
#endif
/** @} */
#ifdef MDB_ROBUST_SUPPORTED
@ -667,6 +696,7 @@ typedef struct MDB_txbody {
char mtb_rmname[MNAME_LEN];
#elif defined(MDB_USE_SYSV_SEM)
int mtb_semid;
int mtb_rlocked;
#else
/** Mutex protecting access to this table.
* This is the #MDB_MUTEX(env,r) reader table lock.
@ -695,22 +725,25 @@ typedef struct MDB_txninfo {
#define mti_rmname mt1.mtb.mtb_rmname
#define mti_txnid mt1.mtb.mtb_txnid
#define mti_numreaders mt1.mtb.mtb_numreaders
char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
} mt1;
#ifdef MDB_USE_SYSV_SEM
#define mti_semid mt1.mtb.mtb_semid
#else
#define mti_rlocked mt1.mtb.mtb_rlocked
#endif
char pad[(sizeof(MDB_txbody)+CACHELINE-1) & ~(CACHELINE-1)];
} mt1;
union {
#if defined(_WIN32)
char mt2_wmname[MNAME_LEN];
#define mti_wmname mt2.mt2_wmname
#elif defined MDB_USE_SYSV_SEM
int mt2_wlocked;
#define mti_wlocked mt2.mt2_wlocked
#else
pthread_mutex_t mt2_wmutex;
#define mti_wmutex mt2.mt2_wmutex
#endif
char pad[(MNAME_LEN+CACHELINE-1) & ~(CACHELINE-1)];
} mt2;
#endif
MDB_reader mti_readers[1];
} MDB_txninfo;
@ -719,7 +752,7 @@ typedef struct MDB_txninfo {
((uint32_t) \
((MDB_LOCK_VERSION) \
/* Flags which describe functionality */ \
+ (((MNAME_LEN) == 0) << 18) /* MDB_USE_SYSV_SEM */ \
+ (SYSV_SEM_FLAG << 18) \
+ (((MDB_PIDLOCK) != 0) << 16)))
/** @} */
@ -2824,17 +2857,8 @@ mdb_txn_renew0(MDB_txn *txn)
if (ti) {
if (LOCK_MUTEX(rc, env, MDB_MUTEX(env, w)))
return rc;
#ifdef MDB_USE_SYSV_SEM
meta = env->me_metas[ mdb_env_pick_meta(env) ];
txn->mt_txnid = meta->mm_txnid;
/* Update mti_txnid like mdb_mutex_failed() would,
* in case last writer crashed before updating it.
*/
ti->mti_txnid = txn->mt_txnid;
#else
txn->mt_txnid = ti->mti_txnid;
meta = env->me_metas[txn->mt_txnid & 1];
#endif
} else {
meta = env->me_metas[ mdb_env_pick_meta(env) ];
txn->mt_txnid = meta->mm_txnid;
@ -4790,6 +4814,10 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
int fdflags;
# define MDB_CLOEXEC 0
#endif
#endif
#ifdef MDB_USE_SYSV_SEM
int semid;
union semun semu;
#endif
int rc;
off_t size, rsize;
@ -4904,17 +4932,10 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
env->me_wmutex = CreateMutex(&mdb_all_sa, FALSE, env->me_txns->mti_wmname);
if (!env->me_wmutex) goto fail_errno;
#elif defined(MDB_USE_SYSV_SEM)
union semun semu;
unsigned short vals[2] = {1, 1};
int semid = semget(IPC_PRIVATE, 2, mode);
semid = semget(IPC_PRIVATE, 2, mode);
if (semid < 0)
goto fail_errno;
env->me_rmutex.semid = semid;
env->me_wmutex.semid = semid;
env->me_rmutex.semnum = 0;
env->me_wmutex.semnum = 1;
semu.array = vals;
if (semctl(semid, 0, SETALL, semu) < 0)
goto fail_errno;
@ -4941,8 +4962,6 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
} else {
#ifdef MDB_USE_SYSV_SEM
struct semid_ds buf;
union semun semu;
int semid;
#endif
if (env->me_txns->mti_magic != MDB_MAGIC) {
DPUTS("lock region has invalid magic");
@ -4967,20 +4986,23 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
#elif defined(MDB_USE_SYSV_SEM)
semid = env->me_txns->mti_semid;
semu.buf = &buf;
/* check for read access */
if (semctl(semid, 0, IPC_STAT, semu) < 0)
goto fail_errno;
/* check for write access */
if (semctl(semid, 0, IPC_SET, semu) < 0)
goto fail_errno;
env->me_rmutex.semid = semid;
env->me_wmutex.semid = semid;
env->me_rmutex.semnum = 0;
env->me_wmutex.semnum = 1;
#endif
}
#ifdef MDB_USE_SYSV_SEM
env->me_rmutex.semid = semid;
env->me_wmutex.semid = semid;
env->me_rmutex.semnum = 0;
env->me_wmutex.semnum = 1;
env->me_rmutex.locked = &env->me_txns->mti_rlocked;
env->me_wmutex.locked = &env->me_txns->mti_wlocked;
#endif
return MDB_SUCCESS;
fail_errno:
@ -6575,7 +6597,6 @@ int
mdb_cursor_put(MDB_cursor *mc, MDB_val *key, MDB_val *data,
unsigned int flags)
{
enum { MDB_NO_ROOT = MDB_LAST_ERRCODE+10 }; /* internal code */
MDB_env *env;
MDB_node *leaf = NULL;
MDB_page *fp, *mp;
@ -10044,7 +10065,7 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
#ifdef MDB_ROBUST_SUPPORTED
/** Handle #LOCK_MUTEX0() failure.
* With #MDB_ROBUST, try to repair the lock file if the mutex owner died.
* Try to repair the lock file if the mutex owner died.
* @param[in] env the environment handle
* @param[in] mutex LOCK_MUTEX0() mutex
* @param[in] rc LOCK_MUTEX0() error (nonzero)
@ -10053,11 +10074,8 @@ static int mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc)
{
int toggle, rlocked, rc2;
#ifndef _WIN32
enum { WAIT_ABANDONED = EOWNERDEAD };
#endif
if (rc == (int) WAIT_ABANDONED) {
if (rc == MDB_OWNERDEAD) {
/* We own the mutex. Clean up after dead previous owner. */
rc = MDB_SUCCESS;
rlocked = (mutex == MDB_MUTEX(env, r));
@ -10078,7 +10096,7 @@ static int mdb_mutex_failed(MDB_env *env, mdb_mutex_t *mutex, int rc)
(rc ? "this process' env is hosed" : "recovering")));
rc2 = mdb_reader_check0(env, rlocked, NULL);
if (rc2 == 0)
rc2 = pthread_mutex_consistent(mutex);
rc2 = mdb_mutex_consistent(mutex);
if (rc || (rc = rc2)) {
DPRINTF(("LOCK_MUTEX recovery failed, %s", mdb_strerror(rc)));
UNLOCK_MUTEX(mutex);