mdbx: prevents use the env after a fork().

Change-Id: I9c86500ac008a7be7eb16aeff2610fa5cfa22b84
This commit is contained in:
Leo Yuriev 2015-11-22 13:23:43 +03:00
parent 7e2000ef7a
commit bf81e86d93

128
mdb.c
View File

@ -1033,7 +1033,7 @@ enum {
#define MDB_END_UPDATE 0x10 /**< update env state (DBIs) */
#define MDB_END_FREE 0x20 /**< free txn unless it is #MDB_env.%me_txn0 */
#define MDB_END_SLOT MDB_NOTLS /**< release any reader slot if #MDB_NOTLS */
static void mdb_txn_end(MDB_txn *txn, unsigned mode);
static int mdb_txn_end(MDB_txn *txn, unsigned mode);
static int mdb_page_get(MDB_txn *txn, pgno_t pgno, MDB_page **mp, int *lvl);
static int mdb_page_search_root(MDB_cursor *mc,
@ -2700,6 +2700,11 @@ mdb_txn_renew0(MDB_txn *txn)
uint16_t x;
int rc, new_notls = 0;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
if ((flags &= MDB_TXN_RDONLY) != 0) {
struct MDB_rthc *rthc = NULL;
MDB_reader *r = NULL;
@ -2889,6 +2894,11 @@ mdb_txn_begin(MDB_env *env, MDB_txn *parent, unsigned flags, MDB_txn **ret)
if (unlikely(env->me_signature != MDBX_ME_SIGNATURE))
return MDB_VERSION_MISMATCH;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
flags &= MDB_TXN_BEGIN_FLAGS;
flags |= env->me_flags & MDB_WRITEMAP;
@ -3064,12 +3074,17 @@ mdbx_txn_straggler(MDB_txn *txn, int *percent)
* @param[in] txn the transaction handle to end
* @param[in] mode why and how to end the transaction
*/
static void
static int
mdb_txn_end(MDB_txn *txn, unsigned mode)
{
MDB_env *env = txn->mt_env;
static const char *const names[] = MDB_END_NAMES;
if (unlikely(txn->mt_env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
/* Export or close DBI handles opened in this txn */
mdb_dbis_update(txn, mode & MDB_END_UPDATE);
@ -3139,6 +3154,8 @@ mdb_txn_end(MDB_txn *txn, unsigned mode)
txn->mt_signature = 0;
free(txn);
}
return MDB_SUCCESS;
}
int
@ -3154,8 +3171,7 @@ mdb_txn_reset(MDB_txn *txn)
if (unlikely(!(txn->mt_flags & MDB_TXN_RDONLY)))
return EINVAL;
mdb_txn_end(txn, MDB_END_RESET);
return MDB_SUCCESS;
return mdb_txn_end(txn, MDB_END_RESET);
}
int
@ -3170,8 +3186,7 @@ mdb_txn_abort(MDB_txn *txn)
if (txn->mt_child)
mdb_txn_abort(txn->mt_child);
mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
return MDB_SUCCESS;
return mdb_txn_end(txn, MDB_END_ABORT|MDB_END_SLOT|MDB_END_FREE);
}
static int
@ -3649,6 +3664,11 @@ mdb_txn_commit(MDB_txn *txn)
if(unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDB_VERSION_MISMATCH;
if (unlikely(txn->mt_env->me_pid != getpid())) {
txn->mt_env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
/* mdb_txn_end() mode for a commit which writes nothing */
end_mode = MDB_END_EMPTY_COMMIT|MDB_END_UPDATE|MDB_END_SLOT|MDB_END_FREE;
@ -3864,8 +3884,7 @@ mdb_txn_commit(MDB_txn *txn)
end_mode = MDB_END_COMMITTED|MDB_END_UPDATE;
done:
mdb_txn_end(txn, end_mode);
return MDB_SUCCESS;
return mdb_txn_end(txn, end_mode);
fail:
mdb_txn_abort(txn);
@ -4205,14 +4224,16 @@ mdb_env_map(MDB_env *env, void *addr)
return errno;
}
if (flags & MDB_NORDAHEAD) {
unsigned madvise_flags = MADV_DONTFORK;
if (flags & MDB_NORDAHEAD)
/* Turn off readahead. It's harmful when the DB is larger than RAM. */
#ifdef MADV_RANDOM
madvise(env->me_map, env->me_mapsize, MADV_RANDOM);
#elif defined(POSIX_MADV_RANDOM)
posix_madvise(env->me_map, env->me_mapsize, POSIX_MADV_RANDOM);
#endif /* MADV_RANDOM & POSIX_MADV_RANDOM */
}
madvise_flags |= MADV_RANDOM;
if (madvise(env->me_map, env->me_mapsize, madvise_flags))
return errno;
#ifdef MADV_DONTDUMP
madvise(env->me_map, env->me_mapsize, MADV_DONTDUMP);
#endif
/* Can happen because the address argument to mmap() is just a
* hint. mmap() can pick another, e.g. if the range is in use.
@ -4438,7 +4459,7 @@ mdb_env_reader_destr(void *ptr)
mdb_ensure(NULL, pthread_mutex_lock(&mdb_rthc_lock) == 0);
reader = rthc->rc_reader;
if (reader) {
if (reader && reader->mr_pid == getpid()) {
mdb_ensure(NULL, reader->mr_rthc == rthc);
rthc->rc_reader = NULL;
reader->mr_rthc = NULL;
@ -4611,7 +4632,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if (rc == EROFS && (env->me_flags & MDB_RDONLY)) {
return MDB_SUCCESS;
}
goto fail_errno;
return rc;
}
/* Lose record locks when exec*() */
@ -4621,20 +4642,20 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
if (!(env->me_flags & MDB_NOTLS)) {
rc = pthread_key_create(&env->me_txkey, mdb_env_reader_destr);
if (rc)
goto fail;
return rc;
env->me_flags |= MDB_ENV_TXKEY;
}
/* Try to get exclusive lock. If we succeed, then
* nobody is using the lock region and we should initialize it.
*/
if ((rc = mdb_env_excl_lock(env, excl))) goto fail;
if ((rc = mdb_env_excl_lock(env, excl))) return rc;
size = lseek(env->me_lfd, 0, SEEK_END);
if (size == -1) goto fail_errno;
if (size == -1) return errno;
rsize = (env->me_maxreaders-1) * sizeof(MDB_reader) + sizeof(MDB_txninfo);
if (size < rsize && *excl > 0) {
if (ftruncate(env->me_lfd, rsize) != 0) goto fail_errno;
if (ftruncate(env->me_lfd, rsize) != 0) return errno;
} else {
rsize = size;
size = rsize - sizeof(MDB_txninfo);
@ -4643,9 +4664,16 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
m = mmap(NULL, rsize, PROT_READ|PROT_WRITE, MAP_SHARED, env->me_lfd, 0);
if (m == MAP_FAILED)
goto fail_errno;
return errno;
env->me_txns = m;
if (madvise(env->me_txns, rsize, MADV_DONTFORK | MADV_WILLNEED))
return errno;
#ifdef MADV_DODUMP
madvise(env->me_txns, rsize, MADV_DODUMP);
#endif
if (*excl > 0) {
pthread_mutexattr_t mattr;
@ -4656,7 +4684,7 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
#endif /* MDB_USE_ROBUST */
|| (rc = pthread_mutex_init(&env->me_txns->mti_rmutex, &mattr))
|| (rc = pthread_mutex_init(&env->me_txns->mti_wmutex, &mattr)))
goto fail;
return rc;
pthread_mutexattr_destroy(&mattr);
env->me_txns->mti_magic = MDB_MAGIC;
@ -4666,27 +4694,19 @@ mdb_env_setup_locks(MDB_env *env, char *lpath, int mode, int *excl)
} else {
if (env->me_txns->mti_magic != MDB_MAGIC) {
mdb_debug("lock region has invalid magic");
rc = MDB_INVALID;
goto fail;
return MDB_INVALID;
}
if (env->me_txns->mti_format != MDB_LOCK_FORMAT) {
mdb_debug("lock region has format+version 0x%x, expected 0x%x",
env->me_txns->mti_format, MDB_LOCK_FORMAT);
rc = MDB_VERSION_MISMATCH;
goto fail;
return MDB_VERSION_MISMATCH;
}
rc = errno;
if (rc && rc != EACCES && rc != EAGAIN) {
goto fail;
}
if (rc && rc != EACCES && rc != EAGAIN)
return rc;
}
return MDB_SUCCESS;
fail_errno:
rc = errno;
fail:
return rc;
}
/** The name of the lock file in the DB environment */
@ -4909,22 +4929,25 @@ mdb_env_close0(MDB_env *env)
* data owned by this process (me_close_readers and
* our readers), and clear each reader atomically.
*/
mdb_ensure(env, pthread_mutex_lock(&mdb_rthc_lock) == 0);
for (i = env->me_close_readers; --i >= 0; ) {
MDB_reader *reader = &env->me_txns->mti_readers[i];
if (reader->mr_pid == pid) {
struct MDB_rthc *rthc = reader->mr_rthc;
if (rthc) {
mdb_ensure(env, rthc->rc_reader == reader);
rthc->rc_reader = NULL;
reader->mr_rthc = NULL;
free(rthc);
if (pid == getpid()) {
mdb_ensure(env, pthread_mutex_lock(&mdb_rthc_lock) == 0);
for (i = env->me_close_readers; --i >= 0; ) {
MDB_reader *reader = &env->me_txns->mti_readers[i];
if (reader->mr_pid == pid) {
struct MDB_rthc *rthc = reader->mr_rthc;
if (rthc) {
mdb_ensure(env, rthc->rc_reader == reader);
rthc->rc_reader = NULL;
reader->mr_rthc = NULL;
free(rthc);
}
reader->mr_pid = 0;
}
reader->mr_pid = 0;
}
mdb_coherent_barrier();
mdb_ensure(env, pthread_mutex_unlock(&mdb_rthc_lock) == 0);
}
mdb_coherent_barrier();
mdb_ensure(env, pthread_mutex_unlock(&mdb_rthc_lock) == 0);
munmap((void *)env->me_txns, (env->me_maxreaders-1)*sizeof(MDB_reader)+sizeof(MDB_txninfo));
env->me_txns = NULL;
@ -9171,7 +9194,9 @@ mdb_env_copyfd0(MDB_env *env, HANDLE fd)
return rc;
/* We must start the actual read txn after blocking writers */
mdb_txn_end(txn, MDB_END_RESET_TMP);
rc = mdb_txn_end(txn, MDB_END_RESET_TMP);
if (rc)
return rc;
/* Temporarily block writers until we snapshot the meta pages */
wmutex = MDB_MUTEX(env, w);
@ -10016,6 +10041,11 @@ mdb_reader_check0(MDB_env *env, int rlocked, int *dead)
pid_t *pids, pid;
int rc = MDB_SUCCESS, count = 0;
if (unlikely(env->me_pid != getpid())) {
env->me_flags |= MDB_FATAL_ERROR;
return MDB_PANIC;
}
rdrs = env->me_txns->mti_numreaders;
pids = malloc((rdrs+1) * sizeof(pid_t));
if (!pids)