mdbx: add locking while open/close/drop dbi-handles (avoid dbi-related races).

This avoid races and collisions between threads when opening,
closing and deleting DBI-handles.

unfortunately, this does not resolve collision in the case an one thread
closing the DBI-handle while the another thread performs transaction.

Change-Id: I48c3ffb11a8f83739fae1712db3476645f573e09
This commit is contained in:
Leo Yuriev 2017-05-28 18:50:09 +03:00
parent bd5d092a2b
commit 0f676db491
2 changed files with 54 additions and 18 deletions

View File

@ -603,6 +603,7 @@ struct MDBX_env {
unsigned me_maxreaders; /* size of the reader table */ unsigned me_maxreaders; /* size of the reader table */
/* Max MDBX_lockinfo.mti_numreaders of interest to mdbx_env_close() */ /* Max MDBX_lockinfo.mti_numreaders of interest to mdbx_env_close() */
unsigned me_close_readers; unsigned me_close_readers;
mdbx_fastmutex_t me_dbi_lock;
MDBX_dbi me_numdbs; /* number of DBs opened */ MDBX_dbi me_numdbs; /* number of DBs opened */
MDBX_dbi me_maxdbs; /* size of the DB table */ MDBX_dbi me_maxdbs; /* size of the DB table */
mdbx_pid_t me_pid; /* process ID of this env */ mdbx_pid_t me_pid; /* process ID of this env */

View File

@ -3814,6 +3814,10 @@ int __cold mdbx_env_create(MDBX_env **penv) {
} }
mdbx_env_setup_limits(env, env->me_os_psize); mdbx_env_setup_limits(env, env->me_os_psize);
rc = mdbx_fastmutex_init(&env->me_dbi_lock);
if (unlikely(rc != MDBX_SUCCESS))
goto bailout;
VALGRIND_CREATE_MEMPOOL(env, 0, 0); VALGRIND_CREATE_MEMPOOL(env, 0, 0);
env->me_signature = MDBX_ME_SIGNATURE; env->me_signature = MDBX_ME_SIGNATURE;
*penv = env; *penv = env;
@ -4507,6 +4511,7 @@ int __cold mdbx_env_close_ex(MDBX_env *env, int dont_sync) {
} }
mdbx_env_close0(env); mdbx_env_close0(env);
mdbx_ensure(env, mdbx_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS);
env->me_signature = 0; env->me_signature = 0;
free(env); free(env);
@ -9106,7 +9111,8 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags,
} }
/* Fail, if no free slot and max hit */ /* Fail, if no free slot and max hit */
if (unlikely(slot >= txn->mt_env->me_maxdbs)) MDBX_env *env = txn->mt_env;
if (unlikely(slot >= env->me_maxdbs))
return MDBX_DBS_FULL; return MDBX_DBS_FULL;
/* Cannot mix named table with some main-table flags */ /* Cannot mix named table with some main-table flags */
@ -9137,7 +9143,11 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags,
if (unlikely(!namedup)) if (unlikely(!namedup))
return MDBX_ENOMEM; return MDBX_ENOMEM;
/* FIXME: lock here (to avoid races !!!) */ int err = mdbx_fastmutex_acquire(&env->me_dbi_lock);
if (unlikely(err != MDBX_SUCCESS)) {
free(namedup);
return err;
}
unsigned dbflag = DB_NEW | DB_VALID | DB_USRVALID; unsigned dbflag = DB_NEW | DB_VALID | DB_USRVALID;
if (unlikely(rc)) { if (unlikely(rc)) {
@ -9165,7 +9175,7 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags,
txn->mt_dbxs[slot].md_cmp = nullptr; txn->mt_dbxs[slot].md_cmp = nullptr;
txn->mt_dbxs[slot].md_dcmp = nullptr; txn->mt_dbxs[slot].md_dcmp = nullptr;
txn->mt_dbflags[slot] = dbflag; txn->mt_dbflags[slot] = dbflag;
txn->mt_dbiseqs[slot] = (txn->mt_env->me_dbiseqs[slot] += 1); txn->mt_dbiseqs[slot] = (env->me_dbiseqs[slot] += 1);
txn->mt_dbs[slot] = *(MDBX_db *)data.iov_base; txn->mt_dbs[slot] = *(MDBX_db *)data.iov_base;
rc = mdbx_dbi_bind(txn, slot, user_flags, keycmp, datacmp); rc = mdbx_dbi_bind(txn, slot, user_flags, keycmp, datacmp);
@ -9183,7 +9193,7 @@ int mdbx_dbi_open_ex(MDBX_txn *txn, const char *table_name, unsigned user_flags,
txn->mt_numdbs++; txn->mt_numdbs++;
} }
/* FIXME: unlock here (to avoid races !!!) */ mdbx_ensure(env, mdbx_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS);
return rc; return rc;
} }
@ -9218,13 +9228,11 @@ int __cold mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *arg,
return mdbx_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg); return mdbx_stat0(txn->mt_env, &txn->mt_dbs[dbi], arg);
} }
int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { static int mdbx_dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) {
char *ptr;
if (unlikely(dbi < CORE_DBS || dbi >= env->me_maxdbs)) if (unlikely(dbi < CORE_DBS || dbi >= env->me_maxdbs))
return MDBX_EINVAL; return MDBX_EINVAL;
/* FIXME: locking to avoid races ? */ char *ptr = env->me_dbxs[dbi].md_name.iov_base;
ptr = env->me_dbxs[dbi].md_name.iov_base;
/* If there was no name, this was already closed */ /* If there was no name, this was already closed */
if (unlikely(!ptr)) if (unlikely(!ptr))
return MDBX_BAD_DBI; return MDBX_BAD_DBI;
@ -9237,6 +9245,18 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) {
return MDBX_SUCCESS; return MDBX_SUCCESS;
} }
int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) {
if (unlikely(dbi < CORE_DBS || dbi >= env->me_maxdbs))
return MDBX_EINVAL;
int rc = mdbx_fastmutex_acquire(&env->me_dbi_lock);
if (likely(rc == MDBX_SUCCESS)) {
rc = mdbx_dbi_close_locked(env, dbi);
mdbx_ensure(env, mdbx_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS);
}
return rc;
}
int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) { int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) {
if (unlikely(!txn || !flags)) if (unlikely(!txn || !flags))
return MDBX_EINVAL; return MDBX_EINVAL;
@ -9344,9 +9364,6 @@ static int mdbx_drop0(MDBX_cursor *mc, int subs) {
} }
int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) { int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) {
MDBX_cursor *mc, *m2;
int rc;
if (unlikely(1 < (unsigned)del || !txn)) if (unlikely(1 < (unsigned)del || !txn))
return MDBX_EINVAL; return MDBX_EINVAL;
@ -9362,25 +9379,41 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) {
if (unlikely(F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY))) if (unlikely(F_ISSET(txn->mt_flags, MDBX_TXN_RDONLY)))
return MDBX_EACCESS; return MDBX_EACCESS;
rc = mdbx_cursor_open(txn, dbi, &mc); MDBX_cursor *mc;
if (unlikely(rc)) int rc = mdbx_cursor_open(txn, dbi, &mc);
if (unlikely(rc != MDBX_SUCCESS))
return rc; return rc;
/* FIXME: locking to avoid races ? */ MDBX_env *env = txn->mt_env;
rc = mdbx_fastmutex_acquire(&env->me_dbi_lock);
if (unlikely(rc != MDBX_SUCCESS)) {
mdbx_cursor_close(mc);
return rc;
}
if (unlikely(!TXN_DBI_EXIST(txn, dbi, DB_USRVALID))) {
rc = MDBX_EINVAL;
goto bailout;
}
if (unlikely(TXN_DBI_CHANGED(txn, dbi))) {
rc = MDBX_BAD_DBI;
goto bailout;
}
rc = mdbx_drop0(mc, mc->mc_db->md_flags & MDBX_DUPSORT); rc = mdbx_drop0(mc, mc->mc_db->md_flags & MDBX_DUPSORT);
/* Invalidate the dropped DB's cursors */ /* Invalidate the dropped DB's cursors */
for (m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) for (MDBX_cursor *m2 = txn->mt_cursors[dbi]; m2; m2 = m2->mc_next)
m2->mc_flags &= ~(C_INITIALIZED | C_EOF); m2->mc_flags &= ~(C_INITIALIZED | C_EOF);
if (unlikely(rc)) if (unlikely(rc))
goto leave; goto bailout;
/* Can't delete the main DB */ /* Can't delete the main DB */
if (del && dbi >= CORE_DBS) { if (del && dbi >= CORE_DBS) {
rc = mdbx_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); rc = mdbx_del0(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA);
if (likely(!rc)) { if (likely(!rc)) {
txn->mt_dbflags[dbi] = DB_STALE; txn->mt_dbflags[dbi] = DB_STALE;
mdbx_dbi_close(txn->mt_env, dbi); mdbx_dbi_close_locked(env, dbi);
} else { } else {
txn->mt_flags |= MDBX_TXN_ERROR; txn->mt_flags |= MDBX_TXN_ERROR;
} }
@ -9397,8 +9430,10 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, int del) {
txn->mt_flags |= MDBX_TXN_DIRTY; txn->mt_flags |= MDBX_TXN_DIRTY;
} }
leave:
bailout:
mdbx_cursor_close(mc); mdbx_cursor_close(mc);
mdbx_ensure(env, mdbx_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS);
return rc; return rc;
} }