mdbx: rework mdbx_dbi_open().

This commit is contained in:
Leo Yuriev 2017-05-15 21:18:52 +03:00
parent 5fdad46cb9
commit ed46246931
2 changed files with 116 additions and 150 deletions

52
mdbx.h
View File

@ -1137,58 +1137,6 @@ LIBMDBX_API int mdbx_dbi_close(MDB_env *env, MDB_dbi dbi);
*/ */
LIBMDBX_API int mdbx_drop(MDB_txn *txn, MDB_dbi dbi, int del); LIBMDBX_API int mdbx_drop(MDB_txn *txn, MDB_dbi dbi, int del);
/* Set a custom key comparison function for a database.
*
* The comparison function is called whenever it is necessary to compare a
* key specified by the application with a key currently stored in the
*database.
* If no comparison function is specified, and no special key flags were
*specified
* with mdbx_dbi_open(), the keys are compared lexically, with shorter keys
*collating
* before longer keys.
* Warning: This function must be called before any data access functions are
*used,
* otherwise data corruption may occur. The same comparison function must be
*used by every
* program accessing the database, every time the database is used.
* [in] txn A transaction handle returned by mdbx_txn_begin()
* [in] dbi A database handle returned by mdbx_dbi_open()
* [in] cmp A MDB_cmp_func function
* Returns A non-zero error value on failure and 0 on success. Some possible
* errors are:
* - EINVAL - an invalid parameter was specified.
*/
LIBMDBX_API int mdbx_set_compare(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
/* Set a custom data comparison function for a MDB_DUPSORT database.
*
* This comparison function is called whenever it is necessary to compare a
*data
* item specified by the application with a data item currently stored in the
*database.
* This function only takes effect if the database was opened with the
*MDB_DUPSORT
* flag.
* If no comparison function is specified, and no special key flags were
*specified
* with mdbx_dbi_open(), the data items are compared lexically, with shorter
*items collating
* before longer items.
* Warning: This function must be called before any data access functions are
*used,
* otherwise data corruption may occur. The same comparison function must be
*used by every
* program accessing the database, every time the database is used.
* [in] txn A transaction handle returned by mdbx_txn_begin()
* [in] dbi A database handle returned by mdbx_dbi_open()
* [in] cmp A MDB_cmp_func function
* Returns A non-zero error value on failure and 0 on success. Some possible
* errors are:
* - EINVAL - an invalid parameter was specified.
*/
LIBMDBX_API int mdbx_set_dupsort(MDB_txn *txn, MDB_dbi dbi, MDB_cmp_func *cmp);
/* Get items from a database. /* Get items from a database.
* *
* This function retrieves key/data pairs from the database. The address * This function retrieves key/data pairs from the database. The address

View File

@ -2435,12 +2435,11 @@ size_t mdbx_txn_id(MDB_txn *txn) {
/** Export or close DBI handles opened in this txn. */ /** Export or close DBI handles opened in this txn. */
static void mdbx_dbis_update(MDB_txn *txn, int keep) { static void mdbx_dbis_update(MDB_txn *txn, int keep) {
int i;
MDB_dbi n = txn->mt_numdbs; MDB_dbi n = txn->mt_numdbs;
MDB_env *env = txn->mt_env; MDB_env *env = txn->mt_env;
unsigned char *tdbflags = txn->mt_dbflags; unsigned char *tdbflags = txn->mt_dbflags;
for (i = n; --i >= CORE_DBS;) { for (unsigned i = n; --i >= CORE_DBS;) {
if (tdbflags[i] & DB_NEW) { if (tdbflags[i] & DB_NEW) {
if (keep) { if (keep) {
env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID; env->me_dbflags[i] = txn->mt_dbs[i].md_flags | MDB_VALID;
@ -8706,144 +8705,177 @@ static MDB_cmp_func *mdbx_default_datacmp(unsigned flags) {
: mdbx_cmp_memn)); : mdbx_cmp_memn));
} }
/** Set the default comparison functions for a database. static int mdbx_dbi_bind(MDB_txn *txn, const MDB_dbi dbi, unsigned user_flags,
* Called immediately after a database is opened to set the defaults. MDB_cmp_func *keycmp, MDB_cmp_func *datacmp) {
* The user can then override them with #mdbx_set_compare() or /* LY: so, accepting only three cases for the table's flags:
* #mdbx_set_dupsort(). * 1) user_flags and both comparators are zero
* @param[in] txn A transaction handle returned by #mdbx_txn_begin() * = assume that a by-default mode/flags is requested for reading;
* @param[in] dbi A database handle returned by #mdbx_dbi_open() * 2) user_flags exactly the same
* = assume that the target mode/flags are requested properly;
* 3) user_flags differs, but table is empty and MDB_CREATE is provided
* = assume that a properly create request with custom flags;
*/ */
static void mdbx_default_cmp(MDB_txn *txn, MDB_dbi dbi) { if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & PERSISTENT_FLAGS) {
unsigned flags = txn->mt_dbs[dbi].md_flags; /* flags ara differs, check other conditions */
txn->mt_dbxs[dbi].md_cmp = mdbx_default_keycmp(flags); if (!user_flags && (!keycmp || keycmp == txn->mt_dbxs[dbi].md_cmp) &&
txn->mt_dbxs[dbi].md_dcmp = mdbx_default_datacmp(flags); (!datacmp || datacmp == txn->mt_dbxs[dbi].md_dcmp)) {
/* no comparators were provided and flags are zero,
* seems that is case #1 above */
user_flags = txn->mt_dbs[dbi].md_flags;
} else if ((user_flags & MDB_CREATE) && txn->mt_dbs[dbi].md_entries == 0) {
if (txn->mt_flags & MDB_TXN_RDONLY)
return /* FIXME: return extended info */ MDBX_EACCESS;
/* make sure flags changes get committed */
txn->mt_dbs[dbi].md_flags = user_flags & PERSISTENT_FLAGS;
txn->mt_flags |= MDB_TXN_DIRTY;
} else {
return /* FIXME: return extended info */ MDB_INCOMPATIBLE;
}
}
if (!txn->mt_dbxs[dbi].md_cmp || MDB_DEBUG) {
if (!keycmp)
keycmp = mdbx_default_keycmp(user_flags);
assert(!txn->mt_dbxs[dbi].md_cmp || txn->mt_dbxs[dbi].md_cmp == keycmp);
txn->mt_dbxs[dbi].md_cmp = keycmp;
}
if (!txn->mt_dbxs[dbi].md_dcmp || MDB_DEBUG) {
if (!datacmp)
datacmp = mdbx_default_datacmp(user_flags);
assert(!txn->mt_dbxs[dbi].md_dcmp || txn->mt_dbxs[dbi].md_dcmp == datacmp);
txn->mt_dbxs[dbi].md_dcmp = datacmp;
}
return MDB_SUCCESS;
} }
int mdbx_dbi_open(MDB_txn *txn, const char *name, unsigned flags, int mdbx_dbi_open_ex(MDB_txn *txn, const char *table_name, unsigned user_flags,
MDB_dbi *dbi) { MDB_dbi *dbi, MDB_cmp_func *keycmp,
MDB_val key, data; MDB_cmp_func *datacmp) {
MDB_dbi i; if (unlikely(!txn || !dbi || (user_flags & ~VALID_FLAGS) != 0))
MDB_cursor mc;
int rc, dbflag, exact;
unsigned unused = 0, seq;
char *namedup;
size_t len;
if (unlikely(!txn || !dbi))
return MDBX_EINVAL; return MDBX_EINVAL;
if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE)) if (unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDBX_EBADSIGN; return MDBX_EBADSIGN;
if (unlikely(flags & ~VALID_FLAGS))
return MDBX_EINVAL;
if (unlikely(txn->mt_flags & MDB_TXN_BLOCKED)) if (unlikely(txn->mt_flags & MDB_TXN_BLOCKED))
return MDB_BAD_TXN; return MDB_BAD_TXN;
/* main DB? */ /* main table? */
if (!name) { if (!table_name) {
*dbi = MAIN_DBI; *dbi = MAIN_DBI;
if (flags & PERSISTENT_FLAGS) { return mdbx_dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp);
uint16_t f2 = flags & PERSISTENT_FLAGS;
/* make sure flag changes get committed */
if ((txn->mt_dbs[MAIN_DBI].md_flags | f2) !=
txn->mt_dbs[MAIN_DBI].md_flags) {
txn->mt_dbs[MAIN_DBI].md_flags |= f2;
txn->mt_flags |= MDB_TXN_DIRTY;
}
}
mdbx_default_cmp(txn, MAIN_DBI);
return MDB_SUCCESS;
} }
if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) { if (txn->mt_dbxs[MAIN_DBI].md_cmp == NULL) {
mdbx_default_cmp(txn, MAIN_DBI); txn->mt_dbxs[MAIN_DBI].md_cmp =
mdbx_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags);
txn->mt_dbxs[MAIN_DBI].md_dcmp =
mdbx_default_datacmp(txn->mt_dbs[MAIN_DBI].md_flags);
} }
/* Is the DB already open? */ /* Is the DB already open? */
len = strlen(name); size_t len = strlen(table_name);
for (i = CORE_DBS; i < txn->mt_numdbs; i++) { MDB_dbi scan, slot = txn->mt_numdbs;
if (!txn->mt_dbxs[i].md_name.mv_size) { for (scan = txn->mt_numdbs; --scan >= CORE_DBS;) {
if (!txn->mt_dbxs[scan].md_name.mv_size) {
/* Remember this free slot */ /* Remember this free slot */
if (!unused) slot = scan;
unused = i;
continue; continue;
} }
if (len == txn->mt_dbxs[i].md_name.mv_size && if (len == txn->mt_dbxs[scan].md_name.mv_size &&
!strncmp(name, txn->mt_dbxs[i].md_name.mv_data, len)) { !strncmp(table_name, txn->mt_dbxs[scan].md_name.mv_data, len)) {
*dbi = i; *dbi = scan;
return MDB_SUCCESS; return mdbx_dbi_bind(txn, scan, user_flags, keycmp, datacmp);
} }
} }
/* If no free slot and max hit, fail */ /* Fail, if no free slot and max hit */
if (!unused && unlikely(txn->mt_numdbs >= txn->mt_env->me_maxdbs)) if (unlikely(slot >= txn->mt_env->me_maxdbs))
return MDB_DBS_FULL; return MDB_DBS_FULL;
/* Cannot mix named databases with some mainDB flags */ /* Cannot mix named table with some main-table flags */
if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT | MDB_INTEGERKEY))) if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & (MDB_DUPSORT | MDB_INTEGERKEY)))
return (flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND; return (user_flags & MDB_CREATE) ? MDB_INCOMPATIBLE : MDB_NOTFOUND;
/* Find the DB info */ /* Find the DB info */
dbflag = DB_NEW | DB_VALID | DB_USRVALID; int exact = 0;
exact = 0; MDB_val key, data;
key.mv_size = len; key.mv_size = len;
key.mv_data = (void *)name; key.mv_data = (void *)table_name;
MDB_cursor mc;
mdbx_cursor_init(&mc, txn, MAIN_DBI, NULL); mdbx_cursor_init(&mc, txn, MAIN_DBI, NULL);
rc = mdbx_cursor_set(&mc, &key, &data, MDB_SET, &exact); int rc = mdbx_cursor_set(&mc, &key, &data, MDB_SET, &exact);
if (likely(rc == MDB_SUCCESS)) { if (unlikely(rc != MDB_SUCCESS)) {
/* make sure this is actually a DB */ if (rc != MDB_NOTFOUND || !(user_flags & MDB_CREATE))
return rc;
} else {
/* make sure this is actually a table */
MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]); MDB_node *node = NODEPTR(mc.mc_pg[mc.mc_top], mc.mc_ki[mc.mc_top]);
if (unlikely((node->mn_flags & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) if (unlikely((node->mn_flags & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA))
return MDB_INCOMPATIBLE; return MDB_INCOMPATIBLE;
} else if (!(rc == MDB_NOTFOUND && (flags & MDB_CREATE))) {
return rc;
} }
/* FIXME: locking to avoid races ? */
/* Done here so we cannot fail after creating a new DB */ /* Done here so we cannot fail after creating a new DB */
if (unlikely((namedup = mdbx_strdup(name)) == NULL)) char *namedup = mdbx_strdup(table_name);
if (unlikely(!namedup))
return MDBX_ENOMEM; return MDBX_ENOMEM;
/* FIXME: lock here (to avoid races !!!) */
unsigned dbflag = DB_NEW | DB_VALID | DB_USRVALID;
if (unlikely(rc)) { if (unlikely(rc)) {
MDB_db db_dummy;
/* MDB_NOTFOUND and MDB_CREATE: Create new DB */ /* MDB_NOTFOUND and MDB_CREATE: Create new DB */
assert(rc == MDB_NOTFOUND);
MDB_db db_dummy;
memset(&db_dummy, 0, sizeof(db_dummy)); memset(&db_dummy, 0, sizeof(db_dummy));
db_dummy.md_root = P_INVALID; db_dummy.md_root = P_INVALID;
db_dummy.md_flags = flags & PERSISTENT_FLAGS; db_dummy.md_flags = user_flags & PERSISTENT_FLAGS;
data.mv_size = sizeof(db_dummy); data.mv_size = sizeof(db_dummy);
data.mv_data = &db_dummy; data.mv_data = &db_dummy;
WITH_CURSOR_TRACKING(mc, rc = mdbx_cursor_put(&mc, &key, &data, F_SUBDATA)); WITH_CURSOR_TRACKING(mc, rc = mdbx_cursor_put(&mc, &key, &data,
F_SUBDATA | MDB_NOOVERWRITE));
if (unlikely(rc != MDB_SUCCESS))
goto bailout;
dbflag |= DB_DIRTY; dbflag |= DB_DIRTY;
} }
if (unlikely(rc)) {
free(namedup);
} else {
/* Got info, register DBI in this txn */ /* Got info, register DBI in this txn */
unsigned slot = unused ? unused : txn->mt_numdbs;
txn->mt_dbxs[slot].md_name.mv_data = namedup; txn->mt_dbxs[slot].md_name.mv_data = namedup;
txn->mt_dbxs[slot].md_name.mv_size = len; txn->mt_dbxs[slot].md_name.mv_size = len;
txn->mt_dbxs[slot].md_cmp = nullptr;
txn->mt_dbxs[slot].md_dcmp = nullptr;
txn->mt_dbflags[slot] = dbflag; txn->mt_dbflags[slot] = dbflag;
/* txn-> and env-> are the same in read txns, use txn->mt_dbiseqs[slot] = (txn->mt_env->me_dbiseqs[slot] += 1);
* tmp variable to avoid undefined assignment
*/
seq = ++txn->mt_env->me_dbiseqs[slot];
txn->mt_dbiseqs[slot] = seq;
memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db)); memcpy(&txn->mt_dbs[slot], data.mv_data, sizeof(MDB_db));
rc = mdbx_dbi_bind(txn, slot, user_flags, keycmp, datacmp);
if (unlikely(rc != MDB_SUCCESS)) {
assert((dbflag & DB_DIRTY) == 0);
/* cleanup slot */
txn->mt_dbxs[slot].md_name.mv_data = NULL;
txn->mt_dbxs[slot].md_name.mv_size = 0;
txn->mt_dbflags[slot] = 0;
bailout:
free(namedup);
} else {
*dbi = slot; *dbi = slot;
mdbx_default_cmp(txn, slot); if (slot == txn->mt_numdbs)
if (!unused) {
txn->mt_numdbs++; txn->mt_numdbs++;
} }
}
/* FIXME: unlock here (to avoid races !!!) */
return rc; return rc;
} }
int mdbx_dbi_open(MDB_txn *txn, const char *table_name, unsigned table_flags,
MDB_dbi *dbi) {
return mdbx_dbi_open_ex(txn, table_name, table_flags, dbi, nullptr, nullptr);
}
int __cold mdbx_dbi_stat(MDB_txn *txn, MDB_dbi dbi, MDBX_stat *arg, int __cold mdbx_dbi_stat(MDB_txn *txn, MDB_dbi dbi, MDBX_stat *arg,
size_t bytes) { size_t bytes) {
if (unlikely(!arg || !txn)) if (unlikely(!arg || !txn))
@ -10214,20 +10246,6 @@ int mdbx_is_dirty(const MDB_txn *txn, const void *ptr) {
return MDBX_RESULT_TRUE; return MDBX_RESULT_TRUE;
} }
int mdbx_dbi_open_ex(MDB_txn *txn, const char *name, unsigned flags,
MDB_dbi *pdbi, MDB_cmp_func *keycmp,
MDB_cmp_func *datacmp) {
int rc = mdbx_dbi_open(txn, name, flags, pdbi);
if (likely(rc == MDB_SUCCESS)) {
MDB_dbi dbi = *pdbi;
unsigned md_flags = txn->mt_dbs[dbi].md_flags;
txn->mt_dbxs[dbi].md_cmp = keycmp ? keycmp : mdbx_default_keycmp(md_flags);
txn->mt_dbxs[dbi].md_dcmp =
datacmp ? datacmp : mdbx_default_datacmp(md_flags);
}
return rc;
}
int mdbx_dbi_sequence(MDB_txn *txn, MDB_dbi dbi, uint64_t *result, int mdbx_dbi_sequence(MDB_txn *txn, MDB_dbi dbi, uint64_t *result,
uint64_t increment) { uint64_t increment) {
if (unlikely(!txn)) if (unlikely(!txn))