mdbx: Merge branch 'devel'.

This commit is contained in:
Leo Yuriev 2016-11-27 12:18:53 +03:00
commit b7cb7d6046
6 changed files with 156 additions and 158 deletions

109
lmdb.h
View File

@ -664,9 +664,6 @@ int mdb_env_create(MDB_env **env);
* </ul>
*/
int mdb_env_open(MDB_env *env, const char *path, unsigned flags, mode_t mode);
#if MDBX_MODE_ENABLED
int mdbx_env_open_ex(MDB_env *env, const char *path, unsigned flags, mode_t mode, int *exclusive);
#endif /* MDBX_MODE_ENABLED */
/** @brief Copy an LMDB environment to the specified path.
*
* This function may be used to make a backup of an existing environment.
@ -749,9 +746,6 @@ int mdb_env_copyfd2(MDB_env *env, mdb_filehandle_t fd, unsigned flags);
* where the statistics will be copied
*/
int mdb_env_stat(MDB_env *env, MDB_stat *stat);
#if MDBX_MODE_ENABLED
int mdbx_env_stat(MDB_env *env, MDBX_stat *stat, size_t bytes);
#endif /* MDBX_MODE_ENABLED */
/** @brief Return information about the LMDB environment.
*
@ -760,9 +754,6 @@ int mdbx_env_stat(MDB_env *env, MDBX_stat *stat, size_t bytes);
* where the information will be copied
*/
int mdb_env_info(MDB_env *env, MDB_envinfo *info);
#if MDBX_MODE_ENABLED
int mdbx_env_info(MDB_env *env, MDBX_envinfo *info, size_t bytes);
#endif /* MDBX_MODE_ENABLED */
/** @brief Flush the data buffers to disk.
*
@ -799,9 +790,6 @@ int mdb_env_sync(MDB_env *env, int force);
* checkpoint (meta-page update) will rolledback for consistency guarantee.
*/
void mdb_env_close(MDB_env *env);
#if MDBX_MODE_ENABLED
int mdbx_env_close_ex(MDB_env *env, int dont_sync);
#endif /* MDBX_MODE_ENABLED */
/** @brief Set environment flags.
*
@ -988,27 +976,6 @@ typedef void MDB_assert_func(MDB_env *env, const char *msg,
*/
int mdb_env_set_assert(MDB_env *env, MDB_assert_func *func);
#if MDBX_MODE_ENABLED
/** @brief Set threshold to force flush the data buffers to disk,
* even of #MDB_NOSYNC, #MDB_NOMETASYNC and #MDB_MAPASYNC flags
* in the environment.
*
* Data is always written to disk when #mdb_txn_commit() is called,
* but the operating system may keep it buffered. LMDB always flushes
* the OS buffers upon commit as well, unless the environment was
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
*
* The default is 0, than mean no any threshold checked,
* and no additional flush will be made.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] bytes The size in bytes of summary changes
* when a synchronous flush would be made.
* @return A non-zero error value on failure and 0 on success.
*/
int mdbx_env_set_syncbytes(MDB_env *env, size_t bytes);
#endif /* MDBX_MODE_ENABLED */
/** @brief Create a transaction for use with the environment.
*
* The transaction handle may be discarded using #mdb_txn_abort() or #mdb_txn_commit().
@ -1213,9 +1180,6 @@ int mdb_dbi_open(MDB_txn *txn, const char *name, unsigned flags, MDB_dbi *dbi);
* </ul>
*/
int mdb_stat(MDB_txn *txn, MDB_dbi dbi, MDB_stat *stat);
#if MDBX_MODE_ENABLED
int mdbx_stat(MDB_txn *txn, MDB_dbi dbi, MDBX_stat *stat, size_t bytes);
#endif /* MDBX_MODE_ENABLED */
/** @brief Retrieve the DB flags for a database handle.
*
@ -1664,86 +1628,15 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx);
* @return 0 on success, non-zero on failure.
*/
int mdb_reader_check(MDB_env *env, int *dead);
#if MDBX_MODE_ENABLED
/** @brief Returns a lag of the reading.
*
* Returns an information for estimate how much given read-only
* transaction is lagging relative the to actual head.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[out] percent Percentage of page allocation in the database.
* @return Number of transactions committed after the given was started for read, or -1 on failure.
*/
int mdbx_txn_straggler(MDB_txn *txn, int *percent);
/** @brief A callback function for killing a laggard readers,
* but also could waiting ones. Called in case of MDB_MAP_FULL error.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] pid pid of the reader process.
* @param[in] thread_id thread_id of the reader thread.
* @param[in] txn Transaction number on which stalled.
* @param[in] gap a lag from the last commited txn.
* @param[in] retry a retry number, less that zero for notify end of OOM-loop.
* @return -1 on failure (reader is not killed),
* 0 on a race condition (no such reader),
* 1 on success (reader was killed),
* >1 on success (reader was SURE killed).
*/
typedef int (MDBX_oom_func)(MDB_env *env, int pid, void* thread_id, size_t txn, unsigned gap, int retry);
/** @brief Set the OOM callback.
*
* Callback will be called only on out-of-pages case for killing
* a laggard readers to allowing reclaiming of freeDB.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] oomfunc A #MDBX_oom_func function or NULL to disable.
*/
void mdbx_env_set_oomfunc(MDB_env *env, MDBX_oom_func *oom_func);
/** @brief Get the current oom_func callback.
*
* Callback will be called only on out-of-pages case for killing
* a laggard readers to allowing reclaiming of freeDB.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @return A #MDBX_oom_func function or NULL if disabled.
*/
MDBX_oom_func* mdbx_env_get_oomfunc(MDB_env *env);
#endif /* MDBX_MODE_ENABLED */
/** @} */
#if MDBX_MODE_ENABLED
#define MDBX_DBG_ASSERT 1
#define MDBX_DBG_PRINT 2
#define MDBX_DBG_TRACE 4
#define MDBX_DBG_EXTRA 8
#define MDBX_DBG_AUDIT 16
#define MDBX_DBG_EDGE 32
/* LY: a "don't touch" value */
#define MDBX_DBG_DNT (-1L)
typedef void MDBX_debug_func(int type, const char *function, int line,
const char *msg, va_list args);
int mdbx_setup_debug(int flags, MDBX_debug_func* logger, long edge_txn);
typedef int MDBX_pgvisitor_func(size_t pgno, unsigned pgnumber, void* ctx,
const char* dbi, const char *type, int nentries,
int payload_bytes, int header_bytes, int unused_bytes);
int mdbx_env_pgwalk(MDB_txn *txn, MDBX_pgvisitor_func* visitor, void* ctx);
#endif /* MDBX_MODE_ENABLED */
char* mdb_dkey(MDB_val *key, char *buf);
#ifdef __cplusplus
}
#endif
/** @page tools LMDB Command Line Tools
The following describes the command line tools that are available for LMDBX.
The following describes the command line tools that are available for LMDB.
\li \ref mdb_chk_1
\li \ref mdb_copy_1
\li \ref mdb_dump_1

41
mdb.c
View File

@ -805,11 +805,17 @@ typedef struct MDB_dbx {
void *md_relctx; /**< user-provided context for md_rel */
} MDB_dbx;
#if MDBX_MODE_ENABLED
# define MDBX_MODE_SALT 0
#else
# define MDBX_MODE_SALT 1115449266
#endif
/** A database transaction.
* Every operation requires a transaction handle.
*/
struct MDB_txn {
#define MDBX_MT_SIGNATURE 0x706C553B
#define MDBX_MT_SIGNATURE (0x706C553B^MDBX_MODE_SALT)
unsigned mt_signature;
MDB_txn *mt_parent; /**< parent of a nested txn */
/** Nested txn under this txn, set together with flag #MDB_TXN_HAS_CHILD */
@ -914,7 +920,7 @@ struct MDB_xcursor;
* (A node with #F_DUPDATA but no #F_SUBDATA contains a subpage).
*/
struct MDB_cursor {
#define MDBX_MC_SIGNATURE 0xFE05D5B1
#define MDBX_MC_SIGNATURE (0xFE05D5B1^MDBX_MODE_SALT)
unsigned mc_signature;
/** Next cursor on this DB in this txn */
MDB_cursor *mc_next;
@ -980,7 +986,7 @@ struct MDB_rthc {
};
/** The database environment. */
struct MDB_env {
#define MDBX_ME_SIGNATURE 0x9A899641
#define MDBX_ME_SIGNATURE (0x9A899641^MDBX_MODE_SALT)
unsigned me_signature;
HANDLE me_fd; /**< The main data file */
HANDLE me_lfd; /**< The lock file */
@ -3097,35 +3103,6 @@ mdb_dbis_update(MDB_txn *txn, int keep)
env->me_numdbs = n;
}
ATTRIBUTE_NO_SANITIZE_THREAD /* LY: avoid tsan-trap by me_txn, mm_last_pg and mt_next_pgno */
int mdbx_txn_straggler(MDB_txn *txn, int *percent)
{
MDB_env *env;
MDB_meta *meta;
txnid_t lag;
if(unlikely(!txn))
return -EINVAL;
if(unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDB_VERSION_MISMATCH;
if (unlikely(! txn->mt_u.reader))
return -1;
env = txn->mt_env;
meta = mdb_meta_head_r(env);
if (percent) {
size_t maxpg = env->me_maxpg;
size_t last = meta->mm_last_pg + 1;
if (env->me_txn)
last = env->me_txn0->mt_next_pgno;
*percent = (last * 100ull + maxpg / 2) / maxpg;
}
lag = meta->mm_txnid - txn->mt_u.reader->mr_txnid;
return (0 > (long) lag) ? ~0u >> 1: lag;
}
/** End a transaction, except successful commit of a nested transaction.
* May be called twice for readonly txns: First reset it, then abort.
* @param[in] txn the transaction handle to end

View File

@ -74,9 +74,13 @@ struct {
short *pagemap;
size_t total_payload_bytes;
size_t pgcount;
} walk = {
.dbi_names = { "@gc" }
};
} walk;
static __attribute__((constructor))
void init_walk(void)
{
walk.dbi_names[0] = "@gc";
}
size_t total_unused_bytes;
int exclusive = 2;

42
mdbx.c
View File

@ -53,8 +53,6 @@ mdbx_setup_debug(int flags, MDBX_debug_func* logger, long edge_txn) {
static txnid_t __cold
mdbx_oomkick(MDB_env *env, txnid_t oldest)
{
mdb_debug("DB size maxed out");
#if MDBX_MODE_ENABLED
int retry;
txnid_t snap;
mdb_debug("DB size maxed out");
@ -107,10 +105,6 @@ mdbx_oomkick(MDB_env *env, txnid_t oldest)
/* LY: notify end of oom-loop */
env->me_oom_func(env, 0, 0, oldest, 0, -retry);
}
#else
(void) oldest;
(void) mdb_reader_check(env, NULL);
#endif /* MDBX_MODE_ENABLED */
return mdb_find_oldest(env, NULL);
}
@ -141,14 +135,40 @@ mdbx_env_get_oomfunc(MDB_env *env)
? env->me_oom_func : NULL;
}
struct mdb_walk_ctx {
ATTRIBUTE_NO_SANITIZE_THREAD /* LY: avoid tsan-trap by me_txn, mm_last_pg and mt_next_pgno */
int mdbx_txn_straggler(MDB_txn *txn, int *percent)
{
MDB_env *env;
MDB_meta *meta;
txnid_t lag;
if(unlikely(!txn))
return -EINVAL;
if(unlikely(txn->mt_signature != MDBX_MT_SIGNATURE))
return MDB_VERSION_MISMATCH;
if (unlikely(! txn->mt_u.reader))
return -1;
env = txn->mt_env;
meta = mdb_meta_head_r(env);
if (percent) {
size_t maxpg = env->me_maxpg;
size_t last = meta->mm_last_pg + 1;
if (env->me_txn)
last = env->me_txn0->mt_next_pgno;
*percent = (last * 100ull + maxpg / 2) / maxpg;
}
lag = meta->mm_txnid - txn->mt_u.reader->mr_txnid;
return (0 > (long) lag) ? ~0u >> 1: lag;
}
typedef struct mdb_walk_ctx {
MDB_txn *mw_txn;
void *mw_user;
MDBX_pgvisitor_func *mw_visitor;
};
typedef struct mdb_walk_ctx mdb_walk_ctx_t;
} mdb_walk_ctx_t;
/** Depth-first tree traversal. */
static int __cold

106
mdbx.h
View File

@ -50,6 +50,11 @@
# define _GNU_SOURCE
#endif
/** @defgroup mdbx MDBX API
* @{
* @brief libmdbx - Extended version of LMDB
*/
#define mdb_version mdbx_version
#define mdb_strerror mdbx_strerror
#define mdb_env_create mdbx_env_create
@ -61,7 +66,6 @@
#define mdb_env_copyfd2 mdbx_env_copyfd2
#define mdb_env_sync mdbx_env_sync
#define mdb_env_close mdbx_env_close
#define mdb_env_close_ex mdbx_env_close_ex
#define mdb_env_set_flags mdbx_env_set_flags
#define mdb_env_get_flags mdbx_env_get_flags
#define mdb_env_get_path mdbx_env_get_path
@ -113,4 +117,104 @@
#include "./lmdb.h"
#ifdef __cplusplus
extern "C" {
#endif
int mdbx_env_open_ex(MDB_env *env, const char *path, unsigned flags, mode_t mode, int *exclusive);
int mdbx_env_stat(MDB_env *env, MDBX_stat *stat, size_t bytes);
int mdbx_stat(MDB_txn *txn, MDB_dbi dbi, MDBX_stat *stat, size_t bytes);
int mdbx_env_info(MDB_env *env, MDBX_envinfo *info, size_t bytes);
int mdbx_env_close_ex(MDB_env *env, int dont_sync);
/** @brief Set threshold to force flush the data buffers to disk,
* even of #MDB_NOSYNC, #MDB_NOMETASYNC and #MDB_MAPASYNC flags
* in the environment.
*
* Data is always written to disk when #mdb_txn_commit() is called,
* but the operating system may keep it buffered. LMDB always flushes
* the OS buffers upon commit as well, unless the environment was
* opened with #MDB_NOSYNC or in part #MDB_NOMETASYNC.
*
* The default is 0, than mean no any threshold checked,
* and no additional flush will be made.
*
* @param[in] env An environment handle returned by #mdb_env_create()
* @param[in] bytes The size in bytes of summary changes
* when a synchronous flush would be made.
* @return A non-zero error value on failure and 0 on success.
*/
int mdbx_env_set_syncbytes(MDB_env *env, size_t bytes);
/** @brief Returns a lag of the reading.
*
* Returns an information for estimate how much given read-only
* transaction is lagging relative the to actual head.
*
* @param[in] txn A transaction handle returned by #mdb_txn_begin()
* @param[out] percent Percentage of page allocation in the database.
* @return Number of transactions committed after the given was started for read, or -1 on failure.
*/
int mdbx_txn_straggler(MDB_txn *txn, int *percent);
/** @brief A callback function for killing a laggard readers,
* but also could waiting ones. Called in case of MDB_MAP_FULL error.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] pid pid of the reader process.
* @param[in] thread_id thread_id of the reader thread.
* @param[in] txn Transaction number on which stalled.
* @param[in] gap a lag from the last commited txn.
* @param[in] retry a retry number, less that zero for notify end of OOM-loop.
* @return -1 on failure (reader is not killed),
* 0 on a race condition (no such reader),
* 1 on success (reader was killed),
* >1 on success (reader was SURE killed).
*/
typedef int (MDBX_oom_func)(MDB_env *env, int pid, void* thread_id, size_t txn, unsigned gap, int retry);
/** @brief Set the OOM callback.
*
* Callback will be called only on out-of-pages case for killing
* a laggard readers to allowing reclaiming of freeDB.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @param[in] oomfunc A #MDBX_oom_func function or NULL to disable.
*/
void mdbx_env_set_oomfunc(MDB_env *env, MDBX_oom_func *oom_func);
/** @brief Get the current oom_func callback.
*
* Callback will be called only on out-of-pages case for killing
* a laggard readers to allowing reclaiming of freeDB.
*
* @param[in] env An environment handle returned by #mdb_env_create().
* @return A #MDBX_oom_func function or NULL if disabled.
*/
MDBX_oom_func* mdbx_env_get_oomfunc(MDB_env *env);
#define MDBX_DBG_ASSERT 1
#define MDBX_DBG_PRINT 2
#define MDBX_DBG_TRACE 4
#define MDBX_DBG_EXTRA 8
#define MDBX_DBG_AUDIT 16
#define MDBX_DBG_EDGE 32
/* LY: a "don't touch" value */
#define MDBX_DBG_DNT (-1L)
typedef void MDBX_debug_func(int type, const char *function, int line,
const char *msg, va_list args);
int mdbx_setup_debug(int flags, MDBX_debug_func* logger, long edge_txn);
typedef int MDBX_pgvisitor_func(size_t pgno, unsigned pgnumber, void* ctx,
const char* dbi, const char *type, int nentries,
int payload_bytes, int header_bytes, int unused_bytes);
int mdbx_env_pgwalk(MDB_txn *txn, MDBX_pgvisitor_func* visitor, void* ctx);
/** @} */
#ifdef __cplusplus
}
#endif
#endif /* _MDBX_H_ */

View File

@ -74,7 +74,7 @@ int main(int argc,char * argv[])
env_oflags = 0;
}
/* LY: especially here we always needs MDB_NOSYNC
* for testing mdb_env_close_ex() and "redo-to-steady" on open. */
* for testing mdbx_env_close_ex() and "redo-to-steady" on open. */
env_oflags |= MDB_NOSYNC;
E(mdb_env_open(env, DBPATH, env_oflags, 0664));
@ -159,7 +159,7 @@ int main(int argc,char * argv[])
mdb_dbi_close(env, dbi);
/********************* LY: kept DB dirty ****************/
mdb_env_close_ex(env, 1);
mdbx_env_close_ex(env, 1);
E(mdb_env_create(&env));
E(mdb_env_set_maxdbs(env, 4));
E(mdb_env_open(env, DBPATH, env_oflags, 0664));
@ -194,7 +194,7 @@ int main(int argc,char * argv[])
mdb_txn_abort(txn);
mdb_dbi_close(env, dbi);
mdb_env_close_ex(env, 0);
mdbx_env_close_ex(env, 0);
return 0;
}