mdbx: add latency gathering for commit stages.

Change-Id: If68ceb6e69e5e565ce9de0fd9a80424b6da280c5
This commit is contained in:
Leonid Yuriev 2020-10-15 19:23:14 +03:00
parent 7cf92b66cf
commit f73a8a8680
3 changed files with 81 additions and 10 deletions

View File

@ -12,6 +12,7 @@ TODO:
Added features: Added features:
- Provided package for [buildroot](https://buildroot.org/). - Provided package for [buildroot](https://buildroot.org/).
- Added `mdbx_env_delete()` for deletion an environment files in a proper and multiprocess-safe way. - Added `mdbx_env_delete()` for deletion an environment files in a proper and multiprocess-safe way.
- Added `mdbx_txn_commit_ex()` with collecting latency information.
Fixes: Fixes:

36
mdbx.h
View File

@ -2921,6 +2921,38 @@ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_txn_flags(const MDBX_txn *txn);
MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API uint64_t
mdbx_txn_id(const MDBX_txn *txn); mdbx_txn_id(const MDBX_txn *txn);
/** \brief Latency of commit stages in 1/65536 of seconds units.
* \warning This structure may be changed in future releases.
* \see mdbx_txn_commit_ex() */
struct MDBX_commit_latency {
/** \brief Duration of preparation (commit child transactions, update
* sub-databases records and cursors destroying). */
uint32_t preparation;
/** \brief Duration of GC/freeDB handling & updation. */
uint32_t gc;
/** \brief Duration of internal audit if enabled. */
uint32_t audit;
/** \brief Duration of writing dirty/modified data pages. */
uint32_t write;
/** \brief Duration of syncing written data to the dist/storage. */
uint32_t sync;
/** \brief Duration of transaction ending (releasing resources). */
uint32_t ending;
/** \brief The total duration of a commit. */
uint32_t whole;
};
#ifndef __cplusplus
/** \ingroup c_statinfo */
typedef struct MDBX_commit_latency MDBX_commit_latency;
#endif
/** \brief Commit all the operations of a transaction into the database and
* collect latency information.
* \see mdbx_txn_commit()
* \ingroup c_statinfo
* \warning This function may be changed in future releases. */
LIBMDBX_API int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency);
/** \brief Commit all the operations of a transaction into the database. /** \brief Commit all the operations of a transaction into the database.
* \ingroup c_transactions * \ingroup c_transactions
* *
@ -2958,7 +2990,9 @@ mdbx_txn_id(const MDBX_txn *txn);
* \retval MDBX_ENOSPC No more disk space. * \retval MDBX_ENOSPC No more disk space.
* \retval MDBX_EIO A system-level I/O error occurred. * \retval MDBX_EIO A system-level I/O error occurred.
* \retval MDBX_ENOMEM Out of memory. */ * \retval MDBX_ENOMEM Out of memory. */
LIBMDBX_API int mdbx_txn_commit(MDBX_txn *txn); LIBMDBX_INLINE_API(int, mdbx_txn_commit, (MDBX_txn * txn)) {
return mdbx_txn_commit_ex(txn, NULL);
}
/** \brief Abandon all the operations of the transaction instead of saving them. /** \brief Abandon all the operations of the transaction instead of saving them.
* \ingroup c_transactions * \ingroup c_transactions

View File

@ -8025,12 +8025,18 @@ static __always_inline bool mdbx_txn_dbi_exists(MDBX_txn *txn, MDBX_dbi dbi,
return mdbx_txn_import_dbi(txn, dbi); return mdbx_txn_import_dbi(txn, dbi);
} }
int mdbx_txn_commit(MDBX_txn *txn) { int mdbx_txn_commit(MDBX_txn *txn) { return __inline_mdbx_txn_commit(txn); }
int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
STATIC_ASSERT(MDBX_TXN_FINISHED == STATIC_ASSERT(MDBX_TXN_FINISHED ==
MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR); MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR);
const uint64_t ts_0 = latency ? mdbx_osal_monotime() : 0;
uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0;
uint32_t audit_duration = 0;
int rc = check_txn(txn, MDBX_TXN_FINISHED); int rc = check_txn(txn, MDBX_TXN_FINISHED);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
return rc; goto provide_latency;
if (unlikely(txn->mt_flags & MDBX_TXN_ERROR)) { if (unlikely(txn->mt_flags & MDBX_TXN_ERROR)) {
rc = MDBX_RESULT_TRUE; rc = MDBX_RESULT_TRUE;
@ -8041,7 +8047,8 @@ int mdbx_txn_commit(MDBX_txn *txn) {
#if MDBX_ENV_CHECKPID #if MDBX_ENV_CHECKPID
if (unlikely(env->me_pid != mdbx_getpid())) { if (unlikely(env->me_pid != mdbx_getpid())) {
env->me_flags |= MDBX_FATAL_ERROR; env->me_flags |= MDBX_FATAL_ERROR;
return MDBX_PANIC; rc = MDBX_PANIC;
goto provide_latency;
} }
#endif /* MDBX_ENV_CHECKPID */ #endif /* MDBX_ENV_CHECKPID */
@ -8052,7 +8059,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
goto done; goto done;
if (txn->mt_child) { if (txn->mt_child) {
rc = mdbx_txn_commit(txn->mt_child); rc = mdbx_txn_commit_ex(txn->mt_child, NULL);
mdbx_tassert(txn, txn->mt_child == NULL); mdbx_tassert(txn, txn->mt_child == NULL);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
@ -8107,6 +8114,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_dbistate[i] = txn->mt_dbistate[i] | (parent->mt_dbistate[i] & parent->mt_dbistate[i] = txn->mt_dbistate[i] | (parent->mt_dbistate[i] &
(DBI_CREAT | DBI_FRESH)); (DBI_CREAT | DBI_FRESH));
} }
ts_1 = latency ? mdbx_osal_monotime() : 0;
/* Remove refunded pages from parent's dirty & spill lists */ /* Remove refunded pages from parent's dirty & spill lists */
MDBX_DPL dst = mdbx_dpl_sort(parent->tw.dirtylist); MDBX_DPL dst = mdbx_dpl_sort(parent->tw.dirtylist);
@ -8278,6 +8286,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
parent->mt_flags |= MDBX_TXN_SPILLS; parent->mt_flags |= MDBX_TXN_SPILLS;
} }
ts_2 = latency ? mdbx_osal_monotime() : 0;
/* Append our loose page list to parent's */ /* Append our loose page list to parent's */
if (txn->tw.loose_pages) { if (txn->tw.loose_pages) {
MDBX_page **lp = &parent->tw.loose_pages; MDBX_page **lp = &parent->tw.loose_pages;
@ -8299,8 +8308,6 @@ int mdbx_txn_commit(MDBX_txn *txn) {
env->me_txn = parent; env->me_txn = parent;
parent->mt_child = NULL; parent->mt_child = NULL;
txn->mt_signature = 0;
mdbx_free(txn);
mdbx_tassert(parent, mdbx_dirtylist_check(parent)); mdbx_tassert(parent, mdbx_dirtylist_check(parent));
/* Scan parent's loose page for suitable for refund */ /* Scan parent's loose page for suitable for refund */
@ -8310,8 +8317,13 @@ int mdbx_txn_commit(MDBX_txn *txn) {
break; break;
} }
} }
ts_4 = ts_3 = latency ? mdbx_osal_monotime() : 0;
txn->mt_signature = 0;
mdbx_free(txn);
mdbx_tassert(parent, mdbx_dirtylist_check(parent)); mdbx_tassert(parent, mdbx_dirtylist_check(parent));
return MDBX_SUCCESS; rc = MDBX_SUCCESS;
goto provide_latency;
} }
mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == mdbx_tassert(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
@ -8360,17 +8372,23 @@ int mdbx_txn_commit(MDBX_txn *txn) {
} }
} }
ts_1 = latency ? mdbx_osal_monotime() : 0;
rc = mdbx_update_gc(txn); rc = mdbx_update_gc(txn);
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
ts_2 = latency ? mdbx_osal_monotime() : 0;
if (mdbx_audit_enabled()) { if (mdbx_audit_enabled()) {
rc = mdbx_audit_ex(txn, MDBX_PNL_SIZE(txn->tw.retired_pages), true); rc = mdbx_audit_ex(txn, MDBX_PNL_SIZE(txn->tw.retired_pages), true);
const uint64_t audit_end = mdbx_osal_monotime();
audit_duration = mdbx_osal_monotime_to_16dot16(audit_end - ts_2);
ts_2 = audit_end;
if (unlikely(rc != MDBX_SUCCESS)) if (unlikely(rc != MDBX_SUCCESS))
goto fail; goto fail;
} }
rc = mdbx_page_flush(txn, 0); rc = mdbx_page_flush(txn, 0);
ts_3 = latency ? mdbx_osal_monotime() : 0;
if (likely(rc == MDBX_SUCCESS)) { if (likely(rc == MDBX_SUCCESS)) {
if (txn->mt_dbs[MAIN_DBI].md_flags & DBI_DIRTY) if (txn->mt_dbs[MAIN_DBI].md_flags & DBI_DIRTY)
txn->mt_dbs[MAIN_DBI].md_mod_txnid = pp_txnid2chk(txn); txn->mt_dbs[MAIN_DBI].md_mod_txnid = pp_txnid2chk(txn);
@ -8392,6 +8410,7 @@ int mdbx_txn_commit(MDBX_txn *txn) {
rc = mdbx_sync_locked( rc = mdbx_sync_locked(
env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, &meta); env, env->me_flags | txn->mt_flags | MDBX_SHRINK_ALLOWED, &meta);
} }
ts_4 = latency ? mdbx_osal_monotime() : 0;
if (unlikely(rc != MDBX_SUCCESS)) { if (unlikely(rc != MDBX_SUCCESS)) {
env->me_flags |= MDBX_FATAL_ERROR; env->me_flags |= MDBX_FATAL_ERROR;
goto fail; goto fail;
@ -8400,11 +8419,28 @@ int mdbx_txn_commit(MDBX_txn *txn) {
end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE; end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE;
done: done:
return mdbx_txn_end(txn, end_mode); rc = mdbx_txn_end(txn, end_mode);
provide_latency:
if (latency) {
latency->audit = audit_duration;
latency->preparation =
ts_1 ? mdbx_osal_monotime_to_16dot16(ts_1 - ts_0) : 0;
latency->gc =
(ts_1 && ts_2) ? mdbx_osal_monotime_to_16dot16(ts_2 - ts_1) : 0;
latency->write =
(ts_2 && ts_3) ? mdbx_osal_monotime_to_16dot16(ts_3 - ts_2) : 0;
latency->sync =
(ts_3 && ts_4) ? mdbx_osal_monotime_to_16dot16(ts_4 - ts_3) : 0;
const uint64_t ts_5 = mdbx_osal_monotime();
latency->ending = ts_4 ? mdbx_osal_monotime_to_16dot16(ts_5 - ts_4) : 0;
latency->whole = mdbx_osal_monotime_to_16dot16(ts_5 - ts_0);
}
return rc;
fail: fail:
mdbx_txn_abort(txn); mdbx_txn_abort(txn);
return rc; goto provide_latency;
} }
static __cold int static __cold int