mdbx: add mdbx_txn_info().

Change-Id: I7bef500c23899874c996694b7cc52a38366730f0
This commit is contained in:
Leonid Yuriev 2019-09-21 13:08:23 +03:00
parent 0b500798df
commit 83fbcb660f
2 changed files with 172 additions and 3 deletions

65
mdbx.h
View File

@ -2048,6 +2048,69 @@ LIBMDBX_API int mdbx_env_set_assert(MDBX_env *env, MDBX_assert_func *func);
LIBMDBX_API int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
MDBX_txn **txn);
/* Information about the transaction */
typedef struct MDBX_txn_info {
uint64_t txn_id; /* The ID of the transaction. For a READ-ONLY transaction,
this corresponds to the snapshot being read. */
uint64_t
txn_reader_lag; /* For READ-ONLY transaction: the lag from a recent
MVCC-snapshot, i.e. the number of committed
transaction since read transaction started.
For WRITE transaction (provided if scan_rlt=true): the
lag of the oldest reader from current transaction (i.e.
atleast 1 if any reader running). */
uint64_t txn_space_used; /* Used space by this transaction, i.e. corresponding
to the last used database page. */
uint64_t txn_space_limit_soft; /* Current size of database file. */
uint64_t
txn_space_limit_hard; /* Upper bound for size the database file,
i.e. the value "size_upper" argument of the
approriate call of mdbx_env_set_geometry(). */
uint64_t txn_space_retired; /* For READ-ONLY transaction: The total size of
the database pages that were retired by
committed write transactions after the reader's
MVCC-snapshot, i.e. the space which would be
freed after the Reader releases the
MVCC-snapshot for reuse by completion read
transaction.
For WRITE transaction: The summarized size of
the database pages that were retired for now
due Copy-On-Write during this transaction. */
uint64_t
txn_space_leftover; /* For READ-ONLY transaction: the space available for
writer(s) and that must be exhausted for reason to
call the OOM-killer for this read transaction.
For WRITE transaction: the space inside transaction
that left to MDBX_TXN_FULL error. */
uint64_t
txn_space_dirty; /* For READ-ONLY transaction (provided if scan_rlt=true):
The retired distance for next more recent reader, i.e.
the space that actually become available for reuse
when only this transaction will be finished. For WRITE
transaction: The summarized size of the dirty database
pages that generated during this transaction. */
} MDBX_txn_info;
/* Return information about the MDBX transaction.
*
* [in] txn A transaction handle returned by mdbx_txn_begin().
* [out] stat The address of an MDBX_txn_info structure
* where the information will be copied.
* [in[ scan_rlt The boolean flag controls the scan of the read lock table to
* provide complete information. Such scan is relatively
* expensive and you can avoid it if corresponding fields are
* not needed (see description of MDBX_txn_info above).
*
* Returns A non-zero error value on failure and 0 on success. */
LIBMDBX_API int mdbx_txn_info(MDBX_txn *txn, MDBX_txn_info *info, int scan_rlt);
/* Returns the transaction's MDBX_env
*
* [in] txn A transaction handle returned by mdbx_txn_begin() */
@ -2059,7 +2122,7 @@ LIBMDBX_API MDBX_env *mdbx_txn_env(MDBX_txn *txn);
*
* [in] txn A transaction handle returned by mdbx_txn_begin()
*
* Returns A transaction flags, valid if input is an active transaction,
* Returns A transaction flags, valid if input is an valid transaction,
* otherwise -1. */
LIBMDBX_API int mdbx_txn_flags(MDBX_txn *txn);

View File

@ -3903,8 +3903,115 @@ int mdbx_txn_begin(MDBX_env *env, MDBX_txn *parent, unsigned flags,
return rc;
}
int mdbx_txn_info(MDBX_txn *txn, MDBX_txn_info *info, int scan_rlt) {
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD);
if (unlikely(rc != MDBX_SUCCESS))
return rc;
if (unlikely(!info))
return MDBX_EINVAL;
MDBX_env *const env = txn->mt_env;
#if MDBX_TXN_CHECKPID
if (unlikely(env->me_pid != mdbx_getpid())) {
env->me_flags |= MDBX_FATAL_ERROR;
return MDBX_PANIC;
}
#endif /* MDBX_TXN_CHECKPID */
info->txn_id = txn->mt_txnid;
info->txn_space_used = pgno2bytes(env, txn->mt_geo.next);
if (txn->mt_flags & MDBX_RDONLY) {
const MDBX_meta *head_meta;
txnid_t head_txnid;
uint64_t head_retired;
do {
/* fetch info from volatile head */
head_meta = mdbx_meta_head(env);
head_txnid = mdbx_meta_txnid_fluid(env, head_meta);
head_retired = head_meta->mm_pages_retired;
info->txn_space_limit_soft = pgno2bytes(env, head_meta->mm_geo.now);
info->txn_space_limit_hard = pgno2bytes(env, head_meta->mm_geo.upper);
info->txn_space_leftover =
pgno2bytes(env, head_meta->mm_geo.now - head_meta->mm_geo.next);
mdbx_compiler_barrier();
} while (unlikely(head_meta != mdbx_meta_head(env) ||
head_txnid != mdbx_meta_txnid_fluid(env, head_meta)));
info->txn_reader_lag = head_txnid - info->txn_id;
info->txn_space_dirty = info->txn_space_retired = 0;
if (txn->mt_ro_reader &&
head_retired > txn->mt_ro_reader->mr_snapshot_pages_retired) {
info->txn_space_dirty = info->txn_space_retired = pgno2bytes(
env, (pgno_t)(head_retired -
txn->mt_ro_reader->mr_snapshot_pages_retired));
MDBX_lockinfo *const lck = env->me_lck;
if (scan_rlt && info->txn_reader_lag > 1 && lck) {
/* find next more recent reader */
txnid_t next_reader = head_txnid;
const unsigned snap_nreaders = lck->mti_numreaders;
for (unsigned i = 0; i < snap_nreaders; ++i) {
retry:
if (lck->mti_readers[i].mr_pid) {
mdbx_jitter4testing(true);
const txnid_t snap_txnid = lck->mti_readers[i].mr_txnid;
const uint64_t snap_retired =
lck->mti_readers[i].mr_snapshot_pages_retired;
mdbx_compiler_barrier();
if (unlikely(snap_txnid != lck->mti_readers[i].mr_txnid ||
snap_retired !=
lck->mti_readers[i].mr_snapshot_pages_retired))
goto retry;
if (snap_txnid > txn->mt_txnid && snap_txnid < next_reader) {
next_reader = snap_txnid;
info->txn_space_dirty = pgno2bytes(
env, (pgno_t)(snap_retired -
txn->mt_ro_reader->mr_snapshot_pages_retired));
}
}
}
}
}
} else {
info->txn_space_limit_soft = pgno2bytes(env, txn->mt_geo.now);
info->txn_space_limit_hard = pgno2bytes(env, txn->mt_geo.upper);
info->txn_space_retired =
pgno2bytes(env, MDBX_PNL_SIZE(txn->mt_retired_pages));
info->txn_space_leftover = pgno2bytes(env, txn->mt_dirtyroom);
info->txn_space_dirty =
pgno2bytes(env, MDBX_DPL_TXNFULL - txn->mt_dirtyroom);
info->txn_reader_lag = INT64_MAX;
MDBX_lockinfo *const lck = env->me_lck;
if (scan_rlt && lck) {
txnid_t oldest_snapshot = txn->mt_txnid;
const unsigned snap_nreaders = lck->mti_numreaders;
if (snap_nreaders) {
oldest_snapshot = mdbx_find_oldest(txn);
if (oldest_snapshot == txn->mt_txnid - 1) {
/* check if there is at least one reader */
bool exists = false;
for (unsigned i = 0; i < snap_nreaders; ++i) {
if (lck->mti_readers[i].mr_pid &&
txn->mt_txnid > lck->mti_readers[i].mr_txnid) {
exists = true;
break;
}
}
oldest_snapshot += !exists;
}
}
info->txn_reader_lag = txn->mt_txnid - oldest_snapshot;
}
}
return MDBX_SUCCESS;
}
MDBX_env *mdbx_txn_env(MDBX_txn *txn) {
if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE))
if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE ||
txn->mt_env->me_signature != MDBX_ME_SIGNATURE))
return NULL;
return txn->mt_env;
}
@ -3918,7 +4025,6 @@ uint64_t mdbx_txn_id(MDBX_txn *txn) {
int mdbx_txn_flags(MDBX_txn *txn) {
if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE))
return -1;
return txn->mt_flags;
}