mdbx: preliminaty-draft of mdbx_get_cached() (!!! NOT TESTED AT ALL !!!)

This commit is contained in:
Леонид Юрьев (Leonid Yuriev)
2025-09-06 01:18:01 +03:00
parent 5d12764a8f
commit 16c994413c
2 changed files with 243 additions and 0 deletions

44
mdbx.h
View File

@@ -4923,6 +4923,50 @@ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MD
* \retval MDBX_EINVAL An invalid parameter was specified. */
LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data);
/** \brief FIXME
* \ingroup c_crud
*/
typedef struct MDBX_cache_entry {
MDBX_val data;
uint64_t trunk_txnid, last_confirmed_txnid;
} MDBX_cache_entry;
/** \brief FIXME
* \ingroup c_crud
*/
LIBMDBX_INLINE_API(void, mdbx_init_cache, (MDBX_cache_entry * entry)) {
entry->data.iov_base = NULL;
entry->data.iov_len = 0;
entry->trunk_txnid = 0;
entry->last_confirmed_txnid = 0;
}
/** \brief FIXME
* \ingroup c_crud
*/
typedef enum MDBX_cache_status {
MDBX_CACHE_DIRTY = 0,
MDBX_CACHE_CONFIRMED = 1,
MDBX_CACHE_REFRESHED = 2,
MDBX_CACHE_HIT = 3,
MDBX_CACHE_BEHIND = -1,
MDBX_CACHE_ERROR = -2,
} MDBX_cache_status_t;
/** \brief FIXME
* \ingroup c_crud
*/
typedef struct MDBX_cache_result {
MDBX_error_t errcode;
MDBX_cache_status_t status;
} MDBX_cache_result_t;
/** \brief FIXME
* \ingroup c_crud
*/
LIBMDBX_API MDBX_cache_result_t mdbx_get_cached(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data,
MDBX_cache_entry *entry);
/** \brief Store items into a table.
* \ingroup c_crud
*

View File

@@ -133,6 +133,205 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data
return MDBX_SUCCESS;
}
LIBMDBX_API void mdbx_init_cache(MDBX_cache_entry *entry) { __inline_mdbx_init_cache(entry); }
MDBX_MAYBE_UNUSED static inline bool is_outside_dxb(const MDBX_txn *txn, const void *ptr) {
const MDBX_env *env = txn->env;
const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base);
return offset < 0 || (size_t)offset >= pgno2bytes(env, txn->geo.first_unallocated);
}
MDBX_MAYBE_UNUSED static inline bool is_not_commited(const MDBX_txn *txn, const page_t *mp) {
tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0);
tASSERT(txn, mp >= (const page_t *)txn->env->dxb_mmap.base &&
mp < (const page_t *)(ptr_disp(txn->env->dxb_mmap.base,
pgno2bytes(txn->env, txn->geo.first_unallocated))));
return mp->txnid >= txn->txnid;
}
MDBX_MAYBE_UNUSED static inline bool is_inside_dxb_and_commited(const MDBX_txn *txn, const void *ptr) {
return !is_outside_dxb(txn, ptr) && !is_not_commited(txn, ptr2page(txn->env, ptr));
}
static inline MDBX_cache_result_t cache_result(int err, MDBX_cache_status_t status) {
MDBX_cache_result_t result = {.errcode = err, .status = status};
return result;
}
static inline MDBX_cache_result_t cache_error(int err) {
assert(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE);
return cache_result(err, MDBX_CACHE_ERROR);
}
MDBX_cache_result_t mdbx_get_cached(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data,
MDBX_cache_entry *entry) {
DKBUF_DEBUG;
DEBUG("===> cached-get db %u key [%s]", dbi, DKEY_DEBUG(key));
if (unlikely(!entry))
return cache_result(mdbx_get(txn, dbi, key, data), MDBX_CACHE_DIRTY);
if (unlikely(!key || !data || data == &entry->data))
return cache_error(LOG_IFERR(MDBX_EINVAL));
if (unlikely(entry->trunk_txnid > entry->last_confirmed_txnid))
return cache_error(LOG_IFERR(MDBX_INVALID));
STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED);
int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED /* avoid resume parked transaction until slow-path */);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
if (unlikely(txn->txnid < entry->trunk_txnid))
/* the used/read MVCC-snapshot is behind the cached MVCC-range. */
return cache_result(mdbx_get(txn, dbi, key, data), MDBX_CACHE_BEHIND);
if (likely(txn->txnid <= entry->last_confirmed_txnid)) {
/* cache hit fast-path */
*data = entry->data;
return cache_result(data->iov_base ? MDBX_SUCCESS : MDBX_NOTFOUND, MDBX_CACHE_HIT);
}
if (unlikely(txn->flags & MDBX_TXN_PARKED)) {
err = mdbx_txn_unpark((MDBX_txn *)txn, false);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
}
err = dbi_check(txn, dbi);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
const uint64_t committed_snapshot_txnid = (txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP;
txnid_t trunk_txnid = txn->txnid;
if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
err = tbl_fetch((MDBX_txn *)txn, dbi);
if (unlikely(err != MDBX_SUCCESS)) {
if (err == MDBX_NOTFOUND) {
/* the corresponding table has been deleted */
not_found:
data->iov_base = nullptr;
data->iov_len = 0;
if (trunk_txnid > committed_snapshot_txnid)
return cache_result(err, MDBX_CACHE_DIRTY);
if (entry->trunk_txnid == trunk_txnid) {
tASSERT(txn, trunk_txnid < committed_snapshot_txnid && trunk_txnid <= entry->last_confirmed_txnid);
tASSERT(txn, !entry->data.iov_base && !entry->data.iov_len);
entry->last_confirmed_txnid = committed_snapshot_txnid;
return cache_result(err, MDBX_CACHE_CONFIRMED);
}
tASSERT(txn, trunk_txnid <= committed_snapshot_txnid && trunk_txnid > entry->last_confirmed_txnid &&
trunk_txnid > entry->trunk_txnid);
entry->data = *data;
entry->trunk_txnid = trunk_txnid;
entry->last_confirmed_txnid = committed_snapshot_txnid;
return cache_result(err, MDBX_CACHE_REFRESHED);
}
return cache_error(LOG_IFERR(err));
}
}
if (txn->dbs[dbi].mod_txnid /* tree->mod_txnid maybe zero in a legacy DB */)
trunk_txnid = txn->dbs[dbi].mod_txnid;
if ((txn->flags & MDBX_TXN_RDONLY) == 0) {
const MDBX_txn *scan = txn;
do
if ((scan->flags & MDBX_TXN_DIRTY) && (dbi == MAIN_DBI || (scan->dbi_state[dbi] & DBI_DIRTY))) {
/* После коммита вложенных тразакций может быть mod_txnid > front */
trunk_txnid = scan->front_txnid;
break;
}
while (unlikely((scan = scan->parent) != nullptr));
}
if (trunk_txnid <= entry->last_confirmed_txnid) {
tASSERT(txn, (txn->dbi_state[dbi] & DBI_DIRTY) == 0);
cache_confirmed:
tASSERT(txn, trunk_txnid < committed_snapshot_txnid && trunk_txnid <= entry->last_confirmed_txnid);
tASSERT(txn, trunk_txnid == entry->trunk_txnid);
*data = entry->data;
entry->last_confirmed_txnid = committed_snapshot_txnid;
tASSERT(txn, !data->iov_base || is_inside_dxb_and_commited(txn, data->iov_base));
return cache_result(data->iov_base ? MDBX_SUCCESS : MDBX_NOTFOUND, MDBX_CACHE_CONFIRMED);
}
if (unlikely(txn->dbs[dbi].root == P_INVALID)) {
/* the corresponding table is empty now */
goto not_found;
}
cursor_couple_t cx;
err = cursor_init(&cx.outer, txn, dbi);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
alignkey_t aligned;
err = check_key(&cx.outer, key, &aligned);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
cx.outer.top = 0;
cx.outer.ki[0] = 0;
err = page_get(&cx.outer, txn->dbs[dbi].root, &cx.outer.pg[0], trunk_txnid);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
page_t *mp = cx.outer.pg[0];
if ((trunk_txnid = mp->txnid) <= entry->last_confirmed_txnid)
goto cache_confirmed;
intptr_t ki = page_numkeys(mp) - 1;
while (is_branch(mp)) {
const struct node_search_result nsr = node_search(&cx.outer, key);
if (likely(nsr.node))
ki = cx.outer.ki[cx.outer.top] + (intptr_t)nsr.exact - 1;
err = page_get(&cx.outer, node_pgno(page_node(mp, ki)), &mp, trunk_txnid);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
if ((trunk_txnid = mp->txnid) <= entry->last_confirmed_txnid)
goto cache_confirmed;
ki = page_numkeys(mp) - 1;
err = cursor_push(&cx.outer, mp, ki);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
}
if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(&cx.outer, mp))) {
ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags);
err = MDBX_CORRUPTED;
return cache_error(LOG_IFERR(err));
}
struct node_search_result nsr = node_search(&cx.outer, &aligned.key);
if (!nsr.exact)
goto not_found;
if (unlikely(node_flags(nsr.node) & N_DUP)) {
/* TODO: It is possible to implement support, but need to think through the usage scenarios */
err = MDBX_EMULTIVAL;
return cache_error(LOG_IFERR(err));
}
err = node_read(&cx.outer, nsr.node, data, mp);
if (unlikely(err != MDBX_SUCCESS))
return cache_error(LOG_IFERR(err));
if (trunk_txnid > committed_snapshot_txnid) {
tASSERT(txn, trunk_txnid > entry->last_confirmed_txnid && trunk_txnid > entry->trunk_txnid);
return cache_result(MDBX_SUCCESS, MDBX_CACHE_DIRTY);
}
tASSERT(txn, is_inside_dxb_and_commited(txn, data->iov_base));
tASSERT(txn, trunk_txnid <= committed_snapshot_txnid && trunk_txnid > entry->last_confirmed_txnid &&
trunk_txnid > entry->trunk_txnid);
entry->data = *data;
entry->trunk_txnid = trunk_txnid;
entry->last_confirmed_txnid = committed_snapshot_txnid;
return cache_result(MDBX_SUCCESS, MDBX_CACHE_REFRESHED);
}
/*----------------------------------------------------------------------------*/
int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) {