diff --git a/CMakeLists.txt b/CMakeLists.txt index 1b7e9835..ec9e12cf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -67,6 +67,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-dbi.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c" + AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-get-cached.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-misc.c" AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-opts.c" @@ -782,6 +783,7 @@ else() "${MDBX_SOURCE_DIR}/api-dbi.c" "${MDBX_SOURCE_DIR}/api-env.c" "${MDBX_SOURCE_DIR}/api-extra.c" + "${MDBX_SOURCE_DIR}/api-get-cached.c" "${MDBX_SOURCE_DIR}/api-key-transform.c" "${MDBX_SOURCE_DIR}/api-misc.c" "${MDBX_SOURCE_DIR}/api-opts.c" diff --git a/mdbx.h b/mdbx.h index 488feeef..d2879d18 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4923,6 +4923,50 @@ LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MD * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); +/** \brief FIXME + * \ingroup c_crud + */ +typedef struct MDBX_cache_entry { + MDBX_val data; + uint64_t trunk_txnid, last_confirmed_txnid; +} MDBX_cache_entry; + +/** \brief FIXME + * \ingroup c_crud + */ +LIBMDBX_INLINE_API(void, mdbx_init_cache, (MDBX_cache_entry * entry)) { + entry->data.iov_base = NULL; + entry->data.iov_len = 0; + entry->trunk_txnid = 0; + entry->last_confirmed_txnid = 0; +} + +/** \brief FIXME + * \ingroup c_crud + */ +typedef enum MDBX_cache_status { + MDBX_CACHE_DIRTY = 0, + MDBX_CACHE_CONFIRMED = 1, + MDBX_CACHE_REFRESHED = 2, + MDBX_CACHE_HIT = 3, + MDBX_CACHE_BEHIND = -1, + MDBX_CACHE_ERROR = -2, +} MDBX_cache_status_t; + +/** \brief FIXME + * \ingroup c_crud + */ +typedef struct MDBX_cache_result { + MDBX_error_t errcode; + MDBX_cache_status_t status; +} MDBX_cache_result_t; + +/** \brief FIXME + * \ingroup c_crud + */ +LIBMDBX_API MDBX_cache_result_t mdbx_get_cached(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, + MDBX_cache_entry *entry); + /** \brief Store items into a table. * \ingroup c_crud * diff --git a/src/alloy.c b/src/alloy.c index 9c3cab5e..f2d73b47 100644 --- a/src/alloy.c +++ b/src/alloy.c @@ -10,6 +10,7 @@ #include "api-dbi.c" #include "api-env.c" #include "api-extra.c" +#include "api-get-cached.c" #include "api-key-transform.c" #include "api-misc.c" #include "api-opts.c" diff --git a/src/api-get-cached.c b/src/api-get-cached.c new file mode 100644 index 00000000..2dec718d --- /dev/null +++ b/src/api-get-cached.c @@ -0,0 +1,203 @@ +/// \copyright SPDX-License-Identifier: Apache-2.0 +/// \author Леонид Юрьев aka Leonid Yuriev \date 2025 + +#include "internals.h" + +LIBMDBX_API void mdbx_init_cache(MDBX_cache_entry *entry) { __inline_mdbx_init_cache(entry); } + +static inline bool is_outside_dxb(const MDBX_txn *txn, const void *ptr) { + const MDBX_env *env = txn->env; + const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base); + return offset < 0 || (size_t)offset >= pgno2bytes(env, txn->geo.first_unallocated); +} + +static inline bool is_not_commited(const MDBX_txn *txn, const page_t *mp) { + tASSERT(txn, (txn->flags & MDBX_TXN_RDONLY) == 0); + tASSERT(txn, mp >= (const page_t *)txn->env->dxb_mmap.base && + mp < (const page_t *)(ptr_disp(txn->env->dxb_mmap.base, + pgno2bytes(txn->env, txn->geo.first_unallocated)))); + return mp->txnid >= txn->txnid; +} + +MDBX_MAYBE_UNUSED static inline bool is_inside_dxb_and_commited(const MDBX_txn *txn, const void *ptr) { + return !is_outside_dxb(txn, ptr) && !is_not_commited(txn, ptr2page(txn->env, ptr)); +} + +static inline MDBX_cache_result_t cache_result(int err, MDBX_cache_status_t status) { + MDBX_cache_result_t result = {.errcode = err, .status = status}; + return result; +} + +static inline MDBX_cache_result_t cache_error(int err) { + assert(err != MDBX_SUCCESS && err != MDBX_RESULT_TRUE); + return cache_result(err, MDBX_CACHE_ERROR); +} + +MDBX_cache_result_t mdbx_get_cached(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, + MDBX_cache_entry *entry) { + DKBUF_DEBUG; + DEBUG("===> cached-get db %u key [%s]", dbi, DKEY_DEBUG(key)); + + if (unlikely(!entry)) + return cache_result(mdbx_get(txn, dbi, key, data), MDBX_CACHE_DIRTY); + + if (unlikely(!key || !data || data == &entry->data)) + return cache_error(LOG_IFERR(MDBX_EINVAL)); + + if (unlikely(entry->trunk_txnid > entry->last_confirmed_txnid)) + return cache_error(LOG_IFERR(MDBX_INVALID)); + + STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED); + int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED /* avoid resume parked transaction until slow-path */); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + if (unlikely(txn->txnid < entry->trunk_txnid)) + /* the used/read MVCC-snapshot is behind the cached MVCC-range. */ + return cache_result(mdbx_get(txn, dbi, key, data), MDBX_CACHE_BEHIND); + + if (likely(txn->txnid <= entry->last_confirmed_txnid)) { + /* cache hit fast-path */ + *data = entry->data; + return cache_result(data->iov_base ? MDBX_SUCCESS : MDBX_NOTFOUND, MDBX_CACHE_HIT); + } + + if (unlikely(txn->flags & MDBX_TXN_PARKED)) { + err = mdbx_txn_unpark((MDBX_txn *)txn, false); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + } + + err = dbi_check(txn, dbi); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + const uint64_t committed_snapshot_txnid = (txn->flags & MDBX_TXN_RDONLY) ? txn->txnid : txn->txnid - xMDBX_TXNID_STEP; + txnid_t trunk_txnid = txn->txnid; + if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) { + err = tbl_fetch((MDBX_txn *)txn, dbi); + if (unlikely(err != MDBX_SUCCESS)) { + if (err == MDBX_NOTFOUND) { + /* the corresponding table has been deleted */ + not_found: + data->iov_base = nullptr; + data->iov_len = 0; + if (trunk_txnid > committed_snapshot_txnid) + return cache_result(err, MDBX_CACHE_DIRTY); + if (entry->trunk_txnid == trunk_txnid) { + tASSERT(txn, trunk_txnid < committed_snapshot_txnid && trunk_txnid <= entry->last_confirmed_txnid); + tASSERT(txn, !entry->data.iov_base && !entry->data.iov_len); + entry->last_confirmed_txnid = committed_snapshot_txnid; + return cache_result(err, MDBX_CACHE_CONFIRMED); + } + tASSERT(txn, trunk_txnid <= committed_snapshot_txnid && trunk_txnid > entry->last_confirmed_txnid && + trunk_txnid > entry->trunk_txnid); + entry->data = *data; + entry->trunk_txnid = trunk_txnid; + entry->last_confirmed_txnid = committed_snapshot_txnid; + return cache_result(err, MDBX_CACHE_REFRESHED); + } + return cache_error(LOG_IFERR(err)); + } + } + + if (txn->dbs[dbi].mod_txnid /* tree->mod_txnid maybe zero in a legacy DB */) + trunk_txnid = txn->dbs[dbi].mod_txnid; + if ((txn->flags & MDBX_TXN_RDONLY) == 0) { + const MDBX_txn *scan = txn; + do + if ((scan->flags & MDBX_TXN_DIRTY) && (dbi == MAIN_DBI || (scan->dbi_state[dbi] & DBI_DIRTY))) { + /* После коммита вложенных тразакций может быть mod_txnid > front */ + trunk_txnid = scan->front_txnid; + break; + } + while (unlikely((scan = scan->parent) != nullptr)); + } + + if (trunk_txnid <= entry->last_confirmed_txnid) { + tASSERT(txn, (txn->dbi_state[dbi] & DBI_DIRTY) == 0); + cache_confirmed: + tASSERT(txn, trunk_txnid < committed_snapshot_txnid && trunk_txnid <= entry->last_confirmed_txnid); + tASSERT(txn, trunk_txnid == entry->trunk_txnid); + *data = entry->data; + entry->last_confirmed_txnid = committed_snapshot_txnid; + tASSERT(txn, !data->iov_base || is_inside_dxb_and_commited(txn, data->iov_base)); + return cache_result(data->iov_base ? MDBX_SUCCESS : MDBX_NOTFOUND, MDBX_CACHE_CONFIRMED); + } + + if (unlikely(txn->dbs[dbi].root == P_INVALID)) { + /* the corresponding table is empty now */ + goto not_found; + } + + cursor_couple_t cx; + err = cursor_init(&cx.outer, txn, dbi); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + alignkey_t aligned; + err = check_key(&cx.outer, key, &aligned); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + cx.outer.top = 0; + cx.outer.ki[0] = 0; + err = page_get(&cx.outer, txn->dbs[dbi].root, &cx.outer.pg[0], trunk_txnid); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + page_t *mp = cx.outer.pg[0]; + if ((trunk_txnid = mp->txnid) <= entry->last_confirmed_txnid) + goto cache_confirmed; + + intptr_t ki = page_numkeys(mp) - 1; + while (is_branch(mp)) { + const struct node_search_result nsr = node_search(&cx.outer, key); + if (likely(nsr.node)) + ki = cx.outer.ki[cx.outer.top] + (intptr_t)nsr.exact - 1; + err = page_get(&cx.outer, node_pgno(page_node(mp, ki)), &mp, trunk_txnid); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + if ((trunk_txnid = mp->txnid) <= entry->last_confirmed_txnid) + goto cache_confirmed; + + ki = page_numkeys(mp) - 1; + err = cursor_push(&cx.outer, mp, ki); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + } + + if (!MDBX_DISABLE_VALIDATION && unlikely(!check_leaf_type(&cx.outer, mp))) { + ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->pgno, mp->flags); + err = MDBX_CORRUPTED; + return cache_error(LOG_IFERR(err)); + } + + struct node_search_result nsr = node_search(&cx.outer, &aligned.key); + if (!nsr.exact) + goto not_found; + + if (unlikely(node_flags(nsr.node) & N_DUP)) { + /* TODO: It is possible to implement support, but need to think through the usage scenarios */ + err = MDBX_EMULTIVAL; + return cache_error(LOG_IFERR(err)); + } + + err = node_read(&cx.outer, nsr.node, data, mp); + if (unlikely(err != MDBX_SUCCESS)) + return cache_error(LOG_IFERR(err)); + + if (trunk_txnid > committed_snapshot_txnid) { + tASSERT(txn, trunk_txnid > entry->last_confirmed_txnid && trunk_txnid > entry->trunk_txnid); + return cache_result(MDBX_SUCCESS, MDBX_CACHE_DIRTY); + } + + tASSERT(txn, is_inside_dxb_and_commited(txn, data->iov_base)); + tASSERT(txn, trunk_txnid <= committed_snapshot_txnid && trunk_txnid > entry->last_confirmed_txnid && + trunk_txnid > entry->trunk_txnid); + entry->data = *data; + entry->trunk_txnid = trunk_txnid; + entry->last_confirmed_txnid = committed_snapshot_txnid; + return cache_result(MDBX_SUCCESS, MDBX_CACHE_REFRESHED); +}