mirror of
https://github.com/isar/libmdbx.git
synced 2025-01-15 19:04:31 +08:00
mdbx: выделение API-функций в api-файлы.
This commit is contained in:
parent
4607184999
commit
ba6df2bb6d
@ -60,12 +60,18 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/README.md"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/mdbx.h++"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/alloy.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cold.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-copy.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-cursor.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-dbi.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-env.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-extra.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-key-transform.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-misc.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-opts.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-range-estimate.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn-data.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/api-txn.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-ops.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/atomics-types.h"
|
||||
@ -74,9 +80,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cogs.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/coherency.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cold.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/config.h.in"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/copy.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/cursor.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dbi.c"
|
||||
@ -86,7 +90,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dpl.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/dxb.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env-opts.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/env.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/essentials.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/gc-get.c"
|
||||
@ -111,7 +114,6 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mdbx.c++"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/meta.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/misc.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/mvcc-readers.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/node.h"
|
||||
@ -124,12 +126,11 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-iov.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-ops.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/page-search.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree-search.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/pnl.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/preface.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/proto.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/range-estimate.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/refund.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/sort.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/spill.c"
|
||||
@ -145,7 +146,7 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/stat.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tools/wingetopt.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/tree-ops.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txl.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/txn.c"
|
||||
@ -156,7 +157,8 @@ if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/.git"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/walk.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.c"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h")
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/src/windows-import.h"
|
||||
AND EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/test/CMakeLists.txt")
|
||||
set(MDBX_AMALGAMATED_SOURCE FALSE)
|
||||
find_program(GIT git)
|
||||
if(NOT GIT)
|
||||
@ -755,10 +757,17 @@ else()
|
||||
list(
|
||||
APPEND
|
||||
LIBMDBX_SOURCES
|
||||
"${MDBX_SOURCE_DIR}/api-cold.c"
|
||||
"${MDBX_SOURCE_DIR}/api-copy.c"
|
||||
"${MDBX_SOURCE_DIR}/api-cursor.c"
|
||||
"${MDBX_SOURCE_DIR}/api-dbi.c"
|
||||
"${MDBX_SOURCE_DIR}/api-env.c"
|
||||
"${MDBX_SOURCE_DIR}/api-extra.c"
|
||||
"${MDBX_SOURCE_DIR}/api-key-transform.c"
|
||||
"${MDBX_SOURCE_DIR}/api-misc.c"
|
||||
"${MDBX_SOURCE_DIR}/api-opts.c"
|
||||
"${MDBX_SOURCE_DIR}/api-range-estimate.c"
|
||||
"${MDBX_SOURCE_DIR}/api-txn-data.c"
|
||||
"${MDBX_SOURCE_DIR}/api-txn.c"
|
||||
"${MDBX_SOURCE_DIR}/atomics-ops.h"
|
||||
"${MDBX_SOURCE_DIR}/atomics-types.h"
|
||||
@ -767,8 +776,6 @@ else()
|
||||
"${MDBX_SOURCE_DIR}/cogs.c"
|
||||
"${MDBX_SOURCE_DIR}/cogs.h"
|
||||
"${MDBX_SOURCE_DIR}/coherency.c"
|
||||
"${MDBX_SOURCE_DIR}/cold.c"
|
||||
"${MDBX_SOURCE_DIR}/copy.c"
|
||||
"${MDBX_SOURCE_DIR}/cursor.c"
|
||||
"${MDBX_SOURCE_DIR}/cursor.h"
|
||||
"${MDBX_SOURCE_DIR}/dbi.c"
|
||||
@ -776,7 +783,6 @@ else()
|
||||
"${MDBX_SOURCE_DIR}/dpl.c"
|
||||
"${MDBX_SOURCE_DIR}/dpl.h"
|
||||
"${MDBX_SOURCE_DIR}/dxb.c"
|
||||
"${MDBX_SOURCE_DIR}/env-opts.c"
|
||||
"${MDBX_SOURCE_DIR}/env.c"
|
||||
"${MDBX_SOURCE_DIR}/essentials.h"
|
||||
"${MDBX_SOURCE_DIR}/gc-get.c"
|
||||
@ -792,7 +798,6 @@ else()
|
||||
"${MDBX_SOURCE_DIR}/logging_and_debug.h"
|
||||
"${MDBX_SOURCE_DIR}/meta.c"
|
||||
"${MDBX_SOURCE_DIR}/meta.h"
|
||||
"${MDBX_SOURCE_DIR}/misc.c"
|
||||
"${MDBX_SOURCE_DIR}/mvcc-readers.c"
|
||||
"${MDBX_SOURCE_DIR}/node.c"
|
||||
"${MDBX_SOURCE_DIR}/node.h"
|
||||
@ -804,12 +809,11 @@ else()
|
||||
"${MDBX_SOURCE_DIR}/page-iov.h"
|
||||
"${MDBX_SOURCE_DIR}/page-ops.c"
|
||||
"${MDBX_SOURCE_DIR}/page-ops.h"
|
||||
"${MDBX_SOURCE_DIR}/page-search.c"
|
||||
"${MDBX_SOURCE_DIR}/tree-search.c"
|
||||
"${MDBX_SOURCE_DIR}/pnl.c"
|
||||
"${MDBX_SOURCE_DIR}/pnl.h"
|
||||
"${MDBX_SOURCE_DIR}/preface.h"
|
||||
"${MDBX_SOURCE_DIR}/proto.h"
|
||||
"${MDBX_SOURCE_DIR}/range-estimate.c"
|
||||
"${MDBX_SOURCE_DIR}/refund.c"
|
||||
"${MDBX_SOURCE_DIR}/sort.h"
|
||||
"${MDBX_SOURCE_DIR}/spill.c"
|
||||
@ -817,7 +821,7 @@ else()
|
||||
"${MDBX_SOURCE_DIR}/table.c"
|
||||
"${MDBX_SOURCE_DIR}/tls.c"
|
||||
"${MDBX_SOURCE_DIR}/tls.h"
|
||||
"${MDBX_SOURCE_DIR}/tree.c"
|
||||
"${MDBX_SOURCE_DIR}/tree-ops.c"
|
||||
"${MDBX_SOURCE_DIR}/txl.c"
|
||||
"${MDBX_SOURCE_DIR}/txl.h"
|
||||
"${MDBX_SOURCE_DIR}/txn.c"
|
||||
|
16
src/alloy.c
16
src/alloy.c
@ -4,22 +4,26 @@
|
||||
#define xMDBX_ALLOY 1 /* alloyed build */
|
||||
#include "internals.h" /* must be included first */
|
||||
|
||||
#include "api-cold.c"
|
||||
#include "api-copy.c"
|
||||
#include "api-cursor.c"
|
||||
#include "api-dbi.c"
|
||||
#include "api-env.c"
|
||||
#include "api-extra.c"
|
||||
#include "api-key-transform.c"
|
||||
#include "api-misc.c"
|
||||
#include "api-opts.c"
|
||||
#include "api-range-estimate.c"
|
||||
#include "api-txn-data.c"
|
||||
#include "api-txn.c"
|
||||
#include "audit.c"
|
||||
#include "chk.c"
|
||||
#include "cogs.c"
|
||||
#include "coherency.c"
|
||||
#include "cold.c"
|
||||
#include "copy.c"
|
||||
#include "cursor.c"
|
||||
#include "dbi.c"
|
||||
#include "dpl.c"
|
||||
#include "dxb.c"
|
||||
#include "env-opts.c"
|
||||
#include "env.c"
|
||||
#include "gc-get.c"
|
||||
#include "gc-put.c"
|
||||
@ -29,21 +33,19 @@
|
||||
#include "lck.c"
|
||||
#include "logging_and_debug.c"
|
||||
#include "meta.c"
|
||||
#include "misc.c"
|
||||
#include "mvcc-readers.c"
|
||||
#include "node.c"
|
||||
#include "osal.c"
|
||||
#include "page-get.c"
|
||||
#include "page-iov.c"
|
||||
#include "page-ops.c"
|
||||
#include "page-search.c"
|
||||
#include "pnl.c"
|
||||
#include "range-estimate.c"
|
||||
#include "refund.c"
|
||||
#include "spill.c"
|
||||
#include "table.c"
|
||||
#include "tls.c"
|
||||
#include "tree.c"
|
||||
#include "tree-ops.c"
|
||||
#include "tree-search.c"
|
||||
#include "txl.c"
|
||||
#include "txn.c"
|
||||
#include "utils.c"
|
||||
|
@ -128,111 +128,6 @@ __cold int mdbx_env_get_valsize4page_max(const MDBX_env *env, MDBX_db_flags_t fl
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
__cold static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) {
|
||||
st->ms_depth += db->height;
|
||||
st->ms_branch_pages += db->branch_pages;
|
||||
st->ms_leaf_pages += db->leaf_pages;
|
||||
st->ms_overflow_pages += db->large_pages;
|
||||
st->ms_entries += db->items;
|
||||
if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
|
||||
st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid;
|
||||
}
|
||||
|
||||
__cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) {
|
||||
memset(st, 0, bytes);
|
||||
|
||||
int err = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
cursor_couple_t cx;
|
||||
err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
const MDBX_env *const env = txn->env;
|
||||
st->ms_psize = env->ps;
|
||||
TXN_FOREACH_DBI_FROM(txn, dbi,
|
||||
/* assuming GC is internal and not subject for accounting */ MAIN_DBI) {
|
||||
if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID)
|
||||
stat_add(txn->dbs + dbi, st, bytes);
|
||||
}
|
||||
|
||||
if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) {
|
||||
|
||||
/* scan and account not opened named tables */
|
||||
err = tree_search(&cx.outer, nullptr, Z_FIRST);
|
||||
while (err == MDBX_SUCCESS) {
|
||||
const page_t *mp = cx.outer.pg[cx.outer.top];
|
||||
for (size_t i = 0; i < page_numkeys(mp); i++) {
|
||||
const node_t *node = page_node(mp, i);
|
||||
if (node_flags(node) != N_TREE)
|
||||
continue;
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node));
|
||||
return MDBX_CORRUPTED;
|
||||
}
|
||||
|
||||
/* skip opened and already accounted */
|
||||
const MDBX_val name = {node_key(node), node_ks(node)};
|
||||
TXN_FOREACH_DBI_USER(txn, dbi) {
|
||||
if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID &&
|
||||
env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) {
|
||||
node = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (node) {
|
||||
tree_t db;
|
||||
memcpy(&db, node_data(node), sizeof(db));
|
||||
stat_add(&db, st, bytes);
|
||||
}
|
||||
}
|
||||
err = cursor_sibling_right(&cx.outer);
|
||||
}
|
||||
if (unlikely(err != MDBX_NOTFOUND))
|
||||
return err;
|
||||
}
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) {
|
||||
if (unlikely(!dest))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
|
||||
if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid)
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (likely(txn)) {
|
||||
if (env && unlikely(txn->env != env))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
return LOG_IFERR(stat_acc(txn, dest, bytes));
|
||||
}
|
||||
|
||||
int err = check_env(env, true);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
|
||||
if (env->txn && env_txn0_owned(env))
|
||||
/* inside write-txn */
|
||||
return LOG_IFERR(stat_acc(env->txn, dest, bytes));
|
||||
|
||||
MDBX_txn *tmp_txn;
|
||||
err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
|
||||
const int rc = stat_acc(tmp_txn, dest, bytes);
|
||||
err = mdbx_txn_abort(tmp_txn);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static size_t estimate_rss(size_t database_bytes) {
|
||||
return database_bytes + database_bytes / 64 + (512 + MDBX_WORDBITS * 16) * MEGABYTE;
|
||||
}
|
324
src/api-dbi.c
Normal file
324
src/api-dbi.c
Normal file
@ -0,0 +1,324 @@
|
||||
/// \copyright SPDX-License-Identifier: Apache-2.0
|
||||
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
|
||||
return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr));
|
||||
}
|
||||
|
||||
int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp) {
|
||||
return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp));
|
||||
}
|
||||
|
||||
static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi,
|
||||
MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) {
|
||||
MDBX_val thunk, *name;
|
||||
if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
|
||||
name = (void *)name_cstr;
|
||||
else {
|
||||
thunk.iov_len = strlen(name_cstr);
|
||||
thunk.iov_base = (void *)name_cstr;
|
||||
name = &thunk;
|
||||
}
|
||||
return dbi_open(txn, name, flags, dbi, keycmp, datacmp);
|
||||
}
|
||||
|
||||
int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
|
||||
return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr));
|
||||
}
|
||||
|
||||
int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp) {
|
||||
return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp));
|
||||
}
|
||||
|
||||
__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (txn->dbs[dbi].height) {
|
||||
cx.outer.next = txn->cursors[dbi];
|
||||
txn->cursors[dbi] = &cx.outer;
|
||||
rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT));
|
||||
txn->cursors[dbi] = cx.outer.next;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
/* Invalidate the dropped DB's cursors */
|
||||
for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next)
|
||||
be_poor(mc);
|
||||
|
||||
if (!del || dbi < CORE_DBS) {
|
||||
/* reset the DB record, mark it dirty */
|
||||
txn->dbi_state[dbi] |= DBI_DIRTY;
|
||||
txn->dbs[dbi].height = 0;
|
||||
txn->dbs[dbi].branch_pages = 0;
|
||||
txn->dbs[dbi].leaf_pages = 0;
|
||||
txn->dbs[dbi].large_pages = 0;
|
||||
txn->dbs[dbi].items = 0;
|
||||
txn->dbs[dbi].root = P_INVALID;
|
||||
txn->dbs[dbi].sequence = 0;
|
||||
/* txn->dbs[dbi].mod_txnid = txn->txnid; */
|
||||
txn->flags |= MDBX_TXN_DIRTY;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_env *const env = txn->env;
|
||||
MDBX_val name = env->kvs[dbi].name;
|
||||
rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err;
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
rc = cursor_del(&cx.outer, N_TREE);
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY);
|
||||
tASSERT(txn, txn->flags & MDBX_TXN_DIRTY);
|
||||
txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN;
|
||||
rc = osal_fastmutex_acquire(&env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
return LOG_IFERR(dbi_close_release(env, dbi));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) {
|
||||
MDBX_val thunk, *name;
|
||||
if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
|
||||
name = (void *)name_cstr;
|
||||
else {
|
||||
thunk.iov_len = strlen(name_cstr);
|
||||
thunk.iov_base = (void *)name_cstr;
|
||||
name = &thunk;
|
||||
}
|
||||
return mdbx_dbi_rename2(txn, dbi, name);
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC ||
|
||||
new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
rc = osal_fastmutex_acquire(&txn->env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name);
|
||||
if (pair.defer)
|
||||
pair.defer->next = nullptr;
|
||||
dbi_defer_release(txn->env, pair.defer);
|
||||
rc = pair.err;
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS))
|
||||
return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(dbi >= env->max_dbi))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
rc = osal_fastmutex_acquire(&env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) {
|
||||
retry:
|
||||
if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) {
|
||||
/* LY: Опасный код, так как env->txn может быть изменено в другом потоке.
|
||||
* К сожалению тут нет надежного решения и может быть падение при неверном
|
||||
* использовании API (вызове mdbx_dbi_close конкурентно с завершением
|
||||
* пишущей транзакции).
|
||||
*
|
||||
* Для минимизации вероятности падения сначала проверяем dbi-флаги
|
||||
* в basal_txn, а уже после в env->txn. Таким образом, падение может быть
|
||||
* только при коллизии с завершением вложенной транзакции.
|
||||
*
|
||||
* Альтернативно можно попробовать выполнять обновление/put записи в
|
||||
* mainDb соответствующей таблице закрываемого хендла. Семантически это
|
||||
* верный путь, но проблема в текущем API, в котором исторически dbi-хендл
|
||||
* живет и закрывается вне транзакции. Причем проблема не только в том,
|
||||
* что нет указателя на текущую пишущую транзакцию, а в том что
|
||||
* пользователь точно не ожидает что закрытие хендла приведет к
|
||||
* скрытой/непрозрачной активности внутри транзакции потенциально
|
||||
* выполняемой в другом потоке. Другими словами, проблема может быть
|
||||
* только при неверном использовании API и если пользователь это
|
||||
* допускает, то точно не будет ожидать скрытых действий внутри
|
||||
* транзакции, и поэтому этот путь потенциально более опасен. */
|
||||
const MDBX_txn *const hazard = env->txn;
|
||||
osal_compiler_barrier();
|
||||
if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) {
|
||||
bailout_dirty_dbi:
|
||||
osal_fastmutex_release(&env->dbi_lock);
|
||||
return LOG_IFERR(MDBX_DANGLING_DBI);
|
||||
}
|
||||
osal_memory_barrier();
|
||||
if (unlikely(hazard != env->txn))
|
||||
goto retry;
|
||||
if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 &&
|
||||
hazard->signature == txn_signature &&
|
||||
(dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO)
|
||||
goto bailout_dirty_dbi;
|
||||
osal_compiler_barrier();
|
||||
if (unlikely(hazard != env->txn))
|
||||
goto retry;
|
||||
}
|
||||
rc = dbi_close_release(env, dbi);
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) {
|
||||
if (unlikely(!flags || !state))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
*flags = 0;
|
||||
*state = 0;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
*flags = 0;
|
||||
*state = 0;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
*flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS;
|
||||
*state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) {
|
||||
st->ms_depth = db->height;
|
||||
st->ms_branch_pages = db->branch_pages;
|
||||
st->ms_leaf_pages = db->leaf_pages;
|
||||
st->ms_overflow_pages = db->large_pages;
|
||||
st->ms_entries = db->items;
|
||||
if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
|
||||
st->ms_mod_txnid = db->mod_txnid;
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) {
|
||||
if (unlikely(!dest))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
|
||||
if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) {
|
||||
rc = MDBX_EINVAL;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) {
|
||||
rc = MDBX_BAD_TXN;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
|
||||
rc = tbl_fetch((MDBX_txn *)txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
dest->ms_psize = txn->env->ps;
|
||||
stat_get(&txn->dbs[dbi], dest, bytes);
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
bailout:
|
||||
memset(dest, 0, bytes);
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) {
|
||||
if (unlikely(!func))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS;
|
||||
rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) {
|
||||
node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
|
||||
if (node_flags(node) != N_TREE)
|
||||
continue;
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size",
|
||||
(unsigned)node_ds(node));
|
||||
rc = MDBX_CORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
tree_t reside;
|
||||
const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside));
|
||||
const MDBX_val name = {node_key(node), node_ks(node)};
|
||||
const MDBX_env *const env = txn->env;
|
||||
MDBX_dbi dbi = 0;
|
||||
for (size_t i = CORE_DBS; i < env->n_dbi; ++i) {
|
||||
if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID))
|
||||
continue;
|
||||
if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name))
|
||||
continue;
|
||||
|
||||
tree = dbi_dig(txn, i, &reside);
|
||||
dbi = (MDBX_dbi)i;
|
||||
break;
|
||||
}
|
||||
|
||||
MDBX_stat stat;
|
||||
stat_get(tree, &stat, sizeof(stat));
|
||||
rc = func(ctx, txn, &name, tree->flags, &stat, dbi);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
goto bailout;
|
||||
}
|
||||
rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc;
|
||||
|
||||
bailout:
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
105
src/api-env.c
105
src/api-env.c
@ -1315,3 +1315,108 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) {
|
||||
|
||||
return LOG_IFERR(env_sync(env, force, nonblock));
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
static void stat_add(const tree_t *db, MDBX_stat *const st, const size_t bytes) {
|
||||
st->ms_depth += db->height;
|
||||
st->ms_branch_pages += db->branch_pages;
|
||||
st->ms_leaf_pages += db->leaf_pages;
|
||||
st->ms_overflow_pages += db->large_pages;
|
||||
st->ms_entries += db->items;
|
||||
if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
|
||||
st->ms_mod_txnid = (st->ms_mod_txnid > db->mod_txnid) ? st->ms_mod_txnid : db->mod_txnid;
|
||||
}
|
||||
|
||||
static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) {
|
||||
memset(st, 0, bytes);
|
||||
|
||||
int err = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
cursor_couple_t cx;
|
||||
err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return err;
|
||||
|
||||
const MDBX_env *const env = txn->env;
|
||||
st->ms_psize = env->ps;
|
||||
TXN_FOREACH_DBI_FROM(txn, dbi,
|
||||
/* assuming GC is internal and not subject for accounting */ MAIN_DBI) {
|
||||
if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID)
|
||||
stat_add(txn->dbs + dbi, st, bytes);
|
||||
}
|
||||
|
||||
if (!(txn->dbs[MAIN_DBI].flags & MDBX_DUPSORT) && txn->dbs[MAIN_DBI].items /* TODO: use `md_subs` field */) {
|
||||
|
||||
/* scan and account not opened named tables */
|
||||
err = tree_search(&cx.outer, nullptr, Z_FIRST);
|
||||
while (err == MDBX_SUCCESS) {
|
||||
const page_t *mp = cx.outer.pg[cx.outer.top];
|
||||
for (size_t i = 0; i < page_numkeys(mp); i++) {
|
||||
const node_t *node = page_node(mp, i);
|
||||
if (node_flags(node) != N_TREE)
|
||||
continue;
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %zu", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid table node size", node_ds(node));
|
||||
return MDBX_CORRUPTED;
|
||||
}
|
||||
|
||||
/* skip opened and already accounted */
|
||||
const MDBX_val name = {node_key(node), node_ks(node)};
|
||||
TXN_FOREACH_DBI_USER(txn, dbi) {
|
||||
if ((txn->dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID &&
|
||||
env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[dbi].name) == 0) {
|
||||
node = nullptr;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (node) {
|
||||
tree_t db;
|
||||
memcpy(&db, node_data(node), sizeof(db));
|
||||
stat_add(&db, st, bytes);
|
||||
}
|
||||
}
|
||||
err = cursor_sibling_right(&cx.outer);
|
||||
}
|
||||
if (unlikely(err != MDBX_NOTFOUND))
|
||||
return err;
|
||||
}
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_stat *dest, size_t bytes) {
|
||||
if (unlikely(!dest))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
|
||||
if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid)
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (likely(txn)) {
|
||||
if (env && unlikely(txn->env != env))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
return LOG_IFERR(stat_acc(txn, dest, bytes));
|
||||
}
|
||||
|
||||
int err = check_env(env, true);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
|
||||
if (env->txn && env_txn0_owned(env))
|
||||
/* inside write-txn */
|
||||
return LOG_IFERR(stat_acc(env->txn, dest, bytes));
|
||||
|
||||
MDBX_txn *tmp_txn;
|
||||
err = mdbx_txn_begin((MDBX_env *)env, nullptr, MDBX_TXN_RDONLY, &tmp_txn);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
|
||||
const int rc = stat_acc(tmp_txn, dest, bytes);
|
||||
err = mdbx_txn_abort(tmp_txn);
|
||||
if (unlikely(err != MDBX_SUCCESS))
|
||||
return LOG_IFERR(err);
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
@ -72,6 +72,65 @@ __cold int mdbx_reader_check(MDBX_env *env, int *dead) {
|
||||
return LOG_IFERR(mvcc_cleanup_dead(env, false, dead));
|
||||
}
|
||||
|
||||
__cold int mdbx_thread_register(const MDBX_env *env) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!env->lck_mmap.lck))
|
||||
return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM);
|
||||
|
||||
if (unlikely((env->flags & ENV_TXKEY) == 0)) {
|
||||
eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
|
||||
return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */;
|
||||
}
|
||||
|
||||
eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
|
||||
reader_slot_t *r = thread_rthc_get(env->me_txkey);
|
||||
if (unlikely(r != nullptr)) {
|
||||
eASSERT(env, r->pid.weak == env->pid);
|
||||
eASSERT(env, r->tid.weak == osal_thread_self());
|
||||
if (unlikely(r->pid.weak != env->pid))
|
||||
return LOG_IFERR(MDBX_BAD_RSLOT);
|
||||
return MDBX_RESULT_TRUE /* already registered */;
|
||||
}
|
||||
|
||||
return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err);
|
||||
}
|
||||
|
||||
__cold int mdbx_thread_unregister(const MDBX_env *env) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!env->lck_mmap.lck))
|
||||
return MDBX_RESULT_TRUE;
|
||||
|
||||
if (unlikely((env->flags & ENV_TXKEY) == 0)) {
|
||||
eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
|
||||
return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */;
|
||||
}
|
||||
|
||||
eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
|
||||
reader_slot_t *r = thread_rthc_get(env->me_txkey);
|
||||
if (unlikely(r == nullptr))
|
||||
return MDBX_RESULT_TRUE /* not registered */;
|
||||
|
||||
eASSERT(env, r->pid.weak == env->pid);
|
||||
eASSERT(env, r->tid.weak == osal_thread_self());
|
||||
if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self()))
|
||||
return LOG_IFERR(MDBX_BAD_RSLOT);
|
||||
|
||||
eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD);
|
||||
if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD))
|
||||
return LOG_IFERR(MDBX_BUSY) /* transaction is still active */;
|
||||
|
||||
atomic_store32(&r->pid, 0, mo_Relaxed);
|
||||
atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease);
|
||||
thread_rthc_set(env->me_txkey, nullptr);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Locking API */
|
||||
|
||||
|
@ -142,6 +142,9 @@ __hot static ptrdiff_t estimate(const tree_t *tree, diff_t *const __restrict dr)
|
||||
}
|
||||
}
|
||||
|
||||
/*------------------------------------------------------------------------------
|
||||
* Range-Estimation API */
|
||||
|
||||
__hot int mdbx_estimate_distance(const MDBX_cursor *first, const MDBX_cursor *last, ptrdiff_t *distance_items) {
|
||||
if (unlikely(first == nullptr || last == nullptr || distance_items == nullptr))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
449
src/api-txn-data.c
Normal file
449
src/api-txn-data.c
Normal file
@ -0,0 +1,449 @@
|
||||
/// \copyright SPDX-License-Identifier: Apache-2.0
|
||||
/// \author Леонид Юрьев aka Leonid Yuriev <leo@yuriev.ru> \date 2015-2024
|
||||
|
||||
#include "internals.h"
|
||||
|
||||
__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) {
|
||||
if (unlikely(!mask))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
*mask = 0;
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
if ((cx.outer.tree->flags & MDBX_DUPSORT) == 0)
|
||||
return MDBX_RESULT_TRUE;
|
||||
|
||||
MDBX_val key, data;
|
||||
rc = outer_first(&cx.outer, &key, &data);
|
||||
while (rc == MDBX_SUCCESS) {
|
||||
const node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
|
||||
const tree_t *db = node_data(node);
|
||||
const unsigned flags = node_flags(node);
|
||||
switch (flags) {
|
||||
case N_BIG:
|
||||
case 0:
|
||||
/* single-value entry, deep = 0 */
|
||||
*mask |= 1 << 0;
|
||||
break;
|
||||
case N_DUP:
|
||||
/* single sub-page, deep = 1 */
|
||||
*mask |= 1 << 1;
|
||||
break;
|
||||
case N_DUP | N_TREE:
|
||||
/* sub-tree */
|
||||
*mask |= 1 << UNALIGNED_PEEK_16(db, tree_t, height);
|
||||
break;
|
||||
default:
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid node-size", flags);
|
||||
return LOG_IFERR(MDBX_CORRUPTED);
|
||||
}
|
||||
rc = outer_next(&cx.outer, &key, &data, MDBX_NEXT_NODUP);
|
||||
}
|
||||
|
||||
return LOG_IFERR((rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc);
|
||||
}
|
||||
|
||||
int mdbx_canary_get(const MDBX_txn *txn, MDBX_canary *canary) {
|
||||
if (unlikely(canary == nullptr))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
memset(canary, 0, sizeof(*canary));
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
*canary = txn->canary;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) {
|
||||
DKBUF_DEBUG;
|
||||
DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key));
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key || !data))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
return LOG_IFERR(cursor_seek(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err);
|
||||
}
|
||||
|
||||
int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) {
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key || !data))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(txn->flags & MDBX_TXN_BLOCKED))
|
||||
return LOG_IFERR(MDBX_BAD_TXN);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
return LOG_IFERR(cursor_ops(&cx.outer, key, data, MDBX_SET_LOWERBOUND));
|
||||
}
|
||||
|
||||
int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count) {
|
||||
DKBUF_DEBUG;
|
||||
DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key));
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key || !data))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
rc = cursor_seek(&cx.outer, key, data, MDBX_SET_KEY).err;
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (values_count)
|
||||
*values_count = 0;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
if (values_count) {
|
||||
*values_count = 1;
|
||||
if (inner_pointed(&cx.outer))
|
||||
*values_count =
|
||||
(sizeof(*values_count) >= sizeof(cx.inner.nested_tree.items) || cx.inner.nested_tree.items <= PTRDIFF_MAX)
|
||||
? (size_t)cx.inner.nested_tree.items
|
||||
: PTRDIFF_MAX;
|
||||
}
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
int mdbx_canary_put(MDBX_txn *txn, const MDBX_canary *canary) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (likely(canary)) {
|
||||
if (txn->canary.x == canary->x && txn->canary.y == canary->y && txn->canary.z == canary->z)
|
||||
return MDBX_SUCCESS;
|
||||
txn->canary.x = canary->x;
|
||||
txn->canary.y = canary->y;
|
||||
txn->canary.z = canary->z;
|
||||
}
|
||||
txn->canary.v = txn->txnid;
|
||||
txn->flags |= MDBX_TXN_DIRTY;
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
/* Функция сообщает находится ли указанный адрес в "грязной" странице у
|
||||
* заданной пишущей транзакции. В конечном счете это позволяет избавиться от
|
||||
* лишнего копирования данных из НЕ-грязных страниц.
|
||||
*
|
||||
* "Грязные" страницы - это те, которые уже были изменены в ходе пишущей
|
||||
* транзакции. Соответственно, какие-либо дальнейшие изменения могут привести
|
||||
* к перезаписи таких страниц. Поэтому все функции, выполняющие изменения, в
|
||||
* качестве аргументов НЕ должны получать указатели на данные в таких
|
||||
* страницах. В свою очередь "НЕ грязные" страницы перед модификацией будут
|
||||
* скопированы.
|
||||
*
|
||||
* Другими словами, данные из "грязных" страниц должны быть либо скопированы
|
||||
* перед передачей в качестве аргументов для дальнейших модификаций, либо
|
||||
* отвергнуты на стадии проверки корректности аргументов.
|
||||
*
|
||||
* Таким образом, функция позволяет как избавится от лишнего копирования,
|
||||
* так и выполнить более полную проверку аргументов.
|
||||
*
|
||||
* ВАЖНО: Передаваемый указатель должен указывать на начало данных. Только
|
||||
* так гарантируется что актуальный заголовок страницы будет физически
|
||||
* расположен в той-же странице памяти, в том числе для многостраничных
|
||||
* P_LARGE страниц с длинными данными. */
|
||||
int mdbx_is_dirty(const MDBX_txn *txn, const void *ptr) {
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
const MDBX_env *env = txn->env;
|
||||
const ptrdiff_t offset = ptr_dist(ptr, env->dxb_mmap.base);
|
||||
if (offset >= 0) {
|
||||
const pgno_t pgno = bytes2pgno(env, offset);
|
||||
if (likely(pgno < txn->geo.first_unallocated)) {
|
||||
const page_t *page = pgno2page(env, pgno);
|
||||
if (unlikely(page->pgno != pgno || (page->flags & P_ILL_BITS) != 0)) {
|
||||
/* The ptr pointed into middle of a large page,
|
||||
* not to the beginning of a data. */
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
}
|
||||
return ((txn->flags & MDBX_TXN_RDONLY) || !is_modifable(txn, page)) ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE;
|
||||
}
|
||||
if ((size_t)offset < env->dxb_mmap.limit) {
|
||||
/* Указатель адресует что-то в пределах mmap, но за границей
|
||||
* распределенных страниц. Такое может случится если mdbx_is_dirty()
|
||||
* вызывается после операции, в ходе которой грязная страница была
|
||||
* возвращена в нераспределенное пространство. */
|
||||
return (txn->flags & MDBX_TXN_RDONLY) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Страница вне используемого mmap-диапазона, т.е. либо в функцию был
|
||||
* передан некорректный адрес, либо адрес в теневой странице, которая была
|
||||
* выделена посредством malloc().
|
||||
*
|
||||
* Для режима MDBX_WRITE_MAP режима страница однозначно "не грязная",
|
||||
* а для режимов без MDBX_WRITE_MAP однозначно "не чистая". */
|
||||
return (txn->flags & (MDBX_WRITEMAP | MDBX_TXN_RDONLY)) ? LOG_IFERR(MDBX_EINVAL) : MDBX_RESULT_TRUE;
|
||||
}
|
||||
|
||||
int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, const MDBX_val *data) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(dbi <= FREE_DBI))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)))
|
||||
return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
MDBX_val proxy;
|
||||
MDBX_cursor_op op = MDBX_SET;
|
||||
unsigned flags = MDBX_ALLDUPS;
|
||||
if (data) {
|
||||
proxy = *data;
|
||||
data = &proxy;
|
||||
op = MDBX_GET_BOTH;
|
||||
flags = 0;
|
||||
}
|
||||
rc = cursor_seek(&cx.outer, (MDBX_val *)key, (MDBX_val *)data, op).err;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cx.outer.next = txn->cursors[dbi];
|
||||
txn->cursors[dbi] = &cx.outer;
|
||||
rc = cursor_del(&cx.outer, flags);
|
||||
txn->cursors[dbi] = cx.outer.next;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, MDBX_put_flags_t flags) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key || !data))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(dbi <= FREE_DBI))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND |
|
||||
MDBX_APPENDDUP | MDBX_CURRENT | MDBX_MULTIPLE)))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(txn->flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED)))
|
||||
return LOG_IFERR((txn->flags & MDBX_TXN_RDONLY) ? MDBX_EACCESS : MDBX_BAD_TXN);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
cx.outer.next = txn->cursors[dbi];
|
||||
txn->cursors[dbi] = &cx.outer;
|
||||
|
||||
/* LY: support for update (explicit overwrite) */
|
||||
if (flags & MDBX_CURRENT) {
|
||||
rc = cursor_seek(&cx.outer, (MDBX_val *)key, nullptr, MDBX_SET).err;
|
||||
if (likely(rc == MDBX_SUCCESS) && (txn->dbs[dbi].flags & MDBX_DUPSORT) && (flags & MDBX_ALLDUPS) == 0) {
|
||||
/* LY: allows update (explicit overwrite) only for unique keys */
|
||||
node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
|
||||
if (node_flags(node) & N_DUP) {
|
||||
tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1);
|
||||
rc = MDBX_EMULTIVAL;
|
||||
if ((flags & MDBX_NOOVERWRITE) == 0) {
|
||||
flags -= MDBX_CURRENT;
|
||||
rc = cursor_del(&cx.outer, MDBX_ALLDUPS);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
rc = cursor_put_checklen(&cx.outer, key, data, flags);
|
||||
txn->cursors[dbi] = cx.outer.next;
|
||||
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
|
||||
/* Позволяет обновить или удалить существующую запись с получением
|
||||
* в old_data предыдущего значения данных. При этом если new_data равен
|
||||
* нулю, то выполняется удаление, иначе обновление/вставка.
|
||||
*
|
||||
* Текущее значение может находиться в уже измененной (грязной) странице.
|
||||
* В этом случае страница будет перезаписана при обновлении, а само старое
|
||||
* значение утрачено. Поэтому исходно в old_data должен быть передан
|
||||
* дополнительный буфер для копирования старого значения.
|
||||
* Если переданный буфер слишком мал, то функция вернет -1, установив
|
||||
* old_data->iov_len в соответствующее значение.
|
||||
*
|
||||
* Для не-уникальных ключей также возможен второй сценарий использования,
|
||||
* когда посредством old_data из записей с одинаковым ключом для
|
||||
* удаления/обновления выбирается конкретная. Для выбора этого сценария
|
||||
* во flags следует одновременно указать MDBX_CURRENT и MDBX_NOOVERWRITE.
|
||||
* Именно эта комбинация выбрана, так как она лишена смысла, и этим позволяет
|
||||
* идентифицировать запрос такого сценария.
|
||||
*
|
||||
* Функция может быть замещена соответствующими операциями с курсорами
|
||||
* после двух доработок (TODO):
|
||||
* - внешняя аллокация курсоров, в том числе на стеке (без malloc).
|
||||
* - получения dirty-статуса страницы по адресу (знать о MUTABLE/WRITEABLE).
|
||||
*/
|
||||
|
||||
int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data,
|
||||
MDBX_put_flags_t flags, MDBX_preserve_func preserver, void *preserver_context) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!key || !old_data || old_data == new_data))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(old_data->iov_base == nullptr && old_data->iov_len))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(new_data == nullptr && (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(dbi <= FREE_DBI))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | MDBX_RESERVE | MDBX_APPEND |
|
||||
MDBX_APPENDDUP | MDBX_CURRENT)))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
cx.outer.next = txn->cursors[dbi];
|
||||
txn->cursors[dbi] = &cx.outer;
|
||||
|
||||
MDBX_val present_key = *key;
|
||||
if (F_ISSET(flags, MDBX_CURRENT | MDBX_NOOVERWRITE)) {
|
||||
/* в old_data значение для выбора конкретного дубликата */
|
||||
if (unlikely(!(txn->dbs[dbi].flags & MDBX_DUPSORT))) {
|
||||
rc = MDBX_EINVAL;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
/* убираем лишний бит, он был признаком запрошенного режима */
|
||||
flags -= MDBX_NOOVERWRITE;
|
||||
|
||||
rc = cursor_seek(&cx.outer, &present_key, old_data, MDBX_GET_BOTH).err;
|
||||
if (rc != MDBX_SUCCESS)
|
||||
goto bailout;
|
||||
} else {
|
||||
/* в old_data буфер для сохранения предыдущего значения */
|
||||
if (unlikely(new_data && old_data->iov_base == new_data->iov_base))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
MDBX_val present_data;
|
||||
rc = cursor_seek(&cx.outer, &present_key, &present_data, MDBX_SET_KEY).err;
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
old_data->iov_base = nullptr;
|
||||
old_data->iov_len = 0;
|
||||
if (rc != MDBX_NOTFOUND || (flags & MDBX_CURRENT))
|
||||
goto bailout;
|
||||
} else if (flags & MDBX_NOOVERWRITE) {
|
||||
rc = MDBX_KEYEXIST;
|
||||
*old_data = present_data;
|
||||
goto bailout;
|
||||
} else {
|
||||
page_t *page = cx.outer.pg[cx.outer.top];
|
||||
if (txn->dbs[dbi].flags & MDBX_DUPSORT) {
|
||||
if (flags & MDBX_CURRENT) {
|
||||
/* disallow update/delete for multi-values */
|
||||
node_t *node = page_node(page, cx.outer.ki[cx.outer.top]);
|
||||
if (node_flags(node) & N_DUP) {
|
||||
tASSERT(txn, inner_pointed(&cx.outer) && cx.outer.subcur->nested_tree.items > 1);
|
||||
if (cx.outer.subcur->nested_tree.items > 1) {
|
||||
rc = MDBX_EMULTIVAL;
|
||||
goto bailout;
|
||||
}
|
||||
}
|
||||
/* В LMDB флажок MDBX_CURRENT здесь приведет
|
||||
* к замене данных без учета MDBX_DUPSORT сортировки,
|
||||
* но здесь это в любом случае допустимо, так как мы
|
||||
* проверили что для ключа есть только одно значение. */
|
||||
}
|
||||
}
|
||||
|
||||
if (is_modifable(txn, page)) {
|
||||
if (new_data && cmp_lenfast(&present_data, new_data) == 0) {
|
||||
/* если данные совпадают, то ничего делать не надо */
|
||||
*old_data = *new_data;
|
||||
goto bailout;
|
||||
}
|
||||
rc = preserver ? preserver(preserver_context, old_data, present_data.iov_base, present_data.iov_len)
|
||||
: MDBX_SUCCESS;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
} else {
|
||||
*old_data = present_data;
|
||||
}
|
||||
flags |= MDBX_CURRENT;
|
||||
}
|
||||
}
|
||||
|
||||
if (likely(new_data))
|
||||
rc = cursor_put_checklen(&cx.outer, key, new_data, flags);
|
||||
else
|
||||
rc = cursor_del(&cx.outer, flags & MDBX_ALLDUPS);
|
||||
|
||||
bailout:
|
||||
txn->cursors[dbi] = cx.outer.next;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
static int default_value_preserver(void *context, MDBX_val *target, const void *src, size_t bytes) {
|
||||
(void)context;
|
||||
if (unlikely(target->iov_len < bytes)) {
|
||||
target->iov_base = nullptr;
|
||||
target->iov_len = bytes;
|
||||
return MDBX_RESULT_TRUE;
|
||||
}
|
||||
memcpy(target->iov_base, src, target->iov_len = bytes);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int mdbx_replace(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *new_data, MDBX_val *old_data,
|
||||
MDBX_put_flags_t flags) {
|
||||
return mdbx_replace_ex(txn, dbi, key, new_data, old_data, flags, default_value_preserver, nullptr);
|
||||
}
|
1203
src/api-txn.c
1203
src/api-txn.c
File diff suppressed because it is too large
Load Diff
333
src/dbi.c
333
src/dbi.c
@ -132,7 +132,7 @@ __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) {
|
||||
return MDBX_BAD_DBI;
|
||||
}
|
||||
|
||||
static int defer_and_release(MDBX_env *const env, defer_free_item_t *const chain) {
|
||||
int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain) {
|
||||
size_t length = 0;
|
||||
defer_free_item_t *obsolete_chain = nullptr;
|
||||
#if MDBX_ENABLE_DBI_LOCKFREE
|
||||
@ -229,7 +229,7 @@ int dbi_update(MDBX_txn *txn, int keep) {
|
||||
eASSERT(env, !env->dbs_flags[i] && !env->kvs[i].name.iov_len && !env->kvs[i].name.iov_base);
|
||||
}
|
||||
env->n_dbi = (unsigned)i;
|
||||
defer_and_release(env, defer_chain);
|
||||
dbi_defer_release(env, defer_chain);
|
||||
}
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
@ -594,25 +594,7 @@ int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned user_flags, MDB
|
||||
return rc;
|
||||
}
|
||||
|
||||
static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi,
|
||||
MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) {
|
||||
MDBX_val thunk, *name;
|
||||
if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
|
||||
name = (void *)name_cstr;
|
||||
else {
|
||||
thunk.iov_len = strlen(name_cstr);
|
||||
thunk.iov_base = (void *)name_cstr;
|
||||
name = &thunk;
|
||||
}
|
||||
return dbi_open(txn, name, flags, dbi, keycmp, datacmp);
|
||||
}
|
||||
|
||||
struct dbi_rename_result {
|
||||
defer_free_item_t *defer;
|
||||
int err;
|
||||
};
|
||||
|
||||
__cold static struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) {
|
||||
__cold struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) {
|
||||
struct dbi_rename_result pair;
|
||||
pair.defer = nullptr;
|
||||
pair.err = dbi_check(txn, dbi);
|
||||
@ -690,259 +672,6 @@ static defer_free_item_t *dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) {
|
||||
return defer_item;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
/* API */
|
||||
|
||||
int mdbx_dbi_open(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
|
||||
return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, nullptr, nullptr));
|
||||
}
|
||||
|
||||
int mdbx_dbi_open2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi) {
|
||||
return LOG_IFERR(dbi_open(txn, name, flags, dbi, nullptr, nullptr));
|
||||
}
|
||||
|
||||
int mdbx_dbi_open_ex(MDBX_txn *txn, const char *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp) {
|
||||
return LOG_IFERR(dbi_open_cstr(txn, name, flags, dbi, keycmp, datacmp));
|
||||
}
|
||||
|
||||
int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp) {
|
||||
return LOG_IFERR(dbi_open(txn, name, flags, dbi, keycmp, datacmp));
|
||||
}
|
||||
|
||||
__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (txn->dbs[dbi].height) {
|
||||
cx.outer.next = txn->cursors[dbi];
|
||||
txn->cursors[dbi] = &cx.outer;
|
||||
rc = tree_drop(&cx.outer, dbi == MAIN_DBI || (cx.outer.tree->flags & MDBX_DUPSORT));
|
||||
txn->cursors[dbi] = cx.outer.next;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
/* Invalidate the dropped DB's cursors */
|
||||
for (MDBX_cursor *mc = txn->cursors[dbi]; mc; mc = mc->next)
|
||||
be_poor(mc);
|
||||
|
||||
if (!del || dbi < CORE_DBS) {
|
||||
/* reset the DB record, mark it dirty */
|
||||
txn->dbi_state[dbi] |= DBI_DIRTY;
|
||||
txn->dbs[dbi].height = 0;
|
||||
txn->dbs[dbi].branch_pages = 0;
|
||||
txn->dbs[dbi].leaf_pages = 0;
|
||||
txn->dbs[dbi].large_pages = 0;
|
||||
txn->dbs[dbi].items = 0;
|
||||
txn->dbs[dbi].root = P_INVALID;
|
||||
txn->dbs[dbi].sequence = 0;
|
||||
/* txn->dbs[dbi].mod_txnid = txn->txnid; */
|
||||
txn->flags |= MDBX_TXN_DIRTY;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_env *const env = txn->env;
|
||||
MDBX_val name = env->kvs[dbi].name;
|
||||
rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
rc = cursor_seek(&cx.outer, &name, nullptr, MDBX_SET).err;
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
rc = cursor_del(&cx.outer, N_TREE);
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
tASSERT(txn, txn->dbi_state[MAIN_DBI] & DBI_DIRTY);
|
||||
tASSERT(txn, txn->flags & MDBX_TXN_DIRTY);
|
||||
txn->dbi_state[dbi] = DBI_LINDO | DBI_OLDEN;
|
||||
rc = osal_fastmutex_acquire(&env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS))
|
||||
return LOG_IFERR(defer_and_release(env, dbi_close_locked(env, dbi)));
|
||||
}
|
||||
}
|
||||
}
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) {
|
||||
MDBX_val thunk, *name;
|
||||
if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || name_cstr == MDBX_CHK_META)
|
||||
name = (void *)name_cstr;
|
||||
else {
|
||||
thunk.iov_len = strlen(name_cstr);
|
||||
thunk.iov_base = (void *)name_cstr;
|
||||
name = &thunk;
|
||||
}
|
||||
return LOG_IFERR(mdbx_dbi_rename2(txn, dbi, name));
|
||||
}
|
||||
|
||||
int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS))
|
||||
return (dbi == MAIN_DBI) ? MDBX_SUCCESS : LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(dbi >= env->max_dbi))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS || dbi >= env->max_dbi))
|
||||
return LOG_IFERR(MDBX_BAD_DBI);
|
||||
|
||||
rc = osal_fastmutex_acquire(&env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS && dbi < env->n_dbi)) {
|
||||
retry:
|
||||
if (env->basal_txn && (env->dbs_flags[dbi] & DB_VALID) && (env->basal_txn->flags & MDBX_TXN_FINISHED) == 0) {
|
||||
/* LY: Опасный код, так как env->txn может быть изменено в другом потоке.
|
||||
* К сожалению тут нет надежного решения и может быть падение при неверном
|
||||
* использовании API (вызове mdbx_dbi_close конкурентно с завершением
|
||||
* пишущей транзакции).
|
||||
*
|
||||
* Для минимизации вероятности падения сначала проверяем dbi-флаги
|
||||
* в basal_txn, а уже после в env->txn. Таким образом, падение может быть
|
||||
* только при коллизии с завершением вложенной транзакции.
|
||||
*
|
||||
* Альтернативно можно попробовать выполнять обновление/put записи в
|
||||
* mainDb соответствующей таблице закрываемого хендла. Семантически это
|
||||
* верный путь, но проблема в текущем API, в котором исторически dbi-хендл
|
||||
* живет и закрывается вне транзакции. Причем проблема не только в том,
|
||||
* что нет указателя на текущую пишущую транзакцию, а в том что
|
||||
* пользователь точно не ожидает что закрытие хендла приведет к
|
||||
* скрытой/непрозрачной активности внутри транзакции потенциально
|
||||
* выполняемой в другом потоке. Другими словами, проблема может быть
|
||||
* только при неверном использовании API и если пользователь это
|
||||
* допускает, то точно не будет ожидать скрытых действий внутри
|
||||
* транзакции, и поэтому этот путь потенциально более опасен. */
|
||||
const MDBX_txn *const hazard = env->txn;
|
||||
osal_compiler_barrier();
|
||||
if ((dbi_state(env->basal_txn, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO) {
|
||||
bailout_dirty_dbi:
|
||||
osal_fastmutex_release(&env->dbi_lock);
|
||||
return LOG_IFERR(MDBX_DANGLING_DBI);
|
||||
}
|
||||
osal_memory_barrier();
|
||||
if (unlikely(hazard != env->txn))
|
||||
goto retry;
|
||||
if (hazard != env->basal_txn && hazard && (hazard->flags & MDBX_TXN_FINISHED) == 0 &&
|
||||
hazard->signature == txn_signature &&
|
||||
(dbi_state(hazard, dbi) & (DBI_LINDO | DBI_DIRTY | DBI_CREAT)) > DBI_LINDO)
|
||||
goto bailout_dirty_dbi;
|
||||
osal_compiler_barrier();
|
||||
if (unlikely(hazard != env->txn))
|
||||
goto retry;
|
||||
}
|
||||
rc = defer_and_release(env, dbi_close_locked(env, dbi));
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) {
|
||||
if (unlikely(!flags || !state))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR - MDBX_TXN_PARKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
*flags = 0;
|
||||
*state = 0;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
*flags = 0;
|
||||
*state = 0;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
*flags = txn->dbs[dbi].flags & DB_PERSISTENT_FLAGS;
|
||||
*state = txn->dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *new_name) {
|
||||
int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(new_name == MDBX_CHK_MAIN || new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC ||
|
||||
new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || new_name->iov_base == MDBX_CHK_META))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(dbi < CORE_DBS))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
rc = osal_fastmutex_acquire(&txn->env->dbi_lock);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name);
|
||||
if (pair.defer)
|
||||
pair.defer->next = nullptr;
|
||||
defer_and_release(txn->env, pair.defer);
|
||||
rc = pair.err;
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
static void stat_get(const tree_t *db, MDBX_stat *st, size_t bytes) {
|
||||
st->ms_depth = db->height;
|
||||
st->ms_branch_pages = db->branch_pages;
|
||||
st->ms_leaf_pages = db->leaf_pages;
|
||||
st->ms_overflow_pages = db->large_pages;
|
||||
st->ms_entries = db->items;
|
||||
if (likely(bytes >= offsetof(MDBX_stat, ms_mod_txnid) + sizeof(st->ms_mod_txnid)))
|
||||
st->ms_mod_txnid = db->mod_txnid;
|
||||
}
|
||||
|
||||
__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) {
|
||||
if (unlikely(!dest))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
rc = dbi_check(txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
|
||||
const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid);
|
||||
if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) {
|
||||
rc = MDBX_EINVAL;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(txn->flags & MDBX_TXN_BLOCKED)) {
|
||||
rc = MDBX_BAD_TXN;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(txn->dbi_state[dbi] & DBI_STALE)) {
|
||||
rc = tbl_fetch((MDBX_txn *)txn, dbi);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
dest->ms_psize = txn->env->ps;
|
||||
stat_get(&txn->dbs[dbi], dest, bytes);
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
bailout:
|
||||
memset(dest, 0, bytes);
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
__cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback) {
|
||||
const MDBX_txn *dig = txn;
|
||||
do {
|
||||
@ -966,58 +695,4 @@ __cold const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fall
|
||||
return fallback;
|
||||
}
|
||||
|
||||
__cold int mdbx_enumerate_tables(const MDBX_txn *txn, MDBX_table_enum_func *func, void *ctx) {
|
||||
if (unlikely(!func))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
for (rc = outer_first(&cx.outer, nullptr, nullptr); rc == MDBX_SUCCESS;
|
||||
rc = outer_next(&cx.outer, nullptr, nullptr, MDBX_NEXT_NODUP)) {
|
||||
node_t *node = page_node(cx.outer.pg[cx.outer.top], cx.outer.ki[cx.outer.top]);
|
||||
if (node_flags(node) != N_TREE)
|
||||
continue;
|
||||
if (unlikely(node_ds(node) != sizeof(tree_t))) {
|
||||
ERROR("%s/%d: %s %u", "MDBX_CORRUPTED", MDBX_CORRUPTED, "invalid dupsort sub-tree node size",
|
||||
(unsigned)node_ds(node));
|
||||
rc = MDBX_CORRUPTED;
|
||||
break;
|
||||
}
|
||||
|
||||
tree_t reside;
|
||||
const tree_t *tree = memcpy(&reside, node_data(node), sizeof(reside));
|
||||
const MDBX_val name = {node_key(node), node_ks(node)};
|
||||
const MDBX_env *const env = txn->env;
|
||||
MDBX_dbi dbi = 0;
|
||||
for (size_t i = CORE_DBS; i < env->n_dbi; ++i) {
|
||||
if (i >= txn->n_dbi || !(env->dbs_flags[i] & DB_VALID))
|
||||
continue;
|
||||
if (env->kvs[MAIN_DBI].clc.k.cmp(&name, &env->kvs[i].name))
|
||||
continue;
|
||||
|
||||
tree = dbi_dig(txn, i, &reside);
|
||||
dbi = (MDBX_dbi)i;
|
||||
break;
|
||||
}
|
||||
|
||||
MDBX_stat stat;
|
||||
stat_get(tree, &stat, sizeof(stat));
|
||||
rc = func(ctx, txn, &name, tree->flags, &stat, dbi);
|
||||
if (rc != MDBX_SUCCESS)
|
||||
goto bailout;
|
||||
}
|
||||
rc = (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc;
|
||||
|
||||
bailout:
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
int dbi_close_release(MDBX_env *env, MDBX_dbi dbi) { return dbi_defer_release(env, dbi_close_locked(env, dbi)); }
|
||||
|
14
src/dbi.h
14
src/dbi.h
@ -124,4 +124,18 @@ MDBX_INTERNAL int dbi_open(MDBX_txn *txn, const MDBX_val *const name, unsigned u
|
||||
MDBX_INTERNAL int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp,
|
||||
MDBX_cmp_func *datacmp);
|
||||
|
||||
typedef struct defer_free_item {
|
||||
struct defer_free_item *next;
|
||||
uint64_t timestamp;
|
||||
} defer_free_item_t;
|
||||
|
||||
MDBX_INTERNAL int dbi_defer_release(MDBX_env *const env, defer_free_item_t *const chain);
|
||||
MDBX_INTERNAL int dbi_close_release(MDBX_env *env, MDBX_dbi dbi);
|
||||
MDBX_INTERNAL const tree_t *dbi_dig(const MDBX_txn *txn, const size_t dbi, tree_t *fallback);
|
||||
|
||||
struct dbi_rename_result {
|
||||
defer_free_item_t *defer;
|
||||
int err;
|
||||
};
|
||||
|
||||
MDBX_INTERNAL struct dbi_rename_result dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name);
|
||||
|
@ -315,11 +315,6 @@ struct cursor_couple {
|
||||
subcur_t inner;
|
||||
};
|
||||
|
||||
struct defer_free_item {
|
||||
struct defer_free_item *next;
|
||||
uint64_t timestamp;
|
||||
};
|
||||
|
||||
enum env_flags {
|
||||
/* Failed to update the meta page. Probably an I/O error. */
|
||||
ENV_FATAL_ERROR = INT32_MIN /* 0x80000000 */,
|
||||
|
@ -300,82 +300,6 @@ __cold MDBX_INTERNAL int mvcc_cleanup_dead(MDBX_env *env, int rdt_locked, int *d
|
||||
return rc;
|
||||
}
|
||||
|
||||
int txn_park(MDBX_txn *txn, bool autounpark) {
|
||||
reader_slot_t *const rslot = txn->to.reader;
|
||||
tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY);
|
||||
tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED);
|
||||
if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY))
|
||||
return MDBX_BAD_TXN;
|
||||
|
||||
const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
|
||||
const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed);
|
||||
const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed);
|
||||
if (unlikely(pid != txn->env->pid)) {
|
||||
ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (unlikely(tid != txn->owner || txnid != txn->txnid)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx"
|
||||
" and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
|
||||
tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
|
||||
return MDBX_BAD_RSLOT;
|
||||
}
|
||||
|
||||
atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease);
|
||||
atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed);
|
||||
txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int txn_unpark(MDBX_txn *txn) {
|
||||
if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) !=
|
||||
(MDBX_TXN_RDONLY | MDBX_TXN_PARKED)))
|
||||
return MDBX_BAD_TXN;
|
||||
|
||||
for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) {
|
||||
const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
|
||||
uint64_t tid = safe64_read(&rslot->tid);
|
||||
uint64_t txnid = safe64_read(&rslot->txnid);
|
||||
if (unlikely(pid != txn->env->pid)) {
|
||||
ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD))
|
||||
break;
|
||||
if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ",
|
||||
MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid);
|
||||
break;
|
||||
}
|
||||
if (unlikely((txn->flags & MDBX_TXN_ERROR)))
|
||||
break;
|
||||
|
||||
#if MDBX_64BIT_CAS
|
||||
if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner)))
|
||||
continue;
|
||||
#else
|
||||
atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed);
|
||||
if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) {
|
||||
atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
txnid = safe64_read(&rslot->txnid);
|
||||
tid = safe64_read(&rslot->tid);
|
||||
if (unlikely(txnid != txn->txnid || tid != txn->owner)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx"
|
||||
" and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
|
||||
tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
|
||||
break;
|
||||
}
|
||||
txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE);
|
||||
return err ? err : MDBX_OUSTED;
|
||||
}
|
||||
|
||||
__cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) {
|
||||
DEBUG("DB size maxed out by reading #%" PRIaTXN, straggler);
|
||||
osal_memory_fence(mo_AcquireRelease, false);
|
||||
@ -488,64 +412,3 @@ __cold txnid_t mvcc_kick_laggards(MDBX_env *env, const txnid_t straggler) {
|
||||
}
|
||||
return oldest;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
__cold int mdbx_thread_register(const MDBX_env *env) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!env->lck_mmap.lck))
|
||||
return LOG_IFERR((env->flags & MDBX_EXCLUSIVE) ? MDBX_EINVAL : MDBX_EPERM);
|
||||
|
||||
if (unlikely((env->flags & ENV_TXKEY) == 0)) {
|
||||
eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
|
||||
return LOG_IFERR(MDBX_EINVAL) /* MDBX_NOSTICKYTHREADS mode */;
|
||||
}
|
||||
|
||||
eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
|
||||
reader_slot_t *r = thread_rthc_get(env->me_txkey);
|
||||
if (unlikely(r != nullptr)) {
|
||||
eASSERT(env, r->pid.weak == env->pid);
|
||||
eASSERT(env, r->tid.weak == osal_thread_self());
|
||||
if (unlikely(r->pid.weak != env->pid))
|
||||
return LOG_IFERR(MDBX_BAD_RSLOT);
|
||||
return MDBX_RESULT_TRUE /* already registered */;
|
||||
}
|
||||
|
||||
return LOG_IFERR(mvcc_bind_slot((MDBX_env *)env).err);
|
||||
}
|
||||
|
||||
__cold int mdbx_thread_unregister(const MDBX_env *env) {
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!env->lck_mmap.lck))
|
||||
return MDBX_RESULT_TRUE;
|
||||
|
||||
if (unlikely((env->flags & ENV_TXKEY) == 0)) {
|
||||
eASSERT(env, env->flags & MDBX_NOSTICKYTHREADS);
|
||||
return MDBX_RESULT_TRUE /* MDBX_NOSTICKYTHREADS mode */;
|
||||
}
|
||||
|
||||
eASSERT(env, (env->flags & (MDBX_NOSTICKYTHREADS | ENV_TXKEY)) == ENV_TXKEY);
|
||||
reader_slot_t *r = thread_rthc_get(env->me_txkey);
|
||||
if (unlikely(r == nullptr))
|
||||
return MDBX_RESULT_TRUE /* not registered */;
|
||||
|
||||
eASSERT(env, r->pid.weak == env->pid);
|
||||
eASSERT(env, r->tid.weak == osal_thread_self());
|
||||
if (unlikely(r->pid.weak != env->pid || r->tid.weak != osal_thread_self()))
|
||||
return LOG_IFERR(MDBX_BAD_RSLOT);
|
||||
|
||||
eASSERT(env, r->txnid.weak >= SAFE64_INVALID_THRESHOLD);
|
||||
if (unlikely(r->txnid.weak < SAFE64_INVALID_THRESHOLD))
|
||||
return LOG_IFERR(MDBX_BUSY) /* transaction is still active */;
|
||||
|
||||
atomic_store32(&r->pid, 0, mo_Relaxed);
|
||||
atomic_store32(&env->lck->rdt_refresh_flag, true, mo_AcquireRelease);
|
||||
thread_rthc_set(env->me_txkey, nullptr);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
@ -46,6 +46,7 @@ MDBX_INTERNAL int txn_renew(MDBX_txn *txn, unsigned flags);
|
||||
MDBX_INTERNAL int txn_park(MDBX_txn *txn, bool autounpark);
|
||||
MDBX_INTERNAL int txn_unpark(MDBX_txn *txn);
|
||||
MDBX_INTERNAL int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits);
|
||||
MDBX_INTERNAL void txn_done_cursors(MDBX_txn *txn, const bool merge);
|
||||
|
||||
#define TXN_END_NAMES \
|
||||
{"committed", "empty-commit", "abort", "reset", "fail-begin", "fail-beginchild", "ousted", nullptr}
|
||||
@ -67,6 +68,8 @@ enum {
|
||||
};
|
||||
MDBX_INTERNAL int txn_end(MDBX_txn *txn, unsigned mode);
|
||||
MDBX_INTERNAL int txn_write(MDBX_txn *txn, iov_ctx_t *ctx);
|
||||
MDBX_INTERNAL void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency);
|
||||
MDBX_INTERNAL void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len);
|
||||
|
||||
/* env.c */
|
||||
MDBX_INTERNAL int env_open(MDBX_env *env, mdbx_mode_t mode);
|
||||
|
942
src/txn.c
942
src/txn.c
@ -7,7 +7,7 @@ __hot txnid_t txn_snapshot_oldest(const MDBX_txn *const txn) {
|
||||
return mvcc_shapshot_oldest(txn->env, txn->tw.troika.txnid[txn->tw.troika.prefer_steady]);
|
||||
}
|
||||
|
||||
static void done_cursors(MDBX_txn *txn, const bool merge) {
|
||||
void txn_done_cursors(MDBX_txn *txn, const bool merge) {
|
||||
tASSERT(txn, txn->cursors[FREE_DBI] == nullptr);
|
||||
TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) {
|
||||
MDBX_cursor *mc = txn->cursors[i];
|
||||
@ -65,7 +65,7 @@ int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) {
|
||||
}
|
||||
|
||||
/* Merge child txn into parent */
|
||||
static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) {
|
||||
void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) {
|
||||
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0);
|
||||
dpl_t *const src = dpl_sort(txn);
|
||||
|
||||
@ -395,7 +395,7 @@ static void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t
|
||||
}
|
||||
}
|
||||
|
||||
static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
||||
void txn_take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
||||
MDBX_env *const env = txn->env;
|
||||
if (MDBX_ENABLE_PROFGC) {
|
||||
pgop_stat_t *const ptr = &env->lck->pgops;
|
||||
@ -432,409 +432,6 @@ static void take_gcprof(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
||||
memset(&latency->gc_prof, 0, sizeof(latency->gc_prof));
|
||||
}
|
||||
|
||||
int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) {
|
||||
STATIC_ASSERT(MDBX_TXN_FINISHED == MDBX_TXN_BLOCKED - MDBX_TXN_HAS_CHILD - MDBX_TXN_ERROR - MDBX_TXN_PARKED);
|
||||
const uint64_t ts_0 = latency ? osal_monotime() : 0;
|
||||
uint64_t ts_1 = 0, ts_2 = 0, ts_3 = 0, ts_4 = 0, ts_5 = 0, gc_cputime = 0;
|
||||
|
||||
int rc = check_txn(txn, MDBX_TXN_FINISHED);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (rc == MDBX_BAD_TXN && (txn->flags & MDBX_TXN_RDONLY)) {
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
goto fail;
|
||||
}
|
||||
bailout:
|
||||
if (latency)
|
||||
memset(latency, 0, sizeof(*latency));
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
MDBX_env *const env = txn->env;
|
||||
if (MDBX_ENV_CHECKPID && unlikely(env->pid != osal_getpid())) {
|
||||
env->flags |= ENV_FATAL_ERROR;
|
||||
rc = MDBX_PANIC;
|
||||
goto bailout;
|
||||
}
|
||||
|
||||
if (unlikely(txn->flags & MDBX_TXN_ERROR)) {
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
/* txn_end() mode for a commit which writes nothing */
|
||||
unsigned end_mode = TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE;
|
||||
if (unlikely(txn->flags & MDBX_TXN_RDONLY))
|
||||
goto done;
|
||||
|
||||
if ((txn->flags & MDBX_NOSTICKYTHREADS) && unlikely(txn->owner != osal_thread_self())) {
|
||||
rc = MDBX_THREAD_MISMATCH;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (txn->nested) {
|
||||
rc = mdbx_txn_commit_ex(txn->nested, nullptr);
|
||||
tASSERT(txn, txn->nested == nullptr);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (unlikely(txn != env->txn)) {
|
||||
DEBUG("%s", "attempt to commit unknown transaction");
|
||||
rc = MDBX_EINVAL;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (txn->parent) {
|
||||
tASSERT(txn, audit_ex(txn, 0, false) == 0);
|
||||
eASSERT(env, txn != env->basal_txn);
|
||||
MDBX_txn *const parent = txn->parent;
|
||||
eASSERT(env, parent->signature == txn_signature);
|
||||
eASSERT(env, parent->nested == txn && (parent->flags & MDBX_TXN_HAS_CHILD) != 0);
|
||||
eASSERT(env, dpl_check(txn));
|
||||
|
||||
if (txn->tw.dirtylist->length == 0 && !(txn->flags & MDBX_TXN_DIRTY) && parent->n_dbi == txn->n_dbi) {
|
||||
TXN_FOREACH_DBI_ALL(txn, i) {
|
||||
tASSERT(txn, (txn->dbi_state[i] & DBI_DIRTY) == 0);
|
||||
if ((txn->dbi_state[i] & DBI_STALE) && !(parent->dbi_state[i] & DBI_STALE))
|
||||
tASSERT(txn, memcmp(&parent->dbs[i], &txn->dbs[i], sizeof(tree_t)) == 0);
|
||||
}
|
||||
|
||||
tASSERT(txn, memcmp(&parent->geo, &txn->geo, sizeof(parent->geo)) == 0);
|
||||
tASSERT(txn, memcmp(&parent->canary, &txn->canary, sizeof(parent->canary)) == 0);
|
||||
tASSERT(txn, !txn->tw.spilled.list || MDBX_PNL_GETSIZE(txn->tw.spilled.list) == 0);
|
||||
tASSERT(txn, txn->tw.loose_count == 0);
|
||||
|
||||
/* fast completion of pure nested transaction */
|
||||
VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->txnid);
|
||||
end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE;
|
||||
goto done;
|
||||
}
|
||||
|
||||
/* Preserve space for spill list to avoid parent's state corruption
|
||||
* if allocation fails. */
|
||||
const size_t parent_retired_len = (uintptr_t)parent->tw.retired_pages;
|
||||
tASSERT(txn, parent_retired_len <= MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
||||
const size_t retired_delta = MDBX_PNL_GETSIZE(txn->tw.retired_pages) - parent_retired_len;
|
||||
if (retired_delta) {
|
||||
rc = pnl_need(&txn->tw.relist, retired_delta);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if (txn->tw.spilled.list) {
|
||||
if (parent->tw.spilled.list) {
|
||||
rc = pnl_need(&parent->tw.spilled.list, MDBX_PNL_GETSIZE(txn->tw.spilled.list));
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
spill_purge(txn);
|
||||
}
|
||||
|
||||
if (unlikely(txn->tw.dirtylist->length + parent->tw.dirtylist->length > parent->tw.dirtylist->detent &&
|
||||
!dpl_reserve(parent, txn->tw.dirtylist->length + parent->tw.dirtylist->length))) {
|
||||
rc = MDBX_ENOMEM;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
//-------------------------------------------------------------------------
|
||||
|
||||
parent->tw.gc.reclaimed = txn->tw.gc.reclaimed;
|
||||
txn->tw.gc.reclaimed = nullptr;
|
||||
|
||||
parent->tw.retired_pages = txn->tw.retired_pages;
|
||||
txn->tw.retired_pages = nullptr;
|
||||
|
||||
pnl_free(parent->tw.relist);
|
||||
parent->tw.relist = txn->tw.relist;
|
||||
txn->tw.relist = nullptr;
|
||||
parent->tw.gc.time_acc = txn->tw.gc.time_acc;
|
||||
parent->tw.gc.last_reclaimed = txn->tw.gc.last_reclaimed;
|
||||
|
||||
parent->geo = txn->geo;
|
||||
parent->canary = txn->canary;
|
||||
parent->flags |= txn->flags & MDBX_TXN_DIRTY;
|
||||
|
||||
/* Move loose pages to parent */
|
||||
#if MDBX_ENABLE_REFUND
|
||||
parent->tw.loose_refund_wl = txn->tw.loose_refund_wl;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
parent->tw.loose_count = txn->tw.loose_count;
|
||||
parent->tw.loose_pages = txn->tw.loose_pages;
|
||||
|
||||
/* Merge our cursors into parent's and close them */
|
||||
done_cursors(txn, true);
|
||||
end_mode |= TXN_END_EOTDONE;
|
||||
|
||||
/* Update parent's DBs array */
|
||||
eASSERT(env, parent->n_dbi == txn->n_dbi);
|
||||
TXN_FOREACH_DBI_ALL(txn, dbi) {
|
||||
if (txn->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) {
|
||||
parent->dbs[dbi] = txn->dbs[dbi];
|
||||
/* preserve parent's status */
|
||||
const uint8_t state = txn->dbi_state[dbi] | (parent->dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY));
|
||||
DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, (parent->dbi_state[dbi] != state) ? "update" : "still",
|
||||
parent->dbi_state[dbi], state);
|
||||
parent->dbi_state[dbi] = state;
|
||||
} else {
|
||||
eASSERT(env, txn->dbi_state[dbi] == (parent->dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY)));
|
||||
}
|
||||
}
|
||||
|
||||
if (latency) {
|
||||
ts_1 = osal_monotime();
|
||||
ts_2 = /* no gc-update */ ts_1;
|
||||
ts_3 = /* no audit */ ts_2;
|
||||
ts_4 = /* no write */ ts_3;
|
||||
ts_5 = /* no sync */ ts_4;
|
||||
}
|
||||
txn_merge(parent, txn, parent_retired_len);
|
||||
env->txn = parent;
|
||||
parent->nested = nullptr;
|
||||
tASSERT(parent, dpl_check(parent));
|
||||
|
||||
#if MDBX_ENABLE_REFUND
|
||||
txn_refund(parent);
|
||||
if (ASSERT_ENABLED()) {
|
||||
/* Check parent's loose pages not suitable for refund */
|
||||
for (page_t *lp = parent->tw.loose_pages; lp; lp = page_next(lp)) {
|
||||
tASSERT(parent, lp->pgno < parent->tw.loose_refund_wl && lp->pgno + 1 < parent->geo.first_unallocated);
|
||||
MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
|
||||
VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
|
||||
}
|
||||
/* Check parent's reclaimed pages not suitable for refund */
|
||||
if (MDBX_PNL_GETSIZE(parent->tw.relist))
|
||||
tASSERT(parent, MDBX_PNL_MOST(parent->tw.relist) + 1 < parent->geo.first_unallocated);
|
||||
}
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
|
||||
txn->signature = 0;
|
||||
osal_free(txn);
|
||||
tASSERT(parent, audit_ex(parent, 0, false) == 0);
|
||||
rc = MDBX_SUCCESS;
|
||||
goto provide_latency;
|
||||
}
|
||||
|
||||
if (!txn->tw.dirtylist) {
|
||||
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
||||
} else {
|
||||
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
||||
tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
|
||||
(txn->parent ? txn->parent->tw.dirtyroom : env->options.dp_limit));
|
||||
}
|
||||
done_cursors(txn, false);
|
||||
end_mode |= TXN_END_EOTDONE;
|
||||
|
||||
if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) &&
|
||||
(txn->flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) {
|
||||
TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, !(txn->dbi_state[i] & DBI_DIRTY)); }
|
||||
#if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT
|
||||
rc = txn_end(txn, end_mode);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
rc = MDBX_RESULT_TRUE;
|
||||
goto provide_latency;
|
||||
#else
|
||||
goto done;
|
||||
#endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */
|
||||
}
|
||||
|
||||
DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid, (void *)txn,
|
||||
(void *)env, txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root);
|
||||
|
||||
if (txn->n_dbi > CORE_DBS) {
|
||||
/* Update table root pointers */
|
||||
cursor_couple_t cx;
|
||||
rc = cursor_init(&cx.outer, txn, MAIN_DBI);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
cx.outer.next = txn->cursors[MAIN_DBI];
|
||||
txn->cursors[MAIN_DBI] = &cx.outer;
|
||||
TXN_FOREACH_DBI_USER(txn, i) {
|
||||
if ((txn->dbi_state[i] & DBI_DIRTY) == 0)
|
||||
continue;
|
||||
tree_t *const db = &txn->dbs[i];
|
||||
DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN " -> %" PRIaTXN, i, db->mod_txnid, txn->txnid);
|
||||
/* Может быть mod_txnid > front после коммита вложенных тразакций */
|
||||
db->mod_txnid = txn->txnid;
|
||||
MDBX_val data = {db, sizeof(tree_t)};
|
||||
rc = cursor_put(&cx.outer, &env->kvs[i].name, &data, N_TREE);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
txn->cursors[MAIN_DBI] = cx.outer.next;
|
||||
}
|
||||
|
||||
ts_1 = latency ? osal_monotime() : 0;
|
||||
|
||||
gcu_t gcu_ctx;
|
||||
gc_cputime = latency ? osal_cputime(nullptr) : 0;
|
||||
rc = gc_update_init(txn, &gcu_ctx);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
rc = gc_update(txn, &gcu_ctx);
|
||||
gc_cputime = latency ? osal_cputime(nullptr) - gc_cputime : 0;
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
|
||||
tASSERT(txn, txn->tw.loose_count == 0);
|
||||
txn->dbs[FREE_DBI].mod_txnid = (txn->dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[FREE_DBI].mod_txnid;
|
||||
|
||||
txn->dbs[MAIN_DBI].mod_txnid = (txn->dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->txnid : txn->dbs[MAIN_DBI].mod_txnid;
|
||||
|
||||
ts_2 = latency ? osal_monotime() : 0;
|
||||
ts_3 = ts_2;
|
||||
if (AUDIT_ENABLED()) {
|
||||
rc = audit_ex(txn, MDBX_PNL_GETSIZE(txn->tw.retired_pages), true);
|
||||
ts_3 = osal_monotime();
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto fail;
|
||||
}
|
||||
|
||||
bool need_flush_for_nometasync = false;
|
||||
const meta_ptr_t head = meta_recent(env, &txn->tw.troika);
|
||||
const uint32_t meta_sync_txnid = atomic_load32(&env->lck->meta_sync_txnid, mo_Relaxed);
|
||||
/* sync prev meta */
|
||||
if (head.is_steady && meta_sync_txnid != (uint32_t)head.txnid) {
|
||||
/* Исправление унаследованного от LMDB недочета:
|
||||
*
|
||||
* Всё хорошо, если все процессы работающие с БД не используют WRITEMAP.
|
||||
* Тогда мета-страница (обновленная, но не сброшенная на диск) будет
|
||||
* сохранена в результате fdatasync() при записи данных этой транзакции.
|
||||
*
|
||||
* Всё хорошо, если все процессы работающие с БД используют WRITEMAP
|
||||
* без MDBX_AVOID_MSYNC.
|
||||
* Тогда мета-страница (обновленная, но не сброшенная на диск) будет
|
||||
* сохранена в результате msync() при записи данных этой транзакции.
|
||||
*
|
||||
* Если же в процессах работающих с БД используется оба метода, как sync()
|
||||
* в режиме MDBX_WRITEMAP, так и записи через файловый дескриптор, то
|
||||
* становится невозможным обеспечить фиксацию на диске мета-страницы
|
||||
* предыдущей транзакции и данных текущей транзакции, за счет одной
|
||||
* sync-операцией выполняемой после записи данных текущей транзакции.
|
||||
* Соответственно, требуется явно обновлять мета-страницу, что полностью
|
||||
* уничтожает выгоду от NOMETASYNC. */
|
||||
const uint32_t txnid_dist = ((txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) ? MDBX_NOMETASYNC_LAZY_FD
|
||||
: MDBX_NOMETASYNC_LAZY_WRITEMAP;
|
||||
/* Смысл "магии" в том, чтобы избежать отдельного вызова fdatasync()
|
||||
* или msync() для гарантированной фиксации на диске мета-страницы,
|
||||
* которая была "лениво" отправлена на запись в предыдущей транзакции,
|
||||
* но не сброшена на диск из-за активного режима MDBX_NOMETASYNC. */
|
||||
if (
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
!env->ioring.overlapped_fd &&
|
||||
#endif
|
||||
meta_sync_txnid == (uint32_t)head.txnid - txnid_dist)
|
||||
need_flush_for_nometasync = true;
|
||||
else {
|
||||
rc = meta_sync(env, head);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
ERROR("txn-%s: error %d", "presync-meta", rc);
|
||||
goto fail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (txn->tw.dirtylist) {
|
||||
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC);
|
||||
tASSERT(txn, txn->tw.loose_count == 0);
|
||||
|
||||
mdbx_filehandle_t fd =
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
env->ioring.overlapped_fd ? env->ioring.overlapped_fd : env->lazy_fd;
|
||||
(void)need_flush_for_nometasync;
|
||||
#else
|
||||
(need_flush_for_nometasync || env->dsync_fd == INVALID_HANDLE_VALUE ||
|
||||
txn->tw.dirtylist->length > env->options.writethrough_threshold ||
|
||||
atomic_load64(&env->lck->unsynced_pages, mo_Relaxed))
|
||||
? env->lazy_fd
|
||||
: env->dsync_fd;
|
||||
#endif /* Windows */
|
||||
|
||||
iov_ctx_t write_ctx;
|
||||
rc = iov_init(txn, &write_ctx, txn->tw.dirtylist->length, txn->tw.dirtylist->pages_including_loose, fd, false);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
ERROR("txn-%s: error %d", "iov-init", rc);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
rc = txn_write(txn, &write_ctx);
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
ERROR("txn-%s: error %d", "write", rc);
|
||||
goto fail;
|
||||
}
|
||||
} else {
|
||||
tASSERT(txn, (txn->flags & MDBX_WRITEMAP) != 0 && !MDBX_AVOID_MSYNC);
|
||||
env->lck->unsynced_pages.weak += txn->tw.writemap_dirty_npages;
|
||||
if (!env->lck->eoos_timestamp.weak)
|
||||
env->lck->eoos_timestamp.weak = osal_monotime();
|
||||
}
|
||||
|
||||
/* TODO: use ctx.flush_begin & ctx.flush_end for range-sync */
|
||||
ts_4 = latency ? osal_monotime() : 0;
|
||||
|
||||
meta_t meta;
|
||||
memcpy(meta.magic_and_version, head.ptr_c->magic_and_version, 8);
|
||||
meta.reserve16 = head.ptr_c->reserve16;
|
||||
meta.validator_id = head.ptr_c->validator_id;
|
||||
meta.extra_pagehdr = head.ptr_c->extra_pagehdr;
|
||||
unaligned_poke_u64(4, meta.pages_retired,
|
||||
unaligned_peek_u64(4, head.ptr_c->pages_retired) + MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
||||
meta.geometry = txn->geo;
|
||||
meta.trees.gc = txn->dbs[FREE_DBI];
|
||||
meta.trees.main = txn->dbs[MAIN_DBI];
|
||||
meta.canary = txn->canary;
|
||||
memcpy(&meta.dxbid, &head.ptr_c->dxbid, sizeof(meta.dxbid));
|
||||
|
||||
txnid_t commit_txnid = txn->txnid;
|
||||
#if MDBX_ENABLE_BIGFOOT
|
||||
if (gcu_ctx.bigfoot > txn->txnid) {
|
||||
commit_txnid = gcu_ctx.bigfoot;
|
||||
TRACE("use @%" PRIaTXN " (+%zu) for commit bigfoot-txn", commit_txnid, (size_t)(commit_txnid - txn->txnid));
|
||||
}
|
||||
#endif
|
||||
meta.unsafe_sign = DATASIGN_NONE;
|
||||
meta_set_txnid(env, &meta, commit_txnid);
|
||||
|
||||
rc = dxb_sync_locked(env, env->flags | txn->flags | txn_shrink_allowed, &meta, &txn->tw.troika);
|
||||
|
||||
ts_5 = latency ? osal_monotime() : 0;
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
env->flags |= ENV_FATAL_ERROR;
|
||||
ERROR("txn-%s: error %d", "sync", rc);
|
||||
goto fail;
|
||||
}
|
||||
|
||||
end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE;
|
||||
|
||||
done:
|
||||
if (latency)
|
||||
take_gcprof(txn, latency);
|
||||
rc = txn_end(txn, end_mode);
|
||||
|
||||
provide_latency:
|
||||
if (latency) {
|
||||
latency->preparation = ts_1 ? osal_monotime_to_16dot16(ts_1 - ts_0) : 0;
|
||||
latency->gc_wallclock = (ts_2 > ts_1) ? osal_monotime_to_16dot16(ts_2 - ts_1) : 0;
|
||||
latency->gc_cputime = gc_cputime ? osal_monotime_to_16dot16(gc_cputime) : 0;
|
||||
latency->audit = (ts_3 > ts_2) ? osal_monotime_to_16dot16(ts_3 - ts_2) : 0;
|
||||
latency->write = (ts_4 > ts_3) ? osal_monotime_to_16dot16(ts_4 - ts_3) : 0;
|
||||
latency->sync = (ts_5 > ts_4) ? osal_monotime_to_16dot16(ts_5 - ts_4) : 0;
|
||||
const uint64_t ts_6 = osal_monotime();
|
||||
latency->ending = ts_5 ? osal_monotime_to_16dot16(ts_6 - ts_5) : 0;
|
||||
latency->whole = osal_monotime_to_16dot16_noUnderflow(ts_6 - ts_0);
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
fail:
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
if (latency)
|
||||
take_gcprof(txn, latency);
|
||||
txn_abort(txn);
|
||||
goto provide_latency;
|
||||
}
|
||||
|
||||
int txn_abort(MDBX_txn *txn) {
|
||||
if (txn->flags & MDBX_TXN_RDONLY)
|
||||
/* LY: don't close DBI-handles */
|
||||
@ -1260,7 +857,7 @@ int txn_end(MDBX_txn *txn, unsigned mode) {
|
||||
txn->dbs[MAIN_DBI].root, txn->dbs[FREE_DBI].root);
|
||||
|
||||
if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */
|
||||
done_cursors(txn, false);
|
||||
txn_done_cursors(txn, false);
|
||||
|
||||
int rc = MDBX_SUCCESS;
|
||||
if (txn->flags & MDBX_TXN_RDONLY) {
|
||||
@ -1388,461 +985,6 @@ int txn_end(MDBX_txn *txn, unsigned mode) {
|
||||
return rc;
|
||||
}
|
||||
|
||||
/*----------------------------------------------------------------------------*/
|
||||
|
||||
int mdbx_txn_renew(MDBX_txn *txn) {
|
||||
if (unlikely(!txn))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(txn->signature != txn_signature))
|
||||
return LOG_IFERR(MDBX_EBADSIGN);
|
||||
|
||||
if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
if (unlikely(txn->owner != 0 || !(txn->flags & MDBX_TXN_FINISHED))) {
|
||||
int rc = mdbx_txn_reset(txn);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return rc;
|
||||
}
|
||||
|
||||
int rc = txn_renew(txn, MDBX_TXN_RDONLY);
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
tASSERT(txn, txn->owner == (txn->flags & MDBX_NOSTICKYTHREADS) ? 0 : osal_thread_self());
|
||||
DEBUG("renew txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid,
|
||||
(txn->flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)txn->env, txn->dbs[MAIN_DBI].root,
|
||||
txn->dbs[FREE_DBI].root);
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_txn_set_userctx(MDBX_txn *txn, void *ctx) {
|
||||
int rc = check_txn(txn, MDBX_TXN_FINISHED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
txn->userctx = ctx;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
void *mdbx_txn_get_userctx(const MDBX_txn *txn) { return check_txn(txn, MDBX_TXN_FINISHED) ? nullptr : txn->userctx; }
|
||||
|
||||
int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, MDBX_txn **ret, void *context) {
|
||||
if (unlikely(!ret))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
*ret = nullptr;
|
||||
|
||||
if (unlikely((flags & ~txn_rw_begin_flags) && (parent || (flags & ~txn_ro_begin_flags))))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
int rc = check_env(env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(env->flags & MDBX_RDONLY & ~flags)) /* write txn in RDONLY env */
|
||||
return LOG_IFERR(MDBX_EACCESS);
|
||||
|
||||
MDBX_txn *txn = nullptr;
|
||||
if (parent) {
|
||||
/* Nested transactions: Max 1 child, write txns only, no writemap */
|
||||
rc = check_txn_rw(parent, MDBX_TXN_RDONLY | MDBX_WRITEMAP | MDBX_TXN_BLOCKED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (env->options.spill_parent4child_denominator) {
|
||||
/* Spill dirty-pages of parent to provide dirtyroom for child txn */
|
||||
rc = txn_spill(parent, nullptr, parent->tw.dirtylist->length / env->options.spill_parent4child_denominator);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
tASSERT(parent, audit_ex(parent, 0, false) == 0);
|
||||
|
||||
flags |= parent->flags & (txn_rw_begin_flags | MDBX_TXN_SPILLS | MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP);
|
||||
} else if ((flags & MDBX_TXN_RDONLY) == 0) {
|
||||
/* Reuse preallocated write txn. However, do not touch it until
|
||||
* txn_renew() succeeds, since it currently may be active. */
|
||||
txn = env->basal_txn;
|
||||
goto renew;
|
||||
}
|
||||
|
||||
const intptr_t bitmap_bytes =
|
||||
#if MDBX_ENABLE_DBI_SPARSE
|
||||
ceil_powerof2(env->max_dbi, CHAR_BIT * sizeof(txn->dbi_sparse[0])) / CHAR_BIT;
|
||||
#else
|
||||
0;
|
||||
#endif /* MDBX_ENABLE_DBI_SPARSE */
|
||||
STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to));
|
||||
const size_t base =
|
||||
(flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn);
|
||||
const size_t size = base +
|
||||
((flags & MDBX_TXN_RDONLY) ? (size_t)bitmap_bytes + env->max_dbi * sizeof(txn->dbi_seqs[0]) : 0) +
|
||||
env->max_dbi * (sizeof(txn->dbs[0]) + sizeof(txn->cursors[0]) + sizeof(txn->dbi_state[0]));
|
||||
txn = osal_malloc(size);
|
||||
if (unlikely(txn == nullptr))
|
||||
return LOG_IFERR(MDBX_ENOMEM);
|
||||
#if MDBX_DEBUG
|
||||
memset(txn, 0xCD, size);
|
||||
VALGRIND_MAKE_MEM_UNDEFINED(txn, size);
|
||||
#endif /* MDBX_DEBUG */
|
||||
MDBX_ANALYSIS_ASSUME(size > base);
|
||||
memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base);
|
||||
txn->dbs = ptr_disp(txn, base);
|
||||
txn->cursors = ptr_disp(txn->dbs, env->max_dbi * sizeof(txn->dbs[0]));
|
||||
#if MDBX_DEBUG
|
||||
txn->cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */
|
||||
#endif
|
||||
txn->dbi_state = ptr_disp(txn, size - env->max_dbi * sizeof(txn->dbi_state[0]));
|
||||
txn->flags = flags;
|
||||
txn->env = env;
|
||||
|
||||
if (parent) {
|
||||
tASSERT(parent, dpl_check(parent));
|
||||
#if MDBX_ENABLE_DBI_SPARSE
|
||||
txn->dbi_sparse = parent->dbi_sparse;
|
||||
#endif /* MDBX_ENABLE_DBI_SPARSE */
|
||||
txn->dbi_seqs = parent->dbi_seqs;
|
||||
txn->geo = parent->geo;
|
||||
rc = dpl_alloc(txn);
|
||||
if (likely(rc == MDBX_SUCCESS)) {
|
||||
const size_t len = MDBX_PNL_GETSIZE(parent->tw.relist) + parent->tw.loose_count;
|
||||
txn->tw.relist = pnl_alloc((len > MDBX_PNL_INITIAL) ? len : MDBX_PNL_INITIAL);
|
||||
if (unlikely(!txn->tw.relist))
|
||||
rc = MDBX_ENOMEM;
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
nested_failed:
|
||||
pnl_free(txn->tw.relist);
|
||||
dpl_free(txn);
|
||||
osal_free(txn);
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
/* Move loose pages to reclaimed list */
|
||||
if (parent->tw.loose_count) {
|
||||
do {
|
||||
page_t *lp = parent->tw.loose_pages;
|
||||
tASSERT(parent, lp->flags == P_LOOSE);
|
||||
rc = pnl_insert_span(&parent->tw.relist, lp->pgno, 1);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
goto nested_failed;
|
||||
MDBX_ASAN_UNPOISON_MEMORY_REGION(&page_next(lp), sizeof(page_t *));
|
||||
VALGRIND_MAKE_MEM_DEFINED(&page_next(lp), sizeof(page_t *));
|
||||
parent->tw.loose_pages = page_next(lp);
|
||||
/* Remove from dirty list */
|
||||
page_wash(parent, dpl_exist(parent, lp->pgno), lp, 1);
|
||||
} while (parent->tw.loose_pages);
|
||||
parent->tw.loose_count = 0;
|
||||
#if MDBX_ENABLE_REFUND
|
||||
parent->tw.loose_refund_wl = 0;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
tASSERT(parent, dpl_check(parent));
|
||||
}
|
||||
txn->tw.dirtyroom = parent->tw.dirtyroom;
|
||||
txn->tw.dirtylru = parent->tw.dirtylru;
|
||||
|
||||
dpl_sort(parent);
|
||||
if (parent->tw.spilled.list)
|
||||
spill_purge(parent);
|
||||
|
||||
tASSERT(txn, MDBX_PNL_ALLOCLEN(txn->tw.relist) >= MDBX_PNL_GETSIZE(parent->tw.relist));
|
||||
memcpy(txn->tw.relist, parent->tw.relist, MDBX_PNL_SIZEOF(parent->tw.relist));
|
||||
eASSERT(env, pnl_check_allocated(txn->tw.relist, (txn->geo.first_unallocated /* LY: intentional assignment
|
||||
here, only for assertion */
|
||||
= parent->geo.first_unallocated) -
|
||||
MDBX_ENABLE_REFUND));
|
||||
|
||||
txn->tw.gc.time_acc = parent->tw.gc.time_acc;
|
||||
txn->tw.gc.last_reclaimed = parent->tw.gc.last_reclaimed;
|
||||
if (parent->tw.gc.reclaimed) {
|
||||
txn->tw.gc.reclaimed = parent->tw.gc.reclaimed;
|
||||
parent->tw.gc.reclaimed = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.gc.reclaimed);
|
||||
}
|
||||
|
||||
txn->tw.retired_pages = parent->tw.retired_pages;
|
||||
parent->tw.retired_pages = (void *)(intptr_t)MDBX_PNL_GETSIZE(parent->tw.retired_pages);
|
||||
|
||||
txn->txnid = parent->txnid;
|
||||
txn->front_txnid = parent->front_txnid + 1;
|
||||
#if MDBX_ENABLE_REFUND
|
||||
txn->tw.loose_refund_wl = 0;
|
||||
#endif /* MDBX_ENABLE_REFUND */
|
||||
txn->canary = parent->canary;
|
||||
parent->flags |= MDBX_TXN_HAS_CHILD;
|
||||
parent->nested = txn;
|
||||
txn->parent = parent;
|
||||
txn->owner = parent->owner;
|
||||
txn->tw.troika = parent->tw.troika;
|
||||
|
||||
txn->cursors[FREE_DBI] = nullptr;
|
||||
txn->cursors[MAIN_DBI] = nullptr;
|
||||
txn->dbi_state[FREE_DBI] = parent->dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
|
||||
txn->dbi_state[MAIN_DBI] = parent->dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY);
|
||||
memset(txn->dbi_state + CORE_DBS, 0, (txn->n_dbi = parent->n_dbi) - CORE_DBS);
|
||||
memcpy(txn->dbs, parent->dbs, sizeof(txn->dbs[0]) * CORE_DBS);
|
||||
|
||||
tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length ==
|
||||
(parent->parent ? parent->parent->tw.dirtyroom : parent->env->options.dp_limit));
|
||||
tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length ==
|
||||
(txn->parent ? txn->parent->tw.dirtyroom : txn->env->options.dp_limit));
|
||||
env->txn = txn;
|
||||
tASSERT(parent, parent->cursors[FREE_DBI] == nullptr);
|
||||
rc = parent->cursors[MAIN_DBI] ? cursor_shadow(parent->cursors[MAIN_DBI], txn, MAIN_DBI) : MDBX_SUCCESS;
|
||||
if (AUDIT_ENABLED() && ASSERT_ENABLED()) {
|
||||
txn->signature = txn_signature;
|
||||
tASSERT(txn, audit_ex(txn, 0, false) == 0);
|
||||
}
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
txn_end(txn, TXN_END_FAIL_BEGINCHILD);
|
||||
} else { /* MDBX_TXN_RDONLY */
|
||||
txn->dbi_seqs = ptr_disp(txn->cursors, env->max_dbi * sizeof(txn->cursors[0]));
|
||||
#if MDBX_ENABLE_DBI_SPARSE
|
||||
txn->dbi_sparse = ptr_disp(txn->dbi_state, -bitmap_bytes);
|
||||
#endif /* MDBX_ENABLE_DBI_SPARSE */
|
||||
renew:
|
||||
rc = txn_renew(txn, flags);
|
||||
}
|
||||
|
||||
if (unlikely(rc != MDBX_SUCCESS)) {
|
||||
if (txn != env->basal_txn)
|
||||
osal_free(txn);
|
||||
} else {
|
||||
if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY))
|
||||
eASSERT(env, txn->flags == (MDBX_TXN_RDONLY | MDBX_TXN_FINISHED));
|
||||
else if (flags & MDBX_TXN_RDONLY)
|
||||
eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_TXN_RDONLY | MDBX_WRITEMAP |
|
||||
/* Win32: SRWL flag */ txn_shrink_allowed)) == 0);
|
||||
else {
|
||||
eASSERT(env, (txn->flags & ~(MDBX_NOSTICKYTHREADS | MDBX_WRITEMAP | txn_shrink_allowed | MDBX_NOMETASYNC |
|
||||
MDBX_SAFE_NOSYNC | MDBX_TXN_SPILLS)) == 0);
|
||||
assert(!txn->tw.spilled.list && !txn->tw.spilled.least_removed);
|
||||
}
|
||||
txn->signature = txn_signature;
|
||||
txn->userctx = context;
|
||||
*ret = txn;
|
||||
DEBUG("begin txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->txnid,
|
||||
(flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->dbs[MAIN_DBI].root,
|
||||
txn->dbs[FREE_DBI].root);
|
||||
}
|
||||
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_txn_info(const MDBX_txn *txn, MDBX_txn_info *info, bool scan_rlt) {
|
||||
int rc = check_txn(txn, MDBX_TXN_FINISHED);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if (unlikely(!info))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
MDBX_env *const env = txn->env;
|
||||
#if MDBX_ENV_CHECKPID
|
||||
if (unlikely(env->pid != osal_getpid())) {
|
||||
env->flags |= ENV_FATAL_ERROR;
|
||||
return LOG_IFERR(MDBX_PANIC);
|
||||
}
|
||||
#endif /* MDBX_ENV_CHECKPID */
|
||||
|
||||
info->txn_id = txn->txnid;
|
||||
info->txn_space_used = pgno2bytes(env, txn->geo.first_unallocated);
|
||||
|
||||
if (txn->flags & MDBX_TXN_RDONLY) {
|
||||
meta_ptr_t head;
|
||||
uint64_t head_retired;
|
||||
troika_t troika = meta_tap(env);
|
||||
do {
|
||||
/* fetch info from volatile head */
|
||||
head = meta_recent(env, &troika);
|
||||
head_retired = unaligned_peek_u64_volatile(4, head.ptr_v->pages_retired);
|
||||
info->txn_space_limit_soft = pgno2bytes(env, head.ptr_v->geometry.now);
|
||||
info->txn_space_limit_hard = pgno2bytes(env, head.ptr_v->geometry.upper);
|
||||
info->txn_space_leftover = pgno2bytes(env, head.ptr_v->geometry.now - head.ptr_v->geometry.first_unallocated);
|
||||
} while (unlikely(meta_should_retry(env, &troika)));
|
||||
|
||||
info->txn_reader_lag = head.txnid - info->txn_id;
|
||||
info->txn_space_dirty = info->txn_space_retired = 0;
|
||||
uint64_t reader_snapshot_pages_retired = 0;
|
||||
if (txn->to.reader &&
|
||||
((txn->flags & MDBX_TXN_PARKED) == 0 || safe64_read(&txn->to.reader->tid) != MDBX_TID_TXN_OUSTED) &&
|
||||
head_retired >
|
||||
(reader_snapshot_pages_retired = atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed))) {
|
||||
info->txn_space_dirty = info->txn_space_retired =
|
||||
pgno2bytes(env, (pgno_t)(head_retired - reader_snapshot_pages_retired));
|
||||
|
||||
size_t retired_next_reader = 0;
|
||||
lck_t *const lck = env->lck_mmap.lck;
|
||||
if (scan_rlt && info->txn_reader_lag > 1 && lck) {
|
||||
/* find next more recent reader */
|
||||
txnid_t next_reader = head.txnid;
|
||||
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
||||
for (size_t i = 0; i < snap_nreaders; ++i) {
|
||||
retry:
|
||||
if (atomic_load32(&lck->rdt[i].pid, mo_AcquireRelease)) {
|
||||
jitter4testing(true);
|
||||
const uint64_t snap_tid = safe64_read(&lck->rdt[i].tid);
|
||||
const txnid_t snap_txnid = safe64_read(&lck->rdt[i].txnid);
|
||||
const uint64_t snap_retired = atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_AcquireRelease);
|
||||
if (unlikely(snap_retired != atomic_load64(&lck->rdt[i].snapshot_pages_retired, mo_Relaxed)) ||
|
||||
snap_txnid != safe64_read(&lck->rdt[i].txnid) || snap_tid != safe64_read(&lck->rdt[i].tid))
|
||||
goto retry;
|
||||
if (snap_txnid <= txn->txnid) {
|
||||
retired_next_reader = 0;
|
||||
break;
|
||||
}
|
||||
if (snap_txnid < next_reader && snap_tid >= MDBX_TID_TXN_OUSTED) {
|
||||
next_reader = snap_txnid;
|
||||
retired_next_reader = pgno2bytes(
|
||||
env, (pgno_t)(snap_retired - atomic_load64(&txn->to.reader->snapshot_pages_retired, mo_Relaxed)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
info->txn_space_dirty = retired_next_reader;
|
||||
}
|
||||
} else {
|
||||
info->txn_space_limit_soft = pgno2bytes(env, txn->geo.now);
|
||||
info->txn_space_limit_hard = pgno2bytes(env, txn->geo.upper);
|
||||
info->txn_space_retired =
|
||||
pgno2bytes(env, txn->nested ? (size_t)txn->tw.retired_pages : MDBX_PNL_GETSIZE(txn->tw.retired_pages));
|
||||
info->txn_space_leftover = pgno2bytes(env, txn->tw.dirtyroom);
|
||||
info->txn_space_dirty =
|
||||
pgno2bytes(env, txn->tw.dirtylist ? txn->tw.dirtylist->pages_including_loose
|
||||
: (txn->tw.writemap_dirty_npages + txn->tw.writemap_spilled_npages));
|
||||
info->txn_reader_lag = INT64_MAX;
|
||||
lck_t *const lck = env->lck_mmap.lck;
|
||||
if (scan_rlt && lck) {
|
||||
txnid_t oldest_snapshot = txn->txnid;
|
||||
const size_t snap_nreaders = atomic_load32(&lck->rdt_length, mo_AcquireRelease);
|
||||
if (snap_nreaders) {
|
||||
oldest_snapshot = txn_snapshot_oldest(txn);
|
||||
if (oldest_snapshot == txn->txnid - 1) {
|
||||
/* check if there is at least one reader */
|
||||
bool exists = false;
|
||||
for (size_t i = 0; i < snap_nreaders; ++i) {
|
||||
if (atomic_load32(&lck->rdt[i].pid, mo_Relaxed) && txn->txnid > safe64_read(&lck->rdt[i].txnid)) {
|
||||
exists = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
oldest_snapshot += !exists;
|
||||
}
|
||||
}
|
||||
info->txn_reader_lag = txn->txnid - oldest_snapshot;
|
||||
}
|
||||
}
|
||||
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
MDBX_env *mdbx_txn_env(const MDBX_txn *txn) {
|
||||
if (unlikely(!txn || txn->signature != txn_signature || txn->env->signature.weak != env_signature))
|
||||
return nullptr;
|
||||
return txn->env;
|
||||
}
|
||||
|
||||
uint64_t mdbx_txn_id(const MDBX_txn *txn) {
|
||||
if (unlikely(!txn || txn->signature != txn_signature))
|
||||
return 0;
|
||||
return txn->txnid;
|
||||
}
|
||||
|
||||
MDBX_txn_flags_t mdbx_txn_flags(const MDBX_txn *txn) {
|
||||
STATIC_ASSERT(
|
||||
(MDBX_TXN_INVALID & (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | MDBX_TXN_HAS_CHILD |
|
||||
txn_gc_drained | txn_shrink_allowed | txn_rw_begin_flags | txn_ro_begin_flags)) == 0);
|
||||
if (unlikely(!txn || txn->signature != txn_signature))
|
||||
return MDBX_TXN_INVALID;
|
||||
assert(0 == (int)(txn->flags & MDBX_TXN_INVALID));
|
||||
|
||||
MDBX_txn_flags_t flags = txn->flags;
|
||||
if (F_ISSET(flags, MDBX_TXN_PARKED | MDBX_TXN_RDONLY) && txn->to.reader &&
|
||||
safe64_read(&txn->to.reader->tid) == MDBX_TID_TXN_OUSTED)
|
||||
flags |= MDBX_TXN_OUSTED;
|
||||
return flags;
|
||||
}
|
||||
|
||||
int mdbx_txn_reset(MDBX_txn *txn) {
|
||||
int rc = check_txn(txn, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
/* This call is only valid for read-only txns */
|
||||
if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
|
||||
return LOG_IFERR(MDBX_EINVAL);
|
||||
|
||||
/* LY: don't close DBI-handles */
|
||||
rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE);
|
||||
if (rc == MDBX_SUCCESS) {
|
||||
tASSERT(txn, txn->signature == txn_signature);
|
||||
tASSERT(txn, txn->owner == 0);
|
||||
}
|
||||
return LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int mdbx_txn_break(MDBX_txn *txn) {
|
||||
do {
|
||||
int rc = check_txn(txn, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
txn->flags |= MDBX_TXN_ERROR;
|
||||
if (txn->flags & MDBX_TXN_RDONLY)
|
||||
break;
|
||||
txn = txn->nested;
|
||||
} while (txn);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int mdbx_txn_abort(MDBX_txn *txn) {
|
||||
int rc = check_txn(txn, 0);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
rc = check_env(txn->env, true);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
if ((txn->flags & (MDBX_TXN_RDONLY | MDBX_NOSTICKYTHREADS)) == MDBX_NOSTICKYTHREADS &&
|
||||
unlikely(txn->owner != osal_thread_self())) {
|
||||
mdbx_txn_break(txn);
|
||||
return LOG_IFERR(MDBX_THREAD_MISMATCH);
|
||||
}
|
||||
|
||||
return LOG_IFERR(txn_abort(txn));
|
||||
}
|
||||
|
||||
int mdbx_txn_park(MDBX_txn *txn, bool autounpark) {
|
||||
STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_ERROR);
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
if (unlikely((txn->flags & MDBX_TXN_RDONLY) == 0))
|
||||
return LOG_IFERR(MDBX_TXN_INVALID);
|
||||
|
||||
if (unlikely((txn->flags & MDBX_TXN_ERROR))) {
|
||||
rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE);
|
||||
return LOG_IFERR(rc ? rc : MDBX_OUSTED);
|
||||
}
|
||||
|
||||
return LOG_IFERR(txn_park(txn, autounpark));
|
||||
}
|
||||
|
||||
int mdbx_txn_unpark(MDBX_txn *txn, bool restart_if_ousted) {
|
||||
STATIC_ASSERT(MDBX_TXN_BLOCKED > MDBX_TXN_PARKED + MDBX_TXN_ERROR);
|
||||
int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_PARKED - MDBX_TXN_ERROR);
|
||||
if (unlikely(rc != MDBX_SUCCESS))
|
||||
return LOG_IFERR(rc);
|
||||
if (unlikely(!F_ISSET(txn->flags, MDBX_TXN_RDONLY | MDBX_TXN_PARKED)))
|
||||
return MDBX_SUCCESS;
|
||||
|
||||
rc = txn_unpark(txn);
|
||||
if (likely(rc != MDBX_OUSTED) || !restart_if_ousted)
|
||||
return LOG_IFERR(rc);
|
||||
|
||||
tASSERT(txn, txn->flags & MDBX_TXN_FINISHED);
|
||||
rc = txn_renew(txn, MDBX_TXN_RDONLY);
|
||||
return (rc == MDBX_SUCCESS) ? MDBX_RESULT_TRUE : LOG_IFERR(rc);
|
||||
}
|
||||
|
||||
int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) {
|
||||
tASSERT(txn, (bad_bits & MDBX_TXN_PARKED) && (txn->flags & bad_bits));
|
||||
/* Здесь осознано заложено отличие в поведении припаркованных транзакций:
|
||||
@ -1859,3 +1001,79 @@ int txn_check_badbits_parked(const MDBX_txn *txn, int bad_bits) {
|
||||
tASSERT(txn, bad_bits == MDBX_TXN_BLOCKED || bad_bits == MDBX_TXN_BLOCKED - MDBX_TXN_ERROR);
|
||||
return mdbx_txn_unpark((MDBX_txn *)txn, false);
|
||||
}
|
||||
|
||||
int txn_park(MDBX_txn *txn, bool autounpark) {
|
||||
reader_slot_t *const rslot = txn->to.reader;
|
||||
tASSERT(txn, (txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) == MDBX_TXN_RDONLY);
|
||||
tASSERT(txn, txn->to.reader->tid.weak < MDBX_TID_TXN_OUSTED);
|
||||
if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) != MDBX_TXN_RDONLY))
|
||||
return MDBX_BAD_TXN;
|
||||
|
||||
const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
|
||||
const uint64_t tid = atomic_load64(&rslot->tid, mo_Relaxed);
|
||||
const uint64_t txnid = atomic_load64(&rslot->txnid, mo_Relaxed);
|
||||
if (unlikely(pid != txn->env->pid)) {
|
||||
ERROR("unexpected pid %u%s%u", pid, " != must ", txn->env->pid);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (unlikely(tid != txn->owner || txnid != txn->txnid)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%0zx"
|
||||
" and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
|
||||
tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
|
||||
return MDBX_BAD_RSLOT;
|
||||
}
|
||||
|
||||
atomic_store64(&rslot->tid, MDBX_TID_TXN_PARKED, mo_AcquireRelease);
|
||||
atomic_store32(&txn->env->lck->rdt_refresh_flag, true, mo_Relaxed);
|
||||
txn->flags += autounpark ? MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK : MDBX_TXN_PARKED;
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int txn_unpark(MDBX_txn *txn) {
|
||||
if (unlikely((txn->flags & (MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD | MDBX_TXN_RDONLY | MDBX_TXN_PARKED)) !=
|
||||
(MDBX_TXN_RDONLY | MDBX_TXN_PARKED)))
|
||||
return MDBX_BAD_TXN;
|
||||
|
||||
for (reader_slot_t *const rslot = txn->to.reader; rslot; atomic_yield()) {
|
||||
const uint32_t pid = atomic_load32(&rslot->pid, mo_Relaxed);
|
||||
uint64_t tid = safe64_read(&rslot->tid);
|
||||
uint64_t txnid = safe64_read(&rslot->txnid);
|
||||
if (unlikely(pid != txn->env->pid)) {
|
||||
ERROR("unexpected pid %u%s%u", pid, " != expected ", txn->env->pid);
|
||||
return MDBX_PROBLEM;
|
||||
}
|
||||
if (unlikely(tid == MDBX_TID_TXN_OUSTED || txnid >= SAFE64_INVALID_THRESHOLD))
|
||||
break;
|
||||
if (unlikely(tid != MDBX_TID_TXN_PARKED || txnid != txn->txnid)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%" PRIx64 " and/or txn-id %" PRIaTXN "%s%" PRIaTXN, tid, " != must ",
|
||||
MDBX_TID_TXN_OUSTED, txnid, " != must ", txn->txnid);
|
||||
break;
|
||||
}
|
||||
if (unlikely((txn->flags & MDBX_TXN_ERROR)))
|
||||
break;
|
||||
|
||||
#if MDBX_64BIT_CAS
|
||||
if (unlikely(!atomic_cas64(&rslot->tid, MDBX_TID_TXN_PARKED, txn->owner)))
|
||||
continue;
|
||||
#else
|
||||
atomic_store32(&rslot->tid.high, (uint32_t)((uint64_t)txn->owner >> 32), mo_Relaxed);
|
||||
if (unlikely(!atomic_cas32(&rslot->tid.low, (uint32_t)MDBX_TID_TXN_PARKED, (uint32_t)txn->owner))) {
|
||||
atomic_store32(&rslot->tid.high, (uint32_t)(MDBX_TID_TXN_PARKED >> 32), mo_AcquireRelease);
|
||||
continue;
|
||||
}
|
||||
#endif
|
||||
txnid = safe64_read(&rslot->txnid);
|
||||
tid = safe64_read(&rslot->tid);
|
||||
if (unlikely(txnid != txn->txnid || tid != txn->owner)) {
|
||||
ERROR("unexpected thread-id 0x%" PRIx64 "%s0x%zx"
|
||||
" and/or txn-id %" PRIaTXN "%s%" PRIaTXN,
|
||||
tid, " != must ", txn->owner, txnid, " != must ", txn->txnid);
|
||||
break;
|
||||
}
|
||||
txn->flags &= ~(MDBX_TXN_PARKED | MDBX_TXN_AUTOUNPARK);
|
||||
return MDBX_SUCCESS;
|
||||
}
|
||||
|
||||
int err = txn_end(txn, TXN_END_OUSTED | TXN_END_RESET | TXN_END_UPDATE);
|
||||
return err ? err : MDBX_OUSTED;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user