diff --git a/CMakeLists.txt b/CMakeLists.txt index 33e6233d..0f10b877 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ ## -## Copyright 2020-2023 Leonid Yuriev +## Copyright 2020-2024 Leonid Yuriev ## and other libmdbx authors: please see AUTHORS file. ## All rights reserved. ## diff --git a/COPYRIGHT b/COPYRIGHT index 352beaed..d6111220 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,4 +1,4 @@ -Copyright 2015-2023 Leonid Yuriev . +Copyright 2015-2024 Leonid Yuriev . Copyright 2011-2015 Howard Chu, Symas Corp. Copyright 2015,2016 Peter-Service R&D LLC. All rights reserved. diff --git a/ChangeLog.md b/ChangeLog.md index 6978e6b1..32367fc6 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,6 +4,86 @@ ChangeLog English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.12.10 "СЭМ" от 2024-03-12 + +Поддерживающий выпуск с исправлением обнаруженных ошибок и устранением недочетов +в память Героя России гвардии майора Дмитрия Семёнова с позывным "СЭМ". + +``` +git diff' stat: 19 commits, 57 files changed, 751 insertions(+), 331 deletions(-) +Signed-off-by: Леонид Юрьев (Leonid Yuriev) +``` + +Благодарности: + + - [Dvir H](https://t.me/Dvirsw) за [сообщение](https://t.me/libmdbx/5368) + об ошибке `MDBX_CORRUPTED` в сценарии работы в режиме `MDBX_DUPFIXED` и нечетной длинной + мульти-значений, с предоставлением точного минимального сценария воспроизведения. + +Значимые исправления и доработки: + + - Устранение унаследованной от LMDB ошибки приводящей к повреждению БД при использовании `MDBX_DUPFIXED`. + Проблема была выявлена при расширении тестов сценариями с длинными мульти-значениями в режиме `MDBX_DUPFIXED`. + Сообщений о проявлении этой ошибки в эксплуатационных/продуктовых средах не поступало. + + На LEAF2-страниц, используемых для компактного хранения мульти-значений фиксированной длины, + выполнялось резервирование места без учета возможности превышения размера страниц, + с последующим переполнением, повреждением структуры БД и/или повреждением содержимого ОЗУ. + + Вероятность проявления ошибки существенно увеличивалась с увеличением размера/длины + мульти-значений/дубликатов и уменьшением размера страницы БД. Поэтому при использовании `MDBX_INTEGERDUP` + вероятность проявления близка к нулю, а сценарий такого проявления найти не удалось. + + В MDBX ошибка присутствовала с момента отделения проекта от LMDB, + где эта ошибка присутствует более 11 лети, по настоящее время. + + - Исправление ложной ошибки `MDBX_CORRUPTED (-30796)` в сценарии работы + в режиме `MDBX_DUPFIXED` и нечетной длинной мульти-значений. + + - Исправление недочета корректировки сопутствующих курсоров при разделении страницы + по сценарию добавления пустой страницы слева. + + - Доработка `rebalance()` ради уменьшения WAF. Новый функционал, включая + контролируемую пользователем опцию `enum MDBX_option_t`, будет доступен + в выпусках ветки `0.13.x`, а в этом выпуске доработка сводится к тактике + не-вовленичения чистой страницы при нехватке запаса страниц в ходе обновления GC, + за счет ухудшения баланса дерева страниц. + + - Устранение упущения приводящего к нелогичной ситуации + `me_dxb_mmap.curren > me_dxb_mmap.limit` при "дребезге" размера БД. + В текущем понимании, последствий кроме срабатывания assert-проверки нет, а + вероятность проявления близка к нулю. + + - Исправление в функционале обхода дерева, используемого утилитой + `mdbx_chk`, подсчета места затраченного на выравнивание на вложенной + под-странице, в случае нечетного количества dupfixed-элементов нечетного + размера. + Сообщений о проявлении этой ошибки в эксплуатационных/продуктовых средах не поступало. + + - Исправление assert-проверки внутри `check_txn()` для случая завершенных транзакций в режиме `MDBX_NO_TLS`. + Последствий ошибки, кроме срабатывания assert-проверки в отладочных сборках, нет. + + - Устранение ошибки при открытии БД на файловой системе только-для-чтения. + + - Удалены излишне строгие проверки в утилите `mdbx_chk`, которые + приводили к ложно-позитивным ошибкам при проверке БД после серии + последних доработок. Подробности см в комментариях к коммитам [781b3f64d52b73cbaeb00a55811d1247c25624a8](https://gitflic.ru/project/erthink/libmdbx/commit/781b3f64d52b73cbaeb00a55811d1247c25624a8) + и [0741c81cfd8dc0864fcf55e04192b2207c8f68f7](https://gitflic.ru/project/erthink/libmdbx/commit/0741c81cfd8dc0864fcf55e04192b2207c8f68f7). + +Прочее: + + - Расширение стохастического теста dupfixed-сценариями. + - Корректировка условия в assert-проверке для `MDBX_TXN_DRAINED_GC`. + - Добавление в jitter-сценарий простого теста сброса и перезапуска читающих транзакций. + - Вынужденное продолжение очистки/рефакторинга унаследованных ребусов в `cursor_put_nochecklen()`. + - Фиксация транзакции при ошибках теста для последующего анализа содержимого БД. + - Сопутствующий рефакторинг `node_shrink()` для ясности исходного кода. + - Приведение в соответствие протоколируемых имен тестов опциям командной строки. + - Добавление cmoke-теста `extra/dupfixed_addodd`. + + +-------------------------------------------------------------------------------- + ## v0.12.9 "Ясень-4" от 2023-12-11 diff --git a/cmake/compiler.cmake b/cmake/compiler.cmake index 762ea1bc..73cd3502 100644 --- a/cmake/compiler.cmake +++ b/cmake/compiler.cmake @@ -1,4 +1,4 @@ -## Copyright (c) 2012-2023 Leonid Yuriev . +## Copyright (c) 2012-2024 Leonid Yuriev . ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. diff --git a/cmake/profile.cmake b/cmake/profile.cmake index f13b6976..cf6bf87b 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -1,4 +1,4 @@ -## Copyright (c) 2012-2023 Leonid Yuriev . +## Copyright (c) 2012-2024 Leonid Yuriev . ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. diff --git a/cmake/utils.cmake b/cmake/utils.cmake index aa8aef01..0fa57845 100644 --- a/cmake/utils.cmake +++ b/cmake/utils.cmake @@ -1,4 +1,4 @@ -## Copyright (c) 2012-2023 Leonid Yuriev . +## Copyright (c) 2012-2024 Leonid Yuriev . ## ## Licensed under the Apache License, Version 2.0 (the "License"); ## you may not use this file except in compliance with the License. diff --git a/mdbx.h b/mdbx.h index 15122715..43960abf 100644 --- a/mdbx.h +++ b/mdbx.h @@ -25,7 +25,7 @@ _The Future will (be) [Positive](https://www.ptsecurity.com). Всё будет \section copyright LICENSE & COPYRIGHT -\authors Copyright (c) 2015-2023, Leonid Yuriev +\authors Copyright (c) 2015-2024, Leonid Yuriev and other _libmdbx_ authors: please see [AUTHORS](./AUTHORS) file. \copyright Redistribution and use in source and binary forms, with or without diff --git a/mdbx.h++ b/mdbx.h++ index b77d851a..dbe94755 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1,7 +1,7 @@ /// \file mdbx.h++ /// \brief The libmdbx C++ API header file. /// -/// \author Copyright (c) 2020-2023, Leonid Yuriev . +/// \author Copyright (c) 2020-2024, Leonid Yuriev . /// \copyright SPDX-License-Identifier: Apache-2.0 /// /// Tested with: diff --git a/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch b/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch index 2dfe7ee3..40b50c7e 100644 --- a/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch +++ b/packages/buildroot/0001-package-libmdbx-new-package-library-database.patch @@ -1,4 +1,4 @@ -From 3efdf07a80f750c23de126ac80e78fb0545a1b63 Mon Sep 17 00:00:00 2001 +From 22732cf074188912caea6da6795c36098efb7eb5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 23 Oct 2023 18:07:13 +0300 @@ -13,11 +13,10 @@ This patch adds libmdbx: focused on creating unique lightweight solutions. - libmdbx surpasses the legendary LMDB (Lightning Memory-Mapped Database) in terms of reliability, features and performance. - - https://gitflic.ru/project/erthink/libmdbx + - more information at https://gitflic.ru/project/erthink/libmdbx -The v0.12.8 "Vladimir Utkin" is stable release of frontward _libmdbx_ -branch with new superior features on the day of 100 anniversary of the birth -of the outstanding Soviet and Russian scientist and engineer Vladimir Fedorovich Utkin. +The v0.12.10 "SEM" is stable release of _libmdbx_ branch with new superior features, +in memory of the Hero of Russia Guard Major Dmitry Semenov with the call sign "SEM". The complete ChangeLog: https://gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md @@ -112,18 +111,18 @@ index 0000000000..a9a4ac45c5 + !BR2_TOOLCHAIN_GCC_AT_LEAST_4_4 diff --git a/package/libmdbx/libmdbx.hash b/package/libmdbx/libmdbx.hash new file mode 100644 -index 0000000000..82cf28d6a9 +index 0000000000..1dbdef7c69 --- /dev/null +++ b/package/libmdbx/libmdbx.hash @@ -0,0 +1,5 @@ +# Hashes from: https://libmdbx.dqdkfa.ru/release/SHA256SUMS -+sha256 c78c56c53708bbfc519bf53ebf520d1f09d30ee6427a4bedf713316696e671d0 libmdbx-amalgamated-0.12.8.tar.xz ++sha256 4637e06768a9a8fc7577e6e458e045ad1f9a8baee74996a3c88bc0ad64cbcf67 libmdbx-amalgamated-0.12.10.tar.xz + +# Locally calculated +sha256 310fe25c858a9515fc8c8d7d1f24a67c9496f84a91e0a0e41ea9975b1371e569 LICENSE diff --git a/package/libmdbx/libmdbx.mk b/package/libmdbx/libmdbx.mk new file mode 100644 -index 0000000000..d198fe5b22 +index 0000000000..36df45b64f --- /dev/null +++ b/package/libmdbx/libmdbx.mk @@ -0,0 +1,42 @@ @@ -133,7 +132,7 @@ index 0000000000..d198fe5b22 +# +################################################################################ + -+LIBMDBX_VERSION = 0.12.8 ++LIBMDBX_VERSION = 0.12.10 +LIBMDBX_SOURCE = libmdbx-amalgamated-$(LIBMDBX_VERSION).tar.xz +LIBMDBX_SITE = https://libmdbx.dqdkfa.ru/release +LIBMDBX_SUPPORTS_IN_SOURCE_BUILD = NO @@ -170,5 +169,5 @@ index 0000000000..d198fe5b22 + +$(eval $(cmake-package)) -- -2.42.0 +2.43.2 diff --git a/src/alloy.c b/src/alloy.c index 7d0cf636..d79f269c 100644 --- a/src/alloy.c +++ b/src/alloy.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/base.h b/src/base.h index b8a243e8..1c5871f6 100644 --- a/src/base.h +++ b/src/base.h @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/core.c b/src/core.c index 9d142343..bca12197 100644 --- a/src/core.c +++ b/src/core.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev . + * Copyright 2015-2024 Leonid Yuriev . * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -3334,7 +3334,7 @@ static int __must_check_result node_add_leaf2(MDBX_cursor *mc, size_t indx, const MDBX_val *key); static void node_del(MDBX_cursor *mc, size_t ksize); -static void node_shrink(MDBX_page *mp, size_t indx); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node); static int __must_check_result node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft); static int __must_check_result node_read(MDBX_cursor *mc, const MDBX_node *leaf, @@ -5589,6 +5589,14 @@ __cold static void meta_troika_dump(const MDBX_env *env, /*----------------------------------------------------------------------------*/ +static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * +lckless_stub(const MDBX_env *env) { + uintptr_t stub = (uintptr_t)&env->x_lckless_stub; + /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ + stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); + return (MDBX_lockinfo *)stub; +} + /* Find oldest txnid still referenced. */ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); @@ -5596,7 +5604,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { MDBX_lockinfo *const lck = env->me_lck_mmap.lck; if (unlikely(lck == NULL /* exclusive without-lck mode */)) { - eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, env->me_lck == lckless_stub(env)); env->me_lck->mti_readers_refresh_flag.weak = nothing_changed; return env->me_lck->mti_oldest_reader.weak = steady; } @@ -6195,6 +6203,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, #endif /* MDBX_ENABLE_MADVISE */ rc = osal_mresize(mresize_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); #if MDBX_ENABLE_MADVISE if (rc == MDBX_SUCCESS) { @@ -6220,6 +6229,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, bailout: if (rc == MDBX_SUCCESS) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); if (mode == explicit_resize) @@ -6250,6 +6260,7 @@ bailout: "present %" PRIuPTR " -> %" PRIuPTR ", " "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", prev_size, size_bytes, prev_limit, limit_bytes, rc); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } if (!env->me_dxb_mmap.base) { env->me_flags |= MDBX_FATAL_ERROR; @@ -7287,7 +7298,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, //--------------------------------------------------------------------------- if (unlikely(!is_gc_usable(txn, mc, flags))) { - eASSERT(env, txn->mt_flags & MDBX_TXN_DRAINED_GC); + eASSERT(env, (txn->mt_flags & MDBX_TXN_DRAINED_GC) || num > 1); goto no_gc; } @@ -7949,7 +7960,7 @@ __hot static int page_touch(MDBX_cursor *mc) { np->mp_txnid = txn->mt_front; return MDBX_SUCCESS; } - tASSERT(txn, !IS_OVERFLOW(mp)); + tASSERT(txn, !IS_OVERFLOW(mp) && !IS_SUBP(mp)); if (IS_FROZEN(txn, mp)) { /* CoW the page */ @@ -8894,8 +8905,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { mo_AcquireRelease); } else { /* exclusive mode without lck */ - eASSERT(env, !env->me_lck_mmap.lck && - env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, !env->me_lck_mmap.lck && env->me_lck == lckless_stub(env)); } jitter4testing(true); @@ -9061,6 +9071,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno); const size_t required_bytes = (txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); if (unlikely(required_bytes > env->me_dxb_mmap.current)) { /* Размер БД (для пишущих транзакций) или используемых данных (для * читающих транзакций) больше предыдущего/текущего размера внутри @@ -9078,6 +9089,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_geo.upper, implicit_grow); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } else if (unlikely(size_bytes < env->me_dxb_mmap.current)) { /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно * уменьшить, но всё сложнее: @@ -9103,11 +9115,15 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = osal_fastmutex_acquire(&env->me_remap_guard); #endif if (likely(rc == MDBX_SUCCESS)) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize); if (likely(rc == MDBX_SUCCESS)) { eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes); if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize) - env->me_dxb_mmap.current = (size_t)env->me_dxb_mmap.filesize; + env->me_dxb_mmap.current = + (env->me_dxb_mmap.limit < env->me_dxb_mmap.filesize) + ? env->me_dxb_mmap.limit + : (size_t)env->me_dxb_mmap.filesize; } #if defined(_WIN32) || defined(_WIN64) osal_srwlock_ReleaseShared(&env->me_remap_guard); @@ -9172,10 +9188,11 @@ static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { if (unlikely(txn->mt_flags & bad_bits)) return MDBX_BAD_TXN; - tASSERT(txn, (txn->mt_flags & MDBX_NOTLS) == - ((txn->mt_flags & MDBX_TXN_RDONLY) - ? txn->mt_env->me_flags & MDBX_NOTLS - : 0)); + tASSERT(txn, (txn->mt_flags & MDBX_TXN_FINISHED) || + (txn->mt_flags & MDBX_NOTLS) == + ((txn->mt_flags & MDBX_TXN_RDONLY) + ? txn->mt_env->me_flags & MDBX_NOTLS + : 0)); #if MDBX_TXN_CHECKOWNER STATIC_ASSERT(MDBX_NOTLS > MDBX_TXN_FINISHED + MDBX_TXN_RDONLY); if (unlikely(txn->mt_owner != osal_thread_self()) && @@ -10234,7 +10251,9 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { const size_t for_all_before_touch = for_relist + for_tree_before_touch; const size_t for_all_after_touch = for_relist + for_tree_after_touch; - if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch)) + if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch) && + (ctx->cursor.mc_snum == 0 || + IS_MODIFIABLE(txn, ctx->cursor.mc_pg[ctx->cursor.mc_top]))) return MDBX_SUCCESS; TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " @@ -13078,13 +13097,23 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { leaf_nodemax > (intptr_t)(sizeof(MDBX_db) + NODESIZE + 42) && leaf_nodemax >= branch_nodemax && leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); - env->me_leaf_nodemax = (unsigned)leaf_nodemax; - env->me_branch_nodemax = (unsigned)branch_nodemax; + env->me_leaf_nodemax = (uint16_t)leaf_nodemax; + env->me_branch_nodemax = (uint16_t)branch_nodemax; env->me_psize2log = (uint8_t)log2n_powerof2(pagesize); eASSERT(env, pgno2bytes(env, 1) == pagesize); eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); recalculate_merge_threshold(env); + /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ + env->me_subpage_limit = env->me_leaf_nodemax - NODESIZE; + env->me_subpage_room_threshold = 0; + env->me_subpage_reserve_prereq = env->me_leaf_nodemax; + env->me_subpage_reserve_limit = env->me_subpage_limit / 42; + eASSERT(env, + env->me_subpage_reserve_prereq > + env->me_subpage_room_threshold + env->me_subpage_reserve_limit); + eASSERT(env, env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); if (!env->me_options.flags.non_auto.dp_limit) { /* auto-setup dp_limit by "The42" ;-) */ @@ -13114,14 +13143,6 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { env->me_options.dp_initial = env->me_options.dp_limit; } -static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * -lckless_stub(const MDBX_env *env) { - uintptr_t stub = (uintptr_t)&env->x_lckless_stub; - /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ - stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); - return (MDBX_lockinfo *)stub; -} - __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!penv)) return MDBX_EINVAL; @@ -15230,6 +15251,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, if (rc == MDBX_RESULT_TRUE) { env->me_incore = true; NOTICE("%s", "in-core database"); + rc = MDBX_SUCCESS; } else if (unlikely(rc != MDBX_SUCCESS)) { ERROR("check_fs_incore(), err %d", rc); goto bailout; @@ -15691,8 +15713,12 @@ __hot static __always_inline int page_get_checker_lite(const uint16_t ILL, if (((ILL & P_OVERFLOW) || !IS_OVERFLOW(page)) && (ILL & (P_BRANCH | P_LEAF | P_LEAF2)) == 0) { - if (unlikely(page->mp_upper < page->mp_lower || - ((page->mp_lower | page->mp_upper) & 1) || + /* Контроль четности page->mp_upper тут либо приводит к ложным ошибкам, + * либо слишком дорог по количеству операций. Заковырка в том, что mp_upper + * может быть нечетным на LEAF2-страницах, при нечетном количестве элементов + * нечетной длины. Поэтому четность page->mp_upper здесь не проверяется, но + * соответствующие полные проверки есть в page_check(). */ + if (unlikely(page->mp_upper < page->mp_lower || (page->mp_lower & 1) || PAGEHDRSZ + page->mp_upper > txn->mt_env->me_psize)) return bad_page(page, "invalid page' lower(%u)/upper(%u) with limit %zu\n", @@ -17272,6 +17298,26 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, return rc; } +static size_t leaf2_reserve(const MDBX_env *const env, size_t host_page_room, + size_t subpage_len, size_t item_len) { + eASSERT(env, (subpage_len & 1) == 0); + eASSERT(env, + env->me_subpage_reserve_prereq > env->me_subpage_room_threshold + + env->me_subpage_reserve_limit && + env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + size_t reserve = 0; + for (size_t n = 0; + n < 5 && reserve + item_len <= env->me_subpage_reserve_limit && + EVEN(subpage_len + item_len) <= env->me_subpage_limit && + host_page_room >= + env->me_subpage_reserve_prereq + EVEN(subpage_len + item_len); + ++n) { + subpage_len += item_len; + reserve += item_len; + } + return reserve + (subpage_len & 1); +} + static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { int err; @@ -17339,11 +17385,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { bool exact = false; - MDBX_val lastkey, olddata; + MDBX_val last_key, old_data; if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &lastkey, &olddata); + rc = cursor_last(mc, &last_key, &old_data); if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &lastkey); + const int cmp = mc->mc_dbx->md_cmp(key, &last_key); if (likely(cmp > 0)) { mc->mc_ki[mc->mc_top]++; /* step forward for appending */ rc = MDBX_NOTFOUND; @@ -17358,7 +17404,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } else { struct cursor_set_result csr = /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ - cursor_set(mc, (MDBX_val *)key, &olddata, MDBX_SET); + cursor_set(mc, (MDBX_val *)key, &old_data, MDBX_SET); rc = csr.err; exact = csr.exact; } @@ -17366,14 +17412,14 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (exact) { if (unlikely(flags & MDBX_NOOVERWRITE)) { DEBUG("duplicate key [%s]", DKEY_DEBUG(key)); - *data = olddata; + *data = old_data; return MDBX_KEYEXIST; } if (unlikely(mc->mc_flags & C_SUB)) { /* nested subtree of DUPSORT-database with the same key, * nothing to update */ eASSERT(env, data->iov_len == 0 && - (olddata.iov_len == 0 || + (old_data.iov_len == 0 || /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ (mc->mc_db->md_flags & MDBX_DUPFIXED))); @@ -17389,8 +17435,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, exact = false; } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) { /* checking for early exit without dirtying pages */ - if (unlikely(eq_fast(data, &olddata))) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + if (unlikely(eq_fast(data, &old_data))) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (mc->mc_xcursor) { if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; @@ -17400,7 +17446,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, /* the same data, nothing to update */ return MDBX_SUCCESS; } - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) != 0); + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) != 0); } } } else if (unlikely(rc != MDBX_NOTFOUND)) @@ -17408,17 +17454,16 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } mc->mc_flags &= ~C_DEL; - MDBX_val xdata, *rdata = data; - size_t mcount = 0, dcount = 0; + MDBX_val xdata, *ref_data = data; + size_t *batch_dupfixed_done = nullptr, batch_dupfixed_given = 0; if (unlikely(flags & MDBX_MULTIPLE)) { - dcount = data[1].iov_len; - data[1].iov_len = 0 /* reset done item counter */; - rdata = &xdata; - xdata.iov_len = data->iov_len * dcount; + batch_dupfixed_given = data[1].iov_len; + batch_dupfixed_done = &data[1].iov_len; + *batch_dupfixed_done = 0; } /* Cursor is positioned, check for room in the dirty list */ - err = cursor_touch(mc, key, rdata); + err = cursor_touch(mc, key, ref_data); if (unlikely(err)) return err; @@ -17447,13 +17492,13 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_xcursor->mx_dbx.md_klen_min = mc->mc_xcursor->mx_dbx.md_klen_max = data->iov_len); + if (mc->mc_flags & C_SUB) + npr.page->mp_flags |= P_LEAF2; } - if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED) - npr.page->mp_flags |= P_LEAF2; mc->mc_flags |= C_INITIALIZED; } - MDBX_val dkey, olddata; + MDBX_val old_singledup, old_data; MDBX_db nested_dupdb; MDBX_page *sub_root = nullptr; bool insert_key, insert_data; @@ -17461,19 +17506,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_page *fp = env->me_pbuf; fp->mp_txnid = mc->mc_txn->mt_front; insert_key = insert_data = (rc != MDBX_SUCCESS); - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); if ((mc->mc_db->md_flags & MDBX_DUPSORT) && node_size(key, data) > env->me_leaf_nodemax) { /* Too big for a node, insert in sub-DB. Set up an empty - * "old sub-page" for prep_subDB to expand to a full page. */ + * "old sub-page" for convert_to_subtree to expand to a full page. */ fp->mp_leaf2_ksize = (mc->mc_db->md_flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; fp->mp_lower = fp->mp_upper = 0; - olddata.iov_len = PAGEHDRSZ; - goto prep_subDB; + old_data.iov_len = PAGEHDRSZ; + goto convert_to_subtree; } } else { /* there's only a key anyway, so this is a no-op */ @@ -17518,7 +17563,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(err != MDBX_SUCCESS)) return err; } - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + MDBX_node *const node = + page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); /* Large/Overflow page overwrites need special handling */ if (unlikely(node_flags(node) & F_BIGDATA)) { @@ -17592,19 +17638,18 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS) return err; } else { - olddata.iov_len = node_ds(node); - olddata.iov_base = node_data(node); - cASSERT(mc, ptr_disp(olddata.iov_base, olddata.iov_len) <= + old_data.iov_len = node_ds(node); + old_data.iov_base = node_data(node); + cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize)); /* DB has dups? */ if (mc->mc_db->md_flags & MDBX_DUPSORT) { /* Prepare (sub-)page/sub-DB to accept the new item, if needed. * fp: old sub-page or a header faking it. - * mp: new (sub-)page. offset: growth in page size. - * xdata: node data with new page or DB. */ - size_t i; - size_t offset = 0; + * mp: new (sub-)page. + * xdata: node data with new sub-page or sub-DB. */ + size_t growth = 0; /* growth in page size.*/ MDBX_page *mp = fp = xdata.iov_base = env->me_pbuf; mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; @@ -17612,19 +17657,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (!(node_flags(node) & F_DUPDATA)) { /* does data match? */ if (flags & MDBX_APPENDDUP) { - const int cmp = mc->mc_dbx->md_dcmp(data, &olddata); - cASSERT(mc, cmp != 0 || eq_fast(data, &olddata)); + const int cmp = mc->mc_dbx->md_dcmp(data, &old_data); + cASSERT(mc, cmp != 0 || eq_fast(data, &old_data)); if (unlikely(cmp <= 0)) return MDBX_EKEYMISMATCH; - } else if (eq_fast(data, &olddata)) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + } else if (eq_fast(data, &old_data)) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; /* data is match exactly byte-to-byte, nothing to update */ rc = MDBX_SUCCESS; - if (likely((flags & MDBX_MULTIPLE) == 0)) - return rc; - goto continue_multiple; + if (unlikely(batch_dupfixed_done)) + goto batch_dupfixed_continue; + return rc; } /* Just overwrite the current item */ @@ -17634,62 +17679,143 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } /* Back up original data item */ - memcpy(dkey.iov_base = fp + 1, olddata.iov_base, - dkey.iov_len = olddata.iov_len); + memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, + old_singledup.iov_len = old_data.iov_len); /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF | P_SUBP; fp->mp_lower = 0; - xdata.iov_len = PAGEHDRSZ + dkey.iov_len + data->iov_len; + xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len; if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp->mp_flags |= P_LEAF2; fp->mp_leaf2_ksize = (uint16_t)data->iov_len; - xdata.iov_len += 2 * data->iov_len; /* leave space for 2 more */ - cASSERT(mc, xdata.iov_len <= env->me_psize); + /* Будем создавать LEAF2-страницу, как минимум с двумя элементами. + * При коротких значениях и наличии свободного места можно сделать + * некоторое резервирование места, чтобы при последующих добавлениях + * не сразу расширять созданную под-страницу. + * Резервирование в целом сомнительно (см ниже), но может сработать + * в плюс (а если в минус то несущественный) при коротких ключах. */ + xdata.iov_len += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + cASSERT(mc, (xdata.iov_len & 1) == 0); } else { xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + - (dkey.iov_len & 1) + (data->iov_len & 1); - cASSERT(mc, xdata.iov_len <= env->me_psize); + (old_data.iov_len & 1) + (data->iov_len & 1); } + cASSERT(mc, (xdata.iov_len & 1) == 0); fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); - olddata.iov_len = xdata.iov_len; /* pretend olddata is fp */ + old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ } else if (node_flags(node) & F_SUBDATA) { /* Data is on sub-DB, just store it */ flags |= F_DUPDATA | F_SUBDATA; - goto put_sub; + goto dupsort_put; } else { /* Data is on sub-page */ - fp = olddata.iov_base; + fp = old_data.iov_base; switch (flags) { default: - if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) { - offset = node_size(data, nullptr) + sizeof(indx_t); - break; + growth = IS_LEAF2(fp) ? fp->mp_leaf2_ksize + : (node_size(data, nullptr) + sizeof(indx_t)); + if (page_room(fp) >= growth) { + /* На текущей под-странице есть место для добавления элемента. + * Оптимальнее продолжить использовать эту страницу, ибо + * добавление вложенного дерева увеличит WAF на одну страницу. */ + goto continue_subpage; } - offset = fp->mp_leaf2_ksize; - if (page_room(fp) < offset) { - offset *= 4; /* space for 4 more */ - break; - } - /* FALLTHRU: Big enough MDBX_DUPFIXED sub-page */ - __fallthrough; + /* На текущей под-странице нет места для еще одного элемента. + * Можно либо увеличить эту под-страницу, либо вынести куст + * значений во вложенное дерево. + * + * Продолжать использовать текущую под-страницу возможно + * только пока и если размер после добавления элемента будет + * меньше me_leaf_nodemax. Соответственно, при превышении + * просто сразу переходим на вложенное дерево. */ + xdata.iov_len = old_data.iov_len + (growth += growth & 1); + if (xdata.iov_len > env->me_subpage_limit) + goto convert_to_subtree; + + /* Можно либо увеличить под-страницу, в том числе с некоторым + * запасом, либо перейти на вложенное поддерево. + * + * Резервирование места на под-странице представляется сомнительным: + * - Резервирование увеличит рыхлость страниц, в том числе + * вероятность разделения основной/гнездовой страницы; + * - Сложно предсказать полезный размер резервирования, + * особенно для не-MDBX_DUPFIXED; + * - Наличие резерва позволяет съекономить только на перемещении + * части элементов основной/гнездовой страницы при последующих + * добавлениях в нее элементов. Причем после первого изменения + * размера под-страницы, её тело будет примыкать + * к неиспользуемому месту на основной/гнездовой странице, + * поэтому последующие последовательные добавления потребуют + * только передвижения в mp_ptrs[]. + * + * Соответственно, более важным/определяющим представляется + * своевременный переход к вложеному дереву, но тут достаточно + * сложный конфликт интересов: + * - При склонности к переходу к вложенным деревьям, суммарно + * в БД будет большее кол-во более рыхлых страниц. Это увеличит + * WAF, а также RAF при последовательных чтениях большой БД. + * Однако, при коротких ключах и большом кол-ве + * дубликатов/мультизначений, плотность ключей в листовых + * страницах основного дерева будет выше. Соответственно, будет + * пропорционально меньше branch-страниц. Поэтому будет выше + * вероятность оседания/не-вымывания страниц основного дерева из + * LRU-кэша, а также попадания в write-back кэш при записи. + * - Наоботот, при склонности к использованию под-страниц, будут + * наблюдаться обратные эффекты. Плюс некоторые накладные расходы + * на лишнее копирование данных под-страниц в сценариях + * нескольких обонвлений дубликатов одного куста в одной + * транзакции. + * + * Суммарно наиболее рациональным представляется такая тактика: + * - Вводим три порога subpage_limit, subpage_room_threshold + * и subpage_reserve_prereq, которые могут быть + * заданы/скорректированы пользователем в ‰ от me_leaf_nodemax; + * - Используем под-страницу пока её размер меньше subpage_limit + * и на основной/гнездовой странице не-менее + * subpage_room_threshold свободного места; + * - Резервируем место только для 1-3 коротких dupfixed-элементов, + * расширяя размер под-страницы на размер кэш-линии ЦПУ, но + * только если на странице не менее subpage_reserve_prereq + * свободного места. + * - По-умолчанию устанавливаем: + * subpage_limit = me_leaf_nodemax (1000‰); + * subpage_room_threshold = 0; + * subpage_reserve_prereq = me_leaf_nodemax (1000‰). + */ + if (IS_LEAF2(fp)) + growth += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + break; + case MDBX_CURRENT | MDBX_NODUPDATA: case MDBX_CURRENT: + continue_subpage: fp->mp_txnid = mc->mc_txn->mt_front; fp->mp_pgno = mp->mp_pgno; mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; flags |= F_DUPDATA; - goto put_sub; + goto dupsort_put; } - xdata.iov_len = olddata.iov_len + offset; + xdata.iov_len = old_data.iov_len + growth; + cASSERT(mc, (xdata.iov_len & 1) == 0); } fp_flags = fp->mp_flags; - if (node_size_len(node_ks(node), xdata.iov_len) > - env->me_leaf_nodemax) { + if (xdata.iov_len > env->me_subpage_limit || + node_size_len(node_ks(node), xdata.iov_len) > + env->me_leaf_nodemax || + (env->me_subpage_room_threshold && + page_room(mc->mc_pg[mc->mc_top]) + + node_size_len(node_ks(node), old_data.iov_len) < + env->me_subpage_room_threshold + + node_size_len(node_ks(node), xdata.iov_len))) { /* Too big for a sub-page, convert to sub-DB */ + convert_to_subtree: fp_flags &= ~P_SUBP; - prep_subDB: nested_dupdb.md_xsize = 0; nested_dupdb.md_flags = flags_db2sub(mc->mc_db->md_flags); if (mc->mc_db->md_flags & MDBX_DUPFIXED) { @@ -17708,8 +17834,9 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(par.err != MDBX_SUCCESS)) return par.err; mc->mc_db->md_leaf_pages += 1; - cASSERT(mc, env->me_psize > olddata.iov_len); - offset = env->me_psize - (unsigned)olddata.iov_len; + cASSERT(mc, env->me_psize > old_data.iov_len); + growth = env->me_psize - (unsigned)old_data.iov_len; + cASSERT(mc, (growth & 1) == 0); flags |= F_DUPDATA | F_SUBDATA; nested_dupdb.md_root = mp->mp_pgno; nested_dupdb.md_seq = 0; @@ -17721,29 +17848,33 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mp->mp_txnid = mc->mc_txn->mt_front; mp->mp_leaf2_ksize = fp->mp_leaf2_ksize; mp->mp_lower = fp->mp_lower; - cASSERT(mc, fp->mp_upper + offset <= UINT16_MAX); - mp->mp_upper = (indx_t)(fp->mp_upper + offset); + cASSERT(mc, fp->mp_upper + growth < UINT16_MAX); + mp->mp_upper = fp->mp_upper + (indx_t)growth; if (unlikely(fp_flags & P_LEAF2)) { memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->mp_leaf2_ksize); + cASSERT(mc, + (((mp->mp_leaf2_ksize & page_numkeys(mp)) ^ mp->mp_upper) & + 1) == 0); } else { + cASSERT(mc, (mp->mp_upper & 1) == 0); memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), - olddata.iov_len - fp->mp_upper - PAGEHDRSZ); + old_data.iov_len - fp->mp_upper - PAGEHDRSZ); memcpy(mp->mp_ptrs, fp->mp_ptrs, page_numkeys(fp) * sizeof(mp->mp_ptrs[0])); - for (i = 0; i < page_numkeys(fp); i++) { - cASSERT(mc, mp->mp_ptrs[i] + offset <= UINT16_MAX); - mp->mp_ptrs[i] += (indx_t)offset; + for (size_t i = 0; i < page_numkeys(fp); i++) { + cASSERT(mc, mp->mp_ptrs[i] + growth <= UINT16_MAX); + mp->mp_ptrs[i] += (indx_t)growth; } } } if (!insert_key) node_del(mc, 0); - rdata = &xdata; + ref_data = &xdata; flags |= F_DUPDATA; - goto new_sub; + goto insert_node; } /* MDBX passes F_SUBDATA in 'flags' to write a DB record */ @@ -17751,15 +17882,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, return MDBX_INCOMPATIBLE; current: - if (data->iov_len == olddata.iov_len) { + if (data->iov_len == old_data.iov_len) { cASSERT(mc, EVEN(key->iov_len) == EVEN(node_ks(node))); /* same size, just replace it. Note that we could * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ if (flags & MDBX_RESERVE) - data->iov_base = olddata.iov_base; + data->iov_base = old_data.iov_base; else if (!(mc->mc_flags & C_SUB)) - memcpy(olddata.iov_base, data->iov_base, data->iov_len); + memcpy(old_data.iov_base, data->iov_base, data->iov_len); else { cASSERT(mc, page_numkeys(mc->mc_pg[mc->mc_top]) == 1); cASSERT(mc, PAGETYPE_COMPAT(mc->mc_pg[mc->mc_top]) == P_LEAF); @@ -17784,14 +17915,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, node_del(mc, 0); } - rdata = data; + ref_data = data; -new_sub:; +insert_node:; const unsigned naf = flags & NODE_ADD_FLAGS; - size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->iov_len - : leaf_size(env, key, rdata); + size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) + ? key->iov_len + : leaf_size(env, key, ref_data); if (page_room(mc->mc_pg[mc->mc_top]) < nsize) { - rc = page_split(mc, key, rdata, P_INVALID, + rc = page_split(mc, key, ref_data, P_INVALID, insert_key ? naf : naf | MDBX_SPLIT_REPLACE); if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); @@ -17799,25 +17931,25 @@ new_sub:; /* There is room already in this leaf page. */ if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { cASSERT(mc, !(naf & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) && - rdata->iov_len == 0); + ref_data->iov_len == 0); rc = node_add_leaf2(mc, mc->mc_ki[mc->mc_top], key); } else - rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, naf); + rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, ref_data, naf); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ const MDBX_dbi dbi = mc->mc_dbi; - const size_t i = mc->mc_top; - MDBX_page *const mp = mc->mc_pg[i]; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp) + if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[top] != mp) continue; - if (m3->mc_ki[i] >= mc->mc_ki[i]) - m3->mc_ki[i] += insert_key; + if (m3->mc_ki[top] >= mc->mc_ki[top]) + m3->mc_ki[top] += insert_key; if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, mp, m3->mc_ki[top]); } } } @@ -17828,18 +17960,18 @@ new_sub:; * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ if (flags & F_DUPDATA) { - unsigned xflags; - size_t ecount; - put_sub: - xdata.iov_len = 0; - xdata.iov_base = nullptr; + MDBX_val empty; + dupsort_put: + empty.iov_len = 0; + empty.iov_base = nullptr; MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); #define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 STATIC_ASSERT( (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == MDBX_NOOVERWRITE); - xflags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> - SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); + unsigned xflags = + MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> + SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); if ((flags & MDBX_CURRENT) == 0) { xflags -= MDBX_CURRENT; err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]); @@ -17849,80 +17981,78 @@ new_sub:; if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; /* converted, write the original data first */ - if (dkey.iov_base) { - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, - xflags); + if (old_singledup.iov_base) { + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &old_singledup, + &empty, xflags); if (unlikely(rc)) - goto bad_sub; + goto dupsort_error; } if (!(node_flags(node) & F_SUBDATA) || sub_root) { /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2; - MDBX_xcursor *mx = mc->mc_xcursor; - size_t i = mc->mc_top; - MDBX_page *mp = mc->mc_pg[i]; + MDBX_xcursor *const mx = mc->mc_xcursor; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; const intptr_t nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; - if (m2->mc_pg[i] == mp) { - if (m2->mc_ki[i] == mc->mc_ki[i]) { - err = cursor_xinit2(m2, mx, dkey.iov_base != nullptr); + if (m2->mc_pg[top] == mp) { + if (m2->mc_ki[top] == mc->mc_ki[top]) { + err = cursor_xinit2(m2, mx, old_singledup.iov_base != nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key && m2->mc_ki[top] < nkeys) { + XCURSOR_REFRESH(m2, mp, m2->mc_ki[top]); } } } } cASSERT(mc, mc->mc_xcursor->mx_db.md_entries < PTRDIFF_MAX); - ecount = (size_t)mc->mc_xcursor->mx_db.md_entries; + const size_t probe = (size_t)mc->mc_xcursor->mx_db.md_entries; #define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == MDBX_APPEND); xflags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &xdata, + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &empty, xflags); if (flags & F_SUBDATA) { void *db = node_data(node); mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } - insert_data = (ecount != (size_t)mc->mc_xcursor->mx_db.md_entries); + insert_data = (probe != (size_t)mc->mc_xcursor->mx_db.md_entries); } /* Increment count unless we just replaced an existing item. */ if (insert_data) mc->mc_db->md_entries++; if (insert_key) { - /* Invalidate txn if we created an empty sub-DB */ - if (unlikely(rc)) - goto bad_sub; + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; /* If we succeeded and the key didn't exist before, * make sure the cursor is marked valid. */ mc->mc_flags |= C_INITIALIZED; } - if (unlikely(flags & MDBX_MULTIPLE)) { - if (likely(rc == MDBX_SUCCESS)) { - continue_multiple: - mcount++; + if (likely(rc == MDBX_SUCCESS)) { + if (unlikely(batch_dupfixed_done)) { + batch_dupfixed_continue: /* let caller know how many succeeded, if any */ - data[1].iov_len = mcount; - if (mcount < dcount) { + if ((*batch_dupfixed_done += 1) < batch_dupfixed_given) { data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); insert_key = insert_data = false; - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; goto more; } } + if (AUDIT_ENABLED()) + rc = cursor_check(mc); } - if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) - rc = cursor_check(mc); return rc; - bad_sub: + + dupsort_error: if (unlikely(rc == MDBX_KEYEXIST)) { /* should not happen, we deleted that item */ ERROR("Unexpected %i error while put to nested dupsort's hive", rc); @@ -18086,6 +18216,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return rc; MDBX_page *mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, IS_MODIFIABLE(mc->mc_txn, mp)); if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->mp_pgno, mp->mp_flags); @@ -18104,7 +18235,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { if (!(node_flags(node) & F_SUBDATA)) mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); rc = cursor_del(&mc->mc_xcursor->mx_cursor, 0); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; /* If sub-DB still has entries, we're done */ if (mc->mc_xcursor->mx_db.md_entries) { @@ -18113,11 +18244,10 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(node_data(node), &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } else { - /* shrink fake page */ - node_shrink(mp, mc->mc_ki[mc->mc_top]); - node = page_node(mp, mc->mc_ki[mc->mc_top]); + /* shrink sub-page */ + node = node_shrink(mp, mc->mc_ki[mc->mc_top], node); mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - /* fix other sub-DB cursors pointed at fake pages on this page */ + /* fix other sub-DB cursors pointed at sub-pages on this page */ for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) @@ -18344,6 +18474,7 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, const size_t ksize = mc->mc_db->md_xsize; cASSERT(mc, ksize == key->iov_len); const size_t nkeys = page_numkeys(mp); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); /* Just using these for counting */ const intptr_t lower = mp->mp_lower + sizeof(indx_t); @@ -18363,6 +18494,8 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, memmove(ptr_disp(ptr, ksize), ptr, diff * ksize); /* insert new key */ memcpy(ptr, key->iov_base, ksize); + + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return MDBX_SUCCESS; } @@ -18529,6 +18662,7 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { mp->mp_lower -= sizeof(indx_t); cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper += (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return; } @@ -18568,35 +18702,28 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { /* Compact the main page after deleting a node on a subpage. * [in] mp The main page to operate on. * [in] indx The index of the subpage on the main page. */ -static void node_shrink(MDBX_page *mp, size_t indx) { - MDBX_node *node; - MDBX_page *sp, *xp; - size_t nsize, delta, len, ptr; - intptr_t i; - - node = page_node(mp, indx); - sp = (MDBX_page *)node_data(node); - delta = page_room(sp); - assert(delta > 0); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node) { + assert(node = page_node(mp, indx)); + MDBX_page *sp = (MDBX_page *)node_data(node); + assert(IS_SUBP(sp) && page_numkeys(sp) > 0); + const size_t delta = + EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); + if (unlikely(delta) == 0) + return node; /* Prepare to shift upward, set len = length(subpage part to shift) */ - if (IS_LEAF2(sp)) { - delta &= /* do not make the node uneven-sized */ ~(size_t)1; - if (unlikely(delta) == 0) - return; - nsize = node_ds(node) - delta; - assert(nsize % 1 == 0); - len = nsize; - } else { - xp = ptr_disp(sp, delta); /* destination subpage */ - for (i = page_numkeys(sp); --i >= 0;) { + size_t nsize = node_ds(node) - delta, len = nsize; + assert(nsize % 1 == 0); + if (!IS_LEAF2(sp)) { + len = PAGEHDRSZ; + MDBX_page *xp = ptr_disp(sp, delta); /* destination subpage */ + for (intptr_t i = page_numkeys(sp); --i >= 0;) { assert(sp->mp_ptrs[i] >= delta); xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta); } - nsize = node_ds(node) - delta; - len = PAGEHDRSZ; } - sp->mp_upper = sp->mp_lower; + assert(sp->mp_upper >= sp->mp_lower + delta); + sp->mp_upper -= (indx_t)delta; sp->mp_pgno = mp->mp_pgno; node_set_ds(node, nsize); @@ -18604,15 +18731,17 @@ static void node_shrink(MDBX_page *mp, size_t indx) { void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len); - ptr = mp->mp_ptrs[indx]; - for (i = page_numkeys(mp); --i >= 0;) { - if (mp->mp_ptrs[i] <= ptr) { + const size_t pivot = mp->mp_ptrs[indx]; + for (intptr_t i = page_numkeys(mp); --i >= 0;) { + if (mp->mp_ptrs[i] <= pivot) { assert((size_t)UINT16_MAX - mp->mp_ptrs[i] >= delta); mp->mp_ptrs[i] += (indx_t)delta; } } assert((size_t)UINT16_MAX - mp->mp_upper >= delta); mp->mp_upper += (indx_t)delta; + + return ptr_disp(node, delta); } /* Initial setup of a sorted-dups cursor. @@ -19484,7 +19613,6 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); cASSERT(csrc, csrc->mc_snum < csrc->mc_db->md_depth || IS_LEAF(csrc->mc_pg[csrc->mc_db->md_depth - 1])); - cASSERT(cdst, page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); const int pagetype = PAGETYPE_WHOLE(psrc); /* Move all nodes from src to dst */ @@ -19495,7 +19623,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t j = dst_nkeys; if (unlikely(pagetype & P_LEAF2)) { /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_len = csrc->mc_db->md_xsize; @@ -19503,6 +19633,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t i = 0; do { rc = node_add_leaf2(cdst, j++, &key); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_base = ptr_disp(key.iov_base, key.iov_len); @@ -19516,7 +19647,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cursor_copy(csrc, &mn); /* must find the lowest key below src */ rc = page_search_lowest(&mn); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; const MDBX_page *mp = mn.mc_pg[mn.mc_top]; @@ -19541,7 +19673,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; size_t i = 0; @@ -19555,6 +19689,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(csrc, node_flags(srcnode) == 0); rc = node_add_branch(cdst, j++, &key, node_pgno(srcnode)); } + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -19581,7 +19716,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { if (csrc->mc_ki[csrc->mc_top] == 0) { const MDBX_val nullkey = {0, 0}; rc = update_key(csrc, &nullkey); - if (unlikely(rc)) { + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) { csrc->mc_top++; return rc; } @@ -19616,7 +19752,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } rc = page_retire(csrc, (MDBX_page *)psrc); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -19629,7 +19766,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { const uint16_t save_depth = cdst->mc_db->md_depth; cursor_pop(cdst); rc = rebalance(cdst); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -19817,11 +19954,9 @@ static int rebalance(MDBX_cursor *mc) { mc->mc_snum = 0; mc->mc_top = 0; mc->mc_flags &= ~C_INITIALIZED; - - rc = page_retire(mc, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } else if (IS_BRANCH(mp) && nkeys == 1) { + return page_retire(mc, mp); + } + if (IS_BRANCH(mp) && nkeys == 1) { DEBUG("%s", "collapsing root page!"); mc->mc_db->md_root = node_pgno(page_node(mp, 0)); rc = page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], mp->mp_txnid); @@ -19854,15 +19989,10 @@ static int rebalance(MDBX_cursor *mc) { PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top]) == pagetype); cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth || IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); - - rc = page_retire(mc, mp); - if (likely(rc == MDBX_SUCCESS)) - rc = page_touch(mc); - return rc; - } else { - DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", - mp->mp_pgno, mp->mp_flags); + return page_retire(mc, mp); } + DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", + mp->mp_pgno, mp->mp_flags); return MDBX_SUCCESS; } @@ -19911,6 +20041,7 @@ static int rebalance(MDBX_cursor *mc) { const size_t right_nkeys = right ? page_numkeys(right) : 0; bool involve = false; retry: + cASSERT(mc, mc->mc_snum > 1); if (left_room > room_threshold && left_room >= right_room && (IS_MODIFIABLE(mc->mc_txn, left) || involve)) { /* try merge with left */ @@ -19982,7 +20113,18 @@ retry: return MDBX_SUCCESS; } - if (likely(!involve)) { + /* Заглушено в ветке v0.12.x, будет работать в v0.13.1 и далее. + * + * if (mc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance && + * likely(room_threshold > 0)) { + * room_threshold = 0; + * goto retry; + * } + */ + if (likely(!involve) && + (likely(mc->mc_dbi != FREE_DBI) || mc->mc_txn->tw.loose_pages || + MDBX_PNL_GETSIZE(mc->mc_txn->tw.relist) || (mc->mc_flags & C_GCU) || + (mc->mc_txn->mt_flags & MDBX_TXN_DRAINED_GC) || room_threshold)) { involve = true; goto retry; } @@ -20072,8 +20214,7 @@ __cold static int page_check(const MDBX_cursor *const mc, break; } - if (unlikely(mp->mp_upper < mp->mp_lower || - ((mp->mp_lower | mp->mp_upper) & 1) || + if (unlikely(mp->mp_upper < mp->mp_lower || (mp->mp_lower & 1) || PAGEHDRSZ + mp->mp_upper > env->me_psize)) rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", mp->mp_lower, mp->mp_upper, page_space(env)); @@ -20089,11 +20230,6 @@ __cold static int page_check(const MDBX_cursor *const mc, bad_page(mp, "%s-page nkeys (%zu) < %u\n", IS_BRANCH(mp) ? "branch" : "leaf", nkeys, 1 + IS_BRANCH(mp)); } - if (!IS_LEAF2(mp) && unlikely(PAGEHDRSZ + mp->mp_upper + - nkeys * sizeof(MDBX_node) + nkeys - 1 > - env->me_psize)) - rc = bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", - mp->mp_upper, nkeys, page_space(env)); const size_t ksize_max = keysize_max(env->me_psize, 0); const size_t leaf2_ksize = mp->mp_leaf2_ksize; @@ -20102,8 +20238,20 @@ __cold static int page_check(const MDBX_cursor *const mc, (mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", mc->mc_db->md_flags); - if (unlikely(leaf2_ksize < 1 || leaf2_ksize > ksize_max)) - rc = bad_page(mp, "invalid leaf2-key length (%zu)\n", leaf2_ksize); + else if (unlikely(leaf2_ksize != mc->mc_db->md_xsize)) + rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); + else if (unlikely(((leaf2_ksize & nkeys) ^ mp->mp_upper) & 1)) + rc = bad_page( + mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", + mp->mp_upper, nkeys, leaf2_ksize); + } else { + if (unlikely((mp->mp_upper & 1) || PAGEHDRSZ + mp->mp_upper + + nkeys * sizeof(MDBX_node) + + nkeys - 1 > + env->me_psize)) + rc = + bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", + mp->mp_upper, nkeys, page_space(env)); } MDBX_val here, prev = {0, 0}; @@ -20111,7 +20259,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (IS_LEAF2(mp)) { const char *const key = page_leaf2key(mp, i, leaf2_ksize); if (unlikely(end_of_page < key + leaf2_ksize)) { - rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n", + rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", key + leaf2_ksize - end_of_page); continue; } @@ -20120,7 +20268,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (unlikely(leaf2_ksize < mc->mc_dbx->md_klen_min || leaf2_ksize > mc->mc_dbx->md_klen_max)) rc = bad_page( - mp, "leaf2-key size (%zu) <> min/max key-length (%zu/%zu)\n", + mp, "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", leaf2_ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); else mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize; @@ -20129,7 +20277,7 @@ __cold static int page_check(const MDBX_cursor *const mc, here.iov_base = (void *)key; here.iov_len = leaf2_ksize; if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "leaf2-key #%zu wrong order (%s >= %s)\n", i, + rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, DKEY(&prev), DVAL(&here)); prev = here; } @@ -20540,6 +20688,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, DKBUF; MDBX_page *const mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, (mp->mp_flags & P_ILL_BITS) == 0); + const size_t newindx = mc->mc_ki[mc->mc_top]; size_t nkeys = page_numkeys(mp); if (AUDIT_ENABLED()) { @@ -20631,7 +20781,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, /* It is reasonable and possible to split the page at the begin */ if (unlikely(newindx < minkeys)) { split_indx = minkeys; - if (newindx == 0 && foliage == 0 && !(naf & MDBX_SPLIT_REPLACE)) { + if (newindx == 0 && !(naf & MDBX_SPLIT_REPLACE)) { split_indx = 0; /* Checking for ability of splitting by the left-side insertion * of a pure page with the new key */ @@ -20651,10 +20801,19 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } else get_key(page_node(mp, 0), &sepkey); cASSERT(mc, mc->mc_dbx->md_cmp(newkey, &sepkey) < 0); - /* Avoiding rare complex cases of split the parent page */ - if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) + /* Avoiding rare complex cases of nested split the parent page(s) */ + if (page_room(mc->mc_pg[ptop]) < branch_size(env, &sepkey)) split_indx = minkeys; } + if (foliage) { + TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " + "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", + foliage, mc->mc_top, ptop, split_indx, minkeys, + DKEY_DEBUG(&sepkey), page_room(mc->mc_pg[ptop]), + branch_size(env, &sepkey)); + TRACE("pure-left: newkey %s, newdata %s, newindx %zu", + DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); + } } } @@ -20667,9 +20826,10 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = *newkey; } else if (unlikely(pure_left)) { /* newindx == split_indx == 0 */ - TRACE("no-split, but add new pure page at the %s", "left/before"); + TRACE("pure-left: no-split, but add new pure page at the %s", + "left/before"); cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); - TRACE("old-first-key is %s", DKEY_DEBUG(&sepkey)); + TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); } else { if (IS_LEAF2(sister)) { /* Move half of the keys to the right sibling */ @@ -20700,6 +20860,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mp->mp_lower += sizeof(indx_t); cASSERT(mc, mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); } else { memcpy(sister->mp_ptrs, split, distance * ksize); void *const ins = page_leaf2key(sister, distance, ksize); @@ -20712,6 +20873,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sister->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); cASSERT(mc, distance <= (int)UINT16_MAX); mc->mc_ki[mc->mc_top] = (indx_t)distance; + cASSERT(mc, + (((ksize & page_numkeys(sister)) ^ sister->mp_upper) & 1) == 0); } if (AUDIT_ENABLED()) { @@ -20880,18 +21043,20 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } } else if (unlikely(pure_left)) { MDBX_page *ptop_page = mc->mc_pg[ptop]; - DEBUG("adding to parent page %u node[%u] left-leaf page #%u key %s", + TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " + "%s", ptop_page->mp_pgno, mc->mc_ki[ptop], sister->mp_pgno, DKEY(mc->mc_ki[ptop] ? newkey : NULL)); - mc->mc_top--; + assert(mc->mc_top == ptop + 1); + mc->mc_top = (uint8_t)ptop; rc = node_add_branch(mc, mc->mc_ki[ptop], mc->mc_ki[ptop] ? newkey : NULL, sister->mp_pgno); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1] && ptop == mc->mc_top); if (likely(rc == MDBX_SUCCESS) && mc->mc_ki[ptop] == 0) { - DEBUG("update prev-first key on parent %s", DKEY(&sepkey)); MDBX_node *node = page_node(mc->mc_pg[ptop], 1); + TRACE("pure-left: update prev-first key on parent to %s", DKEY(&sepkey)); cASSERT(mc, node_ks(node) == 0 && node_pgno(node) == mp->mp_pgno); cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 0); mc->mc_ki[ptop] = 1; @@ -20899,6 +21064,9 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 1); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1]); mc->mc_ki[ptop] = 0; + } else { + TRACE("pure-left: no-need-update prev-first key on parent %s", + DKEY(&sepkey)); } mc->mc_top++; @@ -20947,7 +21115,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, &sepkey); if (mc->mc_dbx->md_cmp(newkey, &sepkey) < 0) { mc->mc_top -= (uint8_t)i; - DEBUG("update new-first on parent [%i] page %u key %s", + DEBUG("pure-left: update new-first on parent [%i] page %u key %s", mc->mc_ki[mc->mc_top], mc->mc_pg[mc->mc_top]->mp_pgno, DKEY(newkey)); rc = update_key(mc, newkey); @@ -20958,7 +21126,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, break; } } - } else if (tmp_ki_copy /* !IS_LEAF2(mp) */) { + } else if (tmp_ki_copy) { /* !IS_LEAF2(mp) */ /* Move nodes */ mc->mc_pg[mc->mc_top] = sister; i = split_indx; @@ -21077,7 +21245,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, m3->mc_ki[k + 1] = m3->mc_ki[k]; m3->mc_pg[k + 1] = m3->mc_pg[k]; } - m3->mc_ki[0] = m3->mc_ki[0] >= nkeys; + m3->mc_ki[0] = m3->mc_ki[0] >= nkeys + pure_left; m3->mc_pg[0] = mc->mc_pg[0]; m3->mc_snum++; m3->mc_top++; @@ -23529,8 +23697,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, (mp ? page_room(mp) : pagesize - header_size) - payload_size; size_t align_bytes = 0; - for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; - align_bytes += ((payload_size + align_bytes) & 1), ++i) { + for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { if (type == MDBX_page_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ payload_size += mp->mp_leaf2_ksize; @@ -23538,23 +23705,26 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } MDBX_node *node = page_node(mp, i); - payload_size += NODESIZE + node_ks(node); + const size_t node_key_size = node_ks(node); + payload_size += NODESIZE + node_key_size; if (type == MDBX_page_branch) { assert(i > 0 || node_ks(node) == 0); + align_bytes += node_key_size & 1; continue; } + const size_t node_data_size = node_ds(node); assert(type == MDBX_page_leaf); switch (node_flags(node)) { case 0 /* usual node */: - payload_size += node_ds(node); + payload_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_BIGDATA /* long data on the large/overflow page */: { - payload_size += sizeof(pgno_t); const pgno_t large_pgno = node_largedata_pgno(node); - const size_t over_payload = node_ds(node); + const size_t over_payload = node_data_size; const size_t over_header = PAGEHDRSZ; npages = 1; @@ -23573,27 +23743,31 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + payload_size += sizeof(pgno_t); + align_bytes += node_key_size & 1; } break; case F_SUBDATA /* sub-db */: { - const size_t namelen = node_ks(node); - payload_size += node_ds(node); - if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) { + const size_t namelen = node_key_size; + if (unlikely(namelen == 0 || node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; } break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - payload_size += sizeof(MDBX_db); - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + if (unlikely(node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_DUPDATA /* short sub-page */: { - if (unlikely(node_ds(node) <= PAGEHDRSZ)) { + if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; break; @@ -23621,16 +23795,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, err = MDBX_CORRUPTED; } - for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; - subalign_bytes += ((subpayload_size + subalign_bytes) & 1), ++j) { - + for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; ++j) { if (subtype == MDBX_subpage_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ subpayload_size += sp->mp_leaf2_ksize; } else { assert(subtype == MDBX_subpage_leaf); - MDBX_node *subnode = page_node(sp, j); - subpayload_size += NODESIZE + node_ks(subnode) + node_ds(subnode); + const MDBX_node *subnode = page_node(sp, j); + const size_t subnode_size = node_ks(subnode) + node_ds(subnode); + subheader_size += NODESIZE; + subpayload_size += subnode_size; + subalign_bytes += subnode_size & 1; if (unlikely(node_flags(subnode) != 0)) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; @@ -23639,7 +23814,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_data_size, subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -23647,7 +23822,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, header_size += subheader_size; unused_size += subunused_size; payload_size += subpayload_size; - align_bytes += subalign_bytes; + align_bytes += subalign_bytes + (node_key_size & 1); } break; default: diff --git a/src/internals.h b/src/internals.h index 3d1fd706..30af6413 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -1376,8 +1376,12 @@ struct MDBX_env { struct MDBX_lockinfo *me_lck; unsigned me_psize; /* DB page size, initialized from me_os_psize */ - unsigned me_leaf_nodemax; /* max size of a leaf-node */ - unsigned me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_leaf_nodemax; /* max size of a leaf-node */ + uint16_t me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_subpage_limit; + uint16_t me_subpage_room_threshold; + uint16_t me_subpage_reserve_prereq; + uint16_t me_subpage_reserve_limit; atomic_pgno_t me_mlocked_pgno; uint8_t me_psize2log; /* log2 of DB page size */ int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */ diff --git a/src/lck-posix.c b/src/lck-posix.c index 17c50ddd..d2bd6c1c 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/lck-windows.c b/src/lck-windows.c index 8ffccb1b..16ec0b3b 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -1,5 +1,5 @@ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index aa4e9868..7b182325 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -1,6 +1,6 @@ -.\" Copyright 2015-2023 Leonid Yuriev . +.\" Copyright 2015-2024 Leonid Yuriev . .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_CHK 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_CHK 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_chk \- MDBX checking tool .SH SYNOPSIS diff --git a/src/man1/mdbx_copy.1 b/src/man1/mdbx_copy.1 index 4e67a5b8..33a017fd 100644 --- a/src/man1/mdbx_copy.1 +++ b/src/man1/mdbx_copy.1 @@ -1,8 +1,8 @@ -.\" Copyright 2015-2023 Leonid Yuriev . +.\" Copyright 2015-2024 Leonid Yuriev . .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_COPY 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_COPY 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_copy \- MDBX environment copy tool .SH SYNOPSIS diff --git a/src/man1/mdbx_drop.1 b/src/man1/mdbx_drop.1 index 425eecd2..70d8355c 100644 --- a/src/man1/mdbx_drop.1 +++ b/src/man1/mdbx_drop.1 @@ -1,7 +1,7 @@ -.\" Copyright 2021-2023 Leonid Yuriev . +.\" Copyright 2021-2024 Leonid Yuriev . .\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DROP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DROP 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_drop \- MDBX database delete tool .SH SYNOPSIS diff --git a/src/man1/mdbx_dump.1 b/src/man1/mdbx_dump.1 index d236b93c..c809d7d3 100644 --- a/src/man1/mdbx_dump.1 +++ b/src/man1/mdbx_dump.1 @@ -1,8 +1,8 @@ -.\" Copyright 2015-2023 Leonid Yuriev . +.\" Copyright 2015-2024 Leonid Yuriev . .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DUMP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DUMP 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_dump \- MDBX environment export tool .SH SYNOPSIS diff --git a/src/man1/mdbx_load.1 b/src/man1/mdbx_load.1 index ae8e7596..675865cd 100644 --- a/src/man1/mdbx_load.1 +++ b/src/man1/mdbx_load.1 @@ -1,8 +1,8 @@ -.\" Copyright 2015-2023 Leonid Yuriev . +.\" Copyright 2015-2024 Leonid Yuriev . .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_LOAD 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_LOAD 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_load \- MDBX environment import tool .SH SYNOPSIS diff --git a/src/man1/mdbx_stat.1 b/src/man1/mdbx_stat.1 index c330d2e6..38cd735c 100644 --- a/src/man1/mdbx_stat.1 +++ b/src/man1/mdbx_stat.1 @@ -1,8 +1,8 @@ -.\" Copyright 2015-2023 Leonid Yuriev . +.\" Copyright 2015-2024 Leonid Yuriev . .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_STAT 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_STAT 1 "2024-03-13" "MDBX 0.12.10" .SH NAME mdbx_stat \- MDBX environment status tool .SH SYNOPSIS diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 31c1e6a8..2f9cad49 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1,5 +1,5 @@ // -// Copyright (c) 2020-2023, Leonid Yuriev . +// Copyright (c) 2020-2024, Leonid Yuriev . // SPDX-License-Identifier: Apache-2.0 // // Non-inline part of the libmdbx C++ API diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index a8c97372..e0b25d3d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -1,7 +1,7 @@ /* mdbx_chk.c - memory-mapped database check tool */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -537,26 +537,14 @@ static int pgvisitor(const uint64_t pgno, const unsigned pgnumber, data_tree_problems += !is_gc_tree; gc_tree_problems += is_gc_tree; } - if (payload_bytes < 1) { - if (nentries > 1) { - problem_add("page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries", - pagetype_caption, payload_bytes, nentries); - /* if ((size_t)header_bytes + unused_bytes < page_size) { - // LY: hush a misuse error - page_bytes = page_size; - } */ - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - problem_add("page", pgno, "empty", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - dbi->pages.empty += 1; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } + if (nentries < 1 || (pagetype == MDBX_page_branch && nentries < 2)) { + problem_add("page", pgno, nentries ? "half-empty" : "empty", + "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + dbi->pages.empty += 1; + data_tree_problems += !is_gc_tree; + gc_tree_problems += is_gc_tree; } if (pgnumber) { @@ -611,7 +599,7 @@ static int handle_freedb(const uint64_t record_number, const MDBX_val *key, problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) + if (number > MDBX_PGL_LIMIT) problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number); else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { problem_add("entry", txnid, "trimmed idl", diff --git a/src/mdbx_copy.c b/src/mdbx_copy.c index 52adc312..e73f143a 100644 --- a/src/mdbx_copy.c +++ b/src/mdbx_copy.c @@ -1,7 +1,7 @@ /* mdbx_copy.c - memory-mapped database backup tool */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/mdbx_drop.c b/src/mdbx_drop.c index 859710a6..b3107218 100644 --- a/src/mdbx_drop.c +++ b/src/mdbx_drop.c @@ -1,7 +1,7 @@ /* mdbx_drop.c - memory-mapped database delete tool */ /* - * Copyright 2021-2023 Leonid Yuriev + * Copyright 2021-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * * Copyright 2016-2021 Howard Chu, Symas Corp. diff --git a/src/mdbx_dump.c b/src/mdbx_dump.c index 21a695e2..f918919b 100644 --- a/src/mdbx_dump.c +++ b/src/mdbx_dump.c @@ -1,7 +1,7 @@ /* mdbx_dump.c - memory-mapped database dump tool */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/mdbx_load.c b/src/mdbx_load.c index 552fedc8..ac9bd787 100644 --- a/src/mdbx_load.c +++ b/src/mdbx_load.c @@ -1,7 +1,7 @@ /* mdbx_load.c - memory-mapped database load tool */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/mdbx_stat.c b/src/mdbx_stat.c index adedc13e..2059972d 100644 --- a/src/mdbx_stat.c +++ b/src/mdbx_stat.c @@ -1,7 +1,7 @@ /* mdbx_stat.c - memory-mapped database status tool */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/src/osal.c b/src/osal.c index b07565b4..a1de5be6 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1,7 +1,7 @@ /* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -1826,8 +1826,8 @@ MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, #else struct statvfs info; if (err != MDBX_ENOFILE) { - if (statvfs(pathname, &info) == 0 && (info.f_flag & ST_RDONLY) == 0) - return err; + if (statvfs(pathname, &info) == 0) + return (info.f_flag & ST_RDONLY) ? MDBX_SUCCESS : err; if (errno != MDBX_ENOFILE) return errno; } @@ -2571,7 +2571,7 @@ retry_mapview:; ptr_disp(map->base, size), ((map->current < map->limit) ? map->current : map->limit) - size); } - map->current = size; + map->current = (size < map->limit) ? size : map->limit; } if (limit == map->limit) @@ -2732,6 +2732,7 @@ retry_mapview:; map->base = ptr; } map->limit = limit; + map->current = size; #if MDBX_ENABLE_MADVISE #ifdef MADV_DONTFORK diff --git a/src/osal.h b/src/osal.h index 3d45da4b..9ee2b335 100644 --- a/src/osal.h +++ b/src/osal.h @@ -1,7 +1,7 @@ /* https://en.wikipedia.org/wiki/Operating_system_abstraction_layer */ /* - * Copyright 2015-2023 Leonid Yuriev + * Copyright 2015-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f6901916..4cef005c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -72,6 +72,10 @@ if(UNIX AND NOT SUBPROJECT) target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfixed_addodd extra/dupfixed_addodd.c) + target_include_directories(test_extra_dupfixed_addodd PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfixed_addodd ${TOOL_MDBX_LIB}) + if(MDBX_BUILD_CXX) add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) target_include_directories(test_extra_maindb_ordinal PRIVATE "${PROJECT_SOURCE_DIR}") @@ -168,6 +172,7 @@ else() if(UNIX AND NOT SUBPROJECT) add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) + add_test(NAME extra_dupfixed_addodd COMMAND test_extra_dupfixed_addodd) if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) diff --git a/test/append.c++ b/test/append.c++ index d2486001..6a4ac5a1 100644 --- a/test/append.c++ +++ b/test/append.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/base.h++ b/test/base.h++ index 60ab7812..7f605ba1 100644 --- a/test/base.h++ +++ b/test/base.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/cases.c++ b/test/cases.c++ index 97421e7d..307bc37d 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/chrono.c++ b/test/chrono.c++ index 4d53b60d..2099b105 100644 --- a/test/chrono.c++ +++ b/test/chrono.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/chrono.h++ b/test/chrono.h++ index 5d29b1c2..c4c7cd6e 100644 --- a/test/chrono.h++ +++ b/test/chrono.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/config.c++ b/test/config.c++ index 922f7b37..2e0a76bc 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/config.h++ b/test/config.h++ index f57dce7c..6d3b6ce8 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/dead.c++ b/test/dead.c++ index d0f8cb09..6d372d7b 100644 --- a/test/dead.c++ +++ b/test/dead.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/extra/dupfixed_addodd.c b/test/extra/dupfixed_addodd.c new file mode 100644 index 00000000..da9ba944 --- /dev/null +++ b/test/extra/dupfixed_addodd.c @@ -0,0 +1,93 @@ +/* + * @Dvirsw (https://t.me/Dvirsw) + * I think there is a bug with DUPFIXED. The following code fails. + * + * https://t.me/libmdbx/5368 + */ + +#include +#include + +#include "mdbx.h" +#include +#include +#include +#include +#include +#include +#include +#include + +int main() { + int rc; + MDBX_env *env = NULL; + MDBX_dbi dbi = 0; + MDBX_val key, data; + MDBX_txn *txn = NULL; + + rc = mdbx_env_create(&env); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_set_maxdbs(env, 1); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_open(env, "./example-db", + MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0664); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_begin(env, NULL, 0, &txn); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_txn_begin: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_dbi_open(txn, "test", MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_CREATE, + &dbi); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_dbi_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + char key_bytes[32] = {0}; + key.iov_len = 32; + key.iov_base = key_bytes; + + // Another put after this will fail. + unsigned char idx; + for (idx = 0; idx < 129; idx++) { + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + } + + // This will fail and exit. + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + fprintf(stderr, "expected failure\n"); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdbx_txn_commit: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } +} diff --git a/test/extra/pcrf/pcrf_test.c b/test/extra/pcrf/pcrf_test.c index 1d1f1e7e..51f2bbff 100644 --- a/test/extra/pcrf/pcrf_test.c +++ b/test/extra/pcrf/pcrf_test.c @@ -1,5 +1,5 @@ /* - * Copyright 2016-2023 Leonid Yuriev . + * Copyright 2016-2024 Leonid Yuriev . * Copyright 2015 Vladimir Romanov * , Yota Lab. * diff --git a/test/hill.c++ b/test/hill.c++ index 79234b7d..8a32fc46 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/jitter.c++ b/test/jitter.c++ index b25599b0..c266a3ac 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -99,6 +99,37 @@ bool testcase_jitter::run() { jitter_delay(); txn_begin(true); fetch_canary(); + if (flipcoin()) { + MDBX_txn_info info; + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err != MDBX_BAD_TXN) + failure_perror("mdbx_txn_info(MDBX_BAD_TXN)", err); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset(again)", err); + err = mdbx_txn_break(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_break()", err); + + err = mdbx_txn_abort(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_abort()", err); + txn_guard.release(); + txn_begin(true); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + + err = mdbx_txn_renew(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_renew()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err) + failure_perror("mdbx_txn_info()", err); + } jitter_delay(); txn_end(flipcoin()); } diff --git a/test/keygen.c++ b/test/keygen.c++ index a6d20f33..f8e02181 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/keygen.h++ b/test/keygen.h++ index 9e2410fd..4d72df8b 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/log.c++ b/test/log.c++ index 04dad84d..8c013c84 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/log.h++ b/test/log.h++ index aa111ac9..385a7c63 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 900c1319..a0c3bd66 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -432,6 +432,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ --keygen.seed=${seed} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=9 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=11 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -446,6 +452,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ --keygen.seed=${seed} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=10 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=12 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=16 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -468,6 +480,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ --keygen.seed=${seed} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=9 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=11 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 @@ -491,6 +509,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ --keygen.seed=${seed} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=13 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=16 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 @@ -506,6 +530,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ --keygen.seed=${seed} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=21 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --keygen.seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=32 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options loop=$((loop + 1)) if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi diff --git a/test/main.c++ b/test/main.c++ index 2b8ff655..7634cabf 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/nested.c++ b/test/nested.c++ index 48299c79..5d056a8a 100644 --- a/test/nested.c++ +++ b/test/nested.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index 094d6769..f22bc2ed 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/osal-windows.c++ b/test/osal-windows.c++ index 24cde253..5df510c6 100644 --- a/test/osal-windows.c++ +++ b/test/osal-windows.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/osal.h++ b/test/osal.h++ index ef3b5562..bab97d86 100644 --- a/test/osal.h++ +++ b/test/osal.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/test.c++ b/test/test.c++ index 77c90c0a..f3906fed 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -24,9 +24,9 @@ const char *testcase2str(const actor_testcase testcase) { case ac_hill: return "hill"; case ac_deadread: - return "deadread"; + return "dead.reader"; case ac_deadwrite: - return "deadwrite"; + return "dead.writer"; case ac_jitter: return "jitter"; case ac_try: @@ -733,6 +733,18 @@ void testcase::speculum_check_iterator(const char *where, const char *stage, mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } +void testcase::failure(const char *fmt, ...) const { + va_list ap; + va_start(ap, fmt); + fflush(nullptr); + logging::output_nocheckloglevel_ap(logging::failure, fmt, ap); + va_end(ap); + fflush(nullptr); + if (txn_guard) + mdbx_txn_commit(const_cast(this)->txn_guard.release()); + exit(EXIT_FAILURE); +} + #if SPECULUM_CURSORS void testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, diff --git a/test/test.h++ b/test/test.h++ index 6158ba66..07a8d941 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * @@ -277,6 +277,7 @@ protected: void signal(); bool should_continue(bool check_timeout_only = false) const; + void failure(const char *fmt, ...) const; void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); diff --git a/test/ttl.c++ b/test/ttl.c++ index a7049022..80eef068 100644 --- a/test/ttl.c++ +++ b/test/ttl.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/utils.c++ b/test/utils.c++ index 71d56eb8..daf5e582 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. * diff --git a/test/utils.h++ b/test/utils.h++ index 4e91226d..4210f552 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -1,5 +1,5 @@ /* - * Copyright 2017-2023 Leonid Yuriev + * Copyright 2017-2024 Leonid Yuriev * and other libmdbx authors: please see AUTHORS file. * All rights reserved. *