From e9ad618b58530ee6e998371f71d9110772daa506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 23 Apr 2023 20:23:04 +0300 Subject: [PATCH 001/137] =?UTF-8?q?mdbx:=20=D0=BD=D0=B0=D1=87=D0=B0=D0=BB?= =?UTF-8?q?=D0=BE=20=D0=B2=D0=B5=D1=82=D0=BA=D0=B8=20`0.13`,=20=D1=81=20?= =?UTF-8?q?=D0=BD=D0=BE=D0=B2=D1=8B=D0=BC=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=BE=D0=BC=20=D0=B8=20=D0=B8?= =?UTF-8?q?=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Планируется очистка от функций и возможностей ранее объявленных устаревшими. В частности, будет удалена поддержка пользовательских функций сравнения, которые были обьявлены устаревшими начиная с версии 0.9, более 33 месяцев назад. --- ChangeLog.md | 9 +++++++++ mdbx.h | 4 ++-- src/man1/mdbx_chk.1 | 2 +- src/man1/mdbx_copy.1 | 2 +- src/man1/mdbx_drop.1 | 2 +- src/man1/mdbx_dump.1 | 2 +- src/man1/mdbx_load.1 | 2 +- src/man1/mdbx_stat.1 | 2 +- 8 files changed, 17 insertions(+), 8 deletions(-) diff --git a/ChangeLog.md b/ChangeLog.md index 5a8f0af6..53ca0059 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,6 +5,14 @@ English version [by Google](https://gitflic-ru.translate.goog/project/erthink/li and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.13.0 at 2023-04-23 + +Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. + + +******************************************************************************** + + ## v0.12.8 (сопровождение и подготовка к релизу) Поддержка стабильной ветки. @@ -22,6 +30,7 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic ## v0.12.8 "Владимир Уткин" от 2023-10-17 + Стабилизирующий выпуск с исправлением обнаруженных ошибок и устранением недочетов, в день 100-летия со дня рождения выдающегося советского и российского ученого и конструктора [Влади́мира Фёдоровича У́ткина](https://ru.wikipedia.org/wiki/Уткин,_Владимир_Фёдорович). diff --git a/mdbx.h b/mdbx.h index ece77eda..47d3e12a 100644 --- a/mdbx.h +++ b/mdbx.h @@ -634,9 +634,9 @@ typedef mode_t mdbx_mode_t; extern "C" { #endif -/* MDBX version 0.12.x */ +/* MDBX version 0.13.x */ #define MDBX_VERSION_MAJOR 0 -#define MDBX_VERSION_MINOR 12 +#define MDBX_VERSION_MINOR 13 #ifndef LIBMDBX_API #if defined(LIBMDBX_EXPORTS) diff --git a/src/man1/mdbx_chk.1 b/src/man1/mdbx_chk.1 index aa4e9868..6f26166c 100644 --- a/src/man1/mdbx_chk.1 +++ b/src/man1/mdbx_chk.1 @@ -1,6 +1,6 @@ .\" Copyright 2015-2023 Leonid Yuriev . .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_CHK 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_CHK 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_chk \- MDBX checking tool .SH SYNOPSIS diff --git a/src/man1/mdbx_copy.1 b/src/man1/mdbx_copy.1 index 4e67a5b8..18658782 100644 --- a/src/man1/mdbx_copy.1 +++ b/src/man1/mdbx_copy.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_COPY 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_COPY 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_copy \- MDBX environment copy tool .SH SYNOPSIS diff --git a/src/man1/mdbx_drop.1 b/src/man1/mdbx_drop.1 index 425eecd2..634150ac 100644 --- a/src/man1/mdbx_drop.1 +++ b/src/man1/mdbx_drop.1 @@ -1,7 +1,7 @@ .\" Copyright 2021-2023 Leonid Yuriev . .\" Copyright 2014-2021 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DROP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DROP 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_drop \- MDBX database delete tool .SH SYNOPSIS diff --git a/src/man1/mdbx_dump.1 b/src/man1/mdbx_dump.1 index d236b93c..13a746b7 100644 --- a/src/man1/mdbx_dump.1 +++ b/src/man1/mdbx_dump.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_DUMP 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_DUMP 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_dump \- MDBX environment export tool .SH SYNOPSIS diff --git a/src/man1/mdbx_load.1 b/src/man1/mdbx_load.1 index ae8e7596..fdd2fc24 100644 --- a/src/man1/mdbx_load.1 +++ b/src/man1/mdbx_load.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2014-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_LOAD 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_LOAD 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_load \- MDBX environment import tool .SH SYNOPSIS diff --git a/src/man1/mdbx_stat.1 b/src/man1/mdbx_stat.1 index c330d2e6..0260bb71 100644 --- a/src/man1/mdbx_stat.1 +++ b/src/man1/mdbx_stat.1 @@ -2,7 +2,7 @@ .\" Copyright 2015,2016 Peter-Service R&D LLC . .\" Copyright 2012-2015 Howard Chu, Symas Corp. All Rights Reserved. .\" Copying restrictions apply. See COPYRIGHT/LICENSE. -.TH MDBX_STAT 1 "2023-10-17" "MDBX 0.12.8" +.TH MDBX_STAT 1 "2023-04-23" "MDBX 0.13" .SH NAME mdbx_stat \- MDBX environment status tool .SH SYNOPSIS From dd9fc963d2221d4880ee536a0501ad985d9d796b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Mar 2023 21:24:18 +0300 Subject: [PATCH 002/137] =?UTF-8?q?mdbx:=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=20=D1=80=D0=B0=D1=81=D1=88?= =?UTF-8?q?=D0=B8=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20API=20=D1=84=D1=83=D0=BD?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=BE=D0=BC=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D1=86=D0=B5=D0=BB?= =?UTF-8?q?=D0=BE=D1=81=D1=82=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20=D1=81=D1=82?= =?UTF-8?q?=D1=80=D1=83=D0=BA=D1=82=D1=83=D1=80=D1=8B=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 231 +++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 226 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index 47d3e12a..768e9b00 100644 --- a/mdbx.h +++ b/mdbx.h @@ -816,7 +816,7 @@ typedef struct iovec MDBX_val; #endif /* ! SunOS */ enum MDBX_constants { - /** The hard limit for DBI handles */ + /** The hard limit for DBI handles. */ MDBX_MAX_DBI = UINT32_C(32765), /** The maximum size of a data item. */ @@ -5519,9 +5519,9 @@ LIBMDBX_API int mdbx_env_set_hsr(MDBX_env *env, MDBX_hsr_func *hsr_callback); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API MDBX_hsr_func * mdbx_env_get_hsr(const MDBX_env *env); -/** \defgroup btree_traversal B-tree Traversal - * This is internal API for mdbx_chk tool. You should avoid to use it, except - * some extremal special cases. +/** \defgroup chk Checking and Recovery + * Basically this is internal API for `mdbx_chk` tool, etc. + * You should avoid to use it, except some extremal special cases. * \ingroup c_extra * @{ */ @@ -5562,6 +5562,16 @@ MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, void *ctx, bool dont_check_keys_ordering); +/** \brief Acquires write-transaction lock. + * Provided for custom and/or complex locking scenarios. + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); + +/** \brief Releases write-transaction lock. + * Provided for custom and/or complex locking scenarios. + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); + /** \brief Open an environment instance using specific meta-page * for checking and recovery. * @@ -5592,7 +5602,218 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, * leg(s). */ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); -/** end of btree_traversal @} */ +/** \brief Флаги/опции для проверки целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_flags_t { + /** Режим проверки по-умолчанию, в том числе в режиме только-чтения. */ + MDBX_CHK_DEFAULTS = 0, + + /** Проверка в режиме чтения-записи, с захватом блокировки и приостановки + * пишущих транзакций. */ + MDBX_CHK_READWRITE = 1, + + /** Пропустить обход дерева страниц. */ + MDBX_CHK_SKIP_BTREE_TRAVERSAL = 2, + + /** Пропустить просмотр записей ключ-значение. */ + MDBX_CHK_SKIP_KV_TRAVERSAL = 4, + + /** Игнорировать порядок ключей и записей. + * \note Требуется при проверке унаследованных БД созданных с использованием + * нестандартных (пользовательских) функций сравнения ключей или значений. */ + MDBX_CHK_IGNORE_ORDER = 8 +}; +#ifndef __cplusplus +/** \ingroup c_opening */ +typedef enum MDBX_chk_flags_t MDBX_chk_flags_t; +#else +DEFINE_ENUM_FLAG_OPERATORS(MDBX_chk_flags_t) +#endif + +/** \brief Уровни логирование/детализации информации, + * поставляемой через обратные вызовы при проверке целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_severity { + MDBX_chk_severity_prio_shift = 4, + MDBX_chk_severity_kind_mask = 0xF, + MDBX_chk_fatal = 0x00u, + MDBX_chk_error = 0x11u, + MDBX_chk_warning = 0x22u, + MDBX_chk_notice = 0x33u, + MDBX_chk_result = 0x44u, + MDBX_chk_resolution = 0x55u, + MDBX_chk_processing = 0x56u, + MDBX_chk_info = 0x67u, + MDBX_chk_verbose = 0x78u, + MDBX_chk_details = 0x89u, + MDBX_chk_extra = 0x9Au +}; + +/** \brief Стадии проверки, + * сообщаемые через обратные вызовы при проверке целостности БД. + * \see mdbx_env_chk() */ +enum MDBX_chk_stage { + MDBX_chk_none, + MDBX_chk_init, + MDBX_chk_lock, + MDBX_chk_meta, + MDBX_chk_traversal_tree, + MDBX_chk_traversal_freedb, + MDBX_chk_space, + MDBX_chk_traversal_maindb, + MDBX_chk_traversal_subdbs, + MDBX_chk_conclude, + MDBX_chk_unlock, + MDBX_chk_finalize +}; + +/** \brief Виртуальная строка отчета, формируемого при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_line { + struct MDBX_chk_context *ctx; + uint8_t severity, scope_depth, empty; + char *begin, *end, *out; +} MDBX_chk_line_t; + +/** \brief Проблема обнаруженная при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_issue { + struct MDBX_chk_issue *next; + size_t count; + const char *caption; +} MDBX_chk_issue_t; + +/** \brief Иерархический контекст при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_scope { + MDBX_chk_issue_t *issues; + struct MDBX_chk_internal *internal; + const void *object; + enum MDBX_chk_stage stage; + enum MDBX_chk_severity verbosity; + size_t subtotal_issues; + union { + void *ptr; + size_t number; + } usr_z, usr_v, usr_o; +} MDBX_chk_scope_t; + +/** \brief Пользовательский тип для привязки дополнительных данных, + * связанных с некоторой таблицей ключ-значение, при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_user_subdb_cookie MDBX_chk_user_subdb_cookie_t; + +/** \brief Гистограмма с некоторой статистической информацией, + * собираемой при проверке целостности БД. + * \see mdbx_env_chk() */ +struct MDBX_chk_histogram { + size_t amount, count, ones, pad; + struct { + size_t begin, end, amount, count; + } ranges[9]; +}; + +/** \brief Информация о некоторой таблицей ключ-значение, + * при проверке целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_subdb { + MDBX_chk_user_subdb_cookie_t *cookie; + MDBX_val name; + MDBX_db_flags_t flags; + int id; + + size_t payload_bytes, lost_bytes; + struct { + size_t all, empty, other; + size_t branch, leaf; + size_t nested_branch, nested_leaf, nested_subleaf; + } pages; + struct { + /// Tree deep histogram + struct MDBX_chk_histogram deep; + /// Histogram of large/overflow pages length + struct MDBX_chk_histogram large_pages; + /// Histogram of nested trees height, span length for GC + struct MDBX_chk_histogram nested_tree; + /// Keys length histogram + struct MDBX_chk_histogram key_len; + /// Values length histogram + struct MDBX_chk_histogram val_len; + } histogram; +} MDBX_chk_subdb_t; + +/** \brief Контекст проверки целостности БД. + * \see mdbx_env_chk() */ +typedef struct MDBX_chk_context { + struct MDBX_chk_internal *internal; + MDBX_env *env; + MDBX_txn *txn; + MDBX_chk_scope_t *scope; + unsigned scope_nesting; + struct { + size_t total_payload_bytes; + size_t subdb_total, subdb_processed; + size_t total_unused_bytes, unused_pages; + size_t processed_pages, reclaimable_pages, gc_pages, alloc_pages, + backed_pages; + size_t problems_meta, tree_problems, gc_tree_problems, kv_tree_problems, + problems_gc, problems_kv, total_problems; + uint64_t steady_txnid, recent_txnid; + /** Указатель на массив размером subdb_total с указателями на экземпляры + * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значние, + * включая MainDB и GC/FreeDB. */ + const MDBX_chk_subdb_t *const *subdbs; + } result; +} MDBX_chk_context_t; + +/** FIXME */ +typedef struct MDBX_chk_callbacks { + bool (*check_break)(MDBX_chk_context_t *ctx); + int (*scope_push)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner, const char *fmt, va_list args); + int (*scope_conclude)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner, int err); + void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, + MDBX_chk_scope_t *inner); + void (*issue)(MDBX_chk_context_t *ctx, const char *object, + size_t entry_number, const char *issue, const char *extra_fmt, + va_list extra_args); + MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, + const MDBX_val *name, + MDBX_db_flags_t flags); + int (*subdb_conclude)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + MDBX_cursor *cursor, int err); + void (*subdb_dispose)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb); + + int (*subdb_handle_kv)(MDBX_chk_context_t *ctx, const MDBX_chk_subdb_t *subdb, + size_t entry_number, const MDBX_val *key, + const MDBX_val *value); + + int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); + int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); + + struct { + MDBX_chk_line_t *(*begin)(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity); + void (*flush)(MDBX_chk_line_t *); + void (*done)(MDBX_chk_line_t *); + void (*chars)(MDBX_chk_line_t *, const char *str, size_t len); + void (*format)(MDBX_chk_line_t *, const char *fmt, va_list args); + void (*size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, + const char *suffix); + } print; +} MDBX_chk_callbacks_t; + +/** FIXME */ +LIBMDBX_API int mdbx_env_chk(MDBX_env *env, const MDBX_chk_callbacks_t *cb, + MDBX_chk_context_t *ctx, + const enum MDBX_chk_flags_t flags, + enum MDBX_chk_severity verbosity, + unsigned timeout_seconds_16dot16); +/** FIXME */ +LIBMDBX_API int mdbx_env_chk_problem(MDBX_chk_context_t *ctx); + +/** end of chk @} */ /** end of c_api @} */ From f0d523c507042cc70eeeb690778c9b2be6a8b33f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 10 Oct 2023 23:14:40 +0300 Subject: [PATCH 003/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20API=20=D1=84=D1=83=D0=BD=D0=BA?= =?UTF-8?q?=D1=86=D0=B8=D1=8F=D0=BC=D0=B8=20lock/unlock/upgrade/downgrade?= =?UTF-8?q?=20=D0=BE=D1=81=D0=BD=D0=BE=D0=B2=D0=BD=D0=BE=D0=B9=20=D0=B1?= =?UTF-8?q?=D0=BB=D0=BE=D0=BA=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/core.c | 78 ++++++++++++++++++++++++++++++++--------------- src/internals.h | 1 - src/lck-posix.c | 61 +++++++++++++++++++++++++++--------- src/lck-windows.c | 33 +++++++++++--------- src/osal.h | 14 ++++----- 6 files changed, 126 insertions(+), 63 deletions(-) diff --git a/mdbx.h b/mdbx.h index 768e9b00..de16ccff 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5570,7 +5570,7 @@ LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); /** \brief Releases write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +LIBMDBX_API int mdbx_txn_unlock(MDBX_env *env); /** \brief Open an environment instance using specific meta-page * for checking and recovery. diff --git a/src/core.c b/src/core.c index 899b65af..d0cb0914 100644 --- a/src/core.c +++ b/src/core.c @@ -8200,7 +8200,7 @@ retry:; rc = MDBX_SUCCESS /* means "some data was synced" */; } - err = mdbx_txn_lock(env, nonblock); + err = osal_txn_lock(env, nonblock); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -8247,7 +8247,7 @@ retry:; bailout: if (locked) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -8442,7 +8442,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { } else if (env->me_flags & MDBX_RDONLY) { /* read-only mode, no write-txn, no wlock mutex */ last = NUM_METAS; - } else if (mdbx_txn_lock(env, true) == MDBX_SUCCESS) { + } else if (osal_txn_lock(env, true) == MDBX_SUCCESS) { /* no write-txn */ last = NUM_METAS; should_unlock = true; @@ -8463,7 +8463,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { pgno2bytes(env, edge - last)); } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); } } #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ @@ -8840,6 +8840,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_flags = MDBX_TXN_RDONLY | MDBX_TXN_FINISHED; return MDBX_SUCCESS; } + txn->mt_owner = tid; /* Seek & fetch the last meta */ uint64_t timestamp = 0; @@ -8915,12 +8916,11 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - eASSERT(env, txn->mt_txnid >= env->me_lck->mti_oldest_reader.weak); txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ + txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); - txn->mt_numdbs = env->me_numdbs; } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | MDBX_WRITEMAP)) == 0); @@ -8946,16 +8946,16 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { /* Not yet touching txn == env->me_txn0, it may be active */ jitter4testing(false); - rc = mdbx_txn_lock(env, !!(flags & MDBX_TXN_TRY)); + rc = osal_txn_lock(env, !!(flags & MDBX_TXN_TRY)); if (unlikely(rc)) return rc; if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_PANIC; } #if defined(_WIN32) || defined(_WIN64) if (unlikely(!env->me_map)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_EPERM; } #endif /* Windows */ @@ -9129,7 +9129,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - txn->mt_owner = tid; return MDBX_SUCCESS; } bailout: @@ -9810,9 +9809,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - atomic_add32(&env->me_ignore_EDEADLK, 1); txn_valgrind(env, nullptr); - atomic_sub32(&env->me_ignore_EDEADLK, 1); #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); @@ -9845,7 +9842,6 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; - txn->mt_owner = 0; env->me_txn = txn->mt_parent; pnl_free(txn->tw.spilled.list); txn->tw.spilled.list = nullptr; @@ -9858,7 +9854,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (!(env->me_flags & MDBX_WRITEMAP)) dlist_free(txn); /* The writer mutex was locked in mdbx_txn_begin. */ - mdbx_txn_unlock(env); + osal_txn_unlock(env); } else { eASSERT(env, txn->mt_parent != NULL); MDBX_txn *const parent = txn->mt_parent; @@ -9870,6 +9866,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, memcmp(&txn->tw.troika, &parent->tw.troika, sizeof(meta_troika_t)) == 0); + txn->mt_owner = 0; if (txn->tw.lifo_reclaimed) { eASSERT(env, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) >= (uintptr_t)parent->tw.lifo_reclaimed); @@ -13258,7 +13255,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, return MDBX_EACCESS; if (!inside_txn) { - int err = mdbx_txn_lock(env, false); + int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; need_unlock = true; @@ -13609,7 +13606,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, bailout: if (need_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21675,13 +21672,13 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, return rc; /* Temporarily block writers until we snapshot the meta pages */ - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc != MDBX_SUCCESS)) return rc; rc = txn_renew(read_txn, MDBX_TXN_RDONLY); if (unlikely(rc != MDBX_SUCCESS)) { - mdbx_txn_unlock(env); + osal_txn_unlock(env); return rc; } @@ -21693,7 +21690,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, memcpy(buffer, env->me_map, meta_bytes); MDBX_meta *const headcopy = /* LY: get pointer to the snapshot copy */ ptr_disp(buffer, ptr_dist(meta_recent(env, &troika).ptr_c, env->me_map)); - mdbx_txn_unlock(env); + osal_txn_unlock(env); if (flags & MDBX_CP_FORCE_DYNAMIC_SIZE) meta_make_sizeable(headcopy); @@ -21953,7 +21950,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_txn0->mt_owner != osal_thread_self(); bool should_unlock = false; if (lock_needed) { - rc = mdbx_txn_lock(env, false); + rc = osal_txn_lock(env, false); if (unlikely(rc)) return rc; should_unlock = true; @@ -21965,7 +21962,7 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, env->me_flags &= ~flags; if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return MDBX_SUCCESS; } @@ -24828,7 +24825,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, return MDBX_EINVAL; if (env->me_options.dp_reserve_limit != (unsigned)value) { if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24868,7 +24865,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; if (lock_needed) { - err = mdbx_txn_lock(env, false); + err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; should_unlock = true; @@ -24968,7 +24965,7 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } if (should_unlock) - mdbx_txn_unlock(env); + osal_txn_unlock(env); return err; } @@ -25490,6 +25487,39 @@ mdbx_key_from_int32(const int32_t i32) { #endif /* LIBMDBX_NO_EXPORTS_LEGACY_API */ +/*------------------------------------------------------------------------------ + * Locking API */ + +int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner || + (env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + return osal_txn_lock(env, dont_wait); +} + +int mdbx_txn_unlock(MDBX_env *env) { + int rc = check_env(env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + if (unlikely(env->me_txn0->mt_owner != osal_thread_self())) + return MDBX_THREAD_MISMATCH; + if (unlikely((env->me_txn0->mt_flags & MDBX_TXN_FINISHED) == 0)) + return MDBX_BUSY; + + osal_txn_unlock(env); + return MDBX_SUCCESS; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 3d1fd706..1664dcd7 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1482,7 +1482,6 @@ struct MDBX_env { int me_valgrind_handle; #endif #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - MDBX_atomic_uint32_t me_ignore_EDEADLK; pgno_t me_poison_edge; #endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ diff --git a/src/lck-posix.c b/src/lck-posix.c index 17c50ddd..7f58e9ed 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -120,7 +120,7 @@ mdbx_global_destructor(void) { * - Блокировка таблицы читателей для регистрации, * т.е. функции osal_rdt_lock() и osal_rdt_unlock(). * - Блокировка БД для пишущих транзакций, - * т.е. функции mdbx_txn_lock() и mdbx_txn_unlock(). + * т.е. функции osal_txn_lock() и osal_txn_unlock(). * * Остальной функционал реализуется отдельно посредством файловых блокировок: * - Первоначальный захват БД в режиме exclusive/shared и последующий перевод @@ -527,6 +527,34 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return rc; } +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { + assert(env->me_lfd != INVALID_HANDLE_VALUE); + if (unlikely(osal_getpid() != env->me_pid)) + return MDBX_PANIC; + + const int cmd = dont_wait ? op_setlk : op_setlkw; + int rc = lck_op(env->me_lfd, cmd, F_WRLCK, 0, 1); + if (rc == MDBX_SUCCESS && (env->me_flags & MDBX_EXCLUSIVE) == 0) { + rc = (env->me_pid > 1) + ? lck_op(env->me_lazy_fd, cmd, F_WRLCK, 0, env->me_pid - 1) + : MDBX_SUCCESS; + if (rc == MDBX_SUCCESS) { + rc = lck_op(env->me_lazy_fd, cmd, F_WRLCK, env->me_pid + 1, + OFF_T_MAX - env->me_pid - 1); + if (rc != MDBX_SUCCESS && env->me_pid > 1 && + lck_op(env->me_lazy_fd, op_setlk, F_UNLCK, 0, env->me_pid - 1)) + rc = MDBX_PANIC; + } + if (rc != MDBX_SUCCESS && lck_op(env->me_lfd, op_setlk, F_RDLCK, 0, 1)) + rc = MDBX_PANIC; + } + if (unlikely(rc != 0)) { + ERROR("%s, err %u", "lck", rc); + assert(MDBX_IS_ERROR(rc)); + } + return rc; +} + __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, MDBX_env *inprocess_neighbor) { if (unlikely(osal_getpid() != env->me_pid)) @@ -822,11 +850,6 @@ __cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, #error "FIXME" #endif /* MDBX_LOCKING */ -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) - if (rc == EDEADLK && atomic_load32(&env->me_ignore_EDEADLK, mo_Relaxed) > 0) - return rc; -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ - ERROR("mutex (un)lock failed, %s", mdbx_strerror(err)); if (rc != EDEADLK) env->me_flags |= MDBX_FATAL_ERROR; @@ -931,20 +954,28 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { jitter4testing(true); } -int mdbx_txn_lock(MDBX_env *env, bool dont_wait) { +int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); + eASSERT(env, !env->me_txn0->mt_owner); jitter4testing(true); - int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); - TRACE("<< rc %d", rc); - return MDBX_IS_ERROR(rc) ? rc : MDBX_SUCCESS; + const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + int rc = err; + if (likely(!MDBX_IS_ERROR(err))) { + env->me_txn0->mt_owner = osal_thread_self(); + rc = MDBX_SUCCESS; + } + TRACE("<< rc %d", err); + return rc; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); - TRACE("<< rc %d", rc); - if (unlikely(rc != MDBX_SUCCESS)) - mdbx_panic("%s() failed: err %d\n", __func__, rc); + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); + env->me_txn0->mt_owner = 0; + int err = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); + TRACE("<< err %d", err); + if (unlikely(err != MDBX_SUCCESS)) + mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } diff --git a/src/lck-windows.c b/src/lck-windows.c index 8ffccb1b..ed77da30 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -178,7 +178,8 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #define DXB_BODY (env->me_psize * (size_t)NUM_METAS), DXB_MAXLEN #define DXB_WHOLE 0, DXB_MAXLEN -int mdbx_txn_lock(MDBX_env *env, bool dontwait) { +int osal_txn_lock(MDBX_env *env, bool dontwait) { + eASSERT(env, !env->me_txn0->mt_owner); if (dontwait) { if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) return MDBX_BUSY; @@ -194,12 +195,8 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } - if (env->me_flags & MDBX_EXCLUSIVE) { - /* Zap: Failing to release lock 'env->me_windowsbug_lock' - * in function 'mdbx_txn_lock' */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return MDBX_SUCCESS; - } + if (env->me_flags & MDBX_EXCLUSIVE) + goto done; const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -218,17 +215,20 @@ int mdbx_txn_lock(MDBX_env *env, bool dontwait) { } } if (rc == MDBX_SUCCESS) { + done: /* Zap: Failing to release lock 'env->me_windowsbug_lock' * in function 'mdbx_txn_lock' */ MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(26115); - return rc; + env->me_txn0->mt_owner = osal_thread_self(); + return MDBX_SUCCESS; } LeaveCriticalSection(&env->me_windowsbug_lock); return (!dontwait || rc != ERROR_LOCK_VIOLATION) ? rc : MDBX_BUSY; } -void mdbx_txn_unlock(MDBX_env *env) { +void osal_txn_unlock(MDBX_env *env) { + eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); if ((env->me_flags & MDBX_EXCLUSIVE) == 0) { const HANDLE fd4data = env->me_overlapped_fd ? env->me_overlapped_fd : env->me_lazy_fd; @@ -236,6 +236,7 @@ void mdbx_txn_unlock(MDBX_env *env) { if (err != MDBX_SUCCESS) mdbx_panic("%s failed: err %u", __func__, err); } + env->me_txn0->mt_owner = 0; LeaveCriticalSection(&env->me_windowsbug_lock); } @@ -442,7 +443,7 @@ osal_resume_threads_after_remap(mdbx_handle_array_t *array) { * The osal_lck_downgrade() moves the locking-FSM from "exclusive write" * state to the "used" (i.e. shared) state. * - * The mdbx_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) + * The osal_lck_upgrade() moves the locking-FSM from "used" (i.e. shared) * state to the "exclusive write" state. */ @@ -615,7 +616,7 @@ MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env) { return MDBX_SUCCESS /* 5) now at S-? (used), done */; } -MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { +MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { /* Transite from used state (S-?) to exclusive-write (E-E) */ assert(env->me_lfd != INVALID_HANDLE_VALUE); @@ -625,7 +626,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 1) now on S-? (used), try S-E (locked) */ jitter4testing(false); - int rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_UPPER); + int rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_UPPER); if (rc != MDBX_SUCCESS) { /* 2) something went wrong, give up */; VERBOSE("%s, err %u", "S-?(used) >> S-E(locked)", rc); @@ -640,7 +643,9 @@ MDBX_INTERNAL_FUNC int mdbx_lck_upgrade(MDBX_env *env) { /* 4) now on ?-E (middle), try E-E (exclusive-write) */ jitter4testing(false); - rc = flock(env->me_lfd, LCK_EXCLUSIVE | LCK_DONTWAIT, LCK_LOWER); + rc = flock(env->me_lfd, + dont_wait ? LCK_EXCLUSIVE | LCK_DONTWAIT : LCK_EXCLUSIVE, + LCK_LOWER); if (rc != MDBX_SUCCESS) { /* 5) something went wrong, give up */; VERBOSE("%s, err %u", "?-E(middle) >> E-E(exclusive-write)", rc); @@ -686,7 +691,7 @@ MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, const bool synced = env->me_lck_mmap.lck->mti_unsynced_pages.weak == 0; osal_munmap(&env->me_lck_mmap); if (synced && !inprocess_neighbor && env->me_lfd != INVALID_HANDLE_VALUE && - mdbx_lck_upgrade(env) == MDBX_SUCCESS) + osal_lck_upgrade(env, true) == MDBX_SUCCESS) /* this will fail if LCK is used/mmapped by other process(es) */ osal_ftruncate(env->me_lfd, 0); } diff --git a/src/osal.h b/src/osal.h index 3d45da4b..1b5c317f 100644 --- a/src/osal.h +++ b/src/osal.h @@ -718,6 +718,8 @@ MDBX_INTERNAL_FUNC int osal_lck_seize(MDBX_env *env); /// operational lock. /// \return Error code or zero on success MDBX_INTERNAL_FUNC int osal_lck_downgrade(MDBX_env *env); +MDBX_MAYBE_UNUSED MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, + bool dont_wait); /// \brief Locks LCK-file or/and table of readers for (de)registering. /// \return Error code or zero on success @@ -726,16 +728,12 @@ MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env); /// \brief Unlocks LCK-file or/and table of readers after (de)registering. MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env); -/// \brief Acquires lock for DB change (on writing transaction start) -/// Reading transactions will not be blocked. -/// Declared as LIBMDBX_API because it is used in mdbx_chk. +/// \brief Acquires write-transaction lock. /// \return Error code or zero on success -LIBMDBX_API int mdbx_txn_lock(MDBX_env *env, bool dont_wait); +MDBX_INTERNAL_FUNC int osal_txn_lock(MDBX_env *env, bool dont_wait); -/// \brief Releases lock once DB changes is made (after writing transaction -/// has finished). -/// Declared as LIBMDBX_API because it is used in mdbx_chk. -LIBMDBX_API void mdbx_txn_unlock(MDBX_env *env); +/// \brief Releases write-transaction lock.. +MDBX_INTERNAL_FUNC void osal_txn_unlock(MDBX_env *env); /// \brief Sets alive-flag of reader presence (indicative lock) for PID of /// the current process. The function does no more than needed for From 253a56206b60fffd8ed7f4575607fb06717cef33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 24 Apr 2023 20:59:18 +0300 Subject: [PATCH 004/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B8=20=D0=BF=D0=B5?= =?UTF-8?q?=D1=80=D0=B5=D0=BD=D0=BE=D1=81=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=B0=20=D1=83=D1=82=D0=B8=D0=BB?= =?UTF-8?q?=D0=B8=D1=82=D1=8B=20`mdbx=5Fchk`=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D1=8C=20=D0=B1=D0=B8=D0=B1=D0=BB=D0=B8=D0=BE=D1=82=D0=B5?= =?UTF-8?q?=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- TODO.md | 2 +- mdbx.h | 73 +- src/base.h | 1 + src/core.c | 2340 ++++++++++++++++++++++++++++++++++++++++++++--- src/internals.h | 41 +- src/mdbx_chk.c | 1898 ++++++++------------------------------ 6 files changed, 2637 insertions(+), 1718 deletions(-) diff --git a/TODO.md b/TODO.md index 0d9fd46d..d8e2d0b7 100644 --- a/TODO.md +++ b/TODO.md @@ -11,7 +11,6 @@ For the same reason ~~Github~~ is blacklisted forever. So currently most of the links are broken due to noted malicious ~~Github~~ sabotage. - - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Replace SRW-lock on Windows to allow shrink DB with `MDBX_NOTLS` option](https://libmdbx.dqdkfa.ru/dead-github/issues/210). - [More flexible support of asynchronous runtime/framework(s)](https://libmdbx.dqdkfa.ru/dead-github/issues/200). - [Migration guide from LMDB to MDBX](https://libmdbx.dqdkfa.ru/dead-github/issues/199). @@ -23,6 +22,7 @@ So currently most of the links are broken due to noted malicious ~~Github~~ sabo Done ---- + - [Move most of `mdbx_chk` functional to the library API](https://libmdbx.dqdkfa.ru/dead-github/issues/204). - [Simple careful mode for working with corrupted DB](https://libmdbx.dqdkfa.ru/dead-github/issues/223). - [Engage an "overlapped I/O" on Windows](https://libmdbx.dqdkfa.ru/dead-github/issues/224). - [Large/Overflow pages accounting for dirty-room](https://libmdbx.dqdkfa.ru/dead-github/issues/192). diff --git a/mdbx.h b/mdbx.h index de16ccff..c94bde3f 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2571,9 +2571,7 @@ struct MDBX_envinfo { uint64_t mi_latter_reader_txnid; /**< ID of the last reader transaction */ uint64_t mi_self_latter_reader_txnid; /**< ID of the last reader transaction of caller process */ - uint64_t mi_meta0_txnid, mi_meta0_sign; - uint64_t mi_meta1_txnid, mi_meta1_sign; - uint64_t mi_meta2_txnid, mi_meta2_sign; + uint64_t mi_meta_txnid[3], mi_meta_sign[3]; uint32_t mi_maxreaders; /**< Total reader slots in the environment */ uint32_t mi_numreaders; /**< Max reader slots used in the environment */ uint32_t mi_dxb_pagesize; /**< Database pagesize */ @@ -2590,7 +2588,7 @@ struct MDBX_envinfo { struct { struct { uint64_t x, y; - } current, meta0, meta1, meta2; + } current, meta[3]; } mi_bootid; /** Bytes not explicitly synchronized to disk */ @@ -5525,43 +5523,6 @@ mdbx_env_get_hsr(const MDBX_env *env); * \ingroup c_extra * @{ */ -/** \brief Page types for traverse the b-tree. - * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ -enum MDBX_page_type_t { - MDBX_page_broken, - MDBX_page_meta, - MDBX_page_large, - MDBX_page_branch, - MDBX_page_leaf, - MDBX_page_dupfixed_leaf, - MDBX_subpage_leaf, - MDBX_subpage_dupfixed_leaf, - MDBX_subpage_broken, -}; -#ifndef __cplusplus -typedef enum MDBX_page_type_t MDBX_page_type_t; -#endif - -/** \brief Pseudo-name for MainDB */ -#define MDBX_PGWALK_MAIN ((void *)((ptrdiff_t)0)) -/** \brief Pseudo-name for GarbageCollectorDB */ -#define MDBX_PGWALK_GC ((void *)((ptrdiff_t)-1)) -/** \brief Pseudo-name for MetaPages */ -#define MDBX_PGWALK_META ((void *)((ptrdiff_t)-2)) - -/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ -typedef int -MDBX_pgvisitor_func(const uint64_t pgno, const unsigned number, void *const ctx, - const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t type, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) MDBX_CXX17_NOEXCEPT; - -/** \brief B-tree traversal function. */ -LIBMDBX_API int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, - void *ctx, bool dont_check_keys_ordering); - /** \brief Acquires write-transaction lock. * Provided for custom and/or complex locking scenarios. * \returns A non-zero error value on failure and 0 on success. */ @@ -5718,6 +5679,14 @@ struct MDBX_chk_histogram { * \see mdbx_env_chk() */ typedef struct MDBX_chk_subdb { MDBX_chk_user_subdb_cookie_t *cookie; + +/** \brief Pseudo-name for MainDB */ +#define MDBX_CHK_MAIN ((void *)((ptrdiff_t)0)) +/** \brief Pseudo-name for GarbageCollectorDB */ +#define MDBX_CHK_GC ((void *)((ptrdiff_t)-1)) +/** \brief Pseudo-name for MetaPages */ +#define MDBX_CHK_META ((void *)((ptrdiff_t)-2)) + MDBX_val name; MDBX_db_flags_t flags; int id; @@ -5749,7 +5718,7 @@ typedef struct MDBX_chk_context { MDBX_env *env; MDBX_txn *txn; MDBX_chk_scope_t *scope; - unsigned scope_nesting; + uint8_t scope_nesting; struct { size_t total_payload_bytes; size_t subdb_total, subdb_processed; @@ -5776,7 +5745,7 @@ typedef struct MDBX_chk_callbacks { void (*scope_pop)(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *outer, MDBX_chk_scope_t *inner); void (*issue)(MDBX_chk_context_t *ctx, const char *object, - size_t entry_number, const char *issue, const char *extra_fmt, + uint64_t entry_number, const char *issue, const char *extra_fmt, va_list extra_args); MDBX_chk_user_subdb_cookie_t *(*subdb_filter)(MDBX_chk_context_t *ctx, const MDBX_val *name, @@ -5792,16 +5761,14 @@ typedef struct MDBX_chk_callbacks { int (*stage_begin)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage); int (*stage_end)(MDBX_chk_context_t *ctx, enum MDBX_chk_stage, int err); - struct { - MDBX_chk_line_t *(*begin)(MDBX_chk_context_t *ctx, - enum MDBX_chk_severity severity); - void (*flush)(MDBX_chk_line_t *); - void (*done)(MDBX_chk_line_t *); - void (*chars)(MDBX_chk_line_t *, const char *str, size_t len); - void (*format)(MDBX_chk_line_t *, const char *fmt, va_list args); - void (*size)(MDBX_chk_line_t *, const char *prefix, const uint64_t value, - const char *suffix); - } print; + MDBX_chk_line_t *(*print_begin)(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity); + void (*print_flush)(MDBX_chk_line_t *); + void (*print_done)(MDBX_chk_line_t *); + void (*print_chars)(MDBX_chk_line_t *, const char *str, size_t len); + void (*print_format)(MDBX_chk_line_t *, const char *fmt, va_list args); + void (*print_size)(MDBX_chk_line_t *, const char *prefix, + const uint64_t value, const char *suffix); } MDBX_chk_callbacks_t; /** FIXME */ diff --git a/src/base.h b/src/base.h index b8a243e8..fd730945 100644 --- a/src/base.h +++ b/src/base.h @@ -48,6 +48,7 @@ #include #include +#include #include #include #include diff --git a/src/core.c b/src/core.c index d0cb0914..fec25bed 100644 --- a/src/core.c +++ b/src/core.c @@ -5572,7 +5572,7 @@ __cold static void meta_troika_dump(const MDBX_env *env, const meta_ptr_t recent = meta_recent(env, troika); const meta_ptr_t prefer_steady = meta_prefer_steady(env, troika); const meta_ptr_t tail = meta_tail(env, troika); - NOTICE("%" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " + NOTICE("troika: %" PRIaTXN ".%c:%" PRIaTXN ".%c:%" PRIaTXN ".%c, fsm=0x%02x, " "head=%d-%" PRIaTXN ".%c, " "base=%d-%" PRIaTXN ".%c, " "tail=%d-%" PRIaTXN ".%c, " @@ -12143,6 +12143,10 @@ static __always_inline bool eq_fast(const MDBX_val *a, const MDBX_val *b) { eq_fast_slowpath(a->iov_base, b->iov_base, a->iov_len); } +static int cmp_equal_or_greater(const MDBX_val *a, const MDBX_val *b) { + return eq_fast(a, b) ? 0 : 1; +} + static int validate_meta(MDBX_env *env, MDBX_meta *const meta, const MDBX_page *const page, const unsigned meta_number, unsigned *guess_pagesize) { @@ -22247,9 +22251,9 @@ __cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; } -__cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, - MDBX_envinfo *arg, const size_t bytes) { - +__cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, + MDBX_envinfo *out, const size_t bytes, + meta_troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); @@ -22259,18 +22263,18 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, /* environment not yet opened */ #if 1 /* default behavior: returns the available info but zeroed the rest */ - memset(arg, 0, bytes); - arg->mi_geo.lower = env->me_dbgeo.lower; - arg->mi_geo.upper = env->me_dbgeo.upper; - arg->mi_geo.shrink = env->me_dbgeo.shrink; - arg->mi_geo.grow = env->me_dbgeo.grow; - arg->mi_geo.current = env->me_dbgeo.now; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + memset(out, 0, bytes); + out->mi_geo.lower = env->me_dbgeo.lower; + out->mi_geo.upper = env->me_dbgeo.upper; + out->mi_geo.shrink = env->me_dbgeo.shrink; + out->mi_geo.grow = env->me_dbgeo.grow; + out->mi_geo.current = env->me_dbgeo.now; + out->mi_maxreaders = env->me_maxreaders; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; } return MDBX_SUCCESS; #else @@ -22285,123 +22289,119 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; - meta_troika_t holder; - meta_troika_t const *troika; if (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) - troika = &txn->tw.troika; - else { - holder = meta_tap(env); - troika = &holder; - } + *troika = txn->tw.troika; + else + *troika = meta_tap(env); const meta_ptr_t head = meta_recent(env, troika); - arg->mi_recent_txnid = head.txnid; - arg->mi_meta0_txnid = troika->txnid[0]; - arg->mi_meta0_sign = unaligned_peek_u64(4, meta0->mm_sign); - arg->mi_meta1_txnid = troika->txnid[1]; - arg->mi_meta1_sign = unaligned_peek_u64(4, meta1->mm_sign); - arg->mi_meta2_txnid = troika->txnid[2]; - arg->mi_meta2_sign = unaligned_peek_u64(4, meta2->mm_sign); + out->mi_recent_txnid = head.txnid; + out->mi_meta_txnid[0] = troika->txnid[0]; + out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); + out->mi_meta_txnid[1] = troika->txnid[1]; + out->mi_meta_sign[1] = unaligned_peek_u64(4, meta1->mm_sign); + out->mi_meta_txnid[2] = troika->txnid[2]; + out->mi_meta_sign[2] = unaligned_peek_u64(4, meta2->mm_sign); if (likely(bytes > size_before_bootid)) { - memcpy(&arg->mi_bootid.meta0, &meta0->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta1, &meta1->mm_bootid, 16); - memcpy(&arg->mi_bootid.meta2, &meta2->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[0], &meta0->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[1], &meta1->mm_bootid, 16); + memcpy(&out->mi_bootid.meta[2], &meta2->mm_bootid, 16); } const volatile MDBX_meta *txn_meta = head.ptr_v; - arg->mi_last_pgno = txn_meta->mm_geo.next - 1; - arg->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); + out->mi_last_pgno = txn_meta->mm_geo.next - 1; + out->mi_geo.current = pgno2bytes(env, txn_meta->mm_geo.now); if (txn) { - arg->mi_last_pgno = txn->mt_next_pgno - 1; - arg->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); + out->mi_last_pgno = txn->mt_next_pgno - 1; + out->mi_geo.current = pgno2bytes(env, txn->mt_end_pgno); const txnid_t wanna_meta_txnid = (txn->mt_flags & MDBX_TXN_RDONLY) ? txn->mt_txnid : txn->mt_txnid - xMDBX_TXNID_STEP; - txn_meta = (arg->mi_meta0_txnid == wanna_meta_txnid) ? meta0 : txn_meta; - txn_meta = (arg->mi_meta1_txnid == wanna_meta_txnid) ? meta1 : txn_meta; - txn_meta = (arg->mi_meta2_txnid == wanna_meta_txnid) ? meta2 : txn_meta; + txn_meta = (out->mi_meta_txnid[0] == wanna_meta_txnid) ? meta0 : txn_meta; + txn_meta = (out->mi_meta_txnid[1] == wanna_meta_txnid) ? meta1 : txn_meta; + txn_meta = (out->mi_meta_txnid[2] == wanna_meta_txnid) ? meta2 : txn_meta; } - arg->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); - arg->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); - arg->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); - arg->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); + out->mi_geo.lower = pgno2bytes(env, txn_meta->mm_geo.lower); + out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); + out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); + out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); const uint64_t unsynced_pages = atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) + (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)arg->mi_recent_txnid); + (uint32_t)out->mi_recent_txnid); - arg->mi_mapsize = env->me_dxb_mmap.limit; + out->mi_mapsize = env->me_dxb_mmap.limit; const MDBX_lockinfo *const lck = env->me_lck; - arg->mi_maxreaders = env->me_maxreaders; - arg->mi_numreaders = env->me_lck_mmap.lck + out->mi_maxreaders = env->me_maxreaders; + out->mi_numreaders = env->me_lck_mmap.lck ? atomic_load32(&lck->mti_numreaders, mo_Relaxed) : INT32_MAX; - arg->mi_dxb_pagesize = env->me_psize; - arg->mi_sys_pagesize = env->me_os_psize; + out->mi_dxb_pagesize = env->me_psize; + out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { - arg->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); + out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); - arg->mi_since_sync_seconds16dot16 = + out->mi_since_sync_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; ts = atomic_load64(&lck->mti_reader_check_timestamp, mo_Relaxed); - arg->mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16 = ts ? osal_monotime_to_16dot16_noUnderflow(monotime_now - ts) : 0; - arg->mi_autosync_threshold = pgno2bytes( + out->mi_autosync_threshold = pgno2bytes( env, atomic_load32(&lck->mti_autosync_threshold, mo_Relaxed)); - arg->mi_autosync_period_seconds16dot16 = + out->mi_autosync_period_seconds16dot16 = osal_monotime_to_16dot16_noUnderflow( atomic_load64(&lck->mti_autosync_period, mo_Relaxed)); - arg->mi_bootid.current.x = bootid.x; - arg->mi_bootid.current.y = bootid.y; - arg->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; + out->mi_mode = env->me_lck_mmap.lck ? lck->mti_envmode.weak : env->me_flags; } if (likely(bytes > size_before_pgop_stat)) { #if MDBX_ENABLE_PGOP_STAT - arg->mi_pgop_stat.newly = + out->mi_pgop_stat.newly = atomic_load64(&lck->mti_pgop_stat.newly, mo_Relaxed); - arg->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); - arg->mi_pgop_stat.clone = + out->mi_pgop_stat.cow = atomic_load64(&lck->mti_pgop_stat.cow, mo_Relaxed); + out->mi_pgop_stat.clone = atomic_load64(&lck->mti_pgop_stat.clone, mo_Relaxed); - arg->mi_pgop_stat.split = + out->mi_pgop_stat.split = atomic_load64(&lck->mti_pgop_stat.split, mo_Relaxed); - arg->mi_pgop_stat.merge = + out->mi_pgop_stat.merge = atomic_load64(&lck->mti_pgop_stat.merge, mo_Relaxed); - arg->mi_pgop_stat.spill = + out->mi_pgop_stat.spill = atomic_load64(&lck->mti_pgop_stat.spill, mo_Relaxed); - arg->mi_pgop_stat.unspill = + out->mi_pgop_stat.unspill = atomic_load64(&lck->mti_pgop_stat.unspill, mo_Relaxed); - arg->mi_pgop_stat.wops = + out->mi_pgop_stat.wops = atomic_load64(&lck->mti_pgop_stat.wops, mo_Relaxed); - arg->mi_pgop_stat.prefault = + out->mi_pgop_stat.prefault = atomic_load64(&lck->mti_pgop_stat.prefault, mo_Relaxed); - arg->mi_pgop_stat.mincore = + out->mi_pgop_stat.mincore = atomic_load64(&lck->mti_pgop_stat.mincore, mo_Relaxed); - arg->mi_pgop_stat.msync = + out->mi_pgop_stat.msync = atomic_load64(&lck->mti_pgop_stat.msync, mo_Relaxed); - arg->mi_pgop_stat.fsync = + out->mi_pgop_stat.fsync = atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); #else memset(&arg->mi_pgop_stat, 0, sizeof(arg->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } - arg->mi_self_latter_reader_txnid = arg->mi_latter_reader_txnid = - arg->mi_recent_txnid; + out->mi_self_latter_reader_txnid = out->mi_latter_reader_txnid = + out->mi_recent_txnid; if (env->me_lck_mmap.lck) { - for (size_t i = 0; i < arg->mi_numreaders; ++i) { + for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (arg->mi_latter_reader_txnid > txnid) - arg->mi_latter_reader_txnid = txnid; - if (pid == env->me_pid && arg->mi_self_latter_reader_txnid > txnid) - arg->mi_self_latter_reader_txnid = txnid; + if (out->mi_latter_reader_txnid > txnid) + out->mi_latter_reader_txnid = txnid; + if (pid == env->me_pid && out->mi_self_latter_reader_txnid > txnid) + out->mi_self_latter_reader_txnid = txnid; } } } @@ -22410,6 +22410,26 @@ __cold static int fetch_envinfo_ex(const MDBX_env *env, const MDBX_txn *txn, return MDBX_SUCCESS; } +__cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, + size_t bytes, meta_troika_t *troika) { + MDBX_envinfo snap; + int rc = env_info_snap(env, txn, &snap, sizeof(snap), troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + while (1) { + rc = env_info_snap(env, txn, out, bytes, troika); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + snap.mi_since_sync_seconds16dot16 = out->mi_since_sync_seconds16dot16; + snap.mi_since_reader_check_seconds16dot16 = + out->mi_since_reader_check_seconds16dot16; + if (likely(memcmp(&snap, out, bytes) == 0)) + return MDBX_SUCCESS; + memcpy(&snap, out, bytes); + } +} + __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == NULL && txn == NULL) || arg == NULL)) @@ -22436,22 +22456,8 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, bytes != size_before_pgop_stat) return MDBX_EINVAL; - MDBX_envinfo snap; - int rc = fetch_envinfo_ex(env, txn, &snap, sizeof(snap)); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - while (1) { - rc = fetch_envinfo_ex(env, txn, arg, bytes); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - snap.mi_since_sync_seconds16dot16 = arg->mi_since_sync_seconds16dot16; - snap.mi_since_reader_check_seconds16dot16 = - arg->mi_since_reader_check_seconds16dot16; - if (likely(memcmp(&snap, arg, bytes) == 0)) - return MDBX_SUCCESS; - memcpy(&snap, arg, bytes); - } + meta_troika_t troika; + return env_info(env, txn, arg, bytes, &troika); } static __inline MDBX_cmp_func *get_default_keycmp(MDBX_db_flags_t flags) { @@ -22572,23 +22578,21 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } /* main table? */ - if (table_name == MDBX_PGWALK_MAIN || - table_name->iov_base == MDBX_PGWALK_MAIN) { + if (table_name == MDBX_CHK_MAIN || table_name->iov_base == MDBX_CHK_MAIN) { rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = MAIN_DBI; return rc; } - if (table_name == MDBX_PGWALK_GC || table_name->iov_base == MDBX_PGWALK_GC) { + if (table_name == MDBX_CHK_GC || table_name->iov_base == MDBX_CHK_GC) { rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; *dbi = FREE_DBI; return rc; } - if (table_name == MDBX_PGWALK_META || - table_name->iov_base == MDBX_PGWALK_META) { + if (table_name == MDBX_CHK_META || table_name->iov_base == MDBX_CHK_META) { rc = MDBX_EINVAL; goto bailout; } @@ -22781,8 +22785,8 @@ static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { MDBX_val thunk, *name; - if (name_cstr == MDBX_PGWALK_MAIN || name_cstr == MDBX_PGWALK_GC || - name_cstr == MDBX_PGWALK_META) + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) name = (void *)name_cstr; else { thunk.iov_len = strlen(name_cstr); @@ -23457,12 +23461,12 @@ typedef struct mdbx_walk_ctx { bool mw_dont_check_keys_ordering; } mdbx_walk_ctx_t; -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep); +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep); static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { if (mp) - switch (mp->mp_flags) { + switch (mp->mp_flags & ~P_SPILLED) { case P_BRANCH: return MDBX_page_branch; case P_LEAF: @@ -23471,15 +23475,13 @@ static MDBX_page_type_t walk_page_type(const MDBX_page *mp) { return MDBX_page_dupfixed_leaf; case P_OVERFLOW: return MDBX_page_large; - case P_META: - return MDBX_page_meta; } return MDBX_page_broken; } /* Depth-first tree traversal. */ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, - const MDBX_val *name, int deep, + MDBX_walk_sdb_t *sdb, int deep, txnid_t parent_txnid) { assert(pgno != P_INVALID); MDBX_page *mp = nullptr; @@ -23536,7 +23538,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, pagesize = pgno2bytes(ctx->mw_txn->mt_env, npages); const size_t over_unused = pagesize - over_payload - over_header; const int rc = ctx->mw_visitor(large_pgno, npages, ctx->mw_user, deep, - name, pagesize, MDBX_page_large, err, 1, + sdb, pagesize, MDBX_page_large, err, 1, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23606,7 +23608,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, name, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_ds(node), subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -23624,7 +23626,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = ctx->mw_visitor( - pgno, 1, ctx->mw_user, deep, name, ctx->mw_txn->mt_env->me_psize, type, + pgno, 1, ctx->mw_user, deep, sdb, ctx->mw_txn->mt_env->me_psize, type, err, nentries, payload_size, header_size, unused_size + align_bytes); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; @@ -23636,7 +23638,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, MDBX_node *node = page_node(mp, i); if (type == MDBX_page_branch) { assert(err == MDBX_SUCCESS); - err = walk_tree(ctx, node_pgno(node), name, deep + 1, mp->mp_txnid); + err = walk_tree(ctx, node_pgno(node), sdb, deep + 1, mp->mp_txnid); if (unlikely(err != MDBX_SUCCESS)) { if (err == MDBX_RESULT_TRUE) break; @@ -23655,11 +23657,13 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); - const MDBX_val subdb_name = {node_key(node), node_ks(node)}; + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = { + {node_key(node), node_ks(node)}, nullptr, nullptr}; + sdb_info.internal = &aligned_db; assert(err == MDBX_SUCCESS); - err = walk_sdb(ctx, &db, &subdb_name, deep + 1); + err = walk_sdb(ctx, &sdb_info, deep + 1); } break; @@ -23669,15 +23673,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } else { - MDBX_db db; - memcpy(&db, node_data(node), sizeof(db)); + MDBX_db aligned_db; + memcpy(&aligned_db, node_data(node), sizeof(aligned_db)); assert(ctx->mw_cursor->mc_xcursor == &container_of(ctx->mw_cursor, MDBX_cursor_couple, outer)->inner); assert(err == MDBX_SUCCESS); err = cursor_xinit1(ctx->mw_cursor, node, mp); if (likely(err == MDBX_SUCCESS)) { ctx->mw_cursor = &ctx->mw_cursor->mc_xcursor->mx_cursor; - err = walk_tree(ctx, db.md_root, name, deep + 1, mp->mp_txnid); + sdb->nested = &aligned_db; + err = walk_tree(ctx, aligned_db.md_root, sdb, deep + 1, mp->mp_txnid); + sdb->nested = nullptr; MDBX_xcursor *inner_xcursor = container_of(ctx->mw_cursor, MDBX_xcursor, mx_cursor); MDBX_cursor_couple *couple = @@ -23692,15 +23698,16 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, return MDBX_SUCCESS; } -__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, - const MDBX_val *name, int deep) { - if (unlikely(sdb->md_root == P_INVALID)) +__cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, + int deep) { + struct MDBX_db *const db = sdb->internal; + if (unlikely(db->md_root == P_INVALID)) return MDBX_SUCCESS; /* empty db */ MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; uint8_t dbistate = DBI_VALID | DBI_AUDITED; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, sdb, &dbx, &dbistate); + int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -23712,8 +23719,8 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_db *const sdb, : CC_PAGECHECK; couple.outer.mc_next = ctx->mw_cursor; ctx->mw_cursor = &couple.outer; - rc = walk_tree(ctx, sdb->md_root, name, deep, - sdb->md_mod_txnid ? sdb->md_mod_txnid : ctx->mw_txn->mt_txnid); + rc = walk_tree(ctx, db->md_root, sdb, deep, + db->md_mod_txnid ? db->md_mod_txnid : ctx->mw_txn->mt_txnid); ctx->mw_cursor = couple.outer.mc_next; return rc; } @@ -23731,15 +23738,13 @@ __cold int mdbx_env_pgwalk(MDBX_txn *txn, MDBX_pgvisitor_func *visitor, ctx.mw_visitor = visitor; ctx.mw_dont_check_keys_ordering = dont_check_keys_ordering; - rc = visitor(0, NUM_METAS, user, 0, MDBX_PGWALK_META, - pgno2bytes(txn->mt_env, NUM_METAS), MDBX_page_meta, MDBX_SUCCESS, - NUM_METAS, sizeof(MDBX_meta) * NUM_METAS, PAGEHDRSZ * NUM_METAS, - (txn->mt_env->me_psize - sizeof(MDBX_meta) - PAGEHDRSZ) * - NUM_METAS); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[FREE_DBI], MDBX_PGWALK_GC, 0); - if (!MDBX_IS_ERROR(rc)) - rc = walk_sdb(&ctx, &txn->mt_dbs[MAIN_DBI], MDBX_PGWALK_MAIN, 0); + MDBX_walk_sdb_t sdb = {{MDBX_CHK_GC, 0}, &txn->mt_dbs[FREE_DBI], nullptr}; + rc = walk_sdb(&ctx, &sdb, 0); + if (!MDBX_IS_ERROR(rc)) { + sdb.name.iov_base = MDBX_CHK_MAIN; + sdb.internal = &txn->mt_dbs[MAIN_DBI]; + rc = walk_sdb(&ctx, &sdb, 0); + } return rc; } @@ -25520,6 +25525,2079 @@ int mdbx_txn_unlock(MDBX_env *env) { return MDBX_SUCCESS; } +/******************************************************************************* + * Checking API */ + +typedef struct MDBX_chk_internal { + MDBX_chk_context_t *usr; + const struct MDBX_chk_callbacks *cb; + uint64_t monotime_timeout; + + size_t *problem_counter; + uint8_t flags; + bool got_break; + bool write_locked; + uint8_t scope_depth; + + MDBX_chk_subdb_t subdb_gc, subdb_main; + int16_t *pagemap; + MDBX_chk_subdb_t *last_lookup; + const void *last_nested; + MDBX_chk_scope_t scope_stack[12]; + MDBX_chk_subdb_t *subdb[MDBX_MAX_DBI + CORE_DBS]; + + MDBX_envinfo envinfo; + meta_troika_t troika; + MDBX_val v2a_buf; +} MDBX_chk_internal_t; + +__cold static int chk_check_break(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + return (chk->got_break || (chk->cb->check_break && + (chk->got_break = chk->cb->check_break(chk->usr)))) + ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; +} + +__cold static void chk_line_end(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_done)) + chk->cb->print_done(line); + } +} + +__cold __must_check_result static MDBX_chk_line_t * +chk_line_begin(MDBX_chk_scope_t *const scope, enum MDBX_chk_severity severity) { + MDBX_chk_internal_t *const chk = scope->internal; + if (severity < MDBX_chk_warning) + mdbx_env_chk_problem(chk->usr); + MDBX_chk_line_t *line = nullptr; + if (likely(chk->cb->print_begin)) { + line = chk->cb->print_begin(chk->usr, severity); + if (likely(line)) { + assert(line->ctx == nullptr || (line->ctx == chk->usr && line->empty)); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->ctx = chk->usr; + } + } + return line; +} + +__cold static MDBX_chk_line_t *chk_line_feed(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + enum MDBX_chk_severity severity = line->severity; + chk_line_end(line); + line = chk_line_begin(chk->usr->scope, severity); + } + return line; +} + +__cold static MDBX_chk_line_t *chk_flush(MDBX_chk_line_t *line) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (likely(chk->cb->print_flush)) { + chk->cb->print_flush(line); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + line->out = line->begin; + } + } + return line; +} + +__cold static size_t chk_print_wanna(MDBX_chk_line_t *line, size_t need) { + if (likely(line && need)) { + size_t have = line->end - line->out; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (need > have) { + line = chk_flush(line); + have = line->end - line->out; + } + return (need < have) ? need : have; + } + return 0; +} + +__cold static MDBX_chk_line_t *chk_puts(MDBX_chk_line_t *line, + const char *str) { + if (likely(line && str && *str)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + size_t left = strlen(str); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_chars) { + chk->cb->print_chars(line, str, left); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else + do { + size_t chunk = chk_print_wanna(line, left); + assert(chunk <= left); + if (unlikely(!chunk)) + break; + memcpy(line->out, str, chunk); + line->out += chunk; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + str += chunk; + left -= chunk; + } while (left); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_va(MDBX_chk_line_t *line, + const char *fmt, va_list args) { + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + if (chk->cb->print_format) { + chk->cb->print_format(line, fmt, args); + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } else { + va_list ones; + va_copy(ones, args); + const int needed = vsnprintf(nullptr, 0, fmt, ones); + va_end(ones); + if (likely(needed > 0)) { + const size_t have = chk_print_wanna(line, needed); + if (likely(have > 0)) { + int written = vsnprintf(line->out, have, fmt, args); + if (likely(written > 0)) + line->out += written; + assert(line->begin <= line->end && line->begin <= line->out && + line->out <= line->end); + } + } + } + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *MDBX_PRINTF_ARGS(2, 3) + chk_print(MDBX_chk_line_t *line, const char *fmt, ...) { + if (likely(line)) { + // MDBX_chk_internal_t *chk = line->ctx->internal; + va_list args; + va_start(args, fmt); + line = chk_print_va(line, fmt, args); + va_end(args); + line->empty = false; + } + return line; +} + +__cold static MDBX_chk_line_t *chk_print_size(MDBX_chk_line_t *line, + const char *prefix, + const uint64_t value, + const char *suffix) { + static const char sf[] = + "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ + if (likely(line)) { + MDBX_chk_internal_t *chk = line->ctx->internal; + prefix = prefix ? prefix : ""; + suffix = suffix ? suffix : ""; + if (chk->cb->print_size) + chk->cb->print_size(line, prefix, value, suffix); + else + for (unsigned i = 0;; ++i) { + const unsigned scale = 10 + i * 10; + const uint64_t rounded = value + (UINT64_C(5) << (scale - 10)); + const uint64_t integer = rounded >> scale; + const uint64_t fractional = + (rounded - (integer << scale)) * 100u >> scale; + if ((rounded >> scale) <= 1000) + return chk_print(line, "%s%" PRIu64 " (%u.%02u %ciB)%s", prefix, + value, (unsigned)integer, (unsigned)fractional, + sf[i], suffix); + } + line->empty = false; + } + return line; +} + +__cold static int chk_error_rc(MDBX_chk_scope_t *const scope, int err, + const char *subj) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (line) + chk_line_end(chk_flush(chk_print(line, "%s() failed, error %s (%d)", subj, + mdbx_strerror(err), err))); + else + debug_log(MDBX_LOG_ERROR, "mdbx_env_chk", 0, "%s() failed, error %s (%d)", + subj, mdbx_strerror(err), err); + return err; +} + +__cold static void MDBX_PRINTF_ARGS(5, 6) + chk_object_issue(MDBX_chk_scope_t *const scope, const char *object, + uint64_t entry_number, const char *caption, + const char *extra_fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_issue_t *issue = chk->usr->scope->issues; + while (issue) { + if (issue->caption == caption) { + issue->count += 1; + break; + } else + issue = issue->next; + } + const bool fresh = issue == nullptr; + if (fresh) { + issue = osal_malloc(sizeof(*issue)); + if (likely(issue)) { + issue->caption = caption; + issue->count = 1; + issue->next = chk->usr->scope->issues; + chk->usr->scope->issues = issue; + } else + chk_error_rc(scope, ENOMEM, "adding issue"); + } + + va_list args; + va_start(args, extra_fmt); + if (chk->cb->issue) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, object, entry_number, caption, extra_fmt, args); + } else { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_error); + if (entry_number != UINT64_MAX) + chk_print(line, "%s #%" PRIu64 ": %s", object, entry_number, caption); + else + chk_print(line, "%s: %s", object, caption); + if (extra_fmt) + chk_puts(chk_print_va(chk_puts(line, " ("), extra_fmt, args), ")"); + chk_line_end(fresh ? chk_flush(line) : line); + } + va_end(args); +} + +__cold static void MDBX_PRINTF_ARGS(2, 3) + chk_scope_issue(MDBX_chk_scope_t *const scope, const char *fmt, ...) { + MDBX_chk_internal_t *const chk = scope->internal; + va_list args; + va_start(args, fmt); + if (likely(chk->cb->issue)) { + mdbx_env_chk_problem(chk->usr); + chk->cb->issue(chk->usr, nullptr, 0, nullptr, fmt, args); + } else + chk_line_end( + chk_print_va(chk_line_begin(scope, MDBX_chk_error), fmt, args)); + va_end(args); +} + +__cold static int chk_scope_end(MDBX_chk_internal_t *chk, int err) { + assert(chk->scope_depth > 0); + MDBX_chk_scope_t *const inner = chk->scope_stack + chk->scope_depth; + MDBX_chk_scope_t *const outer = chk->scope_depth ? inner - 1 : nullptr; + if (!outer || outer->stage != inner->stage) { + if (err == MDBX_SUCCESS && *chk->problem_counter) + err = MDBX_PROBLEM; + else if (*chk->problem_counter == 0 && MDBX_IS_ERROR(err)) + *chk->problem_counter = 1; + if (chk->problem_counter != &chk->usr->result.total_problems) { + chk->usr->result.total_problems += *chk->problem_counter; + chk->problem_counter = &chk->usr->result.total_problems; + } + if (chk->cb->stage_end) + err = chk->cb->stage_end(chk->usr, inner->stage, err); + } + if (chk->cb->scope_conclude) + err = chk->cb->scope_conclude(chk->usr, outer, inner, err); + chk->usr->scope = outer; + chk->usr->scope_nesting = chk->scope_depth -= 1; + if (outer) + outer->subtotal_issues += inner->subtotal_issues; + if (chk->cb->scope_pop) + chk->cb->scope_pop(chk->usr, outer, inner); + + while (inner->issues) { + MDBX_chk_issue_t *next = inner->issues->next; + osal_free(inner->issues); + inner->issues = next; + } + memset(inner, -1, sizeof(*inner)); + return err; +} + +__cold static int chk_scope_begin_args(MDBX_chk_internal_t *chk, + int verbosity_adjustment, + enum MDBX_chk_stage stage, + const void *object, size_t *problems, + const char *fmt, va_list args) { + if (unlikely(chk->scope_depth + 1u >= ARRAY_LENGTH(chk->scope_stack))) + return MDBX_BACKLOG_DEPLETED; + + MDBX_chk_scope_t *const outer = chk->scope_stack + chk->scope_depth; + const int verbosity = + outer->verbosity + + (verbosity_adjustment - 1) * (1 << MDBX_chk_severity_prio_shift); + MDBX_chk_scope_t *const inner = outer + 1; + memset(inner, 0, sizeof(*inner)); + inner->internal = outer->internal; + inner->stage = stage ? stage : (stage = outer->stage); + inner->object = object; + inner->verbosity = (verbosity < MDBX_chk_warning) + ? MDBX_chk_warning + : (enum MDBX_chk_severity)verbosity; + if (problems) + chk->problem_counter = problems; + else if (!chk->problem_counter || outer->stage != stage) + chk->problem_counter = &chk->usr->result.total_problems; + + if (chk->cb->scope_push) { + const int err = chk->cb->scope_push(chk->usr, outer, inner, fmt, args); + if (unlikely(err != MDBX_SUCCESS)) + return err; + } + chk->usr->scope = inner; + chk->usr->scope_nesting = chk->scope_depth += 1; + + if (stage != outer->stage && chk->cb->stage_begin) { + int err = chk->cb->stage_begin(chk->usr, stage); + if (unlikely(err != MDBX_SUCCESS)) { + err = chk_scope_end(chk, err); + assert(err != MDBX_SUCCESS); + return err ? err : MDBX_RESULT_TRUE; + } + } + return MDBX_SUCCESS; +} + +__cold static int MDBX_PRINTF_ARGS(6, 7) + chk_scope_begin(MDBX_chk_internal_t *chk, int verbosity_adjustment, + enum MDBX_chk_stage stage, const void *object, + size_t *problems, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + int rc = chk_scope_begin_args(chk, verbosity_adjustment, stage, object, + problems, fmt, args); + va_end(args); + return rc; +} + +__cold static int chk_scope_restore(MDBX_chk_scope_t *const target, int err) { + MDBX_chk_internal_t *const chk = target->internal; + assert(target <= chk->usr->scope); + while (chk->usr->scope > target) + err = chk_scope_end(chk, err); + return err; +} + +__cold void chk_scope_pop(MDBX_chk_scope_t *const inner) { + if (inner && inner > inner->internal->scope_stack) + chk_scope_restore(inner - 1, MDBX_SUCCESS); +} + +__cold static MDBX_chk_scope_t *MDBX_PRINTF_ARGS(3, 4) + chk_scope_push(MDBX_chk_scope_t *const scope, int verbosity_adjustment, + const char *fmt, ...) { + chk_scope_restore(scope, MDBX_SUCCESS); + va_list args; + va_start(args, fmt); + int err = chk_scope_begin_args(scope->internal, verbosity_adjustment, + scope->stage, nullptr, nullptr, fmt, args); + va_end(args); + return err ? nullptr : scope + 1; +} + +__cold static const char *chk_v2a(MDBX_chk_internal_t *chk, + const MDBX_val *val) { + if (val == MDBX_CHK_MAIN) + return "@MAIN"; + if (val == MDBX_CHK_GC) + return "@GC"; + if (val == MDBX_CHK_META) + return "@META"; + + const unsigned char *const data = val->iov_base; + const size_t len = val->iov_len; + if (data == MDBX_CHK_MAIN) + return "@MAIN"; + if (data == MDBX_CHK_GC) + return "@GC"; + if (data == MDBX_CHK_META) + return "@META"; + + if (!len) + return ""; + if (!data) + return ""; + if (len > 65536) { + const size_t enough = 42; + if (chk->v2a_buf.iov_len < enough) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, enough); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = enough; + } + snprintf(chk->v2a_buf.iov_base, chk->v2a_buf.iov_len, + "", len); + return chk->v2a_buf.iov_base; + } + + bool printable = true; + bool quoting = false; + size_t xchars = 0; + for (size_t i = 0; i < len && printable; ++i) { + quoting = quoting || !(data[i] == '_' || isalnum(data[i])); + printable = + isprint(data[i]) || (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); + } + + size_t need = len + 1; + if (quoting || !printable) + need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; + if (need > chk->v2a_buf.iov_len) { + void *ptr = osal_realloc(chk->v2a_buf.iov_base, need); + if (unlikely(!ptr)) + return ""; + chk->v2a_buf.iov_base = ptr; + chk->v2a_buf.iov_len = need; + } + + static const char hex[] = "0123456789abcdef"; + char *w = chk->v2a_buf.iov_base; + if (!quoting) { + memcpy(w, data, len); + w += len; + } else if (printable) { + *w++ = '\''; + for (size_t i = 0; i < len; ++i) { + if (data[i] < ' ') { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 4); + w[0] = '\\'; + w[1] = 'x'; + w[2] = hex[data[i] >> 4]; + w[3] = hex[data[i] & 15]; + w += 4; + } else if (strchr("\"'`\\", data[i])) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = '\\'; + w[1] = data[i]; + w += 2; + } else { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 1); + *w++ = data[i]; + } + } + *w++ = '\''; + } else { + *w++ = '\\'; + *w++ = 'x'; + for (size_t i = 0; i < len; ++i) { + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w + 2); + w[0] = hex[data[i] >> 4]; + w[1] = hex[data[i] & 15]; + w += 2; + } + } + assert((char *)chk->v2a_buf.iov_base + chk->v2a_buf.iov_len > w); + *w = 0; + return chk->v2a_buf.iov_base; +} + +__cold static void chk_dispose(MDBX_chk_internal_t *chk) { + assert(chk->subdb[FREE_DBI] == &chk->subdb_gc); + assert(chk->subdb[MAIN_DBI] == &chk->subdb_main); + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + if (sdb) { + chk->subdb[i] = nullptr; + if (chk->cb->subdb_dispose && sdb->cookie) { + chk->cb->subdb_dispose(chk->usr, sdb); + sdb->cookie = nullptr; + } + if (sdb != &chk->subdb_gc && sdb != &chk->subdb_main) { + osal_free(sdb); + } + } + } + osal_free(chk->v2a_buf.iov_base); + osal_free(chk->pagemap); + chk->usr->internal = nullptr; + chk->usr->scope = nullptr; + chk->pagemap = nullptr; + memset(chk, 0xDD, sizeof(*chk)); + osal_free(chk); +} + +static size_t div_8s(size_t numerator, size_t divider) { + assert(numerator <= (SIZE_MAX >> 8)); + return (numerator << 8) / divider; +} + +static size_t mul_8s(size_t quotient, size_t multiplier) { + size_t hi = multiplier * (quotient >> 8); + size_t lo = multiplier * (quotient & 255) + 128; + return hi + (lo >> 8); +} + +static void histogram_reduce(struct MDBX_chk_histogram *p) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + // ищем пару для слияния с минимальной ошибкой + size_t min_err = SIZE_MAX, min_i = last - 1; + for (size_t i = 0; i < last; ++i) { + const size_t b1 = p->ranges[i].begin, e1 = p->ranges[i].end, + s1 = p->ranges[i].amount; + const size_t b2 = p->ranges[i + 1].begin, e2 = p->ranges[i + 1].end, + s2 = p->ranges[i + 1].amount; + const size_t l1 = e1 - b1, l2 = e2 - b2, lx = e2 - b1, sx = s1 + s2; + assert(s1 > 0 && b1 > 0 && b1 < e1); + assert(s2 > 0 && b2 > 0 && b2 < e2); + assert(e1 <= b2); + // за ошибку принимаем площадь изменений на гистограмме при слиянии + const size_t h1 = div_8s(s1, l1), h2 = div_8s(s2, l2), hx = div_8s(sx, lx); + const size_t d1 = mul_8s((h1 > hx) ? h1 - hx : hx - h1, l1); + const size_t d2 = mul_8s((h2 > hx) ? h2 - hx : hx - h2, l2); + const size_t dx = mul_8s(hx, b2 - e1); + const size_t err = d1 + d2 + dx; + if (min_err >= err) { + min_i = i; + min_err = err; + } + } + // объединяем + p->ranges[min_i].end = p->ranges[min_i + 1].end; + p->ranges[min_i].amount += p->ranges[min_i + 1].amount; + p->ranges[min_i].count += p->ranges[min_i + 1].count; + if (min_i < last) + // перемещаем хвост + memmove(p->ranges + min_i, p->ranges + min_i + 1, + (last - min_i) * sizeof(p->ranges[0])); + // обнуляем последний элемент и продолжаем + p->ranges[last].count = 0; +} + +static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { + STATIC_ASSERT(ARRAY_LENGTH(p->ranges) > 2); + p->amount += n; + p->count += 1; + if (likely(n < 2)) { + p->ones += n; + p->pad += 1; + } else + for (;;) { + const size_t size = ARRAY_LENGTH(p->ranges), last = size - 1; + size_t i = 0; + while (i < size && p->ranges[i].count && n >= p->ranges[i].begin) { + if (n < p->ranges[i].end) { + // значение попадает в существующий интервал + p->ranges[i].amount += n; + p->ranges[i].count += 1; + return; + } + ++i; + } + if (p->ranges[last].count == 0) { + // использованы еще не все слоты, добавляем интервал + assert(i < size); + if (p->ranges[i].count) { + assert(i < last); + // раздвигаем + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); + } + p->ranges[i].begin = n; + p->ranges[i].end = n + 1; + p->ranges[i].amount = n; + p->ranges[i].count = 1; + return; + } + histogram_reduce(p); + } +} + +__cold static MDBX_chk_line_t * +histogram_dist(MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + line = chk_print(line, "%s:", prefix); + const char *comma = ""; + const size_t first_val = amount ? histogram->ones : histogram->pad; + if (first_val) { + chk_print(line, " %s=%" PRIuSIZE, first, first_val); + comma = ","; + } + for (size_t n = 0; n < ARRAY_LENGTH(histogram->ranges); ++n) + if (histogram->ranges[n].count) { + chk_print(line, "%s %" PRIuSIZE, comma, histogram->ranges[n].begin); + if (histogram->ranges[n].begin != histogram->ranges[n].end - 1) + chk_print(line, "-%" PRIuSIZE, histogram->ranges[n].end - 1); + line = chk_print(line, "=%" PRIuSIZE, + amount ? histogram->ranges[n].amount + : histogram->ranges[n].count); + comma = ","; + } + return line; +} + +__cold static MDBX_chk_line_t * +histogram_print(MDBX_chk_scope_t *scope, MDBX_chk_line_t *line, + const struct MDBX_chk_histogram *histogram, const char *prefix, + const char *first, bool amount) { + if (histogram->count) { + line = chk_print(line, "%s %" PRIuSIZE, prefix, + amount ? histogram->amount : histogram->count); + if (scope->verbosity > MDBX_chk_info) + line = chk_puts( + histogram_dist(line, histogram, " (distribution", first, amount), + ")"); + } + return line; +} + +//----------------------------------------------------------------------------- + +__cold static int chk_get_sdb(MDBX_chk_scope_t *const scope, + const MDBX_walk_sdb_t *in, + MDBX_chk_subdb_t **out) { + MDBX_chk_internal_t *const chk = scope->internal; + if (chk->last_lookup && + chk->last_lookup->name.iov_base == in->name.iov_base) { + *out = chk->last_lookup; + return MDBX_SUCCESS; + } + + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb); ++i) { + MDBX_chk_subdb_t *sdb = chk->subdb[i]; + if (!sdb) { + sdb = osal_calloc(1, sizeof(MDBX_chk_subdb_t)); + if (unlikely(!sdb)) { + *out = nullptr; + return chk_error_rc(scope, MDBX_ENOMEM, "alloc_subDB"); + } + chk->subdb[i] = sdb; + sdb->flags = in->internal->md_flags; + sdb->id = -1; + sdb->name = in->name; + } + if (sdb->name.iov_base == in->name.iov_base) { + if (sdb->id < 0) { + sdb->id = (int)i; + sdb->cookie = + chk->cb->subdb_filter + ? chk->cb->subdb_filter(chk->usr, &sdb->name, sdb->flags) + : (void *)(intptr_t)-1; + } + *out = (chk->last_lookup = sdb); + return MDBX_SUCCESS; + } + } + chk_scope_issue(scope, "too many subDBs > %u", + (unsigned)ARRAY_LENGTH(chk->subdb) - CORE_DBS - /* meta */ 1); + *out = nullptr; + return MDBX_PROBLEM; +} + +//------------------------------------------------------------------------------ + +__cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, + const unsigned num) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_verbose); + MDBX_chk_internal_t *const chk = scope->internal; + if (line) { + MDBX_env *const env = chk->usr->env; + const bool have_bootid = (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0; + const bool bootid_match = + have_bootid && memcmp(&chk->envinfo.mi_bootid.meta[num], + &chk->envinfo.mi_bootid.current, + sizeof(chk->envinfo.mi_bootid.current)) == 0; + + line = chk_print(line, "meta-%u: ", num); + switch (chk->envinfo.mi_meta_sign[num]) { + case MDBX_DATASIGN_NONE: + line = chk_puts(line, "no-sync/legacy"); + break; + case MDBX_DATASIGN_WEAK: + line = chk_print(line, "weak-%s", + have_bootid + ? (bootid_match ? "intact (same boot-id)" : "dead") + : "unknown (no boot-id)"); + break; + default: + line = chk_puts(line, "steady"); + break; + } + const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; + line = chk_print(line, " txn#%" PRIaTXN, meta_txnid); + + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, ", %s", status); + + if (env->me_stuck_meta >= 0) { + if (num == (unsigned)env->me_stuck_meta) + line = chk_print(line, ", %s", "forced for checking"); + } else if (meta_txnid > chk->envinfo.mi_recent_txnid && + (env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == + MDBX_EXCLUSIVE) + line = chk_print(line, + ", rolled-back %" PRIu64 " commit(s) (%" PRIu64 + " >>> %" PRIu64 ")", + meta_txnid - chk->envinfo.mi_recent_txnid, meta_txnid, + chk->envinfo.mi_recent_txnid); + chk_line_end(line); + } +} + +__cold static int +chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, + const int deep, const MDBX_walk_sdb_t *sdb_info, + const size_t page_size, const MDBX_page_type_t pagetype, + const MDBX_error_t page_err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes) { + MDBX_chk_scope_t *const scope = ctx; + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + + MDBX_chk_subdb_t *sdb; + int err = chk_get_sdb(scope, sdb_info, &sdb); + if (unlikely(err)) + return err; + + if (deep > 42) { + chk_scope_issue(scope, "too deeply %u", deep); + return MDBX_CORRUPTED /* avoid infinite loop/recursion */; + } + histogram_acc(deep, &sdb->histogram.deep); + usr->result.processed_pages += npages; + const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; + + int height = deep + 1; + if (sdb->id >= CORE_DBS) + height -= usr->txn->mt_dbs[MAIN_DBI].md_depth; + const struct MDBX_db *nested = sdb_info->nested; + if (nested) { + if (sdb->flags & MDBX_DUPSORT) + height -= sdb_info->internal->md_depth; + else { + chk_object_issue(scope, "nested tree", pgno, "unexpected", + "subDb %s flags 0x%x, deep %i", chk_v2a(chk, &sdb->name), + sdb->flags, deep); + nested = nullptr; + } + } else + chk->last_nested = nullptr; + + const char *pagetype_caption; + bool branch = false; + switch (pagetype) { + default: + chk_object_issue(scope, "page", pgno, "unknown page-type", + "type %u, deep %i", (unsigned)pagetype, deep); + pagetype_caption = "unknown"; + sdb->pages.other += npages; + break; + case MDBX_page_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken"; + sdb->pages.other += npages; + break; + case MDBX_subpage_broken: + assert(page_err != MDBX_SUCCESS); + pagetype_caption = "broken-subpage"; + sdb->pages.other += npages; + break; + case MDBX_page_large: + pagetype_caption = "large"; + histogram_acc(npages, &sdb->histogram.large_pages); + if (sdb->flags & MDBX_DUPSORT) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + case MDBX_page_branch: + branch = true; + if (!nested) { + pagetype_caption = "branch"; + sdb->pages.branch += 1; + } else { + pagetype_caption = "nested-branch"; + sdb->pages.nested_branch += 1; + } + break; + case MDBX_page_dupfixed_leaf: + if (!nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + /* fall through */ + __fallthrough; + case MDBX_page_leaf: + if (!nested) { + pagetype_caption = "leaf"; + sdb->pages.leaf += 1; + if (height != sdb_info->internal->md_depth) + chk_object_issue(scope, "page", pgno, "wrong tree height", + "actual %i != %i subDb %s", height, + sdb_info->internal->md_depth, + chk_v2a(chk, &sdb->name)); + } else { + pagetype_caption = + (pagetype == MDBX_page_leaf) ? "nested-leaf" : "nested-leaf-dupfixed"; + sdb->pages.nested_leaf += 1; + if (chk->last_nested != nested) { + histogram_acc(height, &sdb->histogram.nested_tree); + chk->last_nested = nested; + } + if (height != nested->md_depth) + chk_object_issue(scope, "page", pgno, "wrong nested-tree height", + "actual %i != %i dupsort-node %s", height, + nested->md_depth, chk_v2a(chk, &sdb->name)); + } + break; + case MDBX_subpage_dupfixed_leaf: + case MDBX_subpage_leaf: + pagetype_caption = (pagetype == MDBX_subpage_leaf) ? "subleaf-dupsort" + : "subleaf-dupfixed"; + sdb->pages.nested_subleaf += 1; + if ((sdb->flags & MDBX_DUPSORT) == 0 || nested) + chk_object_issue(scope, "page", pgno, "unexpected", + "type %u, subDb %s flags 0x%x, deep %i", + (unsigned)pagetype, chk_v2a(chk, &sdb->name), sdb->flags, + deep); + break; + } + + if (npages) { + if (sdb->cookie) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (npages == 1) + chk_print(line, "%s-page %" PRIuSIZE, pagetype_caption, pgno); + else + chk_print(line, "%s-span %" PRIuSIZE "[%u]", pagetype_caption, pgno, + npages); + chk_line_end( + chk_print(line, + " of %s: header %" PRIiPTR ", %s %" PRIiPTR + ", payload %" PRIiPTR ", unused %" PRIiPTR ", deep %i", + chk_v2a(chk, &sdb->name), header_bytes, + (pagetype == MDBX_page_branch) ? "keys" : "entries", + nentries, payload_bytes, unused_bytes, deep)); + } + + bool already_used = false; + for (unsigned n = 0; n < npages; ++n) { + const size_t spanpgno = pgno + n; + if (spanpgno >= usr->result.alloc_pages) { + chk_object_issue(scope, "page", spanpgno, "wrong page-no", + "%s-page: %" PRIuSIZE " > %" PRIuSIZE ", deep %i", + pagetype_caption, spanpgno, usr->result.alloc_pages, + deep); + sdb->pages.all += 1; + } else if (chk->pagemap[spanpgno]) { + const MDBX_chk_subdb_t *const rival = + chk->subdb[chk->pagemap[spanpgno] - 1]; + chk_object_issue(scope, "page", spanpgno, + (branch && rival == sdb) ? "loop" : "already used", + "%s-page: by %s, deep %i", pagetype_caption, + chk_v2a(chk, &rival->name), deep); + already_used = true; + } else { + chk->pagemap[spanpgno] = (int16_t)sdb->id + 1; + sdb->pages.all += 1; + } + } + + if (already_used) + return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ + : MDBX_SUCCESS; + } + + if (MDBX_IS_ERROR(page_err)) { + chk_object_issue(scope, "page", pgno, "invalid/corrupted", "%s-page", + pagetype_caption); + } else { + if (unused_bytes > page_size) + chk_object_issue(scope, "page", pgno, "illegal unused-bytes", + "%s-page: %u < %" PRIuSIZE " < %u", pagetype_caption, 0, + unused_bytes, env->me_psize); + + if (header_bytes < (int)sizeof(long) || + (size_t)header_bytes >= env->me_psize - sizeof(long)) { + chk_object_issue(scope, "page", pgno, "illegal header-length", + "%s-page: %" PRIuSIZE " < %" PRIuSIZE " < %" PRIuSIZE, + pagetype_caption, sizeof(long), header_bytes, + env->me_psize - sizeof(long)); + } + if (payload_bytes < 1) { + if (nentries > 1) { + chk_object_issue(scope, "page", pgno, "zero size-of-entry", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries", + pagetype_caption, payload_bytes, nentries); + } else { + chk_object_issue(scope, "page", pgno, "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; + } + } + + if (npages) { + if (page_bytes != page_size) { + chk_object_issue(scope, "page", pgno, "misused", + "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR + "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", + pagetype_caption, page_size, page_bytes, header_bytes, + payload_bytes, unused_bytes, deep); + if (page_size > page_bytes) + sdb->lost_bytes += page_size - page_bytes; + } else { + sdb->payload_bytes += payload_bytes + header_bytes; + usr->result.total_payload_bytes += payload_bytes + header_bytes; + } + } + } + return chk_check_break(scope); +} + +__cold static int chk_tree(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + +#if defined(_WIN32) || defined(_WIN64) + SetLastError(ERROR_SUCCESS); +#else + errno = 0; +#endif /* Windows */ + chk->pagemap = osal_calloc(usr->result.alloc_pages, sizeof(*chk->pagemap)); + if (!chk->pagemap) { + int err = osal_get_errno(); + return chk_error_rc(scope, err ? err : MDBX_ENOMEM, "calloc"); + } + + if (scope->verbosity > MDBX_chk_info) + chk_scope_push(scope, 0, "Walking pages..."); + /* always skip key ordering checking + * to avoid MDBX_CORRUPTED in case custom comparators were used */ + usr->result.processed_pages = NUM_METAS; + int err = mdbx_env_pgwalk(txn, chk_pgvisitor, scope, true); + if (MDBX_IS_ERROR(err) && err != MDBX_EINTR) + chk_error_rc(scope, err, "mdbx_env_pgwalk"); + + for (size_t n = NUM_METAS; n < usr->result.alloc_pages; ++n) + if (!chk->pagemap[n]) + usr->result.unused_pages += 1; + + MDBX_chk_subdb_t total; + memset(&total, 0, sizeof(total)); + total.pages.all = NUM_METAS; + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + total.payload_bytes += sdb->payload_bytes; + total.lost_bytes += sdb->lost_bytes; + total.pages.all += sdb->pages.all; + total.pages.empty += sdb->pages.empty; + total.pages.other += sdb->pages.other; + total.pages.branch += sdb->pages.branch; + total.pages.leaf += sdb->pages.leaf; + total.pages.nested_branch += sdb->pages.nested_branch; + total.pages.nested_leaf += sdb->pages.nested_leaf; + total.pages.nested_subleaf += sdb->pages.nested_subleaf; + } + assert(total.pages.all == usr->result.processed_pages); + + const size_t total_page_bytes = pgno2bytes(env, total.pages.all); + if (usr->scope->subtotal_issues || usr->scope->verbosity >= MDBX_chk_verbose) + chk_line_end(chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "walked %zu pages, left/unused %zu" + ", %" PRIuSIZE " problem(s)", + usr->result.processed_pages, + usr->result.unused_pages, + usr->scope->subtotal_issues)); + + err = chk_scope_restore(scope, err); + if (scope->verbosity > MDBX_chk_info) { + for (size_t i = 0; i < ARRAY_LENGTH(chk->subdb) && chk->subdb[i]; ++i) { + MDBX_chk_subdb_t *const sdb = chk->subdb[i]; + MDBX_chk_scope_t *inner = + chk_scope_push(scope, 0, "tree %s:", chk_v2a(chk, &sdb->name)); + if (sdb->pages.all == 0) + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), "empty")); + else { + MDBX_chk_line_t *line = chk_line_begin(inner, MDBX_chk_info); + if (line) { + line = chk_print(line, "page usage: subtotal %" PRIuSIZE, + sdb->pages.all); + const size_t branch_pages = + sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf + + sdb->pages.nested_subleaf; + if (sdb->pages.other) + line = chk_print(line, ", other %" PRIuSIZE, sdb->pages.other); + if (sdb->pages.other == 0 || + (branch_pages | leaf_pages | sdb->histogram.large_pages.count) != + 0) { + line = chk_print(line, ", branch %" PRIuSIZE ", leaf %" PRIuSIZE, + branch_pages, leaf_pages); + if (sdb->histogram.large_pages.count || + (sdb->flags & MDBX_DUPSORT) == 0) { + line = chk_print(line, ", large %" PRIuSIZE, + sdb->histogram.large_pages.count); + if (sdb->histogram.large_pages.amount | + sdb->histogram.large_pages.count) + line = histogram_print(inner, line, &sdb->histogram.large_pages, + " amount", "single", true); + } + } + line = histogram_dist(chk_line_feed(line), &sdb->histogram.deep, + "tree deep density", "1", false); + if (sdb != &chk->subdb_gc && sdb->histogram.nested_tree.count) { + line = chk_print(chk_line_feed(line), "nested tree(s) %" PRIuSIZE, + sdb->histogram.nested_tree.count); + line = histogram_dist(line, &sdb->histogram.nested_tree, " density", + "1", false); + line = chk_print(chk_line_feed(line), + "nested tree(s) pages %" PRIuSIZE + ": branch %" PRIuSIZE ", leaf %" PRIuSIZE + ", subleaf %" PRIuSIZE, + sdb->pages.nested_branch + sdb->pages.nested_leaf, + sdb->pages.nested_branch, sdb->pages.nested_leaf, + sdb->pages.nested_subleaf); + } + + const size_t bytes = pgno2bytes(env, sdb->pages.all); + line = chk_print( + chk_line_feed(line), + "page filling: subtotal %" PRIuSIZE + " bytes (%.1f%%), payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)", + bytes, bytes * 100.0 / total_page_bytes, sdb->payload_bytes, + sdb->payload_bytes * 100.0 / bytes, bytes - sdb->payload_bytes, + (bytes - sdb->payload_bytes) * 100.0 / bytes); + if (sdb->pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", + sdb->pages.empty); + if (sdb->lost_bytes) + line = + chk_print(line, ", %" PRIuSIZE " bytes lost", sdb->lost_bytes); + chk_line_end(line); + } + } + chk_scope_restore(scope, 0); + } + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, + "summary: total %" PRIuSIZE " bytes, payload %" PRIuSIZE + " (%.1f%%), unused %" PRIuSIZE " (%.1f%%)," + " average fill %.1f%%", + total_page_bytes, usr->result.total_payload_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes, + total_page_bytes - usr->result.total_payload_bytes, + (total_page_bytes - usr->result.total_payload_bytes) * + 100.0 / total_page_bytes, + usr->result.total_payload_bytes * 100.0 / total_page_bytes); + if (total.pages.empty) + line = chk_print(line, ", %" PRIuSIZE " empty pages", total.pages.empty); + if (total.lost_bytes) + line = chk_print(line, ", %" PRIuSIZE " bytes lost", total.lost_bytes); + chk_line_end(line); + return err; +} + +typedef int(chk_kv_visitor)(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, const size_t record_number, + const MDBX_val *key, const MDBX_val *data); + +__cold static int chk_handle_kv(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + int err = MDBX_SUCCESS; + assert(sdb->cookie); + if (chk->cb->subdb_handle_kv) + err = chk->cb->subdb_handle_kv(chk->usr, sdb, record_number, key, data); + return err ? err : chk_check_break(scope); +} + +__cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, + MDBX_chk_subdb_t *sdb, chk_kv_visitor *handler) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + MDBX_cursor *cursor = nullptr; + size_t record_count = 0, dups = 0, sub_databases = 0; + int err; + + if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & txn->mt_flags) { + chk_line_end( + chk_flush(chk_print(chk_line_begin(scope, MDBX_chk_error), + "abort processing %s due to a previous error", + chk_v2a(chk, &sdb->name)))); + err = MDBX_BAD_TXN; + goto bailout; + } + + if (0 > (int)dbi) { + err = dbi_open( + txn, &sdb->name, MDBX_DB_ACCEDE, &dbi, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, + (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_dbi_open"); + goto bailout; + } + } + + const MDBX_db *const db = txn->mt_dbs + dbi; + if (handler) { + const char *key_mode = nullptr; + switch (sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + case 0: + key_mode = "usual"; + break; + case MDBX_REVERSEKEY: + key_mode = "reserve"; + break; + case MDBX_INTEGERKEY: + key_mode = "ordinal"; + break; + case MDBX_REVERSEKEY | MDBX_INTEGERKEY: + key_mode = "msgpack"; + break; + default: + key_mode = "inconsistent"; + chk_scope_issue(scope, "wrong key-mode (0x%x)", + sdb->flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); + } + + const char *value_mode = nullptr; + switch (sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | + MDBX_INTEGERDUP)) { + case 0: + value_mode = "single"; + break; + case MDBX_DUPSORT: + value_mode = "multi"; + break; + case MDBX_DUPSORT | MDBX_REVERSEDUP: + value_mode = "multi-reverse"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED: + value_mode = "multi-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + value_mode = "multi-reverse-samelength"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + value_mode = "multi-ordinal"; + break; + case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "multi-msgpack"; + break; + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + value_mode = "reserved"; + break; + default: + value_mode = "inconsistent"; + chk_scope_issue(scope, "wrong value-mode (0x%x)", + sdb->flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | + MDBX_DUPFIXED | MDBX_INTEGERDUP)); + } + + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = chk_print(line, "key-value kind: %s-key => %s-value", key_mode, + value_mode); + line = chk_print(line, ", flags:"); + if (!sdb->flags) + line = chk_print(line, " none"); + else { + const uint8_t f[] = {MDBX_DUPSORT, + MDBX_INTEGERKEY, + MDBX_REVERSEKEY, + MDBX_DUPFIXED, + MDBX_REVERSEDUP, + MDBX_INTEGERDUP, + 0}; + const char *const t[] = {"dupsort", "integerkey", "reversekey", + "dupfixed", "reversedup", "integerdup"}; + for (size_t i = 0; f[i]; i++) + if (sdb->flags & f[i]) + line = chk_print(line, " %s", t[i]); + } + chk_line_end(chk_print(line, " (0x%02X)", sdb->flags)); + + line = chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "entries %" PRIu64 ", sequence %" PRIu64, db->md_entries, + db->md_seq); + if (db->md_mod_txnid) + line = chk_print(line, ", last modification txn#%" PRIaTXN, + db->md_mod_txnid); + if (db->md_root != P_INVALID) + line = chk_print(line, ", root #%" PRIaPGNO, db->md_root); + chk_line_end(line); + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_verbose), + "b-tree depth %u, pages: branch %" PRIaPGNO + ", leaf %" PRIaPGNO ", large %" PRIaPGNO, + db->md_depth, db->md_branch_pages, db->md_leaf_pages, + db->md_overflow_pages)); + + if ((chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t branch_pages = sdb->pages.branch + sdb->pages.nested_branch; + const size_t leaf_pages = sdb->pages.leaf + sdb->pages.nested_leaf; + const size_t subtotal_pages = + db->md_branch_pages + db->md_leaf_pages + db->md_overflow_pages; + if (subtotal_pages != sdb->pages.all) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIuSIZE " != walked %" PRIuSIZE ")", + "subtotal", subtotal_pages, sdb->pages.all); + if (db->md_branch_pages != branch_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "branch", db->md_branch_pages, branch_pages); + if (db->md_leaf_pages != leaf_pages) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "all-leaf", db->md_leaf_pages, leaf_pages); + if (db->md_overflow_pages != sdb->histogram.large_pages.amount) + chk_scope_issue( + scope, "%s pages mismatch (%" PRIaPGNO " != walked %" PRIuSIZE ")", + "large/overlow", db->md_overflow_pages, + sdb->histogram.large_pages.amount); + } + } + + err = mdbx_cursor_open(txn, dbi, &cursor); + if (unlikely(err)) { + chk_error_rc(scope, err, "mdbx_cursor_open"); + goto bailout; + } + if (chk->flags & MDBX_CHK_IGNORE_ORDER) { + cursor->mc_checking |= CC_SKIPORD | CC_PAGECHECK; + if (cursor->mc_xcursor) + cursor->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD | CC_PAGECHECK; + } + + const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, sdb->flags); + MDBX_val prev_key = {nullptr, 0}, prev_data = {nullptr, 0}; + MDBX_val key, data; + err = mdbx_cursor_get(cursor, &key, &data, MDBX_FIRST); + while (err == MDBX_SUCCESS) { + err = chk_check_break(scope); + if (unlikely(err)) + goto bailout; + + bool bad_key = false; + if (key.iov_len > maxkeysize) { + chk_object_issue(scope, "entry", record_count, + "key length exceeds max-key-size", + "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); + bad_key = true; + } else if ((sdb->flags & MDBX_INTEGERKEY) && key.iov_len != 8 && + key.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong key length", + "%" PRIuPTR " != 4or8", key.iov_len); + bad_key = true; + } + + bool bad_data = false; + if ((sdb->flags & MDBX_INTEGERDUP) && data.iov_len != 8 && + data.iov_len != 4) { + chk_object_issue(scope, "entry", record_count, "wrong data length", + "%" PRIuPTR " != 4or8", data.iov_len); + bad_data = true; + } + + if (prev_key.iov_base) { + if (prev_data.iov_base && !bad_data && (sdb->flags & MDBX_DUPFIXED) && + prev_data.iov_len != data.iov_len) { + chk_object_issue(scope, "entry", record_count, "different data length", + "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, + data.iov_len); + bad_data = true; + } + + if (!bad_key) { + int cmp = mdbx_cmp(txn, dbi, &key, &prev_key); + if (cmp == 0) { + ++dups; + if ((sdb->flags & MDBX_DUPSORT) == 0) { + chk_object_issue(scope, "entry", record_count, "duplicated entries", + nullptr); + if (prev_data.iov_base && data.iov_len == prev_data.iov_len && + memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + } else if (!bad_data && prev_data.iov_base) { + cmp = mdbx_dcmp(txn, dbi, &data, &prev_data); + if (cmp == 0) + chk_object_issue(scope, "entry", record_count, + "complete duplicate", nullptr); + else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of multi-values", nullptr); + } + } else if (cmp < 0 && !(chk->flags & MDBX_CHK_IGNORE_ORDER)) + chk_object_issue(scope, "entry", record_count, + "wrong order of entries", nullptr); + } + } + + if (!bad_key) { + if (!prev_key.iov_base && (sdb->flags & MDBX_INTEGERKEY)) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed key-size %" PRIuSIZE, key.iov_len)); + prev_key = key; + } + if (!bad_data) { + if (!prev_data.iov_base && + (sdb->flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED))) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), + "fixed data-size %" PRIuSIZE, data.iov_len)); + prev_data = data; + } + + record_count++; + histogram_acc(key.iov_len, &sdb->histogram.key_len); + histogram_acc(data.iov_len, &sdb->histogram.val_len); + + const MDBX_node *const node = + page_node(cursor->mc_pg[cursor->mc_top], cursor->mc_ki[cursor->mc_top]); + if (node_flags(node) == F_SUBDATA) { + if (dbi != MAIN_DBI || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + chk_object_issue(scope, "entry", record_count, + "unexpected sub-database", "node-flags 0x%x", + node_flags(node)); + else if (data.iov_len != sizeof(MDBX_db)) + chk_object_issue(scope, "entry", record_count, + "wrong sub-database node size", + "node-size %" PRIuSIZE " != %" PRIuSIZE, data.iov_len, + sizeof(MDBX_db)); + else if (scope->stage == MDBX_chk_traversal_maindb) + /* подсчитываем subDB при первом проходе */ + sub_databases += 1; + else { + /* обработка subDB при втором проходе */ + MDBX_db aligned_db; + memcpy(&aligned_db, data.iov_base, sizeof(aligned_db)); + MDBX_walk_sdb_t sdb_info = {key, nullptr, nullptr}; + sdb_info.internal = &aligned_db; + MDBX_chk_subdb_t *subdb; + err = chk_get_sdb(scope, &sdb_info, &subdb); + if (unlikely(err)) + goto bailout; + if (subdb->cookie) { + err = chk_scope_begin(chk, 0, MDBX_chk_traversal_subdbs, subdb, + &usr->result.problems_kv, + "Processing subDB %s...", + chk_v2a(chk, &subdb->name)); + if (likely(!err)) { + err = chk_db(usr->scope, (MDBX_dbi)-1, subdb, chk_handle_kv); + if (err != MDBX_EINTR && err != MDBX_RESULT_TRUE) + usr->result.subdb_processed += 1; + } + err = chk_scope_restore(scope, err); + if (unlikely(err)) + goto bailout; + } else + chk_line_end(chk_flush( + chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s...", chk_v2a(chk, &subdb->name)))); + } + } else if (handler) { + err = handler(scope, sdb, record_count, &key, &data); + if (unlikely(err)) + goto bailout; + } + + err = mdbx_cursor_get(cursor, &key, &data, MDBX_NEXT); + } + + err = (err != MDBX_NOTFOUND) ? chk_error_rc(scope, err, "mdbx_cursor_get") + : MDBX_SUCCESS; + if (err == MDBX_SUCCESS && record_count != db->md_entries) + chk_scope_issue(scope, + "different number of entries %" PRIuSIZE " != %" PRIu64, + record_count, db->md_entries); +bailout: + if (cursor) { + if (handler) { + if (sdb->histogram.key_len.count) { + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_info); + line = histogram_dist(line, &sdb->histogram.key_len, + "key length density", "0/1", false); + chk_line_feed(line); + line = histogram_dist(line, &sdb->histogram.val_len, + "value length density", "0/1", false); + chk_line_end(line); + } + if (scope->stage == MDBX_chk_traversal_maindb) + usr->result.subdb_total = sub_databases; + if (chk->cb->subdb_conclude) + err = chk->cb->subdb_conclude(usr, sdb, cursor, err); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_resolution); + line = chk_print(line, "summary: %" PRIuSIZE " records,", record_count); + if (dups || (sdb->flags & (MDBX_DUPSORT | MDBX_DUPFIXED | + MDBX_REVERSEDUP | MDBX_INTEGERDUP))) + line = chk_print(line, " %" PRIuSIZE " dups,", dups); + if (sub_databases || dbi == MAIN_DBI) + line = chk_print(line, " %" PRIuSIZE " sub-databases,", sub_databases); + line = chk_print(line, + " %" PRIuSIZE " key's bytes," + " %" PRIuSIZE " data's bytes," + " %" PRIuSIZE " problem(s)", + sdb->histogram.key_len.amount, + sdb->histogram.val_len.amount, scope->subtotal_issues); + chk_line_end(chk_flush(line)); + } + + mdbx_cursor_close(cursor); + if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && + txn->mt_dbistate[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + mdbx_dbi_close(env, dbi); + } + return err; +} + +__cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, + MDBX_chk_subdb_t *sdb, + const size_t record_number, const MDBX_val *key, + const MDBX_val *data) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + assert(sdb == &chk->subdb_gc); + (void)sdb; + const char *bad = ""; + pgno_t *iptr = data->iov_base; + + if (key->iov_len != sizeof(txnid_t)) + chk_object_issue(scope, "entry", record_number, "wrong txn-id size", + "key-size %" PRIuSIZE, key->iov_len); + else { + txnid_t txnid; + memcpy(&txnid, key->iov_base, sizeof(txnid)); + if (txnid < 1 || txnid > usr->txn->mt_txnid) + chk_object_issue(scope, "entry", record_number, "wrong txn-id", + "%" PRIaTXN, txnid); + else { + if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) + chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, + data->iov_len); + size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; + if (number < 1 || number > MDBX_PGL_LIMIT) + chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, + number); + else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { + chk_object_issue(scope, "entry", txnid, "trimmed idl", + "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", + (number + 1) * sizeof(pgno_t), data->iov_len); + number = data->iov_len / sizeof(pgno_t) - 1; + } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= + /* LY: allow gap up to one page. it is ok + * and better than shink-and-retry inside update_gc() */ + usr->env->me_psize) + chk_object_issue(scope, "entry", txnid, "extra idl space", + "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", + (number + 1) * sizeof(pgno_t), data->iov_len); + + usr->result.gc_pages += number; + if (chk->envinfo.mi_latter_reader_txnid > txnid) + usr->result.reclaimable_pages += number; + + size_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : usr->txn->mt_next_pgno; + size_t span = 1; + for (size_t i = 0; i < number; ++i) { + const size_t pgno = iptr[i]; + if (pgno < NUM_METAS) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " < meta-pages %u", pgno, + NUM_METAS); + else if (pgno >= usr->result.backed_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > backed-pages %" PRIuSIZE, pgno, + usr->result.backed_pages); + else if (pgno >= usr->result.alloc_pages) + chk_object_issue(scope, "entry", txnid, "wrong idl entry", + "pgno %" PRIuSIZE " > alloc-pages %" PRIuSIZE, pgno, + usr->result.alloc_pages - 1); + else { + if (MDBX_PNL_DISORDERED(prev, pgno)) { + bad = " [bad sequence]"; + chk_object_issue( + scope, "entry", txnid, "bad sequence", + "%" PRIuSIZE " %c [%" PRIuSIZE "].%" PRIuSIZE, prev, + (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), i, + pgno); + } + if (chk->pagemap) { + const intptr_t id = chk->pagemap[pgno]; + if (id == 0) + chk->pagemap[pgno] = -1 /* mark the pgno listed in GC */; + else if (id > 0) { + assert(id - 1 <= (intptr_t)ARRAY_LENGTH(chk->subdb)); + chk_object_issue(scope, "page", pgno, "already used", "by %s", + chk_v2a(chk, &chk->subdb[id - 1]->name)); + } else + chk_object_issue(scope, "page", pgno, "already listed in GC", + nullptr); + } + } + prev = pgno; + while (i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span))) + ++span; + } + if (sdb->cookie) { + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_details), + "transaction %" PRIaTXN ", %" PRIuSIZE + " pages, maxspan %" PRIuSIZE "%s", + txnid, number, span, bad)); + for (size_t i = 0; i < number; i += span) { + const size_t pgno = iptr[i]; + for (span = 1; + i + span < number && + iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) + : pgno_sub(pgno, span)); + ++span) + ; + histogram_acc(span, &sdb->histogram.nested_tree); + MDBX_chk_line_t *line = chk_line_begin(scope, MDBX_chk_extra); + if (line) { + if (span > 1) + line = + chk_print(line, "%9" PRIuSIZE "[%" PRIuSIZE "]", pgno, span); + else + line = chk_print(line, "%9" PRIuSIZE, pgno); + chk_line_end(line); + int err = chk_check_break(scope); + if (err) + return err; + } + } + } + } + } + return chk_check_break(scope); +} + +__cold static int env_chk(MDBX_chk_scope_t *const scope) { + MDBX_chk_internal_t *const chk = scope->internal; + MDBX_chk_context_t *const usr = chk->usr; + MDBX_env *const env = usr->env; + MDBX_txn *const txn = usr->txn; + int err = + env_info(env, txn, &chk->envinfo, sizeof(chk->envinfo), &chk->troika); + if (unlikely(err)) + return chk_error_rc(scope, err, "env_info"); + + MDBX_chk_line_t *line = + chk_puts(chk_line_begin(scope, MDBX_chk_info), "current boot-id "); + if (chk->envinfo.mi_bootid.current.x | chk->envinfo.mi_bootid.current.y) + line = chk_print(line, "%016" PRIx64 "-%016" PRIx64, + chk->envinfo.mi_bootid.current.x, + chk->envinfo.mi_bootid.current.y); + else + line = chk_puts(line, "unavailable"); + chk_line_end(line); + + err = osal_filesize(env->me_lazy_fd, &env->me_dxb_mmap.filesize); + if (unlikely(err)) + return chk_error_rc(scope, err, "osal_filesize"); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_meta, nullptr, + &usr->result.problems_meta, "Peek the meta-pages..."); + if (likely(!err)) { + MDBX_chk_scope_t *const inner = usr->scope; + const uint64_t dxbfile_pages = + env->me_dxb_mmap.filesize >> env->me_psize2log; + usr->result.alloc_pages = txn->mt_next_pgno; + usr->result.backed_pages = bytes2pgno(env, env->me_dxb_mmap.current); + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + if (unlikely(dxbfile_pages < NUM_METAS)) + chk_scope_issue(inner, "file-pages %" PRIu64 " < %u", dxbfile_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS)) + chk_scope_issue(inner, "backed-pages %zu < %u", usr->result.backed_pages, + NUM_METAS); + if (unlikely(usr->result.backed_pages < NUM_METAS || + dxbfile_pages < NUM_METAS)) + return MDBX_CORRUPTED; + if (unlikely(usr->result.backed_pages > (size_t)MAX_PAGENO + 1)) { + chk_scope_issue(inner, "backed-pages %zu > max-pages %zu", + usr->result.backed_pages, (size_t)MAX_PAGENO + 1); + usr->result.backed_pages = MAX_PAGENO + 1; + } + + if ((env->me_flags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { + if (unlikely(usr->result.backed_pages > dxbfile_pages)) { + chk_scope_issue(inner, "backed-pages %zu > file-pages %" PRIu64, + usr->result.backed_pages, dxbfile_pages); + usr->result.backed_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(scope, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + } else { + /* DB may be shrunk by writer down to the allocated (but unused) pages. */ + if (unlikely(usr->result.alloc_pages > usr->result.backed_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > backed-pages %zu", + usr->result.alloc_pages, usr->result.backed_pages); + usr->result.alloc_pages = usr->result.backed_pages; + } + if (unlikely(usr->result.alloc_pages > dxbfile_pages)) { + chk_scope_issue(inner, "alloc-pages %zu > file-pages %" PRIu64, + usr->result.alloc_pages, dxbfile_pages); + usr->result.alloc_pages = (size_t)dxbfile_pages; + } + if (unlikely(usr->result.backed_pages > dxbfile_pages)) + usr->result.backed_pages = (size_t)dxbfile_pages; + } + + line = chk_line_feed(chk_print( + chk_line_begin(inner, MDBX_chk_info), + "pagesize %u (%u system), max keysize %u..%u" + ", max readers %u", + env->me_psize, env->me_os_psize, + mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), + mdbx_env_get_maxkeysize_ex(env, MDBX_DB_DEFAULTS), env->me_maxreaders)); + line = chk_line_feed( + chk_print_size(line, "mapsize ", env->me_dxb_mmap.current, nullptr)); + if (txn->mt_geo.lower == txn->mt_geo.upper) + line = chk_print_size( + line, "fixed datafile: ", chk->envinfo.mi_geo.current, nullptr); + else { + line = chk_print_size( + line, "dynamic datafile: ", chk->envinfo.mi_geo.lower, nullptr); + line = chk_print_size(line, " .. ", chk->envinfo.mi_geo.upper, ", "); + line = chk_print_size(line, "+", chk->envinfo.mi_geo.grow, ", "); + + line = chk_line_feed( + chk_print_size(line, "-", chk->envinfo.mi_geo.shrink, nullptr)); + line = chk_print_size( + line, "current datafile: ", chk->envinfo.mi_geo.current, nullptr); + } + tASSERT(txn, txn->mt_geo.now == chk->envinfo.mi_geo.current / + chk->envinfo.mi_dxb_pagesize); + chk_line_end(chk_print(line, ", %u pages", txn->mt_geo.now)); +#if defined(_WIN32) || defined(_WIN64) || MDBX_DEBUG + if (txn->mt_geo.shrink_pv && txn->mt_geo.now != txn->mt_geo.upper && + scope->verbosity >= MDBX_chk_verbose) { + line = chk_line_begin(inner, MDBX_chk_notice); + chk_line_feed(chk_print( + line, " > WARNING: Due Windows system limitations a file couldn't")); + chk_line_feed(chk_print( + line, " > be truncated while the database is opened. So, the size")); + chk_line_feed(chk_print( + line, " > database file of may by large than the database itself,")); + chk_line_end(chk_print( + line, " > until it will be closed or reopened in read-write mode.")); + } +#endif /* Windows || Debug */ + chk_verbose_meta(inner, 0); + chk_verbose_meta(inner, 1); + chk_verbose_meta(inner, 2); + + if (env->me_stuck_meta >= 0) { + chk_line_end(chk_print(chk_line_begin(inner, MDBX_chk_processing), + "skip checking meta-pages since the %u" + " is selected for verification", + env->me_stuck_meta)); + line = chk_line_feed( + chk_print(chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", " + "selected for verification %" PRIu64 ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, + chk->envinfo.mi_meta_txnid[env->me_stuck_meta], + chk->envinfo.mi_recent_txnid - + chk->envinfo.mi_meta_txnid[env->me_stuck_meta])); + chk_line_end(line); + } else { + chk_line_end(chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs check for meta-pages clashes")); + const unsigned meta_clash_mask = meta_eq_mask(&chk->troika); + if (meta_clash_mask & 1) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 0, 1); + if (meta_clash_mask & 2) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 1, 2); + if (meta_clash_mask & 4) + chk_scope_issue(inner, "meta-%d and meta-%d are clashed", 2, 0); + + const unsigned prefer_steady_metanum = chk->troika.prefer_steady; + const uint64_t prefer_steady_txnid = + chk->troika.txnid[prefer_steady_metanum]; + const unsigned recent_metanum = chk->troika.recent; + const uint64_t recent_txnid = chk->troika.txnid[recent_metanum]; + if (env->me_flags & MDBX_EXCLUSIVE) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs full check recent-txn-id with meta-pages")); + if (prefer_steady_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, + chk->envinfo.mi_recent_txnid); + } + } else if (chk->write_locked) { + chk_line_end( + chk_puts(chk_line_begin(inner, MDBX_chk_verbose), + "performs lite check recent-txn-id with meta-pages (not a " + "monopolistic mode)")); + if (recent_txnid != chk->envinfo.mi_recent_txnid) { + chk_scope_issue(inner, + "weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + recent_metanum, recent_txnid, + chk->envinfo.mi_recent_txnid); + } + } else { + chk_line_end(chk_puts( + chk_line_begin(inner, MDBX_chk_verbose), + "skip check recent-txn-id with meta-pages (monopolistic or " + "read-write mode only)")); + } + + chk_line_end(chk_print( + chk_line_begin(inner, MDBX_chk_resolution), + "transactions: recent %" PRIu64 ", latter reader %" PRIu64 + ", lag %" PRIi64, + chk->envinfo.mi_recent_txnid, chk->envinfo.mi_latter_reader_txnid, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid)); + } + } + err = chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + eASSERT(env, err == MDBX_SUCCESS); + if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "b-tree")); + else { + err = chk_scope_begin( + chk, -1, MDBX_chk_traversal_tree, nullptr, &usr->result.tree_problems, + "Traversal %s by txn#%" PRIaTXN "...", "b-tree", txn->mt_txnid); + if (likely(!err)) + err = chk_tree(usr->scope); + if (usr->result.tree_problems && usr->result.gc_tree_problems == 0) + usr->result.gc_tree_problems = usr->result.tree_problems; + if (usr->result.tree_problems && usr->result.kv_tree_problems == 0) + usr->result.kv_tree_problems = usr->result.tree_problems; + chk_scope_restore(scope, err); + } + + if (usr->result.gc_tree_problems > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_GC), "b-tree", + usr->result.problems_gc = usr->result.gc_tree_problems)); + else { + err = chk_scope_begin(chk, -1, MDBX_chk_traversal_freedb, &chk->subdb_gc, + &usr->result.problems_gc, + "Traversal %s by txn#%" PRIaTXN "...", "GC/freeDB", + txn->mt_txnid); + if (likely(!err)) + err = chk_db(usr->scope, FREE_DBI, &chk->subdb_gc, chk_handle_gc); + line = chk_line_begin(scope, MDBX_chk_info); + if (line) { + histogram_print(scope, line, &chk->subdb_gc.histogram.nested_tree, + "span(s)", "single", false); + chk_line_end(line); + } + if (usr->result.problems_gc == 0 && + (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) == 0) { + const size_t used_pages = usr->result.alloc_pages - usr->result.gc_pages; + if (usr->result.processed_pages != used_pages) + chk_scope_issue(usr->scope, + "used pages mismatch (%" PRIuSIZE + "(walked) != %" PRIuSIZE "(allocated - GC))", + usr->result.processed_pages, used_pages); + if (usr->result.unused_pages != usr->result.gc_pages) + chk_scope_issue(usr->scope, + "GC pages mismatch (%" PRIuSIZE + "(expected) != %" PRIuSIZE "(GC))", + usr->result.unused_pages, usr->result.gc_pages); + } + } + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + err = chk_scope_begin(chk, 1, MDBX_chk_space, nullptr, nullptr, + "Page allocation:"); + const double percent_boundary_reciprocal = 100.0 / txn->mt_geo.upper; + const double percent_backed_reciprocal = 100.0 / usr->result.backed_pages; + const size_t detained = usr->result.gc_pages - usr->result.reclaimable_pages; + const size_t available2boundary = txn->mt_geo.upper - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t available2backed = usr->result.backed_pages - + usr->result.alloc_pages + + usr->result.reclaimable_pages; + const size_t remained2boundary = txn->mt_geo.upper - usr->result.alloc_pages; + const size_t remained2backed = + usr->result.backed_pages - usr->result.alloc_pages; + + const size_t used = (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) + ? usr->result.alloc_pages - usr->result.gc_pages + : usr->result.processed_pages; + + line = chk_line_begin(usr->scope, MDBX_chk_info); + line = chk_print(line, + "backed by file: %" PRIuSIZE " pages (%.1f%%)" + ", %" PRIuSIZE " left to boundary (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal, + txn->mt_geo.upper - usr->result.backed_pages, + (txn->mt_geo.upper - usr->result.backed_pages) * + percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "used", used, used * percent_backed_reciprocal, + used * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "remained", remained2backed, remained2backed * percent_backed_reciprocal, + remained2boundary, remained2boundary * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "reclaimable: %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)" + ", GC %" PRIuSIZE " (%.1f%% of backed, %.1f%% of boundary)", + usr->result.reclaimable_pages, + usr->result.reclaimable_pages * percent_backed_reciprocal, + usr->result.reclaimable_pages * percent_boundary_reciprocal, + usr->result.gc_pages, usr->result.gc_pages * percent_backed_reciprocal, + usr->result.gc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print( + line, + "detained by reader(s): %" PRIuSIZE + " (%.1f%% of backed, %.1f%% of boundary)" + ", %u reader(s), lag %" PRIi64, + detained, detained * percent_backed_reciprocal, + detained * percent_boundary_reciprocal, chk->envinfo.mi_numreaders, + chk->envinfo.mi_recent_txnid - chk->envinfo.mi_latter_reader_txnid); + line = chk_line_feed(line); + + line = chk_print( + line, "%s: %" PRIuSIZE " page(s), %.1f%% of backed, %.1f%% of boundary", + "allocated", usr->result.alloc_pages, + usr->result.alloc_pages * percent_backed_reciprocal, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = chk_line_feed(line); + + line = chk_print(line, + "%s: %" PRIuSIZE " page(s) (%.1f%%) of backed, %" PRIuSIZE + " to boundary (%.1f%% of boundary)", + "available", available2backed, + available2backed * percent_backed_reciprocal, + available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + + line = chk_line_begin(usr->scope, MDBX_chk_resolution); + line = chk_print(line, "%s %" PRIaPGNO " pages", + (txn->mt_geo.upper == txn->mt_geo.now) ? "total" : "upto", + txn->mt_geo.upper); + line = chk_print(line, ", backed %" PRIuSIZE " (%.1f%%)", + usr->result.backed_pages, + usr->result.backed_pages * percent_boundary_reciprocal); + line = chk_print(line, ", allocated %" PRIuSIZE " (%.1f%%)", + usr->result.alloc_pages, + usr->result.alloc_pages * percent_boundary_reciprocal); + line = + chk_print(line, ", available %" PRIuSIZE " (%.1f%%)", available2boundary, + available2boundary * percent_boundary_reciprocal); + chk_line_end(line); + chk_scope_restore(scope, err); + + //-------------------------------------------------------------------------- + + if (chk->flags & MDBX_CHK_SKIP_KV_TRAVERSAL) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skipping %s traversal...", "key-value")); + else if ((usr->result.problems_kv = usr->result.kv_tree_problems) > 0) + chk_line_end(chk_print( + chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s since %s is corrupted (%" PRIuSIZE " problem(s))", + chk_v2a(chk, MDBX_CHK_MAIN), "key-value", + usr->result.problems_kv = usr->result.kv_tree_problems)); + else { + err = + chk_scope_begin(chk, 0, MDBX_chk_traversal_maindb, &chk->subdb_main, + &usr->result.problems_kv, "Processing %s...", "MainDB"); + if (likely(!err)) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, chk_handle_kv); + chk_scope_restore(scope, err); + + if (usr->result.problems_kv && usr->result.subdb_total) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), + "Skip processing %s", "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total == 0) + chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_info), "No %s", + "sub-database(s)")); + else if (usr->result.problems_kv == 0 && usr->result.subdb_total) { + err = chk_scope_begin(chk, 1, MDBX_chk_traversal_subdbs, nullptr, + &usr->result.problems_kv, + "Traversal %s by txn#%" PRIaTXN "...", + "sub-database(s)", txn->mt_txnid); + if (!err) + err = chk_db(usr->scope, MAIN_DBI, &chk->subdb_main, nullptr); + if (usr->scope->subtotal_issues) + chk_line_end( + chk_print(chk_line_begin(usr->scope, MDBX_chk_resolution), + "processed %" PRIuSIZE " of %" PRIuSIZE " subDb(s)" + ", %" PRIuSIZE " problems(s)", + usr->result.subdb_processed, usr->result.subdb_total, + usr->scope->subtotal_issues)); + } + chk_scope_restore(scope, err); + } + + return chk_scope_end(chk, chk_scope_begin(chk, 0, MDBX_chk_conclude, nullptr, + nullptr, nullptr)); +} + +__cold int mdbx_env_chk_problem(MDBX_chk_context_t *ctx) { + if (likely(ctx && ctx->internal && ctx->internal->usr == ctx && + ctx->internal->problem_counter && ctx->scope)) { + *ctx->internal->problem_counter += 1; + ctx->scope->subtotal_issues += 1; + return MDBX_SUCCESS; + } + return MDBX_EINVAL; +} + +__cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, + MDBX_chk_context_t *ctx, + const enum MDBX_chk_flags_t flags, + enum MDBX_chk_severity verbosity, + unsigned timeout_seconds_16dot16) { + int err, rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + if (unlikely(!cb || !ctx || ctx->internal)) + return MDBX_EINVAL; + + MDBX_chk_internal_t *const chk = osal_calloc(1, sizeof(MDBX_chk_internal_t)); + if (unlikely(!chk)) + return MDBX_ENOMEM; + + chk->cb = cb; + chk->usr = ctx; + chk->usr->internal = chk; + chk->usr->env = env; + chk->flags = flags; + + chk->subdb_gc.id = -1; + chk->subdb_gc.name.iov_base = MDBX_CHK_GC; + chk->subdb[FREE_DBI] = &chk->subdb_gc; + + chk->subdb_main.id = -1; + chk->subdb_main.name.iov_base = MDBX_CHK_MAIN; + chk->subdb[MAIN_DBI] = &chk->subdb_main; + + chk->monotime_timeout = + timeout_seconds_16dot16 + ? osal_16dot16_to_monotime(timeout_seconds_16dot16) + osal_monotime() + : 0; + chk->usr->scope_nesting = 0; + chk->usr->result.subdbs = (const void *)&chk->subdb; + + MDBX_chk_scope_t *const top = chk->scope_stack; + top->verbosity = verbosity; + top->internal = chk; + + // init + rc = chk_scope_end( + chk, chk_scope_begin(chk, 0, MDBX_chk_init, nullptr, nullptr, nullptr)); + + // lock + if (likely(!rc)) + rc = chk_scope_begin( + chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", + (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); + if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { + rc = mdbx_txn_lock(env, false); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); + else + chk->write_locked = true; + } + if (likely(!rc)) { + rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &ctx->txn); + if (unlikely(rc)) + chk_error_rc(ctx->scope, rc, "mdbx_txn_begin"); + } + chk_scope_end(chk, rc); + + // doit + if (likely(!rc)) { + chk->subdb_gc.flags = ctx->txn->mt_dbs[FREE_DBI].md_flags; + chk->subdb_main.flags = ctx->txn->mt_dbs[MAIN_DBI].md_flags; + rc = env_chk(top); + } + + // unlock + if (ctx->txn || chk->write_locked) { + chk_scope_begin(chk, 0, MDBX_chk_unlock, nullptr, nullptr, nullptr); + if (ctx->txn) { + err = mdbx_txn_abort(ctx->txn); + if (err && !rc) + rc = err; + ctx->txn = nullptr; + } + if (chk->write_locked) + mdbx_txn_unlock(env); + rc = chk_scope_end(chk, rc); + } + + // finalize + err = chk_scope_begin(chk, 0, MDBX_chk_finalize, nullptr, nullptr, nullptr); + rc = chk_scope_end(chk, err ? err : rc); + chk_dispose(chk); + return rc; +} + /******************************************************************************/ /* *INDENT-OFF* */ /* clang-format off */ diff --git a/src/internals.h b/src/internals.h index 1664dcd7..c871b3df 100644 --- a/src/internals.h +++ b/src/internals.h @@ -703,7 +703,8 @@ typedef struct MDBX_page { #define PAGETYPE_WHOLE(p) ((uint8_t)(p)->mp_flags) -/* Drop legacy P_DIRTY flag for sub-pages for compatilibity */ +/* Drop legacy P_DIRTY flag for sub-pages for compatilibity, + * for assertions only. */ #define PAGETYPE_COMPAT(p) \ (unlikely(PAGETYPE_WHOLE(p) & P_SUBP) \ ? PAGETYPE_WHOLE(p) & ~(P_SUBP | P_LEGACY_DIRTY) \ @@ -1136,10 +1137,10 @@ typedef struct troika { #if MDBX_WORDBITS > 32 /* Workaround for false-positives from Valgrind */ uint32_t unused_pad; #endif -#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7) -#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64) -#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128) -#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3) +#define TROIKA_HAVE_STEADY(troika) ((troika)->fsm & 7u) +#define TROIKA_STRICT_VALID(troika) ((troika)->tail_and_flags & 64u) +#define TROIKA_VALID(troika) ((troika)->tail_and_flags & 128u) +#define TROIKA_TAIL(troika) ((troika)->tail_and_flags & 3u) txnid_t txnid[NUM_METAS]; } meta_troika_t; @@ -1787,3 +1788,33 @@ MDBX_MAYBE_UNUSED static void static_checks(void) { (size_t)(size), __LINE__); \ ASAN_UNPOISON_MEMORY_REGION(addr, size); \ } while (0) + +/******************************************************************************/ + +/** \brief Page types for traverse the b-tree. + * \see mdbx_env_pgwalk() \see MDBX_pgvisitor_func */ +enum MDBX_page_type_t { + MDBX_page_broken, + MDBX_page_large, + MDBX_page_branch, + MDBX_page_leaf, + MDBX_page_dupfixed_leaf, + MDBX_subpage_leaf, + MDBX_subpage_dupfixed_leaf, + MDBX_subpage_broken, +}; +typedef enum MDBX_page_type_t MDBX_page_type_t; + +typedef struct MDBX_walk_sdb { + MDBX_val name; + struct MDBX_db *internal, *nested; +} MDBX_walk_sdb_t; + +/** \brief Callback function for traverse the b-tree. \see mdbx_env_pgwalk() */ +typedef int +MDBX_pgvisitor_func(const size_t pgno, const unsigned number, void *const ctx, + const int deep, const MDBX_walk_sdb_t *subdb, + const size_t page_size, const MDBX_page_type_t page_type, + const MDBX_error_t err, const size_t nentries, + const size_t payload_bytes, const size_t header_bytes, + const size_t unused_bytes); diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index a8c97372..c590253d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -25,19 +25,6 @@ #include -typedef struct flagbit { - int bit; - const char *name; -} flagbit; - -const flagbit dbflags[] = {{MDBX_DUPSORT, "dupsort"}, - {MDBX_INTEGERKEY, "integerkey"}, - {MDBX_REVERSEKEY, "reversekey"}, - {MDBX_DUPFIXED, "dupfixed"}, - {MDBX_REVERSEDUP, "reversedup"}, - {MDBX_INTEGERDUP, "integerdup"}, - {0, nullptr}}; - #if defined(_WIN32) || defined(_WIN64) #include "wingetopt.h" @@ -72,181 +59,171 @@ static void signal_handler(int sig) { #define EXIT_FAILURE_CHECK_MAJOR (EXIT_FAILURE + 1) #define EXIT_FAILURE_CHECK_MINOR EXIT_FAILURE -typedef struct { - MDBX_val name; - struct { - uint64_t branch, large_count, large_volume, leaf; - uint64_t subleaf_dupsort, leaf_dupfixed, subleaf_dupfixed; - uint64_t total, empty, other; - } pages; - uint64_t payload_bytes; - uint64_t lost_bytes; -} walk_dbi_t; - -struct { - short *pagemap; - uint64_t total_payload_bytes; - uint64_t pgcount; - walk_dbi_t - dbi[MDBX_MAX_DBI + CORE_DBS + /* account pseudo-entry for meta */ 1]; -} walk; - -#define dbi_free walk.dbi[FREE_DBI] -#define dbi_main walk.dbi[MAIN_DBI] -#define dbi_meta walk.dbi[CORE_DBS] - -int envflags = MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; +enum MDBX_env_flags_t env_flags = + MDBX_RDONLY | MDBX_EXCLUSIVE | MDBX_VALIDATION; MDBX_env *env; MDBX_txn *txn; -MDBX_envinfo envinfo; -size_t userdb_count, skipped_subdb; -uint64_t total_unused_bytes, reclaimable_pages, gc_pages, alloc_pages, - unused_pages, backed_pages; -unsigned verbose; -bool ignore_wrong_order, quiet, dont_traversal; +unsigned verbose = 0; +bool quiet; MDBX_val only_subdb; int stuck_meta = -1; +MDBX_chk_context_t chk; +bool turn_meta = false; +bool force_turn_meta = false; +enum MDBX_chk_flags_t chk_flags = MDBX_CHK_DEFAULTS; +enum MDBX_chk_stage chk_stage = MDBX_chk_none; -struct problem { - struct problem *pr_next; - size_t count; - const char *caption; -}; +static MDBX_chk_line_t line_struct; +static size_t anchor_lineno; +static size_t line_count; +static FILE *line_output; -struct problem *problems_list; -unsigned total_problems, data_tree_problems, gc_tree_problems; - -static void MDBX_PRINTF_ARGS(1, 2) print(const char *msg, ...) { - if (!quiet) { - va_list args; - - fflush(stderr); - va_start(args, msg); - vfprintf(stdout, msg, args); - va_end(args); - } -} - -static MDBX_val printable_buf; -static void free_printable_buf(void) { osal_free(printable_buf.iov_base); } - -static const char *sdb_name(const MDBX_val *val) { - if (val == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (val == MDBX_PGWALK_GC) - return "@GC"; - if (val == MDBX_PGWALK_META) - return "@META"; - - const unsigned char *const data = val->iov_base; - const size_t len = val->iov_len; - if (data == MDBX_PGWALK_MAIN) - return "@MAIN"; - if (data == MDBX_PGWALK_GC) - return "@GC"; - if (data == MDBX_PGWALK_META) - return "@META"; - - if (!len) - return ""; - if (!data) - return ""; - if (len > 65536) { - static char buf[64]; - /* NOTE: There is MSYS2 MinGW bug if you here got - * the "unknown conversion type character ‘z’ in format [-Werror=format=]" - * https://stackoverflow.com/questions/74504432/whats-the-proper-way-to-tell-mingw-based-gcc-to-use-ansi-stdio-output-on-windo - */ - snprintf(buf, sizeof(buf), "", len); - return buf; - } - - bool printable = true; - bool quoting = false; - size_t xchars = 0; - for (size_t i = 0; i < val->iov_len && printable; ++i) { - quoting |= data[i] != '_' && isalnum(data[i]) == 0; - printable = isprint(data[i]) != 0 || - (data[i] < ' ' && ++xchars < 4 && len > xchars * 4); - } - - size_t need = len + 1; - if (quoting || !printable) - need += len + /* quotes */ 2 + 2 * /* max xchars */ 4; - if (need > printable_buf.iov_len) { - void *ptr = osal_realloc(printable_buf.iov_base, need); - if (!ptr) - return ""; - if (!printable_buf.iov_base) - atexit(free_printable_buf); - printable_buf.iov_base = ptr; - printable_buf.iov_len = need; - } - - char *out = printable_buf.iov_base; - if (!quoting) { - memcpy(out, data, len); - out += len; - } else if (printable) { - *out++ = '\''; - for (size_t i = 0; i < len; ++i) { - if (data[i] < ' ') { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 4); - static const char hex[] = "0123456789abcdef"; - out[0] = '\\'; - out[1] = 'x'; - out[2] = hex[data[i] >> 4]; - out[3] = hex[data[i] & 15]; - out += 4; - } else if (strchr("\"'`\\", data[i])) { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 2); - out[0] = '\\'; - out[1] = data[i]; - out += 2; - } else { - assert((char *)printable_buf.iov_base + printable_buf.iov_len > - out + 1); - *out++ = data[i]; - } +#define LINE_SEVERITY_NONE 255 +static bool lf(void) { + if (!line_struct.empty) { + line_count += 1; + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_struct.scope_depth = 0; + if (line_output) { + fputc('\n', line_output); + return true; } - *out++ = '\''; } - assert((char *)printable_buf.iov_base + printable_buf.iov_len > out); - *out = 0; - return printable_buf.iov_base; + return false; } -static void va_log(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - static const char *const prefixes[] = { - "!!!fatal: ", " ! " /* error */, " ~ " /* warning */, - " " /* notice */, " // " /* verbose */, " //// " /* debug */, - " ////// " /* trace */ +static void flush(void) { fflush(nullptr); } + +static void lf_flush(void) { + if (lf()) + flush(); +} + +static bool silently(enum MDBX_chk_severity severity) { + int cutoff = + chk.scope ? chk.scope->verbosity >> MDBX_chk_severity_prio_shift + : verbose + (MDBX_chk_result >> MDBX_chk_severity_prio_shift); + int prio = (severity >> MDBX_chk_severity_prio_shift); + if (chk.scope && chk.scope->stage == MDBX_chk_traversal_subdbs && verbose < 2) + prio += 1; + return quiet || cutoff < ((prio > 0) ? prio : 0); +} + +static FILE *prefix(enum MDBX_chk_severity severity) { + if (silently(severity)) + return nullptr; + + static const char *const prefixes[16] = { + "!!!fatal: ", // 0 fatal + " ! ", // 1 error + " ~ ", // 2 warning + " ", // 3 notice + "", // 4 result + " = ", // 5 resolution + " - ", // 6 processing + " ", // 7 info + " ", // 8 verbose + " ", // 9 details + " // ", // A lib-verbose + " //// ", // B lib-debug + " ////// ", // C lib-trace + " ////// ", // D lib-extra + " ////// ", // E +1 + " ////// " // F +2 }; - FILE *out = stdout; - if (level <= MDBX_LOG_ERROR) { - total_problems++; - out = stderr; + const bool nl = + line_struct.scope_depth != chk.scope_nesting || + (line_struct.severity != severity && + (line_struct.severity != MDBX_chk_processing || + severity < MDBX_chk_result || severity > MDBX_chk_resolution)); + if (nl) + lf(); + if (severity < MDBX_chk_warning) + flush(); + FILE *out = (severity > MDBX_chk_error) ? stdout : stderr; + if (nl || line_struct.empty) { + line_struct.severity = severity; + line_struct.scope_depth = chk.scope_nesting; + unsigned kind = line_struct.severity & MDBX_chk_severity_kind_mask; + if (line_struct.scope_depth || *prefixes[kind]) { + line_struct.empty = false; + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fputs(prefixes[kind], out); + } } + return line_output = out; +} - if (!quiet && verbose + 1 >= (unsigned)level && - (unsigned)level < ARRAY_LENGTH(prefixes)) { - fflush(nullptr); - fputs(prefixes[level], out); +static void suffix(size_t cookie, const char *str) { + if (cookie == line_count && !line_struct.empty) { + fprintf(line_output, " %s", str); + line_struct.empty = false; + lf(); + } +} + +static size_t MDBX_PRINTF_ARGS(2, 3) + print(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); vfprintf(out, msg, args); - - const bool have_lf = msg[strlen(msg) - 1] == '\n'; - if (level == MDBX_LOG_FATAL && function && line) - fprintf(out, have_lf ? " %s(), %u\n" : " (%s:%u)\n", - function + (strncmp(function, "mdbx_", 5) ? 5 : 0), line); - else if (!have_lf) - fputc('\n', out); - fflush(nullptr); + va_end(args); + line_struct.empty = false; + return line_count; } + return 0; +} +static FILE *MDBX_PRINTF_ARGS(2, 3) + print_ln(enum MDBX_chk_severity severity, const char *msg, ...) { + FILE *out = prefix(severity); + if (out) { + va_list args; + va_start(args, msg); + vfprintf(out, msg, args); + va_end(args); + line_struct.empty = false; + lf(); + } + return out; +} + +static void logger(MDBX_log_level_t level, const char *function, int line, + const char *fmt, va_list args) { + if (level <= MDBX_LOG_ERROR) + mdbx_env_chk_problem(&chk); + + const unsigned kind = (level > MDBX_LOG_NOTICE) + ? level - MDBX_LOG_NOTICE + + (MDBX_chk_extra & MDBX_chk_severity_kind_mask) + : level; + const unsigned prio = kind << MDBX_chk_severity_prio_shift; + enum MDBX_chk_severity severity = prio + kind; + FILE *out = prefix(severity); + if (out) { + vfprintf(out, fmt, args); + const bool have_lf = fmt[strlen(fmt) - 1] == '\n'; + if (level == MDBX_LOG_FATAL && function && line) { + if (have_lf) + for (size_t i = 0; i < line_struct.scope_depth; ++i) + fputs(" ", out); + fprintf(out, have_lf ? " %s(), %u" : " (%s:%u)", + function + (strncmp(function, "mdbx_", 5) ? 0 : 5), line); + lf(); + } else if (have_lf) { + line_struct.empty = true; + line_struct.severity = LINE_SEVERITY_NONE; + line_count += 1; + } else + lf(); + } + if (level < MDBX_LOG_VERBOSE) + flush(); if (level == MDBX_LOG_FATAL) { #if !MDBX_DEBUG && !MDBX_FORCE_ASSERTIONS exit(EXIT_FAILURE_MDBX); @@ -255,767 +232,144 @@ static void va_log(MDBX_log_level_t level, const char *function, int line, } } -static void MDBX_PRINTF_ARGS(1, 2) error(const char *msg, ...) { +static void MDBX_PRINTF_ARGS(1, 2) error_fmt(const char *msg, ...) { va_list args; va_start(args, msg); - va_log(MDBX_LOG_ERROR, nullptr, 0, msg, args); + logger(MDBX_LOG_ERROR, nullptr, 0, msg, args); va_end(args); } -static void logger(MDBX_log_level_t level, const char *function, int line, - const char *msg, va_list args) { - (void)line; - (void)function; - if (level < MDBX_LOG_EXTRA) - va_log(level, function, line, msg, args); +static int error_fn(const char *fn, int err) { + if (err) + error_fmt("%s() failed, error %d, %s", fn, err, mdbx_strerror(err)); + return err; } -static int check_user_break(void) { - switch (user_break) { - case 0: - return MDBX_SUCCESS; - case 1: - print(" - interrupted by signal\n"); - fflush(nullptr); +static bool check_break(MDBX_chk_context_t *ctx) { + (void)ctx; + if (!user_break) + return false; + if (user_break == 1) { + print(MDBX_chk_resolution, "interrupted by signal"); + lf_flush(); user_break = 2; } - return MDBX_EINTR; + return true; } -static void pagemap_cleanup(void) { - osal_free(walk.pagemap); - walk.pagemap = nullptr; -} - -static bool eq(const MDBX_val a, const MDBX_val b) { - return a.iov_len == b.iov_len && - (a.iov_base == b.iov_base || a.iov_len == 0 || - !memcmp(a.iov_base, b.iov_base, a.iov_len)); -} - -static walk_dbi_t *pagemap_lookup_dbi(const MDBX_val *dbi_name, bool silent) { - static walk_dbi_t *last; - - if (dbi_name == MDBX_PGWALK_MAIN) - return &dbi_main; - if (dbi_name == MDBX_PGWALK_GC) - return &dbi_free; - if (dbi_name == MDBX_PGWALK_META) - return &dbi_meta; - - if (last && eq(last->name, *dbi_name)) - return last; - - walk_dbi_t *dbi = walk.dbi + CORE_DBS + /* account pseudo-entry for meta */ 1; - for (; dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - if (eq(dbi->name, *dbi_name)) - return last = dbi; - } - - if (verbose > 0 && !silent) { - print(" - found %s area\n", sdb_name(dbi_name)); - fflush(nullptr); - } - - if (dbi == ARRAY_END(walk.dbi)) - return nullptr; - - dbi->name = *dbi_name; - return last = dbi; -} - -static void MDBX_PRINTF_ARGS(4, 5) - problem_add(const char *object, uint64_t entry_number, const char *msg, - const char *extra, ...) { - total_problems++; - - if (!quiet) { - int need_fflush = 0; - struct problem *p; - - for (p = problems_list; p; p = p->pr_next) - if (p->caption == msg) - break; - - if (!p) { - p = osal_calloc(1, sizeof(*p)); - if (unlikely(!p)) - return; - p->caption = msg; - p->pr_next = problems_list; - problems_list = p; - need_fflush = 1; - } - - p->count++; - if (verbose > 1) { - print(" %s #%" PRIu64 ": %s", object, entry_number, msg); - if (extra) { - va_list args; - printf(" ("); - va_start(args, extra); - vfprintf(stdout, extra, args); - va_end(args); - printf(")"); - } - printf("\n"); - if (need_fflush) - fflush(nullptr); +static int scope_push(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner, const char *fmt, va_list args) { + (void)scope; + if (fmt && *fmt) { + FILE *out = prefix(MDBX_chk_processing); + if (out) { + vfprintf(out, fmt, args); + inner->usr_o.number = line_count; + line_struct.ctx = ctx; + flush(); } } + return MDBX_SUCCESS; } -static struct problem *problems_push(void) { - struct problem *p = problems_list; - problems_list = nullptr; - return p; -} - -static size_t problems_pop(struct problem *list) { - size_t count = 0; - - if (problems_list) { - int i; - - print(" - problems: "); - for (i = 0; problems_list; ++i) { - struct problem *p = problems_list->pr_next; - count += problems_list->count; - print("%s%s (%" PRIuPTR ")", i ? ", " : "", problems_list->caption, - problems_list->count); - osal_free(problems_list); - problems_list = p; - } - print("\n"); - fflush(nullptr); - } - - problems_list = list; - return count; -} - -static int pgvisitor(const uint64_t pgno, const unsigned pgnumber, - void *const ctx, const int deep, const MDBX_val *dbi_name, - const size_t page_size, const MDBX_page_type_t pagetype, - const MDBX_error_t err, const size_t nentries, - const size_t payload_bytes, const size_t header_bytes, - const size_t unused_bytes) { +static void scope_pop(MDBX_chk_context_t *ctx, MDBX_chk_scope_t *scope, + MDBX_chk_scope_t *inner) { (void)ctx; - const bool is_gc_tree = dbi_name == MDBX_PGWALK_GC; - if (deep > 42) { - problem_add("deep", deep, "too large", nullptr); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_CORRUPTED /* avoid infinite loop/recursion */; - } - - walk_dbi_t *dbi = pagemap_lookup_dbi(dbi_name, false); - if (!dbi) { - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - return MDBX_ENOMEM; - } - - const size_t page_bytes = payload_bytes + header_bytes + unused_bytes; - walk.pgcount += pgnumber; - - const char *pagetype_caption; - bool branch = false; - switch (pagetype) { - default: - problem_add("page", pgno, "unknown page-type", "type %u, deep %i", - (unsigned)pagetype, deep); - pagetype_caption = "unknown"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_broken: - pagetype_caption = "broken"; - dbi->pages.other += pgnumber; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_subpage_broken: - pagetype_caption = "broken-subpage"; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - break; - case MDBX_page_meta: - pagetype_caption = "meta"; - dbi->pages.other += pgnumber; - break; - case MDBX_page_large: - pagetype_caption = "large"; - dbi->pages.large_volume += pgnumber; - dbi->pages.large_count += 1; - break; - case MDBX_page_branch: - pagetype_caption = "branch"; - dbi->pages.branch += pgnumber; - branch = true; - break; - case MDBX_page_leaf: - pagetype_caption = "leaf"; - dbi->pages.leaf += pgnumber; - break; - case MDBX_page_dupfixed_leaf: - pagetype_caption = "leaf-dupfixed"; - dbi->pages.leaf_dupfixed += pgnumber; - break; - case MDBX_subpage_leaf: - pagetype_caption = "subleaf-dupsort"; - dbi->pages.subleaf_dupsort += 1; - break; - case MDBX_subpage_dupfixed_leaf: - pagetype_caption = "subleaf-dupfixed"; - dbi->pages.subleaf_dupfixed += 1; - break; - } - - if (pgnumber) { - if (verbose > 3 && (!only_subdb.iov_base || eq(only_subdb, dbi->name))) { - if (pgnumber == 1) - print(" %s-page %" PRIu64, pagetype_caption, pgno); - else - print(" %s-span %" PRIu64 "[%u]", pagetype_caption, pgno, pgnumber); - print(" of %s: header %" PRIiPTR ", %s %" PRIiPTR ", payload %" PRIiPTR - ", unused %" PRIiPTR ", deep %i\n", - sdb_name(&dbi->name), header_bytes, - (pagetype == MDBX_page_branch) ? "keys" : "entries", nentries, - payload_bytes, unused_bytes, deep); - } - - bool already_used = false; - for (unsigned n = 0; n < pgnumber; ++n) { - uint64_t spanpgno = pgno + n; - if (spanpgno >= alloc_pages) { - problem_add("page", spanpgno, "wrong page-no", - "%s-page: %" PRIu64 " > %" PRIu64 ", deep %i", - pagetype_caption, spanpgno, alloc_pages, deep); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else if (walk.pagemap[spanpgno]) { - walk_dbi_t *coll_dbi = &walk.dbi[walk.pagemap[spanpgno] - 1]; - problem_add("page", spanpgno, - (branch && coll_dbi == dbi) ? "loop" : "already used", - "%s-page: by %s, deep %i", pagetype_caption, - sdb_name(&coll_dbi->name), deep); - already_used = true; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - walk.pagemap[spanpgno] = (short)(dbi - walk.dbi + 1); - dbi->pages.total += 1; - } - } - - if (already_used) - return branch ? MDBX_RESULT_TRUE /* avoid infinite loop/recursion */ - : MDBX_SUCCESS; - } - - if (MDBX_IS_ERROR(err)) { - problem_add("page", pgno, "invalid/corrupted", "%s-page", pagetype_caption); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - if (unused_bytes > page_size) { - problem_add("page", pgno, "illegal unused-bytes", - "%s-page: %u < %" PRIuPTR " < %u", pagetype_caption, 0, - unused_bytes, envinfo.mi_dxb_pagesize); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - - if (header_bytes < (int)sizeof(long) || - (size_t)header_bytes >= envinfo.mi_dxb_pagesize - sizeof(long)) { - problem_add("page", pgno, "illegal header-length", - "%s-page: %" PRIuPTR " < %" PRIuPTR " < %" PRIuPTR, - pagetype_caption, sizeof(long), header_bytes, - envinfo.mi_dxb_pagesize - sizeof(long)); - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - if (payload_bytes < 1) { - if (nentries > 1) { - problem_add("page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR " entries", - pagetype_caption, payload_bytes, nentries); - /* if ((size_t)header_bytes + unused_bytes < page_size) { - // LY: hush a misuse error - page_bytes = page_size; - } */ - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - problem_add("page", pgno, "empty", - "%s-page: payload %" PRIuPTR " bytes, %" PRIuPTR - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - dbi->pages.empty += 1; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } - } - - if (pgnumber) { - if (page_bytes != page_size) { - problem_add("page", pgno, "misused", - "%s-page: %" PRIuPTR " != %" PRIuPTR " (%" PRIuPTR - "h + %" PRIuPTR "p + %" PRIuPTR "u), deep %i", - pagetype_caption, page_size, page_bytes, header_bytes, - payload_bytes, unused_bytes, deep); - if (page_size > page_bytes) - dbi->lost_bytes += page_size - page_bytes; - data_tree_problems += !is_gc_tree; - gc_tree_problems += is_gc_tree; - } else { - dbi->payload_bytes += (uint64_t)payload_bytes + header_bytes; - walk.total_payload_bytes += (uint64_t)payload_bytes + header_bytes; - } - } - } - - return check_user_break(); + (void)scope; + suffix(inner->usr_o.number, inner->subtotal_issues ? "error(s)" : "done"); + flush(); } -typedef int(visitor)(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data); -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler); - -static int handle_userdb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - (void)record_number; - (void)key; - (void)data; - return check_user_break(); +static MDBX_chk_user_subdb_cookie_t *subdb_filter(MDBX_chk_context_t *ctx, + const MDBX_val *name, + MDBX_db_flags_t flags) { + (void)ctx; + (void)flags; + return (!only_subdb.iov_base || + (only_subdb.iov_len == name->iov_len && + memcmp(only_subdb.iov_base, name->iov_base, name->iov_len) == 0)) + ? (void *)(intptr_t)-1 + : nullptr; } -static int handle_freedb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - char *bad = ""; - pgno_t *iptr = data->iov_base; - - if (key->iov_len != sizeof(txnid_t)) - problem_add("entry", record_number, "wrong txn-id size", - "key-size %" PRIiPTR, key->iov_len); - else { - txnid_t txnid; - memcpy(&txnid, key->iov_base, sizeof(txnid)); - if (txnid < 1 || txnid > envinfo.mi_recent_txnid) - problem_add("entry", record_number, "wrong txn-id", "%" PRIaTXN, txnid); - else { - if (data->iov_len < sizeof(pgno_t) || data->iov_len % sizeof(pgno_t)) - problem_add("entry", txnid, "wrong idl size", "%" PRIuPTR, - data->iov_len); - size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) - problem_add("entry", txnid, "wrong idl length", "%" PRIuPTR, number); - else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { - problem_add("entry", txnid, "trimmed idl", - "%" PRIuSIZE " > %" PRIuSIZE " (corruption)", - (number + 1) * sizeof(pgno_t), data->iov_len); - number = data->iov_len / sizeof(pgno_t) - 1; - } else if (data->iov_len - (number + 1) * sizeof(pgno_t) >= - /* LY: allow gap up to one page. it is ok - * and better than shink-and-retry inside update_gc() */ - envinfo.mi_dxb_pagesize) - problem_add("entry", txnid, "extra idl space", - "%" PRIuSIZE " < %" PRIuSIZE " (minor, not a trouble)", - (number + 1) * sizeof(pgno_t), data->iov_len); - - gc_pages += number; - if (envinfo.mi_latter_reader_txnid > txnid) - reclaimable_pages += number; - - pgno_t prev = MDBX_PNL_ASCENDING ? NUM_METAS - 1 : txn->mt_next_pgno; - pgno_t span = 1; - for (size_t i = 0; i < number; ++i) { - if (check_user_break()) - return MDBX_EINTR; - const pgno_t pgno = iptr[i]; - if (pgno < NUM_METAS) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " < meta-pages %u", pgno, NUM_METAS); - else if (pgno >= backed_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > backed-pages %" PRIu64, pgno, - backed_pages); - else if (pgno >= alloc_pages) - problem_add("entry", txnid, "wrong idl entry", - "pgno %" PRIaPGNO " > alloc-pages %" PRIu64, pgno, - alloc_pages - 1); - else { - if (MDBX_PNL_DISORDERED(prev, pgno)) { - bad = " [bad sequence]"; - problem_add("entry", txnid, "bad sequence", - "%" PRIaPGNO " %c [%zu].%" PRIaPGNO, prev, - (prev == pgno) ? '=' : (MDBX_PNL_ASCENDING ? '>' : '<'), - i, pgno); - } - if (walk.pagemap) { - int idx = walk.pagemap[pgno]; - if (idx == 0) - walk.pagemap[pgno] = -1; - else if (idx > 0) - problem_add("page", pgno, "already used", "by %s", - sdb_name(&walk.dbi[idx - 1].name)); - else - problem_add("page", pgno, "already listed in GC", nullptr); - } - } - prev = pgno; - while (i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span))) - ++span; - } - if (verbose > 3 && !only_subdb.iov_base) { - print(" transaction %" PRIaTXN ", %" PRIuPTR - " pages, maxspan %" PRIaPGNO "%s\n", - txnid, number, span, bad); - if (verbose > 4) { - for (size_t i = 0; i < number; i += span) { - const pgno_t pgno = iptr[i]; - for (span = 1; - i + span < number && - iptr[i + span] == (MDBX_PNL_ASCENDING ? pgno_add(pgno, span) - : pgno_sub(pgno, span)); - ++span) - ; - if (span > 1) { - print(" %9" PRIaPGNO "[%" PRIaPGNO "]\n", pgno, span); - } else - print(" %9" PRIaPGNO "\n", pgno); - } - } - } - } - } - - return check_user_break(); +static int stage_begin(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage) { + (void)ctx; + chk_stage = stage; + anchor_lineno = line_count; + flush(); + return MDBX_SUCCESS; } -static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { - return eq(*a, *b) ? 0 : 1; +static int conclude(MDBX_chk_context_t *ctx); +static int stage_end(MDBX_chk_context_t *ctx, enum MDBX_chk_stage stage, + int err) { + if (stage == MDBX_chk_conclude && !err) + err = conclude(ctx); + suffix(anchor_lineno, err ? "error(s)" : "done"); + flush(); + chk_stage = MDBX_chk_none; + return err; } -static int handle_maindb(const uint64_t record_number, const MDBX_val *key, - const MDBX_val *data) { - if (data->iov_len == sizeof(MDBX_db)) { - int rc = process_db(~0u, key, handle_userdb); - if (rc != MDBX_INCOMPATIBLE) { - userdb_count++; - return rc; - } +static MDBX_chk_line_t *print_begin(MDBX_chk_context_t *ctx, + enum MDBX_chk_severity severity) { + (void)ctx; + if (silently(severity)) + return nullptr; + if (line_struct.ctx) { + if (line_struct.severity == MDBX_chk_processing && + severity >= MDBX_chk_result && severity <= MDBX_chk_resolution && + line_output) + fputc(' ', line_output); + else + lf(); + line_struct.ctx = nullptr; } - return handle_userdb(record_number, key, data); + line_struct.severity = severity; + return &line_struct; } -static const char *db_flags2keymode(unsigned flags) { - flags &= (MDBX_REVERSEKEY | MDBX_INTEGERKEY); - switch (flags) { - case 0: - return "usual"; - case MDBX_REVERSEKEY: - return "reserve"; - case MDBX_INTEGERKEY: - return "ordinal"; - case MDBX_REVERSEKEY | MDBX_INTEGERKEY: - return "msgpack"; - default: - assert(false); - __unreachable(); - } +static void print_flush(MDBX_chk_line_t *line) { + (void)line; + flush(); } -static const char *db_flags2valuemode(unsigned flags) { - flags &= (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP); - switch (flags) { - case 0: - return "single"; - case MDBX_DUPSORT: - return "multi"; - case MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_REVERSEDUP: - return "multi-reverse"; - case MDBX_DUPFIXED: - case MDBX_DUPSORT | MDBX_DUPFIXED: - return "multi-samelength"; - case MDBX_DUPFIXED | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: - return "multi-reverse-samelength"; - case MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: - case MDBX_DUPFIXED | MDBX_INTEGERDUP: - return "multi-ordinal"; - case MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "multi-msgpack"; - case MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - return "reserved"; - default: - assert(false); - __unreachable(); - } +static void print_done(MDBX_chk_line_t *line) { + lf(); + line->ctx = nullptr; } -static int process_db(MDBX_dbi dbi_handle, const MDBX_val *dbi_name, - visitor *handler) { - MDBX_cursor *mc; - MDBX_stat ms; - MDBX_val key, data; - MDBX_val prev_key, prev_data; - unsigned flags; - int rc, i; - struct problem *saved_list; - uint64_t problems_count; - const bool second_pass = dbi_handle == MAIN_DBI; - - uint64_t record_count = 0, dups = 0; - uint64_t key_bytes = 0, data_bytes = 0; - - if ((MDBX_TXN_FINISHED | MDBX_TXN_ERROR) & mdbx_txn_flags(txn)) { - print(" ! abort processing %s due to a previous error\n", - sdb_name(dbi_name)); - return MDBX_BAD_TXN; - } - - if (dbi_handle == ~0u) { - rc = mdbx_dbi_open_ex2( - txn, dbi_name, MDBX_DB_ACCEDE, &dbi_handle, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr, - (dbi_name && ignore_wrong_order) ? equal_or_greater : nullptr); - if (rc) { - if (!dbi_name || - rc != - MDBX_INCOMPATIBLE) /* LY: mainDB's record is not a user's DB. */ { - error("mdbx_dbi_open(%s) failed, error %d %s\n", sdb_name(dbi_name), rc, - mdbx_strerror(rc)); - } - return rc; - } - } - - if (dbi_handle >= CORE_DBS && dbi_name && only_subdb.iov_base && - !eq(only_subdb, *dbi_name)) { - if (verbose) { - print("Skip processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - } - skipped_subdb++; - return MDBX_SUCCESS; - } - - if (!second_pass && verbose) - print("Processing %s...\n", sdb_name(dbi_name)); - fflush(nullptr); - - rc = mdbx_dbi_flags(txn, dbi_handle, &flags); - if (rc) { - error("mdbx_dbi_flags() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - rc = mdbx_dbi_stat(txn, dbi_handle, &ms, sizeof(ms)); - if (rc) { - error("mdbx_dbi_stat() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (!second_pass && verbose) { - print(" - key-value kind: %s-key => %s-value", db_flags2keymode(flags), - db_flags2valuemode(flags)); - if (verbose > 1) { - print(", flags:"); - if (!flags) - print(" none"); - else { - for (i = 0; dbflags[i].bit; i++) - if (flags & dbflags[i].bit) - print(" %s", dbflags[i].name); - } - if (verbose > 2) - print(" (0x%02X), dbi-id %d", flags, dbi_handle); - } - print("\n"); - if (ms.ms_mod_txnid) - print(" - last modification txn#%" PRIu64 "\n", ms.ms_mod_txnid); - if (verbose > 1) { - print(" - page size %u, entries %" PRIu64 "\n", ms.ms_psize, - ms.ms_entries); - print(" - b-tree depth %u, pages: branch %" PRIu64 ", leaf %" PRIu64 - ", overflow %" PRIu64 "\n", - ms.ms_depth, ms.ms_branch_pages, ms.ms_leaf_pages, - ms.ms_overflow_pages); - } - } - - walk_dbi_t *dbi = (dbi_handle < CORE_DBS) - ? &walk.dbi[dbi_handle] - : pagemap_lookup_dbi(dbi_name, true); - if (!dbi) { - error("too many DBIs or out of memory\n"); - return MDBX_ENOMEM; - } - if (!dont_traversal) { - const uint64_t subtotal_pages = - ms.ms_branch_pages + ms.ms_leaf_pages + ms.ms_overflow_pages; - if (subtotal_pages != dbi->pages.total) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "subtotal", subtotal_pages, dbi->pages.total); - if (ms.ms_branch_pages != dbi->pages.branch) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", "branch", - ms.ms_branch_pages, dbi->pages.branch); - const uint64_t allleaf_pages = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (ms.ms_leaf_pages != allleaf_pages) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "all-leaf", ms.ms_leaf_pages, allleaf_pages); - if (ms.ms_overflow_pages != dbi->pages.large_volume) - error("%s pages mismatch (%" PRIu64 " != walked %" PRIu64 ")\n", - "large/overlow", ms.ms_overflow_pages, dbi->pages.large_volume); - } - rc = mdbx_cursor_open(txn, dbi_handle, &mc); - if (rc) { - error("mdbx_cursor_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - return rc; - } - - if (ignore_wrong_order) { /* for debugging with enabled assertions */ - mc->mc_checking |= CC_SKIPORD; - if (mc->mc_xcursor) - mc->mc_xcursor->mx_cursor.mc_checking |= CC_SKIPORD; - } - - const size_t maxkeysize = mdbx_env_get_maxkeysize_ex(env, flags); - saved_list = problems_push(); - prev_key.iov_base = nullptr; - prev_key.iov_len = 0; - prev_data.iov_base = nullptr; - prev_data.iov_len = 0; - rc = mdbx_cursor_get(mc, &key, &data, MDBX_FIRST); - while (rc == MDBX_SUCCESS) { - rc = check_user_break(); - if (rc) - goto bailout; - - if (!second_pass) { - bool bad_key = false; - if (key.iov_len > maxkeysize) { - problem_add("entry", record_count, "key length exceeds max-key-size", - "%" PRIuPTR " > %" PRIuPTR, key.iov_len, maxkeysize); - bad_key = true; - } else if ((flags & MDBX_INTEGERKEY) && key.iov_len != sizeof(uint64_t) && - key.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong key length", - "%" PRIuPTR " != 4or8", key.iov_len); - bad_key = true; - } - - bool bad_data = false; - if ((flags & MDBX_INTEGERDUP) && data.iov_len != sizeof(uint64_t) && - data.iov_len != sizeof(uint32_t)) { - problem_add("entry", record_count, "wrong data length", - "%" PRIuPTR " != 4or8", data.iov_len); - bad_data = true; - } - - if (prev_key.iov_base) { - if (prev_data.iov_base && !bad_data && (flags & MDBX_DUPFIXED) && - prev_data.iov_len != data.iov_len) { - problem_add("entry", record_count, "different data length", - "%" PRIuPTR " != %" PRIuPTR, prev_data.iov_len, - data.iov_len); - bad_data = true; - } - - if (!bad_key) { - int cmp = mdbx_cmp(txn, dbi_handle, &key, &prev_key); - if (cmp == 0) { - ++dups; - if ((flags & MDBX_DUPSORT) == 0) { - problem_add("entry", record_count, "duplicated entries", nullptr); - if (prev_data.iov_base && data.iov_len == prev_data.iov_len && - memcmp(data.iov_base, prev_data.iov_base, data.iov_len) == - 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } - } else if (!bad_data && prev_data.iov_base) { - cmp = mdbx_dcmp(txn, dbi_handle, &data, &prev_data); - if (cmp == 0) { - problem_add("entry", record_count, "complete duplicate", - nullptr); - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, - "wrong order of multi-values", nullptr); - } - } - } else if (cmp < 0 && !ignore_wrong_order) { - problem_add("entry", record_count, "wrong order of entries", - nullptr); - } - } - } - - if (!bad_key) { - if (verbose && (flags & MDBX_INTEGERKEY) && !prev_key.iov_base) - print(" - fixed key-size %" PRIuPTR "\n", key.iov_len); - prev_key = key; - } - if (!bad_data) { - if (verbose && (flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) && - !prev_data.iov_base) - print(" - fixed data-size %" PRIuPTR "\n", data.iov_len); - prev_data = data; - } - } - - if (handler) { - rc = handler(record_count, &key, &data); - if (MDBX_IS_ERROR(rc)) - goto bailout; - } - - record_count++; - key_bytes += key.iov_len; - data_bytes += data.iov_len; - - rc = mdbx_cursor_get(mc, &key, &data, MDBX_NEXT); - } - if (rc != MDBX_NOTFOUND) - error("mdbx_cursor_get() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else - rc = 0; - - if (record_count != ms.ms_entries) - problem_add("entry", record_count, "different number of entries", - "%" PRIu64 " != %" PRIu64, record_count, ms.ms_entries); -bailout: - problems_count = problems_pop(saved_list); - if (!second_pass && verbose) { - print(" - summary: %" PRIu64 " records, %" PRIu64 " dups, %" PRIu64 - " key's bytes, %" PRIu64 " data's " - "bytes, %" PRIu64 " problems\n", - record_count, dups, key_bytes, data_bytes, problems_count); - fflush(nullptr); - } - - mdbx_cursor_close(mc); - return (rc || problems_count) ? MDBX_RESULT_TRUE : MDBX_SUCCESS; +static void print_chars(MDBX_chk_line_t *line, const char *str, size_t len) { + if (line->empty) + prefix(line->severity); + fwrite(str, 1, len, line_output); } +static void print_format(MDBX_chk_line_t *line, const char *fmt, va_list args) { + if (line->empty) + prefix(line->severity); + vfprintf(line_output, fmt, args); +} + +static const MDBX_chk_callbacks_t cb = {.check_break = check_break, + .scope_push = scope_push, + .scope_pop = scope_pop, + .subdb_filter = subdb_filter, + .stage_begin = stage_begin, + .stage_end = stage_end, + .print_begin = print_begin, + .print_flush = print_flush, + .print_done = print_done, + .print_chars = print_chars, + .print_format = print_format}; + static void usage(char *prog) { fprintf( stderr, "usage: %s " "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be used multiple times\n" + " -v\t\tmore verbose, could be repeated upto 9 times\n" " -q\t\tbe quiet\n" " -c\t\tforce cooperative mode (don't try exclusive)\n" " -w\t\twrite-mode checking\n" @@ -1031,144 +385,68 @@ static void usage(char *prog) { exit(EXIT_INTERRUPTED); } -static bool meta_ot(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b, const bool wanna_steady) { - if (txn_a == txn_b) - return SIGN_IS_STEADY(sign_b); - - if (wanna_steady && SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return SIGN_IS_STEADY(sign_b); - - return txn_a < txn_b; -} - -static bool meta_eq(txnid_t txn_a, uint64_t sign_a, txnid_t txn_b, - uint64_t sign_b) { - if (!txn_a || txn_a != txn_b) - return false; - - if (SIGN_IS_STEADY(sign_a) != SIGN_IS_STEADY(sign_b)) - return false; - - return true; -} - -static int meta_recent(const bool wanna_steady) { - if (meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady)) - return meta_ot(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, wanna_steady) - ? 1 - : 2; - else - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, wanna_steady) - ? 2 - : 0; -} - -static int meta_tail(int head) { - switch (head) { - case 0: - return meta_ot(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 1 - : 2; - case 1: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, true) - ? 0 - : 2; - case 2: - return meta_ot(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, true) - ? 0 - : 1; - default: - assert(false); - return -1; +static int conclude(MDBX_chk_context_t *ctx) { + int err = MDBX_SUCCESS; + if (ctx->result.total_problems == 1 && ctx->result.problems_meta == 1 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + (env_flags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && + stuck_meta < 0 && ctx->result.steady_txnid < ctx->result.recent_txnid) { + const size_t step_lineno = + print(MDBX_chk_resolution, + "Perform sync-to-disk for make steady checkpoint" + " at txn-id #%" PRIi64 "...", + ctx->result.recent_txnid); + flush(); + err = error_fn("mdbx_env_pgwalk", mdbx_env_sync_ex(ctx->env, true, false)); + if (err == MDBX_SUCCESS) { + ctx->result.problems_meta -= 1; + ctx->result.total_problems -= 1; + suffix(step_lineno, "done"); + } } -} -static int meta_head(void) { return meta_recent(false); } - -void verbose_meta(int num, txnid_t txnid, uint64_t sign, uint64_t bootid_x, - uint64_t bootid_y) { - const bool have_bootid = (bootid_x | bootid_y) != 0; - const bool bootid_match = bootid_x == envinfo.mi_bootid.current.x && - bootid_y == envinfo.mi_bootid.current.y; - - print(" - meta-%d: ", num); - switch (sign) { - case MDBX_DATASIGN_NONE: - print("no-sync/legacy"); - break; - case MDBX_DATASIGN_WEAK: - print("weak-%s", bootid_match ? (have_bootid ? "intact (same boot-id)" - : "unknown (no boot-id") - : "dead"); - break; - default: - print("steady"); - break; + if (turn_meta && stuck_meta >= 0 && + (chk_flags & + (MDBX_CHK_SKIP_BTREE_TRAVERSAL | MDBX_CHK_SKIP_KV_TRAVERSAL)) == 0 && + !only_subdb.iov_base && + (env_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { + const bool successful_check = + (err | ctx->result.total_problems | ctx->result.problems_meta) == 0; + if (successful_check || force_turn_meta) { + const size_t step_lineno = print( + MDBX_chk_resolution, + "Performing turn to the specified meta-page (%d) due to %s!", + stuck_meta, + successful_check ? "successful check" : "the -T option was given"); + flush(); + err = mdbx_env_turn_for_recovery(ctx->env, stuck_meta); + if (err != MDBX_SUCCESS) + error_fn("mdbx_env_turn_for_recovery", err); + else + suffix(step_lineno, "done"); + } else { + print(MDBX_chk_resolution, + "Skipping turn to the specified meta-page (%d) due to " + "unsuccessful check!", + stuck_meta); + lf_flush(); + } } - print(" txn#%" PRIu64, txnid); - const int head = meta_head(); - if (num == head) - print(", head"); - else if (num == meta_tail(head)) - print(", tail"); - else - print(", stay"); - - if (stuck_meta >= 0) { - if (num == stuck_meta) - print(", forced for checking"); - } else if (txnid > envinfo.mi_recent_txnid && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) == MDBX_EXCLUSIVE) - print(", rolled-back %" PRIu64 " (%" PRIu64 " >>> %" PRIu64 ")", - txnid - envinfo.mi_recent_txnid, txnid, envinfo.mi_recent_txnid); - print("\n"); -} - -static uint64_t get_meta_txnid(const unsigned meta_id) { - switch (meta_id) { - default: - assert(false); - error("unexpected meta_id %u\n", meta_id); - return 0; - case 0: - return envinfo.mi_meta0_txnid; - case 1: - return envinfo.mi_meta1_txnid; - case 2: - return envinfo.mi_meta2_txnid; - } -} - -static void print_size(const char *prefix, const uint64_t value, - const char *suffix) { - const char sf[] = - "KMGTPEZY"; /* LY: Kilo, Mega, Giga, Tera, Peta, Exa, Zetta, Yotta! */ - double k = 1024.0; - size_t i; - for (i = 0; sf[i + 1] && value / k > 1000.0; ++i) - k *= 1024; - print("%s%" PRIu64 " (%.2f %cb)%s", prefix, value, value / k, sf[i], suffix); + return err; } int main(int argc, char *argv[]) { int rc; char *prog = argv[0]; char *envname; - unsigned problems_maindb = 0, problems_freedb = 0, problems_meta = 0; - bool write_locked = false; - bool turn_meta = false; - bool force_turn_meta = false; bool warmup = false; MDBX_warmup_flags_t warmup_flags = MDBX_warmup_default; + if (argc < 2) + usage(prog); + double elapsed; #if defined(_WIN32) || defined(_WIN64) uint64_t timestamp_start, timestamp_finish; @@ -1176,20 +454,11 @@ int main(int argc, char *argv[]) { #else struct timespec timestamp_start, timestamp_finish; if (clock_gettime(CLOCK_MONOTONIC, ×tamp_start)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } #endif - dbi_meta.name.iov_base = MDBX_PGWALK_META; - dbi_free.name.iov_base = MDBX_PGWALK_GC; - dbi_main.name.iov_base = MDBX_PGWALK_MAIN; - atexit(pagemap_cleanup); - - if (argc < 2) - usage(prog); - for (int i; (i = getopt(argc, argv, "uU" "0" @@ -1222,7 +491,10 @@ int main(int argc, char *argv[]) { mdbx_build.options); return EXIT_SUCCESS; case 'v': - verbose++; + if (verbose >= 9 && 0) + usage(prog); + else + verbose += 1; break; case '0': stuck_meta = 0; @@ -1239,8 +511,6 @@ int main(int argc, char *argv[]) { case 'T': turn_meta = force_turn_meta = true; quiet = false; - if (verbose < 2) - verbose = 2; break; case 'q': quiet = true; @@ -1248,27 +518,30 @@ int main(int argc, char *argv[]) { case 'n': break; case 'w': - envflags &= ~MDBX_RDONLY; + env_flags &= ~MDBX_RDONLY; + chk_flags |= MDBX_CHK_READWRITE; #if MDBX_MMAP_INCOHERENT_FILE_WRITE /* Temporary `workaround` for OpenBSD kernel's flaw. * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - envflags |= MDBX_WRITEMAP; + env_flags |= MDBX_WRITEMAP; #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ break; case 'c': - envflags = (envflags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; + env_flags = (env_flags & ~MDBX_EXCLUSIVE) | MDBX_ACCEDE; break; case 'd': - dont_traversal = true; + chk_flags |= MDBX_CHK_SKIP_BTREE_TRAVERSAL; break; case 's': if (only_subdb.iov_base && strcmp(only_subdb.iov_base, optarg)) usage(prog); - only_subdb.iov_base = optarg; - only_subdb.iov_len = strlen(optarg); + else { + only_subdb.iov_base = optarg; + only_subdb.iov_len = strlen(optarg); + } break; case 'i': - ignore_wrong_order = true; + chk_flags |= MDBX_CHK_IGNORE_ORDER; break; case 'u': warmup = true; @@ -1287,26 +560,29 @@ int main(int argc, char *argv[]) { usage(prog); rc = MDBX_SUCCESS; - if (stuck_meta >= 0 && (envflags & MDBX_EXCLUSIVE) == 0) { - error("exclusive mode is required to using specific meta-page(%d) for " - "checking.\n", - stuck_meta); + if (stuck_meta >= 0 && (env_flags & MDBX_EXCLUSIVE) == 0) { + error_fmt("exclusive mode is required to using specific meta-page(%d) for " + "checking.", + stuck_meta); rc = EXIT_INTERRUPTED; } if (turn_meta) { if (stuck_meta < 0) { - error("meta-page must be specified (by -0, -1 or -2 options) to turn to " - "it.\n"); + error_fmt( + "meta-page must be specified (by -0, -1 or -2 options) to turn to " + "it."); rc = EXIT_INTERRUPTED; } - if (envflags & MDBX_RDONLY) { - error("write-mode must be enabled to turn to the specified meta-page.\n"); + if (env_flags & MDBX_RDONLY) { + error_fmt( + "write-mode must be enabled to turn to the specified meta-page."); rc = EXIT_INTERRUPTED; } - if (only_subdb.iov_base || dont_traversal) { - error( + if (only_subdb.iov_base || (chk_flags & (MDBX_CHK_SKIP_BTREE_TRAVERSAL | + MDBX_CHK_SKIP_KV_TRAVERSAL))) { + error_fmt( "whole database checking with b-tree traversal are required to turn " - "to the specified meta-page.\n"); + "to the specified meta-page."); rc = EXIT_INTERRUPTED; } } @@ -1327,13 +603,14 @@ int main(int argc, char *argv[]) { #endif /* !WINDOWS */ envname = argv[optind]; - print("mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...\n", + print(MDBX_chk_result, + "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, - (envflags & MDBX_RDONLY) ? "only" : "write"); - fflush(nullptr); - mdbx_setup_debug((verbose < MDBX_LOG_TRACE - 1) - ? (MDBX_log_level_t)(verbose + 1) + (env_flags & MDBX_RDONLY) ? "only" : "write"); + lf_flush(); + mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) + ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) : MDBX_LOG_TRACE, MDBX_DBG_DUMP | MDBX_DBG_ASSERT | MDBX_DBG_AUDIT | MDBX_DBG_LEGACY_OVERLAP | MDBX_DBG_DONT_UPGRADE, @@ -1341,22 +618,22 @@ int main(int argc, char *argv[]) { rc = mdbx_env_create(&env); if (rc) { - error("mdbx_env_create() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_create", rc); return rc < 0 ? EXIT_FAILURE_MDBX : EXIT_FAILURE_SYS; } - rc = mdbx_env_set_maxdbs(env, MDBX_MAX_DBI); + rc = mdbx_env_set_maxdbs(env, CORE_DBS); if (rc) { - error("mdbx_env_set_maxdbs() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("mdbx_env_set_maxdbs", rc); goto bailout; } if (stuck_meta >= 0) { rc = mdbx_env_open_for_recovery(env, envname, stuck_meta, - (envflags & MDBX_RDONLY) ? false : true); + (env_flags & MDBX_RDONLY) ? false : true); } else { - rc = mdbx_env_open(env, envname, envflags, 0); - if ((envflags & MDBX_EXCLUSIVE) && + rc = mdbx_env_open(env, envname, env_flags, 0); + if ((env_flags & MDBX_EXCLUSIVE) && (rc == MDBX_BUSY || #if defined(_WIN32) || defined(_WIN64) rc == ERROR_LOCK_VIOLATION || rc == ERROR_SHARING_VIOLATION @@ -1364,489 +641,51 @@ int main(int argc, char *argv[]) { rc == EBUSY || rc == EAGAIN #endif )) { - envflags &= ~MDBX_EXCLUSIVE; - rc = mdbx_env_open(env, envname, envflags | MDBX_ACCEDE, 0); + env_flags &= ~MDBX_EXCLUSIVE; + rc = mdbx_env_open(env, envname, env_flags | MDBX_ACCEDE, 0); } } if (rc) { - error("mdbx_env_open() failed, error %d %s\n", rc, mdbx_strerror(rc)); - if (rc == MDBX_WANNA_RECOVERY && (envflags & MDBX_RDONLY)) - print("Please run %s in the read-write mode (with '-w' option).\n", prog); + error_fn("mdbx_env_open", rc); + if (rc == MDBX_WANNA_RECOVERY && (env_flags & MDBX_RDONLY)) + print_ln(MDBX_chk_result, + "Please run %s in the read-write mode (with '-w' option).", + prog); goto bailout; } - if (verbose) - print(" - %s mode\n", - (envflags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); - - if ((envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { - if (verbose) { - print(" - taking write lock..."); - fflush(nullptr); - } - rc = mdbx_txn_lock(env, false); - if (rc != MDBX_SUCCESS) { - error("mdbx_txn_lock() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) - print(" done\n"); - write_locked = true; - } + print_ln(MDBX_chk_verbose, "%s mode", + (env_flags & MDBX_EXCLUSIVE) ? "monopolistic" : "cooperative"); if (warmup) { - if (verbose) { - print(" - warming up..."); - fflush(nullptr); - } + anchor_lineno = print(MDBX_chk_verbose, "warming up..."); + flush(); rc = mdbx_env_warmup(env, nullptr, warmup_flags, 3600 * 65536); if (MDBX_IS_ERROR(rc)) { - error("mdbx_env_warmup(flags %u) failed, error %d %s\n", warmup_flags, rc, - mdbx_strerror(rc)); + error_fn("mdbx_env_warmup", rc); goto bailout; } - if (verbose) - print(" %s\n", rc ? "timeout" : "done"); + suffix(anchor_lineno, rc ? "timeout" : "done"); } - rc = mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &txn); + rc = mdbx_env_chk(env, &cb, &chk, chk_flags, + MDBX_chk_result + (verbose << MDBX_chk_severity_prio_shift), + 0); if (rc) { - error("mdbx_txn_begin() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - rc = mdbx_env_info_ex(env, txn, &envinfo, sizeof(envinfo)); - if (rc) { - error("mdbx_env_info_ex() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - if (verbose) { - print(" - current boot-id "); - if (envinfo.mi_bootid.current.x | envinfo.mi_bootid.current.y) - print("%016" PRIx64 "-%016" PRIx64 "\n", envinfo.mi_bootid.current.x, - envinfo.mi_bootid.current.y); - else - print("unavailable\n"); - } - - mdbx_filehandle_t dxb_fd; - rc = mdbx_env_get_fd(env, &dxb_fd); - if (rc) { - error("mdbx_env_get_fd() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - uint64_t dxb_filesize = 0; -#if defined(_WIN32) || defined(_WIN64) - { - BY_HANDLE_FILE_INFORMATION info; - if (!GetFileInformationByHandle(dxb_fd, &info)) - rc = GetLastError(); - else - dxb_filesize = info.nFileSizeLow | (uint64_t)info.nFileSizeHigh << 32; - } -#else - { - struct stat st; - STATIC_ASSERT_MSG(sizeof(off_t) <= sizeof(uint64_t), - "libmdbx requires 64-bit file I/O on 64-bit systems"); - if (fstat(dxb_fd, &st)) - rc = errno; - else - dxb_filesize = st.st_size; - } -#endif - if (rc) { - error("osal_filesize() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - errno = 0; - const uint64_t dxbfile_pages = dxb_filesize / envinfo.mi_dxb_pagesize; - alloc_pages = txn->mt_next_pgno; - backed_pages = envinfo.mi_geo.current / envinfo.mi_dxb_pagesize; - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - } - if (dxbfile_pages < NUM_METAS) - print(" ! file-pages %" PRIu64 " < %u\n", dxbfile_pages, NUM_METAS); - if (backed_pages < NUM_METAS) - print(" ! backed-pages %" PRIu64 " < %u\n", backed_pages, NUM_METAS); - if (backed_pages < NUM_METAS || dxbfile_pages < NUM_METAS) - goto bailout; - if (backed_pages > MAX_PAGENO + 1) { - print(" ! backed-pages %" PRIu64 " > max-pages %" PRIaPGNO "\n", - backed_pages, MAX_PAGENO + 1); - ++problems_meta; - backed_pages = MAX_PAGENO + 1; - } - - if ((envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (backed_pages > dxbfile_pages) { - print(" ! backed-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - backed_pages, dxbfile_pages); - ++problems_meta; - backed_pages = dxbfile_pages; - } - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - } else { - /* LY: DB may be shrunk by writer down to the allocated pages. */ - if (alloc_pages > backed_pages) { - print(" ! alloc-pages %" PRIu64 " > backed-pages %" PRIu64 "\n", - alloc_pages, backed_pages); - ++problems_meta; - alloc_pages = backed_pages; - } - if (alloc_pages > dxbfile_pages) { - print(" ! alloc-pages %" PRIu64 " > file-pages %" PRIu64 "\n", - alloc_pages, dxbfile_pages); - ++problems_meta; - alloc_pages = dxbfile_pages; - } - if (backed_pages > dxbfile_pages) - backed_pages = dxbfile_pages; - } - - if (verbose) { - print(" - pagesize %u (%u system), max keysize %d..%d" - ", max readers %u\n", - envinfo.mi_dxb_pagesize, envinfo.mi_sys_pagesize, - mdbx_env_get_maxkeysize_ex(env, MDBX_DUPSORT), - mdbx_env_get_maxkeysize_ex(env, 0), envinfo.mi_maxreaders); - print_size(" - mapsize ", envinfo.mi_mapsize, "\n"); - if (envinfo.mi_geo.lower == envinfo.mi_geo.upper) - print_size(" - fixed datafile: ", envinfo.mi_geo.current, ""); - else { - print_size(" - dynamic datafile: ", envinfo.mi_geo.lower, ""); - print_size(" .. ", envinfo.mi_geo.upper, ", "); - print_size("+", envinfo.mi_geo.grow, ", "); - print_size("-", envinfo.mi_geo.shrink, "\n"); - print_size(" - current datafile: ", envinfo.mi_geo.current, ""); - } - printf(", %" PRIu64 " pages\n", - envinfo.mi_geo.current / envinfo.mi_dxb_pagesize); -#if defined(_WIN32) || defined(_WIN64) - if (envinfo.mi_geo.shrink && envinfo.mi_geo.current != envinfo.mi_geo.upper) - print( - " WARNING: Due Windows system limitations a " - "file couldn't\n be truncated while the database " - "is opened. So, the size\n database file " - "of may by large than the database itself,\n " - "until it will be closed or reopened in read-write mode.\n"); -#endif - verbose_meta(0, envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_bootid.meta0.x, envinfo.mi_bootid.meta0.y); - verbose_meta(1, envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_bootid.meta1.x, envinfo.mi_bootid.meta1.y); - verbose_meta(2, envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_bootid.meta2.x, envinfo.mi_bootid.meta2.y); - } - - if (stuck_meta >= 0) { - if (verbose) { - print(" - skip checking meta-pages since the %u" - " is selected for verification\n", - stuck_meta); - print(" - transactions: recent %" PRIu64 - ", selected for verification %" PRIu64 ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, get_meta_txnid(stuck_meta), - envinfo.mi_recent_txnid - get_meta_txnid(stuck_meta)); - } - } else { - if (verbose > 1) - print(" - performs check for meta-pages clashes\n"); - if (meta_eq(envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign, - envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 0, 1); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta1_txnid, envinfo.mi_meta1_sign, - envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 1, 2); - ++problems_meta; - } - if (meta_eq(envinfo.mi_meta2_txnid, envinfo.mi_meta2_sign, - envinfo.mi_meta0_txnid, envinfo.mi_meta0_sign)) { - print(" ! meta-%d and meta-%d are clashed\n", 2, 0); - ++problems_meta; - } - - const unsigned steady_meta_id = meta_recent(true); - const uint64_t steady_meta_txnid = get_meta_txnid(steady_meta_id); - const unsigned weak_meta_id = meta_recent(false); - const uint64_t weak_meta_txnid = get_meta_txnid(weak_meta_id); - if (envflags & MDBX_EXCLUSIVE) { - if (verbose > 1) - print(" - performs full check recent-txn-id with meta-pages\n"); - if (steady_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - steady_meta_id, steady_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (write_locked) { - if (verbose > 1) - print(" - performs lite check recent-txn-id with meta-pages (not a " - "monopolistic mode)\n"); - if (weak_meta_txnid != envinfo.mi_recent_txnid) { - print(" ! weak meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")\n", - weak_meta_id, weak_meta_txnid, envinfo.mi_recent_txnid); - ++problems_meta; - } - } else if (verbose) { - print(" - skip check recent-txn-id with meta-pages (monopolistic or " - "read-write mode only)\n"); - } - total_problems += problems_meta; - - if (verbose) - print(" - transactions: recent %" PRIu64 ", latter reader %" PRIu64 - ", lag %" PRIi64 "\n", - envinfo.mi_recent_txnid, envinfo.mi_latter_reader_txnid, - envinfo.mi_recent_txnid - envinfo.mi_latter_reader_txnid); - } - - if (!dont_traversal) { - struct problem *saved_list; - size_t traversal_problems; - uint64_t empty_pages, lost_bytes; - - print("Traversal b-tree by txn#%" PRIaTXN "...\n", txn->mt_txnid); - fflush(nullptr); - walk.pagemap = osal_calloc((size_t)backed_pages, sizeof(*walk.pagemap)); - if (!walk.pagemap) { - rc = errno ? errno : MDBX_ENOMEM; - error("calloc() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - saved_list = problems_push(); - rc = mdbx_env_pgwalk(txn, pgvisitor, nullptr, - true /* always skip key ordering checking to avoid - MDBX_CORRUPTED when using custom comparators */); - traversal_problems = problems_pop(saved_list); - - if (rc) { - if (rc != MDBX_EINTR || !check_user_break()) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - goto bailout; - } - - for (uint64_t n = 0; n < alloc_pages; ++n) - if (!walk.pagemap[n]) - unused_pages += 1; - - empty_pages = lost_bytes = 0; - for (walk_dbi_t *dbi = &dbi_main; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - empty_pages += dbi->pages.empty; - lost_bytes += dbi->lost_bytes; - } - - if (verbose) { - uint64_t total_page_bytes = walk.pgcount * envinfo.mi_dxb_pagesize; - print(" - pages: walked %" PRIu64 ", left/unused %" PRIu64 "\n", - walk.pgcount, unused_pages); - if (verbose > 1) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) { - print(" %s: subtotal %" PRIu64, sdb_name(&dbi->name), - dbi->pages.total); - if (dbi->pages.other && dbi->pages.other != dbi->pages.total) - print(", other %" PRIu64, dbi->pages.other); - if (dbi->pages.branch) - print(", branch %" PRIu64, dbi->pages.branch); - if (dbi->pages.large_count) - print(", large %" PRIu64, dbi->pages.large_count); - uint64_t all_leaf = dbi->pages.leaf + dbi->pages.leaf_dupfixed; - if (all_leaf) { - print(", leaf %" PRIu64, all_leaf); - if (verbose > 2 && - (dbi->pages.subleaf_dupsort | dbi->pages.leaf_dupfixed | - dbi->pages.subleaf_dupfixed)) - print(" (usual %" PRIu64 ", sub-dupsort %" PRIu64 - ", dupfixed %" PRIu64 ", sub-dupfixed %" PRIu64 ")", - dbi->pages.leaf, dbi->pages.subleaf_dupsort, - dbi->pages.leaf_dupfixed, dbi->pages.subleaf_dupfixed); - } - print("\n"); - } - } - - if (verbose > 1) - print(" - usage: total %" PRIu64 " bytes, payload %" PRIu64 - " (%.1f%%), unused " - "%" PRIu64 " (%.1f%%)\n", - total_page_bytes, walk.total_payload_bytes, - walk.total_payload_bytes * 100.0 / total_page_bytes, - total_page_bytes - walk.total_payload_bytes, - (total_page_bytes - walk.total_payload_bytes) * 100.0 / - total_page_bytes); - if (verbose > 2) { - for (walk_dbi_t *dbi = walk.dbi; - dbi < ARRAY_END(walk.dbi) && dbi->name.iov_base; ++dbi) - if (dbi->pages.total) { - uint64_t dbi_bytes = dbi->pages.total * envinfo.mi_dxb_pagesize; - print(" %s: subtotal %" PRIu64 " bytes (%.1f%%)," - " payload %" PRIu64 " (%.1f%%), unused %" PRIu64 " (%.1f%%)", - sdb_name(&dbi->name), dbi_bytes, - dbi_bytes * 100.0 / total_page_bytes, dbi->payload_bytes, - dbi->payload_bytes * 100.0 / dbi_bytes, - dbi_bytes - dbi->payload_bytes, - (dbi_bytes - dbi->payload_bytes) * 100.0 / dbi_bytes); - if (dbi->pages.empty) - print(", %" PRIu64 " empty pages", dbi->pages.empty); - if (dbi->lost_bytes) - print(", %" PRIu64 " bytes lost", dbi->lost_bytes); - print("\n"); - } else - print(" %s: empty\n", sdb_name(&dbi->name)); - } - print(" - summary: average fill %.1f%%", - walk.total_payload_bytes * 100.0 / total_page_bytes); - if (empty_pages) - print(", %" PRIu64 " empty pages", empty_pages); - if (lost_bytes) - print(", %" PRIu64 " bytes lost", lost_bytes); - print(", %" PRIuPTR " problems\n", traversal_problems); - } - } else if (verbose) { - print("Skipping b-tree walk...\n"); - fflush(nullptr); - } - - if (gc_tree_problems) { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@GC", - "b-tree", gc_tree_problems); - problems_freedb = gc_tree_problems; - } else - problems_freedb = process_db(FREE_DBI, MDBX_PGWALK_GC, handle_freedb); - - if (verbose) { - uint64_t value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize; - double percent = value / 100.0; - print(" - space: %" PRIu64 " total pages", value); - print(", backed %" PRIu64 " (%.1f%%)", backed_pages, - backed_pages / percent); - print(", allocated %" PRIu64 " (%.1f%%)", alloc_pages, - alloc_pages / percent); - - if (verbose > 1) { - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages; - print(", remained %" PRIu64 " (%.1f%%)", value, value / percent); - - value = dont_traversal ? alloc_pages - gc_pages : walk.pgcount; - print(", used %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", gc %" PRIu64 " (%.1f%%)", gc_pages, gc_pages / percent); - - value = gc_pages - reclaimable_pages; - print(", detained %" PRIu64 " (%.1f%%)", value, value / percent); - - print(", reclaimable %" PRIu64 " (%.1f%%)", reclaimable_pages, - reclaimable_pages / percent); - } - - value = envinfo.mi_mapsize / envinfo.mi_dxb_pagesize - alloc_pages + - reclaimable_pages; - print(", available %" PRIu64 " (%.1f%%)\n", value, value / percent); - } - - if ((problems_maindb = data_tree_problems) == 0 && problems_freedb == 0) { - if (!dont_traversal && - (envflags & (MDBX_EXCLUSIVE | MDBX_RDONLY)) != MDBX_RDONLY) { - if (walk.pgcount != alloc_pages - gc_pages) { - error("used pages mismatch (%" PRIu64 "(walked) != %" PRIu64 - "(allocated - GC))\n", - walk.pgcount, alloc_pages - gc_pages); - } - if (unused_pages != gc_pages) { - error("GC pages mismatch (%" PRIu64 "(expected) != %" PRIu64 "(GC))\n", - unused_pages, gc_pages); - } - } else if (verbose) { - print(" - skip check used and GC pages (btree-traversal with " - "monopolistic or read-write mode only)\n"); - } - - problems_maindb = process_db(~0u, /* MAIN_DBI */ nullptr, nullptr); - if (problems_maindb == 0) { - print("Scanning %s for %s...\n", "@MAIN", "sub-database(s)"); - if (!process_db(MAIN_DBI, nullptr, handle_maindb)) { - if (!userdb_count && verbose) - print(" - does not contain multiple databases\n"); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", - "sub-database(s)", "@MAIN", problems_maindb); - } - } else { - print("Skip processing %s since %s is corrupted (%u problems)\n", "@MAIN", - "b-tree", data_tree_problems); - } - - if (rc == 0 && total_problems == 1 && problems_meta == 1 && !dont_traversal && - (envflags & MDBX_RDONLY) == 0 && !only_subdb.iov_base && stuck_meta < 0 && - get_meta_txnid(meta_recent(true)) < envinfo.mi_recent_txnid) { - print("Perform sync-to-disk for make steady checkpoint at txn-id #%" PRIi64 - "\n", - envinfo.mi_recent_txnid); - fflush(nullptr); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } - rc = mdbx_env_sync_ex(env, true, false); - if (rc != MDBX_SUCCESS) - error("mdbx_env_pgwalk() failed, error %d %s\n", rc, mdbx_strerror(rc)); - else { - total_problems -= 1; - problems_meta -= 1; - } - } - - if (turn_meta && stuck_meta >= 0 && !dont_traversal && !only_subdb.iov_base && - (envflags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == MDBX_EXCLUSIVE) { - const bool successful_check = (rc | total_problems | problems_meta) == 0; - if (successful_check || force_turn_meta) { - fflush(nullptr); - print(" = Performing turn to the specified meta-page (%d) due to %s!\n", - stuck_meta, - successful_check ? "successful check" : "the -T option was given"); - fflush(nullptr); - rc = mdbx_env_turn_for_recovery(env, stuck_meta); - if (rc != MDBX_SUCCESS) - error("mdbx_env_turn_for_recovery() failed, error %d %s\n", rc, - mdbx_strerror(rc)); - } else { - print(" = Skipping turn to the specified meta-page (%d) due to " - "unsuccessful check!\n", - stuck_meta); - } + if (chk.result.total_problems == 0) + error_fn("mdbx_env_chk", rc); + else if (rc != MDBX_EINTR && rc != MDBX_RESULT_TRUE && !user_break) + rc = 0; } bailout: - if (txn) - mdbx_txn_abort(txn); - if (write_locked) { - mdbx_txn_unlock(env); - write_locked = false; - } if (env) { - const bool dont_sync = rc != 0 || total_problems; + const bool dont_sync = rc != 0 || chk.result.total_problems; mdbx_env_close_ex(env, dont_sync); } - fflush(nullptr); + flush(); if (rc) { - if (rc < 0) + if (rc > 0) return user_break ? EXIT_INTERRUPTED : EXIT_FAILURE_SYS; return EXIT_FAILURE_MDBX; } @@ -1856,21 +695,24 @@ bailout: elapsed = (timestamp_finish - timestamp_start) * 1e-3; #else if (clock_gettime(CLOCK_MONOTONIC, ×tamp_finish)) { - rc = errno; - error("clock_gettime() failed, error %d %s\n", rc, mdbx_strerror(rc)); + error_fn("clock_gettime", errno); return EXIT_FAILURE_SYS; } elapsed = timestamp_finish.tv_sec - timestamp_start.tv_sec + (timestamp_finish.tv_nsec - timestamp_start.tv_nsec) * 1e-9; #endif /* !WINDOWS */ - if (total_problems) { - print("Total %u error%s detected, elapsed %.3f seconds.\n", total_problems, - (total_problems > 1) ? "s are" : " is", elapsed); - if (problems_meta || problems_maindb || problems_freedb) + if (chk.result.total_problems) { + print_ln(MDBX_chk_result, + "Total %" PRIuSIZE " error%s detected, elapsed %.3f seconds.", + chk.result.total_problems, + (chk.result.total_problems > 1) ? "s are" : " is", elapsed); + if (chk.result.problems_meta || chk.result.problems_kv || + chk.result.problems_gc) return EXIT_FAILURE_CHECK_MAJOR; return EXIT_FAILURE_CHECK_MINOR; } - print("No error is detected, elapsed %.3f seconds\n", elapsed); + print_ln(MDBX_chk_result, "No error is detected, elapsed %.3f seconds.", + elapsed); return EXIT_SUCCESS; } From 786da2b089ceabaa567d3170a051eab45a8d37df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 09:07:52 +0300 Subject: [PATCH 005/137] =?UTF-8?q?mdbx-tools:=20=D0=B2=D1=8B=D0=B2=D0=BE?= =?UTF-8?q?=D0=B4=20=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=BE=D0=B1=20=D1=83=D1=80=D0=BE=D0=B2=D0=BD=D0=B5=20?= =?UTF-8?q?=D0=B4=D0=B5=D1=82=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8/verbosity.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx_chk.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index c590253d..55e6f98d 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -369,7 +369,7 @@ static void usage(char *prog) { "usage: %s " "[-V] [-v] [-q] [-c] [-0|1|2] [-w] [-d] [-i] [-s subdb] [-u|U] dbpath\n" " -V\t\tprint version and exit\n" - " -v\t\tmore verbose, could be repeated upto 9 times\n" + " -v\t\tmore verbose, could be repeated upto 9 times for extra details\n" " -q\t\tbe quiet\n" " -c\t\tforce cooperative mode (don't try exclusive)\n" " -w\t\twrite-mode checking\n" @@ -493,8 +493,14 @@ int main(int argc, char *argv[]) { case 'v': if (verbose >= 9 && 0) usage(prog); - else + else { verbose += 1; + if (verbose == 0 && !MDBX_DEBUG) + printf("Verbosity level %u exposures only to" + " a debug/extra-logging-enabled builds (with NDEBUG undefined" + " or MDBX_DEBUG > 0)\n", + verbose); + } break; case '0': stuck_meta = 0; @@ -604,10 +610,15 @@ int main(int argc, char *argv[]) { envname = argv[optind]; print(MDBX_chk_result, - "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode...", + "mdbx_chk %s (%s, T-%s)\nRunning for %s in 'read-%s' mode with " + "verbosity level %u (%s)...", mdbx_version.git.describe, mdbx_version.git.datetime, mdbx_version.git.tree, envname, - (env_flags & MDBX_RDONLY) ? "only" : "write"); + (env_flags & MDBX_RDONLY) ? "only" : "write", verbose, + (verbose > 8) + ? (MDBX_DEBUG ? "extra details for debugging" + : "same as 8 for non-debug builds with MDBX_DEBUG=0") + : "of 0..9"); lf_flush(); mdbx_setup_debug((verbose + MDBX_LOG_WARN < MDBX_LOG_TRACE) ? (MDBX_log_level_t)(verbose + MDBX_LOG_WARN) From cdbcf54af1a050d596d8203095daa6651d719189 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 13:40:41 +0300 Subject: [PATCH 006/137] =?UTF-8?q?mdbx-tests:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`--read-var-info=3Dyes`?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- GNUmakefile | 2 +- test/long_stochastic.sh | 2 +- test/stochastic_small.sh | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 33e6233d..50bd1b4b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -305,7 +305,7 @@ else() "${CMAKE_CURRENT_SOURCE_DIR}/test/valgrind_suppress.txt" CACHE FILEPATH "Suppressions file for Valgrind" FORCE) set(MEMORYCHECK_COMMAND_OPTIONS - "--trace-children=yes --leak-check=full --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" + "--trace-children=yes --leak-check=full --track-origins=yes --track-origins=yes --error-exitcode=42 --error-markers=@ --errors-for-leak-kinds=definite --fair-sched=yes --suppressions=${MEMORYCHECK_SUPPRESSIONS_FILE}" CACHE STRING "Valgrind options" FORCE) set(VALGRIND_COMMAND_OPTIONS "${MEMORYCHECK_COMMAND_OPTIONS}" CACHE STRING "Valgrind options" FORCE) endif() diff --git a/GNUmakefile b/GNUmakefile index 566feee1..c8d79a95 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -433,7 +433,7 @@ test-valgrind: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) -memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt +memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt memcheck: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 900c1319..491ec695 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -62,7 +62,7 @@ do echo " For instance, when the process 'A' explicitly marks a memory" echo " region as 'undefined', the process 'B' fill it," echo " and after this process 'A' read such region, etc." - MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" + MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" rm -f valgrind-*.log ;; --skip-make) diff --git a/test/stochastic_small.sh b/test/stochastic_small.sh index 50497f85..20785a22 100755 --- a/test/stochastic_small.sh +++ b/test/stochastic_small.sh @@ -60,7 +60,7 @@ do echo " For instance, when the process 'A' explicitly marks a memory" echo " region as 'undefined', the process 'B' fill it," echo " and after this process 'A' read such region, etc." - MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" + MONITOR="valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt" rm -f valgrind-*.log ;; --skip-make) From fc1685a178044ca2a2ad3b629f77281b133a0545 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 12:26:09 +0300 Subject: [PATCH 007/137] =?UTF-8?q?mdbx:=20`STATIC=5FASSERT()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20`MDBX=5FTXN=5FRDONLY=5FPREPARE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core.c b/src/core.c index fec25bed..380ab461 100644 --- a/src/core.c +++ b/src/core.c @@ -8828,6 +8828,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { r = brs.rslot; } txn->to.reader = r; + STATIC_ASSERT(MDBX_TXN_RDONLY_PREPARE > MDBX_TXN_RDONLY); if (flags & (MDBX_TXN_RDONLY_PREPARE - MDBX_TXN_RDONLY)) { eASSERT(env, txn->mt_txnid == 0); eASSERT(env, txn->mt_owner == 0); From 224f26813e60e371c3779ec1e8d0d987d0adbfb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 12:27:31 +0300 Subject: [PATCH 008/137] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=B2=D1=80?= =?UTF-8?q?=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FTXN=5FINVALID`?= =?UTF-8?q?=20(`INT32=5FMIN`)=20=D0=B8=D0=B7=20`mdbx=5Ftxn=5Fflags()`=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B0=D1=87?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=B5=D0=B2=D0=B0=D0=BB=D0=B8=D0=B4=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- src/core.c | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index c94bde3f..84355922 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3764,7 +3764,7 @@ mdbx_txn_env(const MDBX_txn *txn); * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). * * \returns A transaction flags, valid if input is an valid transaction, - * otherwise -1. */ + * otherwise \ref MDBX_TXN_INVALID. */ MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_txn_flags(const MDBX_txn *txn); /** \brief Return the transaction's ID. diff --git a/src/core.c b/src/core.c index 380ab461..911f92ea 100644 --- a/src/core.c +++ b/src/core.c @@ -9570,10 +9570,13 @@ uint64_t mdbx_txn_id(const MDBX_txn *txn) { } int mdbx_txn_flags(const MDBX_txn *txn) { - if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) { - assert((-1 & (int)MDBX_TXN_INVALID) != 0); - return -1; - } + STATIC_ASSERT( + (MDBX_TXN_INVALID & + (MDBX_TXN_FINISHED | MDBX_TXN_ERROR | MDBX_TXN_DIRTY | MDBX_TXN_SPILLS | + MDBX_TXN_HAS_CHILD | MDBX_TXN_DRAINED_GC | MDBX_SHRINK_ALLOWED | + MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_RO_BEGIN_FLAGS)) == 0); + if (unlikely(!txn || txn->mt_signature != MDBX_MT_SIGNATURE)) + return MDBX_TXN_INVALID; assert(0 == (int)(txn->mt_flags & MDBX_TXN_INVALID)); return txn->mt_flags; } From a67b9b972989548dfb52e436d0423adc2527cc6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 11 Oct 2023 23:05:50 +0300 Subject: [PATCH 009/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`env=5Finfo=5Fsnap()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) diff --git a/src/core.c b/src/core.c index 911f92ea..dab0d468 100644 --- a/src/core.c +++ b/src/core.c @@ -22260,6 +22260,8 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, meta_troika_t *const troika) { const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) + return MDBX_PANIC; /* is the environment open? * (https://libmdbx.dqdkfa.ru/dead-github/issues/171) */ @@ -22287,18 +22289,12 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, #endif } + *troika = (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) ? txn->tw.troika + : meta_tap(env); + const meta_ptr_t head = meta_recent(env, troika); const MDBX_meta *const meta0 = METAPAGE(env, 0); const MDBX_meta *const meta1 = METAPAGE(env, 1); const MDBX_meta *const meta2 = METAPAGE(env, 2); - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) - return MDBX_PANIC; - - if (txn && !(txn->mt_flags & MDBX_TXN_RDONLY)) - *troika = txn->tw.troika; - else - *troika = meta_tap(env); - - const meta_ptr_t head = meta_recent(env, troika); out->mi_recent_txnid = head.txnid; out->mi_meta_txnid[0] = troika->txnid[0]; out->mi_meta_sign[0] = unaligned_peek_u64(4, meta0->mm_sign); @@ -22330,11 +22326,6 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_geo.upper = pgno2bytes(env, txn_meta->mm_geo.upper); out->mi_geo.shrink = pgno2bytes(env, pv2pages(txn_meta->mm_geo.shrink_pv)); out->mi_geo.grow = pgno2bytes(env, pv2pages(txn_meta->mm_geo.grow_pv)); - const uint64_t unsynced_pages = - atomic_load64(&env->me_lck->mti_unsynced_pages, mo_Relaxed) + - (atomic_load32(&env->me_lck->mti_meta_sync_txnid, mo_Relaxed) != - (uint32_t)out->mi_recent_txnid); - out->mi_mapsize = env->me_dxb_mmap.limit; const MDBX_lockinfo *const lck = env->me_lck; @@ -22346,6 +22337,10 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_sys_pagesize = env->me_os_psize; if (likely(bytes > size_before_bootid)) { + const uint64_t unsynced_pages = + atomic_load64(&lck->mti_unsynced_pages, mo_Relaxed) + + ((uint32_t)out->mi_recent_txnid != + atomic_load32(&lck->mti_meta_sync_txnid, mo_Relaxed)); out->mi_unsync_volume = pgno2bytes(env, (size_t)unsynced_pages); const uint64_t monotime_now = osal_monotime(); uint64_t ts = atomic_load64(&lck->mti_eoos_timestamp, mo_Relaxed); @@ -22390,25 +22385,27 @@ __cold static int env_info_snap(const MDBX_env *env, const MDBX_txn *txn, out->mi_pgop_stat.fsync = atomic_load64(&lck->mti_pgop_stat.fsync, mo_Relaxed); #else - memset(&arg->mi_pgop_stat, 0, sizeof(arg->mi_pgop_stat)); + memset(&out->mi_pgop_stat, 0, sizeof(out->mi_pgop_stat)); #endif /* MDBX_ENABLE_PGOP_STAT*/ } - out->mi_self_latter_reader_txnid = out->mi_latter_reader_txnid = - out->mi_recent_txnid; + txnid_t overall_latter_reader_txnid = out->mi_recent_txnid; + txnid_t self_latter_reader_txnid = overall_latter_reader_txnid; if (env->me_lck_mmap.lck) { for (size_t i = 0; i < out->mi_numreaders; ++i) { const uint32_t pid = atomic_load32(&lck->mti_readers[i].mr_pid, mo_AcquireRelease); if (pid) { const txnid_t txnid = safe64_read(&lck->mti_readers[i].mr_txnid); - if (out->mi_latter_reader_txnid > txnid) - out->mi_latter_reader_txnid = txnid; - if (pid == env->me_pid && out->mi_self_latter_reader_txnid > txnid) - out->mi_self_latter_reader_txnid = txnid; + if (overall_latter_reader_txnid > txnid) + overall_latter_reader_txnid = txnid; + if (pid == env->me_pid && self_latter_reader_txnid > txnid) + self_latter_reader_txnid = txnid; } } } + out->mi_self_latter_reader_txnid = self_latter_reader_txnid; + out->mi_latter_reader_txnid = overall_latter_reader_txnid; osal_compiler_barrier(); return MDBX_SUCCESS; @@ -22421,6 +22418,7 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, if (unlikely(rc != MDBX_SUCCESS)) return rc; + eASSERT(env, sizeof(snap) >= bytes); while (1) { rc = env_info_snap(env, txn, out, bytes, troika); if (unlikely(rc != MDBX_SUCCESS)) @@ -22439,6 +22437,12 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely((env == NULL && txn == NULL) || arg == NULL)) return MDBX_EINVAL; + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + if (txn) { int err = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(err != MDBX_SUCCESS)) @@ -22454,12 +22458,6 @@ __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, env = txn->mt_env; } - const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); - const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); - if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && - bytes != size_before_pgop_stat) - return MDBX_EINVAL; - meta_troika_t troika; return env_info(env, txn, arg, bytes, &troika); } From 5f274eb4c61a418759e16df41aca78ac6b8cefcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 12 Oct 2023 10:16:31 +0300 Subject: [PATCH 010/137] =?UTF-8?q?mdbx:=20=D0=B2=D1=8B=D0=B2=D0=BE=D0=B4?= =?UTF-8?q?=20=D0=B8=D0=BD=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20=D0=B8=D0=B7=20`mdbx=5Fenv=5Fchk()`=20=D0=BE=20boot-id=20?= =?UTF-8?q?=D0=B2=20=D0=BA=D0=B0=D0=B6=D0=B4=D0=BE=D0=B9=20=D0=BC=D0=B5?= =?UTF-8?q?=D1=82=D0=B0-=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/src/core.c b/src/core.c index dab0d468..a84dbbc5 100644 --- a/src/core.c +++ b/src/core.c @@ -26219,7 +26219,13 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, &chk->envinfo.mi_bootid.current, sizeof(chk->envinfo.mi_bootid.current)) == 0; - line = chk_print(line, "meta-%u: ", num); + const char *status = "stay"; + if (num == chk->troika.recent) + status = "head"; + else if (num == TROIKA_TAIL(&chk->troika)) + status = "tail"; + line = chk_print(line, "meta-%u: %s, ", num, status); + switch (chk->envinfo.mi_meta_sign[num]) { case MDBX_DATASIGN_NONE: line = chk_puts(line, "no-sync/legacy"); @@ -26235,14 +26241,14 @@ __cold static void chk_verbose_meta(MDBX_chk_scope_t *const scope, break; } const txnid_t meta_txnid = chk->envinfo.mi_meta_txnid[num]; - line = chk_print(line, " txn#%" PRIaTXN, meta_txnid); - - const char *status = "stay"; - if (num == chk->troika.recent) - status = "head"; - else if (num == TROIKA_TAIL(&chk->troika)) - status = "tail"; - line = chk_print(line, ", %s", status); + line = chk_print(line, " txn#%" PRIaTXN ", ", meta_txnid); + if (chk->envinfo.mi_bootid.meta[num].x | chk->envinfo.mi_bootid.meta[num].y) + line = chk_print(line, "boot-id %" PRIx64 "-%" PRIx64 " (%s)", + chk->envinfo.mi_bootid.meta[num].x, + chk->envinfo.mi_bootid.meta[num].y, + bootid_match ? "live" : "not match"); + else + line = chk_puts(line, "no boot-id"); if (env->me_stuck_meta >= 0) { if (num == (unsigned)env->me_stuck_meta) From d28a397b2d927213eedb11d4b98100c98f28d360 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Oct 2023 17:36:21 +0300 Subject: [PATCH 011/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Funbind()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 22 +++++++++++++++++++ mdbx.h++ | 7 ++++++ src/core.c | 64 +++++++++++++++++++++++++++++++++--------------------- 3 files changed, 68 insertions(+), 25 deletions(-) diff --git a/mdbx.h b/mdbx.h index 84355922..2c062316 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4732,6 +4732,28 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); LIBMDBX_API int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *cursor, MDBX_dbi dbi); +/** \brief Unbind cursor from a transaction. + * \ingroup c_cursors + * + * Unbinded cursor is disassociated with any transactions but still holds + * the original DBI-handle internally. Thus it could be renewed with any running + * transaction or closed. + * + * \see mdbx_cursor_renew() + * \see mdbx_cursor_bind() + * \see mdbx_cursor_close() + * + * \note In contrast to LMDB, the MDBX required that any opened cursors can be + * reused and must be freed explicitly, regardless ones was opened in a + * read-only or write transaction. The REASON for this is eliminates ambiguity + * which helps to avoid errors such as: use-after-free, double-free, i.e. + * memory corruption and segfaults. + * + * \param [in] cursor A cursor handle returned by \ref mdbx_cursor_open(). + * + * \returns A non-zero error value on failure and 0 on success. */ +LIBMDBX_API int mdbx_cursor_unbind(MDBX_cursor *cursor); + /** \brief Create a cursor handle for the specified transaction and DBI handle. * \ingroup c_cursors * diff --git a/mdbx.h++ b/mdbx.h++ index d4cd7077..e3607b61 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4223,6 +4223,9 @@ public: /// map handle. inline void bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle); + /// \brief Unbind cursor from a transaction. + inline void unbind(); + /// \brief Returns the cursor's transaction. inline ::mdbx::txn txn() const; inline map_handle map() const; @@ -6110,6 +6113,10 @@ inline void cursor::bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle) { error::success_or_throw(::mdbx_cursor_bind(txn, handle_, map_handle.dbi)); } +inline void cursor::unbind() { + error::success_or_throw(::mdbx_cursor_unbind(handle_)); +} + inline txn cursor::txn() const { MDBX_txn *txn = ::mdbx_cursor_txn(handle_); error::throw_on_nullptr(txn, MDBX_EINVAL); diff --git a/src/core.c b/src/core.c index a84dbbc5..8e4b364f 100644 --- a/src/core.c +++ b/src/core.c @@ -18846,6 +18846,38 @@ void *mdbx_cursor_get_userctx(const MDBX_cursor *mc) { return couple->mc_userctx; } +int mdbx_cursor_unbind(MDBX_cursor *mc) { + if (unlikely(!mc)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_SUCCESS + : MDBX_EBADSIGN; + + if (unlikely(mc->mc_backup)) /* Cursor from parent transaction */ + return MDBX_EINVAL; + + eASSERT(nullptr, mc->mc_txn && mc->mc_txn->mt_signature == MDBX_MT_SIGNATURE); + cASSERT(mc, mc->mc_signature == MDBX_MC_LIVE); + cASSERT(mc, !mc->mc_backup); + if (unlikely(!mc->mc_txn || mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) { + ERROR("Wrong cursor's transaction %p 0x%x", + __Wpedantic_format_voidptr(mc->mc_txn), + mc->mc_txn ? mc->mc_txn->mt_signature : 0); + return MDBX_PROBLEM; + } + if (mc->mc_flags & C_UNTRACK) { + MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; + while (*prev && *prev != mc) + prev = &(*prev)->mc_next; + cASSERT(mc, *prev == mc); + *prev = mc->mc_next; + } + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + return MDBX_SUCCESS; +} + int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; @@ -18871,10 +18903,10 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { mc->mc_txn != txn)) return MDBX_EINVAL; - assert(mc->mc_db == &txn->mt_dbs[dbi]); - assert(mc->mc_dbx == &txn->mt_dbxs[dbi]); - assert(mc->mc_dbi == dbi); - assert(mc->mc_dbistate == &txn->mt_dbistate[dbi]); + cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); + cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); + cASSERT(mc, mc->mc_dbi == dbi); + cASSERT(mc, mc->mc_dbistate == &txn->mt_dbistate[dbi]); return likely(mc->mc_dbi == dbi && /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && mc->mc_txn == txn) @@ -18883,27 +18915,9 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { } if (mc->mc_signature == MDBX_MC_LIVE) { - if (unlikely(!mc->mc_txn || - mc->mc_txn->mt_signature != MDBX_MT_SIGNATURE)) { - ERROR("Wrong cursor's transaction %p 0x%x", - __Wpedantic_format_voidptr(mc->mc_txn), - mc->mc_txn ? mc->mc_txn->mt_signature : 0); - return MDBX_PROBLEM; - } - if (mc->mc_flags & C_UNTRACK) { - MDBX_cursor **prev = &mc->mc_txn->mt_cursors[mc->mc_dbi]; - while (*prev && *prev != mc) - prev = &(*prev)->mc_next; - cASSERT(mc, *prev == mc); - *prev = mc->mc_next; - } - mc->mc_signature = MDBX_MC_READY4CLOSE; - mc->mc_flags = 0; - mc->mc_dbi = UINT_MAX; - mc->mc_next = NULL; - mc->mc_db = NULL; - mc->mc_dbx = NULL; - mc->mc_dbistate = NULL; + rc = mdbx_cursor_unbind(mc); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; } cASSERT(mc, !(mc->mc_flags & C_UNTRACK)); From 4d3f7e1edc1f3bacc474df6f56677a781eb86a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 13 Oct 2023 22:38:51 +0300 Subject: [PATCH 012/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Ftxn=5Frelease=5Fall=5F?= =?UTF-8?q?cursors()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 21 +++++++++++++++++++++ mdbx.h++ | 16 ++++++++++++++++ src/core.c | 26 ++++++++++++++++++++++++++ 3 files changed, 63 insertions(+) diff --git a/mdbx.h b/mdbx.h index 2c062316..68493003 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4803,6 +4803,27 @@ LIBMDBX_API int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, * or \ref mdbx_cursor_create(). */ LIBMDBX_API void mdbx_cursor_close(MDBX_cursor *cursor); +/** \brief Unbind or closes all cursors of a given transaction. + * \ingroup c_cursors + * + * Unbinds either closes all cursors associated (opened or renewed) with + * a given transaction in a bulk with minimal overhead. + * + * \see mdbx_cursor_unbind() + * \see mdbx_cursor_close() + * + * \param [in] txn A transaction handle returned by \ref mdbx_txn_begin(). + * \param [in] unbind If non-zero, unbinds cursors and leaves ones reusable. + * Otherwise close and dispose cursors. + * + * \returns A negative error value on failure or the number of closed cursors + * on success, some possible errors are: + * \retval MDBX_THREAD_MISMATCH Given transaction is not owned + * by current thread. + * \retval MDBX_BAD_TXN Given transaction is invalid or has + * a child/nested transaction transaction. */ +LIBMDBX_API int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind); + /** \brief Renew a cursor handle for use within the given transaction. * \ingroup c_cursors * diff --git a/mdbx.h++ b/mdbx.h++ index e3607b61..216c0631 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3841,6 +3841,15 @@ public: /// \brief Opens cursor for specified key-value map handle. inline cursor_managed open_cursor(map_handle map); + /// \brief Unbind or close all cursors. + inline size_t release_all_cursors(bool unbind) const; + + /// \brief Close all cursors. + inline size_t close_all_cursors() const { return release_all_cursors(false); } + + /// \brief Unbind all cursors. + inline size_t unbind_all_cursors() const { return release_all_cursors(true); } + /// \brief Open existing key-value map. inline map_handle open_map( const char *name, @@ -5466,6 +5475,13 @@ inline cursor_managed txn::open_cursor(map_handle map) { return cursor_managed(ptr); } +inline size_t txn::release_all_cursors(bool unbind) const { + int err = ::mdbx_txn_release_all_cursors(handle_, unbind); + if (MDBX_UNLIKELY(err < 0)) + MDBX_CXX20_UNLIKELY error::throw_exception(err); + return size_t(err); +} + inline ::mdbx::map_handle txn::open_map(const char *name, const ::mdbx::key_mode key_mode, const ::mdbx::value_mode value_mode) const { diff --git a/src/core.c b/src/core.c index 8e4b364f..293b4aed 100644 --- a/src/core.c +++ b/src/core.c @@ -19019,6 +19019,32 @@ void mdbx_cursor_close(MDBX_cursor *mc) { } } +int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { + int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); + if (likely(rc == MDBX_SUCCESS)) { + for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) { + while (txn->mt_cursors[i]) { + MDBX_cursor *mc = txn->mt_cursors[i]; + ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE && + (mc->mc_flags & C_UNTRACK) && !mc->mc_backup); + rc = likely(rc < INT_MAX) ? rc + 1 : rc; + txn->mt_cursors[i] = mc->mc_next; + if (unbind) { + mc->mc_signature = MDBX_MC_READY4CLOSE; + mc->mc_flags = 0; + } else { + mc->mc_signature = 0; + mc->mc_next = mc; + osal_free(mc); + } + } + } + } else { + eASSERT(nullptr, rc < 0); + } + return rc; +} + MDBX_txn *mdbx_cursor_txn(const MDBX_cursor *mc) { if (unlikely(!mc || mc->mc_signature != MDBX_MC_LIVE)) return NULL; From 0e4c6d61a4a5455ce4d8f989974bf884a4fab8a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 07:26:19 +0300 Subject: [PATCH 013/137] =?UTF-8?q?mdbx-tools:=20=D0=BD=D0=B5=D1=81=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D1=8B=D0=B9=20?= =?UTF-8?q?=D1=80=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE=D1=80=D0=B8=D0=BD?= =?UTF-8?q?=D0=B3=20`mdbx=5Fload`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx_load.c | 235 +++++++++++++++++++++++++----------------------- 1 file changed, 121 insertions(+), 114 deletions(-) diff --git a/src/mdbx_load.c b/src/mdbx_load.c index 552fedc8..d4ff1db9 100644 --- a/src/mdbx_load.c +++ b/src/mdbx_load.c @@ -505,7 +505,7 @@ static int equal_or_greater(const MDBX_val *a, const MDBX_val *b) { } int main(int argc, char *argv[]) { - int i, rc; + int i, err; MDBX_env *env = nullptr; MDBX_txn *txn = nullptr; MDBX_cursor *mc = nullptr; @@ -608,40 +608,45 @@ int main(int argc, char *argv[]) { dbuf.iov_len = 4096; dbuf.iov_base = osal_malloc(dbuf.iov_len); if (!dbuf.iov_base) { - rc = MDBX_ENOMEM; - error("value-buffer", rc); - goto env_close; + err = MDBX_ENOMEM; + error("value-buffer", err); + goto bailout; } /* read first header for mapsize= */ if (!(mode & NOHDR)) { - rc = readhdr(); - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc == EOF) - rc = MDBX_ENODATA; - error("readheader", rc); - goto env_close; + err = readhdr(); + if (unlikely(err != MDBX_SUCCESS)) { + if (err == EOF) + err = MDBX_ENODATA; + error("readheader", err); + goto bailout; } } - rc = mdbx_env_create(&env); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_create", rc); - return EXIT_FAILURE; + err = mdbx_env_create(&env); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_create", err); + goto bailout; + } + + err = mdbx_env_set_maxdbs(env, 2); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_maxdbs", err); + goto bailout; } - mdbx_env_set_maxdbs(env, 2); if (envinfo.mi_maxreaders) { - rc = mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_set_maxreaders", rc); - goto env_close; + err = mdbx_env_set_maxreaders(env, envinfo.mi_maxreaders); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_maxreaders", err); + goto bailout; } } if (envinfo.mi_geo.current | envinfo.mi_mapsize) { if (envinfo.mi_geo.current) { - rc = mdbx_env_set_geometry( + err = mdbx_env_set_geometry( env, (intptr_t)envinfo.mi_geo.lower, (intptr_t)envinfo.mi_geo.current, (intptr_t)envinfo.mi_geo.upper, (intptr_t)envinfo.mi_geo.shrink, (intptr_t)envinfo.mi_geo.grow, @@ -654,23 +659,23 @@ int main(int argc, char *argv[]) { "Database size is too large for current system (mapsize=%" PRIu64 " is great than system-limit %zu)\n", envinfo.mi_mapsize, (size_t)MAX_MAPSIZE); - goto env_close; + goto bailout; } - rc = mdbx_env_set_geometry( + err = mdbx_env_set_geometry( env, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, (intptr_t)envinfo.mi_mapsize, 0, 0, envinfo.mi_dxb_pagesize ? (intptr_t)envinfo.mi_dxb_pagesize : -1); } - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_set_geometry", rc); - goto env_close; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_set_geometry", err); + goto bailout; } } - rc = mdbx_env_open(env, envname, envflags, 0664); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_env_open", rc); - goto env_close; + err = mdbx_env_open(env, envname, envflags, 0664); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_env_open", err); + goto bailout; } kbuf.iov_len = mdbx_env_get_maxvalsize_ex(env, 0) + (size_t)1; @@ -678,54 +683,54 @@ int main(int argc, char *argv[]) { if (!quiet) fprintf(stderr, "mdbx_env_get_maxkeysize() failed, returns %zu\n", kbuf.iov_len); - goto env_close; + goto bailout; } kbuf.iov_base = malloc(kbuf.iov_len); if (!kbuf.iov_base) { - rc = MDBX_ENOMEM; - error("key-buffer", rc); - goto env_close; + err = MDBX_ENOMEM; + error("key-buffer", err); + goto bailout; } - while (rc == MDBX_SUCCESS) { + while (err == MDBX_SUCCESS) { if (user_break) { - rc = MDBX_EINTR; + err = MDBX_EINTR; break; } - rc = mdbx_txn_begin(env, nullptr, 0, &txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_begin", rc); - goto env_close; + err = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_begin", err); + goto bailout; } if (mode & GLOBAL) { mode -= GLOBAL; if (canary.v | canary.x | canary.y | canary.z) { - rc = mdbx_canary_put(txn, &canary); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_canary_put", rc); - goto txn_abort; + err = mdbx_canary_put(txn, &canary); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_canary_put", err); + goto bailout; } } } const char *const dbi_name = subname ? subname : "@MAIN"; - rc = + err = mdbx_dbi_open_ex(txn, subname, dbi_flags | MDBX_CREATE, &dbi, (putflags & MDBX_APPEND) ? equal_or_greater : nullptr, (putflags & MDBX_APPEND) ? equal_or_greater : nullptr); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_open_ex", rc); - goto txn_abort; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_open_ex", err); + goto bailout; } uint64_t present_sequence; - rc = mdbx_dbi_sequence(txn, dbi, &present_sequence, 0); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_sequence", rc); - goto txn_abort; + err = mdbx_dbi_sequence(txn, dbi, &present_sequence, 0); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", err); + goto bailout; } if (present_sequence > sequence) { if (!quiet) @@ -733,22 +738,22 @@ int main(int argc, char *argv[]) { "present sequence for '%s' value (%" PRIu64 ") is greater than loaded (%" PRIu64 ")\n", dbi_name, present_sequence, sequence); - rc = MDBX_RESULT_TRUE; - goto txn_abort; + err = MDBX_RESULT_TRUE; + goto bailout; } if (present_sequence < sequence) { - rc = mdbx_dbi_sequence(txn, dbi, nullptr, sequence - present_sequence); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_sequence", rc); - goto txn_abort; + err = mdbx_dbi_sequence(txn, dbi, nullptr, sequence - present_sequence); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_sequence", err); + goto bailout; } } if (purge) { - rc = mdbx_drop(txn, dbi, false); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_drop", rc); - goto txn_abort; + err = mdbx_drop(txn, dbi, false); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_drop", err); + goto bailout; } } @@ -756,85 +761,85 @@ int main(int argc, char *argv[]) { putflags = (dbi_flags & MDBX_DUPSORT) ? putflags | MDBX_APPENDDUP : putflags & ~MDBX_APPENDDUP; - rc = mdbx_cursor_open(txn, dbi, &mc); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_open", rc); - goto txn_abort; + err = mdbx_cursor_open(txn, dbi, &mc); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_open", err); + goto bailout; } int batch = 0; - while (rc == MDBX_SUCCESS) { + while (err == MDBX_SUCCESS) { MDBX_val key, data; - rc = readline(&key, &kbuf); - if (rc == EOF) + err = readline(&key, &kbuf); + if (err == EOF) break; - if (rc == MDBX_SUCCESS) - rc = readline(&data, &dbuf); - if (rc) { + if (err == MDBX_SUCCESS) + err = readline(&data, &dbuf); + if (err) { if (!quiet) fprintf(stderr, "%s: line %" PRIiSIZE ": failed to read key value\n", prog, lineno); - goto txn_abort; + goto bailout; } - rc = mdbx_cursor_put(mc, &key, &data, putflags); - if (rc == MDBX_KEYEXIST && putflags) + err = mdbx_cursor_put(mc, &key, &data, putflags); + if (err == MDBX_KEYEXIST && putflags) continue; - if (rc == MDBX_BAD_VALSIZE && rescue) { + if (err == MDBX_BAD_VALSIZE && rescue) { if (!quiet) fprintf(stderr, "%s: skip line %" PRIiSIZE ": due %s\n", prog, lineno, - mdbx_strerror(rc)); + mdbx_strerror(err)); continue; } - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_put", rc); - goto txn_abort; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_put", err); + goto bailout; } batch++; MDBX_txn_info txn_info; - rc = mdbx_txn_info(txn, &txn_info, false); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_info", rc); - goto txn_abort; + err = mdbx_txn_info(txn, &txn_info, false); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_info", err); + goto bailout; } if (batch == 10000 || txn_info.txn_space_dirty > MEGABYTE * 256) { - rc = mdbx_txn_commit(txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_commit", rc); - goto env_close; + err = mdbx_txn_commit(txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_commit", err); + goto bailout; } batch = 0; - rc = mdbx_txn_begin(env, nullptr, 0, &txn); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_begin", rc); - goto env_close; + err = mdbx_txn_begin(env, nullptr, 0, &txn); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_begin", err); + goto bailout; } - rc = mdbx_cursor_bind(txn, mc, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_cursor_bind", rc); - goto txn_abort; + err = mdbx_cursor_bind(txn, mc, dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_cursor_bind", err); + goto bailout; } } } mdbx_cursor_close(mc); mc = nullptr; - rc = mdbx_txn_commit(txn); + err = mdbx_txn_commit(txn); txn = nullptr; - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_txn_commit", rc); - goto env_close; + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_txn_commit", err); + goto bailout; } if (subname) { assert(dbi != MAIN_DBI); - rc = mdbx_dbi_close(env, dbi); - if (unlikely(rc != MDBX_SUCCESS)) { - error("mdbx_dbi_close", rc); - goto env_close; + err = mdbx_dbi_close(env, dbi); + if (unlikely(err != MDBX_SUCCESS)) { + error("mdbx_dbi_close", err); + goto bailout; } } else { assert(dbi == MAIN_DBI); @@ -842,14 +847,14 @@ int main(int argc, char *argv[]) { /* try read next header */ if (!(mode & NOHDR)) - rc = readhdr(); + err = readhdr(); else if (ferror(stdin) || feof(stdin)) break; } - switch (rc) { + switch (err) { case EOF: - rc = MDBX_SUCCESS; + err = MDBX_SUCCESS; case MDBX_SUCCESS: break; case MDBX_EINTR: @@ -857,17 +862,19 @@ int main(int argc, char *argv[]) { fprintf(stderr, "Interrupted by signal/user\n"); break; default: - if (unlikely(rc != MDBX_SUCCESS)) - error("readline", rc); + if (unlikely(err != MDBX_SUCCESS)) + error("readline", err); } -txn_abort: - mdbx_cursor_close(mc); - mdbx_txn_abort(txn); -env_close: - mdbx_env_close(env); +bailout: + if (mc) + mdbx_cursor_close(mc); + if (txn) + mdbx_txn_abort(txn); + if (env) + mdbx_env_close(env); free(kbuf.iov_base); free(dbuf.iov_base); - return rc ? EXIT_FAILURE : EXIT_SUCCESS; + return err ? EXIT_FAILURE : EXIT_SUCCESS; } From 04511a7a99050659ac657112e5571c6fd7b78dd4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 09:04:06 +0300 Subject: [PATCH 014/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`const=20MDB?= =?UTF-8?q?X=5Ftxn`=20=D0=B3=D0=B4=D0=B5=20=D1=8D=D1=82=D0=BE=20=D0=B2?= =?UTF-8?q?=D0=BE=D0=B7=D0=BC=D0=BE=D0=B6=D0=BD=D0=BE=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 26 +++++++++++++------------- mdbx.h++ | 13 +++++++------ src/core.c | 42 ++++++++++++++++++++++-------------------- 3 files changed, 42 insertions(+), 39 deletions(-) diff --git a/mdbx.h b/mdbx.h index 68493003..5c43ab89 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4291,8 +4291,8 @@ mdbx_int64_from_key(const MDBX_val); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, - size_t bytes); +LIBMDBX_API int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, + MDBX_stat *stat, size_t bytes); /** \brief Retrieve depth (bitmask) information of nested dupsort (multi-value) * B+trees for given database. @@ -4309,7 +4309,7 @@ LIBMDBX_API int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *stat, * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_RESULT_TRUE The dbi isn't a dupsort (multi-value) database. */ -LIBMDBX_API int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask); /** \brief DBI state bits returted by \ref mdbx_dbi_flags_ex() @@ -4341,13 +4341,13 @@ DEFINE_ENUM_FLAG_OPERATORS(MDBX_dbi_state_t) * \param [out] state Address where the state will be returned. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, - unsigned *state); +LIBMDBX_API int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, + unsigned *flags, unsigned *state); /** \brief The shortcut to calling \ref mdbx_dbi_flags_ex() with `state=NULL` * for discarding it result. * \ingroup c_statinfo */ LIBMDBX_INLINE_API(int, mdbx_dbi_flags, - (MDBX_txn * txn, MDBX_dbi dbi, unsigned *flags)) { + (const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags)) { unsigned state; return mdbx_dbi_flags_ex(txn, dbi, flags, &state); } @@ -4423,7 +4423,7 @@ LIBMDBX_API int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del); * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, +LIBMDBX_API int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data); /** \brief Get items from a database @@ -4456,7 +4456,7 @@ LIBMDBX_API int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, +LIBMDBX_API int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, size_t *values_count); /** \brief Get equal or great item from a database. @@ -4487,7 +4487,7 @@ LIBMDBX_API int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, * by current thread. * \retval MDBX_NOTFOUND The key was not in the database. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data); /** \brief Store items into a database. @@ -4729,7 +4729,7 @@ mdbx_cursor_get_userctx(const MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *cursor, +LIBMDBX_API int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *cursor, MDBX_dbi dbi); /** \brief Unbind cursor from a transaction. @@ -4784,7 +4784,7 @@ LIBMDBX_API int mdbx_cursor_unbind(MDBX_cursor *cursor); * \retval MDBX_THREAD_MISMATCH Given transaction is not owned * by current thread. * \retval MDBX_EINVAL An invalid parameter was specified. */ -LIBMDBX_API int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **cursor); /** \brief Close a cursor handle. @@ -4848,7 +4848,7 @@ LIBMDBX_API int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind); * \retval MDBX_EINVAL An invalid parameter was specified. * \retval MDBX_BAD_DBI The cursor was not bound to a DBI-handle * or such a handle became invalid. */ -LIBMDBX_API int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *cursor); +LIBMDBX_API int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *cursor); /** \brief Return the cursor's transaction handle. * \ingroup c_cursors @@ -5227,7 +5227,7 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * \param [out] distance_items A pointer to store range estimation result. * * \returns A non-zero error value on failure and 0 on success. */ -LIBMDBX_API int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, +LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, MDBX_val *begin_data, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *distance_items); diff --git a/mdbx.h++ b/mdbx.h++ index 216c0631..ea0131be 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3839,7 +3839,7 @@ public: txn_managed start_nested(); /// \brief Opens cursor for specified key-value map handle. - inline cursor_managed open_cursor(map_handle map); + inline cursor_managed open_cursor(map_handle map) const; /// \brief Unbind or close all cursors. inline size_t release_all_cursors(bool unbind) const; @@ -4226,11 +4226,11 @@ public: /// \brief Renew/bind a cursor with a new transaction and previously used /// key-value map handle. - inline void renew(::mdbx::txn &txn); + inline void renew(const ::mdbx::txn &txn); /// \brief Bind/renew a cursor with a new transaction and specified key-value /// map handle. - inline void bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle); + inline void bind(const ::mdbx::txn &txn, ::mdbx::map_handle map_handle); /// \brief Unbind cursor from a transaction. inline void unbind(); @@ -5469,7 +5469,7 @@ inline txn::info txn::get_info(bool scan_reader_lock_table) const { return r; } -inline cursor_managed txn::open_cursor(map_handle map) { +inline cursor_managed txn::open_cursor(map_handle map) const { MDBX_cursor *ptr; error::success_or_throw(::mdbx_cursor_open(handle_, map.dbi, &ptr)); return cursor_managed(ptr); @@ -6121,11 +6121,12 @@ inline ptrdiff_t cursor::estimate(move_operation operation) const { return estimate(operation, &unused_key, nullptr); } -inline void cursor::renew(::mdbx::txn &txn) { +inline void cursor::renew(const ::mdbx::txn &txn) { error::success_or_throw(::mdbx_cursor_renew(txn, handle_)); } -inline void cursor::bind(::mdbx::txn &txn, ::mdbx::map_handle map_handle) { +inline void cursor::bind(const ::mdbx::txn &txn, + ::mdbx::map_handle map_handle) { error::success_or_throw(::mdbx_cursor_bind(txn, handle_, map_handle.dbi)); } diff --git a/src/core.c b/src/core.c index 293b4aed..3745aaf5 100644 --- a/src/core.c +++ b/src/core.c @@ -3386,7 +3386,7 @@ static int __must_check_result cursor_first(MDBX_cursor *mc, MDBX_val *key, static int __must_check_result cursor_last(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data); -static int __must_check_result cursor_init(MDBX_cursor *mc, MDBX_txn *txn, +static int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi); static int __must_check_result cursor_xinit0(MDBX_cursor *mc); static int __must_check_result cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, @@ -9582,7 +9582,7 @@ int mdbx_txn_flags(const MDBX_txn *txn) { } /* Check for misused dbi handles */ -static __inline bool dbi_changed(MDBX_txn *txn, size_t dbi) { +static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { if (txn->mt_dbiseqs == txn->mt_env->me_dbiseqs) return false; if (likely( @@ -11171,7 +11171,7 @@ static int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { } /* Check txn and dbi arguments to a function */ -static __always_inline bool check_dbi(MDBX_txn *txn, MDBX_dbi dbi, +static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, unsigned validity) { if (likely(dbi < txn->mt_numdbs)) { if (likely(!dbi_changed(txn, dbi))) { @@ -11182,7 +11182,7 @@ static __always_inline bool check_dbi(MDBX_txn *txn, MDBX_dbi dbi, return false; } } - return dbi_import(txn, dbi); + return dbi_import((MDBX_txn *)txn, dbi); } /* Merge child txn into parent */ @@ -16083,7 +16083,8 @@ static __always_inline int node_read(MDBX_cursor *mc, const MDBX_node *node, return node_read_bigdata(mc, node, data, mp); } -int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { +int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, + MDBX_val *data) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -16105,7 +16106,7 @@ int mdbx_get(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data) { return cursor_set(&cx.outer, (MDBX_val *)key, data, MDBX_SET).err; } -int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, +int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -16128,8 +16129,8 @@ int mdbx_get_equal_or_great(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, return cursor_get(&cx.outer, key, data, MDBX_SET_LOWERBOUND); } -int mdbx_get_ex(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, MDBX_val *data, - size_t *values_count) { +int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, + MDBX_val *data, size_t *values_count) { DKBUF_DEBUG; DEBUG("===> get db %u key [%s]", dbi, DKEY_DEBUG(key)); @@ -18759,13 +18760,13 @@ static int cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, } static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, - MDBX_txn *const txn, MDBX_db *const db, + const MDBX_txn *const txn, MDBX_db *const db, MDBX_dbx *const dbx, uint8_t *const dbstate) { couple->outer.mc_signature = MDBX_MC_LIVE; couple->outer.mc_next = NULL; couple->outer.mc_backup = NULL; couple->outer.mc_dbi = (MDBX_dbi)dbi; - couple->outer.mc_txn = txn; + couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; couple->outer.mc_dbistate = dbstate; @@ -18803,7 +18804,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, } /* Initialize a cursor for a given transaction and database. */ -static int cursor_init(MDBX_cursor *mc, MDBX_txn *txn, size_t dbi) { +static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], @@ -18878,7 +18879,7 @@ int mdbx_cursor_unbind(MDBX_cursor *mc) { return MDBX_SUCCESS; } -int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { +int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(!mc)) return MDBX_EINVAL; @@ -18932,7 +18933,7 @@ int mdbx_cursor_bind(MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_SUCCESS; } -int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { +int mdbx_cursor_open(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { if (unlikely(!ret)) return MDBX_EINVAL; *ret = NULL; @@ -18951,7 +18952,7 @@ int mdbx_cursor_open(MDBX_txn *txn, MDBX_dbi dbi, MDBX_cursor **ret) { return MDBX_SUCCESS; } -int mdbx_cursor_renew(MDBX_txn *txn, MDBX_cursor *mc) { +int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } @@ -22244,7 +22245,7 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, return rc; } -__cold int mdbx_dbi_dupsort_depthmask(MDBX_txn *txn, MDBX_dbi dbi, +__cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, uint32_t *mask) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -22860,7 +22861,7 @@ int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, return dbi_open(txn, name, flags, dbi, keycmp, datacmp); } -__cold int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, +__cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) @@ -22880,7 +22881,7 @@ __cold int mdbx_dbi_stat(MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, return MDBX_BAD_TXN; if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { - rc = fetch_sdb(txn, dbi); + rc = fetch_sdb((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; } @@ -22941,7 +22942,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return rc; } -int mdbx_dbi_flags_ex(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, +int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, unsigned *state) { int rc = check_txn(txn, MDBX_TXN_BLOCKED - MDBX_TXN_ERROR); if (unlikely(rc != MDBX_SUCCESS)) @@ -24113,7 +24114,7 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -int mdbx_estimate_range(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, +int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, MDBX_val *begin_data, MDBX_val *end_key, MDBX_val *end_data, ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -25455,7 +25456,8 @@ LIBMDBX_API __cold int mdbx_env_info(const MDBX_env *env, MDBX_envinfo *info, return __inline_mdbx_env_info(env, info, bytes); } -LIBMDBX_API int mdbx_dbi_flags(MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags) { +LIBMDBX_API int mdbx_dbi_flags(const MDBX_txn *txn, MDBX_dbi dbi, + unsigned *flags) { return __inline_mdbx_dbi_flags(txn, dbi, flags); } From c254c728d2f0dbd8989cbe1b7fe7824aa0ef11cd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 11:10:32 +0300 Subject: [PATCH 015/137] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=81=D1=82=D1=8B?= =?UTF-8?q?=D0=BB=D1=8C=20=D0=B4=D0=BB=D1=8F=20=D0=BB=D0=BE=D0=B6=D0=BD?= =?UTF-8?q?=D0=BE-=D0=BF=D0=BE=D0=BB=D0=BE=D0=B6=D0=B8=D1=82=D0=B5=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BF=D1=80=D0=B5=D0=B4=D1=83?= =?UTF-8?q?=D0=BF=D1=80=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20Coveri?= =?UTF-8?q?ty.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 3745aaf5..4a741bf3 100644 --- a/src/core.c +++ b/src/core.c @@ -26151,8 +26151,11 @@ static void histogram_acc(const size_t n, struct MDBX_chk_histogram *p) { if (p->ranges[i].count) { assert(i < last); // раздвигаем - memmove(p->ranges + i + 1, p->ranges + i, - (last - i) * sizeof(p->ranges[0])); +#ifdef __COVERITY__ + if (i < last) /* avoid Coverity false-positive issue */ +#endif /* __COVERITY__ */ + memmove(p->ranges + i + 1, p->ranges + i, + (last - i) * sizeof(p->ranges[0])); } p->ranges[i].begin = n; p->ranges[i].end = n + 1; From 24f08aed286ce6b9ceac4b5942f3a34b5afb7077 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 14 Oct 2023 14:07:51 +0300 Subject: [PATCH 016/137] =?UTF-8?q?mdbx-doc:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=BD=D1=84?= =?UTF-8?q?=D0=B8=D0=B3=D1=83=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20Doxygen.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Doxyfile.in | 557 +++++++++++++++++++++++++++++------------------ 1 file changed, 350 insertions(+), 207 deletions(-) diff --git a/docs/Doxyfile.in b/docs/Doxyfile.in index 9aef3329..ca91f8b8 100644 --- a/docs/Doxyfile.in +++ b/docs/Doxyfile.in @@ -1,4 +1,4 @@ -# Doxyfile 1.9.1 +# Doxyfile 1.9.6 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project. @@ -12,6 +12,16 @@ # For lists, items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (\" \"). +# +# Note: +# +# Use doxygen to compare the used configuration file with the template +# configuration file: +# doxygen -x [configFile] +# Use doxygen to compare the used configuration file with the template +# configuration file without replacing the environment variables or CMake type +# replacement variables: +# doxygen -x_noenv [configFile] #--------------------------------------------------------------------------- # Project related configuration options @@ -60,16 +70,28 @@ PROJECT_LOGO = OUTPUT_DIRECTORY = . -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub- -# directories (in 2 levels) under the output directory of each output format and -# will distribute the generated files over these directories. Enabling this +# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 +# sub-directories (in 2 levels) under the output directory of each output format +# and will distribute the generated files over these directories. Enabling this # option can be useful when feeding doxygen a huge amount of source files, where # putting all generated files in the same directory would otherwise causes -# performance problems for the file system. +# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to +# control the number of sub-directories. # The default value is: NO. CREATE_SUBDIRS = NO +# Controls the number of sub-directories that will be created when +# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every +# level increment doubles the number of directories, resulting in 4096 +# directories at level 8 which is the default and also the maximum value. The +# sub-directories are organized in 2 levels, the first level always has a fixed +# number of 16 directories. +# Minimum value: 0, maximum value: 8, default value: 8. +# This tag requires that the tag CREATE_SUBDIRS is set to YES. + +CREATE_SUBDIRS_LEVEL = 8 + # If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII # characters to appear in the names of generated files. If set to NO, non-ASCII # characters will be escaped, for example _xE3_x81_x84 will be used for Unicode @@ -81,26 +103,18 @@ ALLOW_UNICODE_NAMES = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese, -# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States), -# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian, -# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages), -# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian, -# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian, -# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish, -# Ukrainian and Vietnamese. +# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, +# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English +# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, +# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with +# English messages), Korean, Korean-en (Korean with English messages), Latvian, +# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, +# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, +# Swedish, Turkish, Ukrainian and Vietnamese. # The default value is: English. OUTPUT_LANGUAGE = English -# The OUTPUT_TEXT_DIRECTION tag is used to specify the direction in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all generated output in the proper direction. -# Possible values are: None, LTR, RTL and Context. -# The default value is: None. - -OUTPUT_TEXT_DIRECTION = None - # If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member # descriptions after the members that are listed in the file and class # documentation (similar to Javadoc). Set to NO to disable this. @@ -258,16 +272,16 @@ TAB_SIZE = 4 # the documentation. An alias has the form: # name=value # For example adding -# "sideeffect=@par Side Effects:\n" +# "sideeffect=@par Side Effects:^^" # will allow you to put the command \sideeffect (or @sideeffect) in the # documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". You can put \n's in the value part of an alias to insert -# newlines (in the resulting output). You can put ^^ in the value part of an -# alias to insert a newline as if a physical newline was in the original file. -# When you need a literal { or } or , in the value part of an alias you have to -# escape them by means of a backslash (\), this can lead to conflicts with the -# commands \{ and \} for these it is advised to use the version @{ and @} or use -# a double escape (\\{ and \\}) +# "Side Effects:". Note that you cannot put \n's in the value part of an alias +# to insert newlines (in the resulting output). You can put ^^ in the value part +# of an alias to insert a newline as if a physical newline was in the original +# file. When you need a literal { or } or , in the value part of an alias you +# have to escape them by means of a backslash (\), this can lead to conflicts +# with the commands \{ and \} for these it is advised to use the version @{ and +# @} or use a double escape (\\{ and \\}) ALIASES = @@ -312,8 +326,8 @@ OPTIMIZE_OUTPUT_SLICE = NO # extension. Doxygen has a built-in mapping, but you can override or extend it # using this tag. The format is ext=language, where ext is a file extension, and # language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, D, PHP, md (Markdown), Objective-C, Python, Slice, VHDL, -# Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: +# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, +# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: # FortranFree, unknown formatted Fortran: Fortran. In the later case the parser # tries to guess whether the code is fixed or free formatted code, this is the # default for Fortran type files). For instance to make doxygen treat .inc files @@ -460,13 +474,13 @@ TYPEDEF_HIDES_STRUCT = YES LOOKUP_CACHE_SIZE = 0 -# The NUM_PROC_THREADS specifies the number threads doxygen is allowed to use +# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use # during processing. When set to 0 doxygen will based this on the number of # cores available in the system. You can set it explicitly to a value larger # than 0 to get more control over the balance between CPU load and processing # speed. At this moment only the input processing can be done using multiple # threads. Since this is still an experimental feature the default is set to 1, -# which efficively disables parallel processing. Please report any issues you +# which effectively disables parallel processing. Please report any issues you # encounter. Generating dot graphs in parallel is controlled by the # DOT_NUM_THREADS setting. # Minimum value: 0, maximum value: 32, default value: 1. @@ -554,7 +568,8 @@ HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. If set # to NO, these classes will be included in the various overviews. This option -# has no effect if EXTRACT_ALL is enabled. +# will also hide undocumented C++ concepts if enabled. This option has no effect +# if EXTRACT_ALL is enabled. # The default value is: NO. HIDE_UNDOC_CLASSES = NO @@ -585,14 +600,15 @@ INTERNAL_DOCS = NO # filesystem is case sensitive (i.e. it supports files in the same directory # whose names only differ in casing), the option must be set to YES to properly # deal with such files in case they appear in the input. For filesystems that -# are not case sensitive the option should be be set to NO to properly deal with +# are not case sensitive the option should be set to NO to properly deal with # output files written for symbols that only differ in casing, such as for two # classes, one named CLASS and the other named Class, and to also support # references to files without having to specify the exact matching casing. On # Windows (including Cygwin) and MacOS, users should typically set this option # to NO, whereas on Linux or other Unix flavors it should typically be set to # YES. -# The default value is: system dependent. +# Possible values are: SYSTEM, NO and YES. +# The default value is: SYSTEM. CASE_SENSE_NAMES = NO @@ -610,6 +626,12 @@ HIDE_SCOPE_NAMES = NO HIDE_COMPOUND_REFERENCE= NO +# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class +# will show which file needs to be included to use the class. +# The default value is: YES. + +SHOW_HEADERFILE = YES + # If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of # the files that are included by a file in the documentation of that file. # The default value is: YES. @@ -767,7 +789,8 @@ FILE_VERSION_FILTER = # output files in an output format independent way. To create the layout file # that represents doxygen's defaults, run doxygen with the -l option. You can # optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. +# will be used as the name of the layout file. See also section "Changing the +# layout of pages" for information. # # Note that if you run doxygen from a directory containing a file called # DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE @@ -813,22 +836,38 @@ WARNINGS = YES WARN_IF_UNDOCUMENTED = YES # If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some parameters -# in a documented function, or documenting parameters that don't exist or using -# markup commands wrongly. +# potential errors in the documentation, such as documenting some parameters in +# a documented function twice, or documenting parameters that don't exist or +# using markup commands wrongly. # The default value is: YES. WARN_IF_DOC_ERROR = YES +# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete +# function parameter documentation. If set to NO, doxygen will accept that some +# parameters have no documentation without warning. +# The default value is: YES. + +WARN_IF_INCOMPLETE_DOC = YES + # This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that # are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong or incomplete -# parameter documentation, but not about the absence of documentation. If -# EXTRACT_ALL is set to YES then this flag will automatically be disabled. +# value. If set to NO, doxygen will only warn about wrong parameter +# documentation, but not about the absence of documentation. If EXTRACT_ALL is +# set to YES then this flag will automatically be disabled. See also +# WARN_IF_INCOMPLETE_DOC # The default value is: NO. WARN_NO_PARAMDOC = NO +# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about +# undocumented enumeration values. If set to NO, doxygen will accept +# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag +# will automatically be disabled. +# The default value is: NO. + +WARN_IF_UNDOC_ENUM_VAL = NO + # If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when # a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS # then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but @@ -844,13 +883,27 @@ WARN_AS_ERROR = NO # and the warning text. Optionally the format may contain $version, which will # be replaced by the version of the file (if it could be obtained via # FILE_VERSION_FILTER) +# See also: WARN_LINE_FORMAT # The default value is: $file:$line: $text. WARN_FORMAT = "$file:$line: $text" +# In the $text part of the WARN_FORMAT command it is possible that a reference +# to a more specific place is given. To make it easier to jump to this place +# (outside of doxygen) the user can define a custom "cut" / "paste" string. +# Example: +# WARN_LINE_FORMAT = "'vi $file +$line'" +# See also: WARN_FORMAT +# The default value is: at line $line of file $file. + +WARN_LINE_FORMAT = "at line $line of file $file" + # The WARN_LOGFILE tag can be used to specify a file to which warning and error # messages should be written. If left blank the output is written to standard -# error (stderr). +# error (stderr). In case the file specified cannot be opened for writing the +# warning and error messages are written to standard error. When as file - is +# specified the warning and error messages are written to standard output +# (stdout). WARN_LOGFILE = @@ -877,10 +930,21 @@ INPUT = overall.md \ # libiconv (or the iconv built into libc) for the transcoding. See the libiconv # documentation (see: # https://www.gnu.org/software/libiconv/) for the list of possible encodings. +# See also: INPUT_FILE_ENCODING # The default value is: UTF-8. INPUT_ENCODING = UTF-8 +# This tag can be used to specify the character encoding of the source files +# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify +# character encoding on a per file pattern basis. Doxygen will compare the file +# name with each pattern and apply the encoding instead of the default +# INPUT_ENCODING) if there is a match. The character encodings are a list of the +# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding +# "INPUT_ENCODING" for further information on supported encodings. + +INPUT_FILE_ENCODING = + # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and # *.h) to filter out the source-files in the directories. @@ -894,10 +958,10 @@ INPUT_ENCODING = UTF-8 # # If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, # *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, *.inc, -# *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C comment), -# *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, *.vhdl, -# *.ucf, *.qsf and *.ice. +# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, +# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C +# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, +# *.vhdl, *.ucf, *.qsf and *.ice. FILE_PATTERNS = *.h @@ -936,20 +1000,40 @@ EXCLUDE_PATTERNS = # (namespaces, classes, functions, etc.) that should be excluded from the # output. The symbol name can be a fully qualified name, a word, or if the # wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test +# ANamespace::AClass, ANamespace::*Test # # Note that the wildcards are matched against the file with absolute path, so to # exclude all test directories use the pattern */test/* -EXCLUDE_SYMBOLS = NOMINMAX __ORDER_BIG_ENDIAN__ __ORDER_LITTLE_ENDIAN__ \ - __has_include __has_attribute __has_builtin __has_cpp_attribute __has_extension __has_feature \ - HAVE_STRUCT_IOVEC MDBX_STRINGIFY_HELPER MDBX_STRINGIFY \ - MDBX_NOSANITIZE_ENUM MDBX_PRINTF_ARGS \ - MDBX_HAVE_CXX20_CONCEPTS \ - CONSTEXPR_ENUM_FLAGS_OPERATIONS DEFINE_ENUM_FLAG_OPERATORS \ - bool false true __dll_export __dll_import \ - MDBX_64BIT_ATOMIC_CONFIG MDBX_64BIT_CAS_CONFIG MDBX_ENV_CHECKPID_CONFIG MDBX_LOCKING_CONFIG \ - MDBX_TRUST_RTC_CONFIG MDBX_TXN_CHECKOWNER_CONFIG MDBX_USE_OFDLOCKS_CONFIG +EXCLUDE_SYMBOLS = NOMINMAX \ + __ORDER_BIG_ENDIAN__ \ + __ORDER_LITTLE_ENDIAN__ \ + __has_include \ + __has_attribute \ + __has_builtin \ + __has_cpp_attribute \ + __has_extension \ + __has_feature \ + HAVE_STRUCT_IOVEC \ + MDBX_STRINGIFY_HELPER \ + MDBX_STRINGIFY \ + MDBX_NOSANITIZE_ENUM \ + MDBX_PRINTF_ARGS \ + MDBX_HAVE_CXX20_CONCEPTS \ + CONSTEXPR_ENUM_FLAGS_OPERATIONS \ + DEFINE_ENUM_FLAG_OPERATORS \ + bool \ + false \ + true \ + __dll_export \ + __dll_import \ + MDBX_64BIT_ATOMIC_CONFIG \ + MDBX_64BIT_CAS_CONFIG \ + MDBX_ENV_CHECKPID_CONFIG \ + MDBX_LOCKING_CONFIG \ + MDBX_TRUST_RTC_CONFIG \ + MDBX_TXN_CHECKOWNER_CONFIG \ + MDBX_USE_OFDLOCKS_CONFIG # The EXAMPLE_PATH tag can be used to specify one or more files or directories # that contain example code fragments that are included (see the \include @@ -992,6 +1076,11 @@ IMAGE_PATH = # code is scanned, but not when the output code is generated. If lines are added # or removed, the anchors will not be placed correctly. # +# Note that doxygen will use the data processed and written to standard output +# for further processing, therefore nothing else, like debug statements or used +# commands (so in case of a Windows batch file always use @echo OFF), should be +# written to standard output. +# # Note that for custom extensions or not directly supported extensions you also # need to set EXTENSION_MAPPING for the extension otherwise the files are not # properly processed by doxygen. @@ -1033,6 +1122,15 @@ FILTER_SOURCE_PATTERNS = USE_MDFILE_AS_MAINPAGE = +# The Fortran standard specifies that for fixed formatted Fortran code all +# characters from position 72 are to be considered as comment. A common +# extension is to allow longer lines before the automatic comment starts. The +# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can +# be processed before the automatic comment starts. +# Minimum value: 7, maximum value: 10000, default value: 72. + +FORTRAN_COMMENT_AFTER = 72 + #--------------------------------------------------------------------------- # Configuration options related to source browsing #--------------------------------------------------------------------------- @@ -1130,9 +1228,11 @@ VERBATIM_HEADERS = YES CLANG_ASSISTED_PARSING = NO -# If clang assisted parsing is enabled and the CLANG_ADD_INC_PATHS tag is set to -# YES then doxygen will add the directory of each input to the include path. +# If the CLANG_ASSISTED_PARSING tag is set to YES and the CLANG_ADD_INC_PATHS +# tag is set to YES then doxygen will add the directory of each input to the +# include path. # The default value is: YES. +# This tag requires that the tag CLANG_ASSISTED_PARSING is set to YES. CLANG_ADD_INC_PATHS = YES @@ -1168,10 +1268,11 @@ CLANG_DATABASE_PATH = ALPHABETICAL_INDEX = YES -# In case all classes in a project start with a common prefix, all classes will -# be put under the same header in the alphabetical index. The IGNORE_PREFIX tag -# can be used to specify a prefix (or a list of prefixes) that should be ignored -# while generating the index headers. +# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) +# that should be ignored while generating the index headers. The IGNORE_PREFIX +# tag works for classes, function and member names. The entity will be placed in +# the alphabetical list under the first letter of the entity name that remains +# after removing the prefix. # This tag requires that the tag ALPHABETICAL_INDEX is set to YES. IGNORE_PREFIX = @@ -1250,7 +1351,12 @@ HTML_STYLESHEET = # Doxygen will copy the style sheet files to the output directory. # Note: The order of the extra style sheet files is of importance (e.g. the last # style sheet in the list overrules the setting of the previous ones in the -# list). For an example see the documentation. +# list). +# Note: Since the styling of scrollbars can currently not be overruled in +# Webkit/Chromium, the styling will be left out of the default doxygen.css if +# one or more extra stylesheets have been specified. So if scrollbar +# customization is desired it has to be added explicitly. For an example see the +# documentation. # This tag requires that the tag GENERATE_HTML is set to YES. HTML_EXTRA_STYLESHEET = @@ -1265,9 +1371,22 @@ HTML_EXTRA_STYLESHEET = HTML_EXTRA_FILES = +# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output +# should be rendered with a dark or light theme. +# Possible values are: LIGHT always generate light mode output, DARK always +# generate dark mode output, AUTO_LIGHT automatically set the mode according to +# the user preference, use light mode if no preference is set (the default), +# AUTO_DARK automatically set the mode according to the user preference, use +# dark mode if no preference is set and TOGGLE allow to user to switch between +# light and dark mode via a button. +# The default value is: AUTO_LIGHT. +# This tag requires that the tag GENERATE_HTML is set to YES. + +HTML_COLORSTYLE = AUTO_LIGHT + # The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen # will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a colorwheel, see +# this color. Hue is specified as an angle on a color-wheel, see # https://en.wikipedia.org/wiki/Hue for more information. For instance the value # 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 # purple, and 360 is red again. @@ -1277,7 +1396,7 @@ HTML_EXTRA_FILES = HTML_COLORSTYLE_HUE = 220 # The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use grayscales only. A +# in the HTML output. For a value of 0 the output will use gray-scales only. A # value of 255 will produce the most vivid colors. # Minimum value: 0, maximum value: 255, default value: 100. # This tag requires that the tag GENERATE_HTML is set to YES. @@ -1359,6 +1478,13 @@ GENERATE_DOCSET = NO DOCSET_FEEDNAME = "Doxygen generated docs" +# This tag determines the URL of the docset feed. A documentation feed provides +# an umbrella under which multiple documentation sets from a single provider +# (such as a company or product suite) can be grouped. +# This tag requires that the tag GENERATE_DOCSET is set to YES. + +DOCSET_FEEDURL = + # This tag specifies a string that should uniquely identify the documentation # set bundle. This should be a reverse domain-name style string, e.g. # com.mycompany.MyDocSet. Doxygen will append .docset to the name. @@ -1384,8 +1510,12 @@ DOCSET_PUBLISHER_NAME = Publisher # If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three # additional HTML index files: index.hhp, index.hhc, and index.hhk. The # index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# (see: -# https://www.microsoft.com/en-us/download/details.aspx?id=21138) on Windows. +# on Windows. In the beginning of 2021 Microsoft took the original page, with +# a.o. the download links, offline the HTML help workshop was already many years +# in maintenance mode). You can download the HTML help workshop from the web +# archives at Installation executable (see: +# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo +# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). # # The HTML Help Workshop contains a compiler that can convert all HTML output # generated by doxygen into a single compiled HTML file (.chm). Compiled HTML @@ -1544,16 +1674,28 @@ DISABLE_INDEX = YES # to work a browser that supports JavaScript, DHTML, CSS and frames is required # (i.e. any modern browser). Windows users are probably better off using the # HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine-tune the look of the index. As an example, the default style -# sheet generated by doxygen has an example that shows how to put an image at -# the root of the tree instead of the PROJECT_NAME. Since the tree basically has -# the same information as the tab index, you could consider setting -# DISABLE_INDEX to YES when enabling this option. +# further fine tune the look of the index (see "Fine-tuning the output"). As an +# example, the default style sheet generated by doxygen has an example that +# shows how to put an image at the root of the tree instead of the PROJECT_NAME. +# Since the tree basically has the same information as the tab index, you could +# consider setting DISABLE_INDEX to YES when enabling this option. # The default value is: NO. # This tag requires that the tag GENERATE_HTML is set to YES. GENERATE_TREEVIEW = YES +# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the +# FULL_SIDEBAR option determines if the side bar is limited to only the treeview +# area (value NO) or if it should extend to the full height of the window (value +# YES). Setting this to YES gives a layout similar to +# https://docs.readthedocs.io with more room for contents, but less room for the +# project logo, title, and description. If either GENERATE_TREEVIEW or +# DISABLE_INDEX is set to NO, this option has no effect. +# The default value is: NO. +# This tag requires that the tag GENERATE_HTML is set to YES. + +FULL_SIDEBAR = NO + # The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that # doxygen will group on one line in the generated HTML documentation. # @@ -1578,6 +1720,13 @@ TREEVIEW_WIDTH = 250 EXT_LINKS_IN_WINDOW = NO +# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email +# addresses. +# The default value is: YES. +# This tag requires that the tag GENERATE_HTML is set to YES. + +OBFUSCATE_EMAILS = YES + # If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg # tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see # https://inkscape.org) to generate formulas as SVG images instead of PNGs for @@ -1598,17 +1747,6 @@ HTML_FORMULA_FORMAT = png FORMULA_FONTSIZE = 10 -# Use the FORMULA_TRANSPARENT tag to determine whether or not the images -# generated for formulas are transparent PNGs. Transparent PNGs are not -# supported properly for IE 6.0, but are supported on all modern browsers. -# -# Note that when changing this option you need to delete any form_*.png files in -# the HTML output directory before the changes have effect. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_TRANSPARENT = YES - # The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands # to create new LaTeX commands to be used in formulas as building blocks. See # the section "Including formulas" for details. @@ -1626,11 +1764,29 @@ FORMULA_MACROFILE = USE_MATHJAX = YES +# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. +# Note that the different versions of MathJax have different requirements with +# regards to the different settings, so it is possible that also other MathJax +# settings have to be changed when switching between the different MathJax +# versions. +# Possible values are: MathJax_2 and MathJax_3. +# The default value is: MathJax_2. +# This tag requires that the tag USE_MATHJAX is set to YES. + +MATHJAX_VERSION = MathJax_2 + # When MathJax is enabled you can set the default output format to be used for -# the MathJax output. See the MathJax site (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. +# the MathJax output. For more details about the output format see MathJax +# version 2 (see: +# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 +# (see: +# http://docs.mathjax.org/en/latest/web/components/output.html). # Possible values are: HTML-CSS (which is slower, but has the best -# compatibility), NativeMML (i.e. MathML) and SVG. +# compatibility. This is the name for Mathjax version 2, for MathJax version 3 +# this will be translated into chtml), NativeMML (i.e. MathML. Only supported +# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This +# is the name for Mathjax version 3, for MathJax version 2 this will be +# translated into HTML-CSS) and SVG. # The default value is: HTML-CSS. # This tag requires that the tag USE_MATHJAX is set to YES. @@ -1643,15 +1799,21 @@ MATHJAX_FORMAT = HTML-CSS # MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax # Content Delivery Network so you can quickly see the result without installing # MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. -# The default value is: https://cdn.jsdelivr.net/npm/mathjax@2. +# MathJax from https://www.mathjax.org before deployment. The default value is: +# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 +# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_RELPATH = https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.5/ # The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax # extension names that should be enabled during MathJax rendering. For example +# for MathJax version 2 (see +# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): # MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols +# For example for MathJax version 3 (see +# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): +# MATHJAX_EXTENSIONS = ams # This tag requires that the tag USE_MATHJAX is set to YES. MATHJAX_EXTENSIONS = @@ -1831,29 +1993,31 @@ PAPER_TYPE = a4 EXTRA_PACKAGES = -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for the -# generated LaTeX document. The header should contain everything until the first -# chapter. If it is left blank doxygen will generate a standard header. See -# section "Doxygen usage" for information on how to let doxygen write the -# default header to a separate file. +# The LATEX_HEADER tag can be used to specify a user-defined LaTeX header for +# the generated LaTeX document. The header should contain everything until the +# first chapter. If it is left blank doxygen will generate a standard header. It +# is highly recommended to start with a default header using +# doxygen -w latex new_header.tex new_footer.tex new_stylesheet.sty +# and then modify the file new_header.tex. See also section "Doxygen usage" for +# information on how to generate the default header that doxygen normally uses. # -# Note: Only use a user-defined header if you know what you are doing! The -# following commands have a special meaning inside the header: $title, -# $datetime, $date, $doxygenversion, $projectname, $projectnumber, -# $projectbrief, $projectlogo. Doxygen will replace $title with the empty -# string, for the replacement values of the other commands the user is referred -# to HTML_HEADER. +# Note: Only use a user-defined header if you know what you are doing! +# Note: The header is subject to change so you typically have to regenerate the +# default header when upgrading to a newer version of doxygen. The following +# commands have a special meaning inside the header (and footer): For a +# description of the possible markers and block names see the documentation. # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_HEADER = -# The LATEX_FOOTER tag can be used to specify a personal LaTeX footer for the -# generated LaTeX document. The footer should contain everything after the last -# chapter. If it is left blank doxygen will generate a standard footer. See +# The LATEX_FOOTER tag can be used to specify a user-defined LaTeX footer for +# the generated LaTeX document. The footer should contain everything after the +# last chapter. If it is left blank doxygen will generate a standard footer. See # LATEX_HEADER for more information on how to generate a default footer and what -# special commands can be used inside the footer. -# -# Note: Only use a user-defined footer if you know what you are doing! +# special commands can be used inside the footer. See also section "Doxygen +# usage" for information on how to generate the default footer that doxygen +# normally uses. Note: Only use a user-defined footer if you know what you are +# doing! # This tag requires that the tag GENERATE_LATEX is set to YES. LATEX_FOOTER = @@ -1898,8 +2062,7 @@ USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \batchmode # command to the generated LaTeX files. This will instruct LaTeX to keep running -# if errors occur, instead of asking the user for help. This option is also used -# when generating formulas in HTML. +# if errors occur, instead of asking the user for help. # The default value is: NO. # This tag requires that the tag GENERATE_LATEX is set to YES. @@ -1912,16 +2075,6 @@ LATEX_BATCHMODE = NO LATEX_HIDE_INDICES = NO -# If the LATEX_SOURCE_CODE tag is set to YES then doxygen will include source -# code with syntax highlighting in the LaTeX output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_LATEX is set to YES. - -LATEX_SOURCE_CODE = NO - # The LATEX_BIB_STYLE tag can be used to specify the style to use for the # bibliography, e.g. plainnat, or ieeetr. See # https://en.wikipedia.org/wiki/BibTeX and \cite for more info. @@ -2002,16 +2155,6 @@ RTF_STYLESHEET_FILE = RTF_EXTENSIONS_FILE = -# If the RTF_SOURCE_CODE tag is set to YES then doxygen will include source code -# with syntax highlighting in the RTF output. -# -# Note that which sources are shown also depends on other settings such as -# SOURCE_BROWSER. -# The default value is: NO. -# This tag requires that the tag GENERATE_RTF is set to YES. - -RTF_SOURCE_CODE = NO - #--------------------------------------------------------------------------- # Configuration options related to the man page output #--------------------------------------------------------------------------- @@ -2108,15 +2251,6 @@ GENERATE_DOCBOOK = NO DOCBOOK_OUTPUT = docbook -# If the DOCBOOK_PROGRAMLISTING tag is set to YES, doxygen will include the -# program listings (including syntax highlighting and cross-referencing -# information) to the DOCBOOK output. Note that enabling this will significantly -# increase the size of the DOCBOOK output. -# The default value is: NO. -# This tag requires that the tag GENERATE_DOCBOOK is set to YES. - -DOCBOOK_PROGRAMLISTING = NO - #--------------------------------------------------------------------------- # Configuration options for the AutoGen Definitions output #--------------------------------------------------------------------------- @@ -2203,7 +2337,8 @@ SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by the -# preprocessor. +# preprocessor. Note that the INCLUDE_PATH is not recursive, so the setting of +# RECURSIVE has no effect here. # This tag requires that the tag SEARCH_INCLUDES is set to YES. INCLUDE_PATH = @@ -2224,24 +2359,30 @@ INCLUDE_FILE_PATTERNS = # recursively expanded use the := operator instead of the = operator. # This tag requires that the tag ENABLE_PREPROCESSING is set to YES. - PREDEFINED = DOXYGEN \ - MDBX_CXX20_CONCEPT(CONCEPT,NAME)="CONCEPT NAME" \ - MDBX_STD_FILESYSTEM_PATH=::mdbx::filesystem::path \ - MDBX_U128_TYPE=uint128_t MDBX_I128_TYPE=int128_t \ - MDBX_DECLARE_EXCEPTION(NAME)="struct LIBMDBX_API_TYPE NAME : public exception{NAME(const ::mdbx::error &); virtual ~NAME() noexcept; }" \ - MDBX_PURE_FUNCTION=[[gnu::pure]] \ - MDBX_NOTHROW_PURE_FUNCTION="[[gnu::pure, gnu::nothrow]]" \ - MDBX_CONST_FUNCTION=[[gnu::const]] \ - MDBX_NOTHROW_CONST_FUNCTION="[[gnu::const, gnu::nothrow]]" \ - MDBX_CXX01_CONSTEXPR=constexpr MDBX_CXX01_CONSTEXPR_VAR=constexpr \ - MDBX_CXX11_CONSTEXPR=constexpr MDBX_CXX11_CONSTEXPR_VAR=constexpr \ - MDBX_CXX14_CONSTEXPR=constexpr MDBX_CXX14_CONSTEXPR_VAR=constexpr \ - MDBX_CXX17_CONSTEXPR=constexpr MDBX_CXX20_CONSTEXPR=constexpr \ - MDBX_CXX17_NOEXCEPT=noexcept MDBX_IF_CONSTEXPR=constexpr \ - MDBX_CXX20_LIKELY=[[likely]] MDBX_CXX20_UNLIKELY=[[unlikely]] \ - MDBX_MAYBE_UNUSED=[[maybe_unused]] \ - MDBX_DEPRECATED=[[deprecated]] + "MDBX_CXX20_CONCEPT(CONCEPT,NAME)=CONCEPT NAME" \ + MDBX_STD_FILESYSTEM_PATH=::mdbx::filesystem::path \ + MDBX_U128_TYPE=uint128_t \ + MDBX_I128_TYPE=int128_t \ + "MDBX_DECLARE_EXCEPTION(NAME)=struct LIBMDBX_API_TYPE NAME : public exception{NAME(const ::mdbx::error &); virtual ~NAME() noexcept; }" \ + MDBX_PURE_FUNCTION=[[gnu::pure]] \ + "MDBX_NOTHROW_PURE_FUNCTION=[[gnu::pure, gnu::nothrow]]" \ + MDBX_CONST_FUNCTION=[[gnu::const]] \ + "MDBX_NOTHROW_CONST_FUNCTION=[[gnu::const, gnu::nothrow]]" \ + MDBX_CXX01_CONSTEXPR=constexpr \ + MDBX_CXX01_CONSTEXPR_VAR=constexpr \ + MDBX_CXX11_CONSTEXPR=constexpr \ + MDBX_CXX11_CONSTEXPR_VAR=constexpr \ + MDBX_CXX14_CONSTEXPR=constexpr \ + MDBX_CXX14_CONSTEXPR_VAR=constexpr \ + MDBX_CXX17_CONSTEXPR=constexpr \ + MDBX_CXX20_CONSTEXPR=constexpr \ + MDBX_CXX17_NOEXCEPT=noexcept \ + MDBX_IF_CONSTEXPR=constexpr \ + MDBX_CXX20_LIKELY=[[likely]] \ + MDBX_CXX20_UNLIKELY=[[unlikely]] \ + MDBX_MAYBE_UNUSED=[[maybe_unused]] \ + MDBX_DEPRECATED=[[deprecated]] # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then this # tag can be used to specify a list of macro names that should be expanded. The @@ -2312,15 +2453,6 @@ EXTERNAL_PAGES = NO # Configuration options related to the dot tool #--------------------------------------------------------------------------- -# If the CLASS_DIAGRAMS tag is set to YES, doxygen will generate a class diagram -# (in HTML and LaTeX) for classes with base or super classes. Setting the tag to -# NO turns the diagrams off. Note that this option also works with HAVE_DOT -# disabled, but it is recommended to install and use dot, since it yields more -# powerful graphs. -# The default value is: YES. - -CLASS_DIAGRAMS = NO - # You can include diagrams made with dia in doxygen documentation. Doxygen will # then run dia to produce the diagram and insert it in the documentation. The # DIA_PATH tag allows you to specify the directory where the dia binary resides. @@ -2339,7 +2471,7 @@ HIDE_UNDOC_RELATIONS = YES # http://www.graphviz.org/), a graph visualization toolkit from AT&T and Lucent # Bell Labs. The other options in this section have no effect if this option is # set to NO -# The default value is: YES. +# The default value is: NO. HAVE_DOT = NO @@ -2353,37 +2485,52 @@ HAVE_DOT = NO DOT_NUM_THREADS = 0 -# When you want a differently looking font in the dot files that doxygen -# generates you can specify the font name using DOT_FONTNAME. You need to make -# sure dot is able to find the font, which can be done by putting it in a -# standard location or by setting the DOTFONTPATH environment variable or by -# setting DOT_FONTPATH to the directory containing the font. -# The default value is: Helvetica. +# DOT_COMMON_ATTR is common attributes for nodes, edges and labels of +# subgraphs. When you want a differently looking font in the dot files that +# doxygen generates you can specify fontname, fontcolor and fontsize attributes. +# For details please see Node, +# Edge and Graph Attributes specification You need to make sure dot is able +# to find the font, which can be done by putting it in a standard location or by +# setting the DOTFONTPATH environment variable or by setting DOT_FONTPATH to the +# directory containing the font. Default graphviz fontsize is 14. +# The default value is: fontname=Helvetica,fontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTNAME = Helvetica +DOT_COMMON_ATTR = "fontname=Helvetica,fontsize=10" -# The DOT_FONTSIZE tag can be used to set the size (in points) of the font of -# dot graphs. -# Minimum value: 4, maximum value: 24, default value: 10. +# DOT_EDGE_ATTR is concatenated with DOT_COMMON_ATTR. For elegant style you can +# add 'arrowhead=open, arrowtail=open, arrowsize=0.5'. Complete documentation about +# arrows shapes. +# The default value is: labelfontname=Helvetica,labelfontsize=10. # This tag requires that the tag HAVE_DOT is set to YES. -DOT_FONTSIZE = 10 +DOT_EDGE_ATTR = "labelfontname=Helvetica,labelfontsize=10" -# By default doxygen will tell dot to use the default font as specified with -# DOT_FONTNAME. If you specify a different font using DOT_FONTNAME you can set -# the path where dot can find it using this tag. +# DOT_NODE_ATTR is concatenated with DOT_COMMON_ATTR. For view without boxes +# around nodes set 'shape=plain' or 'shape=plaintext' Shapes specification +# The default value is: shape=box,height=0.2,width=0.4. +# This tag requires that the tag HAVE_DOT is set to YES. + +DOT_NODE_ATTR = "shape=box,height=0.2,width=0.4" + +# You can set the path where dot can find font specified with fontname in +# DOT_COMMON_ATTR and others dot attributes. # This tag requires that the tag HAVE_DOT is set to YES. DOT_FONTPATH = -# If the CLASS_GRAPH tag is set to YES then doxygen will generate a graph for -# each documented class showing the direct and indirect inheritance relations. -# Setting this tag to YES will force the CLASS_DIAGRAMS tag to NO. +# If the CLASS_GRAPH tag is set to YES (or GRAPH) then doxygen will generate a +# graph for each documented class showing the direct and indirect inheritance +# relations. In case HAVE_DOT is set as well dot will be used to draw the graph, +# otherwise the built-in generator will be used. If the CLASS_GRAPH tag is set +# to TEXT the direct and indirect inheritance relations will be shown as texts / +# links. +# Possible values are: NO, YES, TEXT and GRAPH. # The default value is: YES. -# This tag requires that the tag HAVE_DOT is set to YES. -CLASS_GRAPH = YES +CLASS_GRAPH = TEXT # If the COLLABORATION_GRAPH tag is set to YES then doxygen will generate a # graph for each documented class showing the direct and indirect implementation @@ -2395,7 +2542,8 @@ CLASS_GRAPH = YES COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS tag is set to YES then doxygen will generate a graph for -# groups, showing the direct groups dependencies. +# groups, showing the direct groups dependencies. See also the chapter Grouping +# in the manual. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2510,6 +2658,13 @@ GRAPHICAL_HIERARCHY = YES DIRECTORY_GRAPH = YES +# The DIR_GRAPH_MAX_DEPTH tag can be used to limit the maximum number of levels +# of child directories generated in directory dependency graphs by dot. +# Minimum value: 1, maximum value: 25, default value: 1. +# This tag requires that the tag DIRECTORY_GRAPH is set to YES. + +DIR_GRAPH_MAX_DEPTH = 1 + # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. For an explanation of the image formats see the section # output formats in the documentation of the dot tool (Graphviz (see: @@ -2517,9 +2672,7 @@ DIRECTORY_GRAPH = YES # Note: If you choose svg you need to set HTML_FILE_EXTENSION to xhtml in order # to make the SVG files visible in IE 9+ (other browsers do not have this # requirement). -# Possible values are: png, png:cairo, png:cairo:cairo, png:cairo:gd, png:gd, -# png:gd:gd, jpg, jpg:cairo, jpg:cairo:gd, jpg:gd, jpg:gd:gd, gif, gif:cairo, -# gif:cairo:gd, gif:gd, gif:gd:gd, svg, png:gd, png:gd:gd, png:cairo, +# Possible values are: png, jpg, gif, svg, png:gd, png:gd:gd, png:cairo, # png:cairo:gd, png:cairo:cairo, png:cairo:gdiplus, png:gdiplus and # png:gdiplus:gdiplus. # The default value is: png. @@ -2565,10 +2718,10 @@ MSCFILE_DIRS = DIAFILE_DIRS = # When using plantuml, the PLANTUML_JAR_PATH tag should be used to specify the -# path where java can find the plantuml.jar file. If left blank, it is assumed -# PlantUML is not used or called during a preprocessing step. Doxygen will -# generate a warning when it encounters a \startuml command in this case and -# will not generate output for the diagram. +# path where java can find the plantuml.jar file or to the filename of jar file +# to be used. If left blank, it is assumed PlantUML is not used or called during +# a preprocessing step. Doxygen will generate a warning when it encounters a +# \startuml command in this case and will not generate output for the diagram. PLANTUML_JAR_PATH = @@ -2606,18 +2759,6 @@ DOT_GRAPH_MAX_NODES = 50 MAX_DOT_GRAPH_DEPTH = 0 -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not seem -# to support this out of the box. -# -# Warning: Depending on the platform used, enabling this option may lead to -# badly anti-aliased labels on the edges of a graph (i.e. they become hard to -# read). -# The default value is: NO. -# This tag requires that the tag HAVE_DOT is set to YES. - -DOT_TRANSPARENT = NO - # Set the DOT_MULTI_TARGETS tag to YES to allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) support @@ -2630,6 +2771,8 @@ DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES doxygen will generate a legend page # explaining the meaning of the various boxes and arrows in the dot generated # graphs. +# Note: This tag requires that UML_LOOK isn't set, i.e. the doxygen internal +# graphical representation for inheritance and collaboration diagrams is used. # The default value is: YES. # This tag requires that the tag HAVE_DOT is set to YES. @@ -2638,8 +2781,8 @@ GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES, doxygen will remove the intermediate # files that are used to generate the various graphs. # -# Note: This setting is not only used for dot files but also for msc and -# plantuml temporary files. +# Note: This setting is not only used for dot files but also for msc temporary +# files. # The default value is: YES. DOT_CLEANUP = YES From 5ebc2c523da7775402c75266e66f74885f04fa27 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 23 Oct 2023 20:35:55 +0300 Subject: [PATCH 017/137] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/ChangeLog.md b/ChangeLog.md index 53ca0059..78cbe391 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -9,6 +9,24 @@ and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API. +Новое: + + - Расширение API функционалом проверки целостности структуры БД, с + переработкой и переноса функционала утилиты `mdbx_chk` внутрь библиотеки. + + - Расширение API функциями lock/unlock/upgrade/downgrade основной блокировки. + + - Добавление в API функций `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()`. + + - Возвращение `MDBX_TXN_INVALID` (`INT32_MIN`) вместо `-1` + из `mdbx_txn_flags()` при передаче невалидной транзакции. + +Мелочи: + + - Обновление конфигурации Doxygen до 1.9.6. + - Добавление `--read-var-info=yes` для Valgrind. + - Вывод из `mdbx_chk` информации об уровне детализации/verbosity. + ******************************************************************************** From ad4d00677b72e58022bb06cb92733216e5ca2635 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 29 Oct 2023 16:39:35 +0300 Subject: [PATCH 018/137] =?UTF-8?q?mdbx:=20PTHREAD=5FMUTEX=5FERRORCHECK=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20MDBX=5FDEBUG=20>=200.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/osal.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/osal.c b/src/osal.c index b07565b4..adffbabf 100644 --- a/src/osal.c +++ b/src/osal.c @@ -503,8 +503,18 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_init(osal_fastmutex_t *fastmutex) { #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(fastmutex); return MDBX_SUCCESS; +#elif MDBX_DEBUG + pthread_mutexattr_t ma; + int rc = pthread_mutexattr_init(&ma); + if (likely(!rc)) { + rc = pthread_mutexattr_settype(&ma, PTHREAD_MUTEX_ERRORCHECK); + if (likely(!rc) || rc == ENOTSUP) + rc = pthread_mutex_init(fastmutex, &ma); + pthread_mutexattr_destroy(&ma); + } + return rc; #else - return pthread_mutex_init(fastmutex, NULL); + return pthread_mutex_init(fastmutex, nullptr); #endif } From 07fc7b9227b699397f8c838c36f2ab719bfddcd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 30 Oct 2023 12:25:05 +0300 Subject: [PATCH 019/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20`--taillog`=20=D0=B2=20=D1=81=D1=82=D0=BE=D1=85=D0=B0?= =?UTF-8?q?=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8=D0=B9=20=D1=81?= =?UTF-8?q?=D0=BA=D1=80=D0=B8=D0=BF=D1=82.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 6 +++--- test/long_stochastic.sh | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index c8d79a95..104ae372 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -418,15 +418,15 @@ smoke-fault: build-test test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' - $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make + $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) test-valgrind: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND test-valgrind: build-test diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index 491ec695..c03c83da 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -13,6 +13,7 @@ DB_UPTO_MB=17408 PAGESIZE=min DONT_CHECK_RAM=no EXTRA=no +TAILLOG=0 while [ -n "$1" ] do @@ -35,9 +36,13 @@ do echo "--pagesize NN Use specified page size (256 is minimal and used by default)" echo "--dont-check-ram-size Don't check available RAM" echo "--extra Iterate extra modes/flags" + echo "--taillog Dump tail of test log on failure" echo "--help Print this usage help and exit" exit -2 ;; + --taillog) + TAILLOG=999 + ;; --multi) LIST=basic ;; @@ -345,14 +350,38 @@ if which lz4 >/dev/null; then function logger { lz4 > ${TESTDB_DIR}/long.log.lz4 } + function taillog { + if [ -s ${TESTDB_DIR}/long.log.lz4 ]; then + echo "=============================================== last ${TAILLOG} lines" + lz4 -d -c ${TESTDB_DIR}/long.log.lz4 | tail -n ${TAILLOG} + else + echo "=============================================== no test log" + fi + } elif which gzip >/dev/null; then function logger { gzip > ${TESTDB_DIR}/long.log.gz } + function taillog { + if [ -s ${TESTDB_DIR}/long.log.gz ]; then + echo "=============================================== last ${TAILLOG} lines" + gzip -d -c ${TESTDB_DIR}/long.log.gz | tail -n ${TAILLOG} + else + echo "=============================================== no test log" + fi + } else function logger { cat > ${TESTDB_DIR}/long.log } + function taillog { + if [ -s ${TESTDB_DIR}/long.log ]; then + echo "=============================================== last ${TAILLOG} lines" + tail -n ${TAILLOG} ${TESTDB_DIR}/long.log + else + echo "=============================================== no test log" + fi + } fi if [ "$EXTRA" != "no" ]; then @@ -375,6 +404,9 @@ function bits2options { function failed { echo "FAILED" >&2 + if [ ${TAILLOG} -gt 0 ]; then + taillog + fi exit 1 } From 7a413406bef54de8baed8ad38203d99acafba26d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 30 Oct 2023 20:32:10 +0300 Subject: [PATCH 020/137] =?UTF-8?q?mdbx-test:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=D1=81=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20=D0=B4=D0=BB=D1=8F=20Val?= =?UTF-8?q?grind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/valgrind_suppress.txt | 75 +------------------------------------- 1 file changed, 1 insertion(+), 74 deletions(-) diff --git a/test/valgrind_suppress.txt b/test/valgrind_suppress.txt index 5bc50077..c01054ac 100644 --- a/test/valgrind_suppress.txt +++ b/test/valgrind_suppress.txt @@ -2,7 +2,6 @@ msync-whole-mmap-1 Memcheck:Param msync(start) - fun:msync ... fun:sync_locked* } @@ -10,7 +9,6 @@ msync-whole-mmap-2 Memcheck:Param msync(start) - fun:msync ... fun:env_sync* } @@ -18,7 +16,6 @@ msync-whole-mmap-3 Memcheck:Param msync(start) - fun:msync ... fun:map_resize* } @@ -26,7 +23,6 @@ msync-wipe-steady Memcheck:Param msync(start) - fun:msync ... fun:wipe_steady* } @@ -34,7 +30,6 @@ msync-meta Memcheck:Param msync(start) - fun:msync ... fun:meta_sync* } @@ -42,7 +37,6 @@ msync-spill Memcheck:Param msync(start) - fun:msync ... fun:txn_spill* } @@ -72,7 +66,6 @@ pwrite-page-flush Memcheck:Param pwrite(buf) - fun:pwrite ... fun:iov_write* } @@ -80,7 +73,6 @@ pwrite64-page-flush Memcheck:Param pwrite64(buf) - fun:pwrite ... fun:iov_write* } @@ -90,16 +82,14 @@ # pwritev-page-flush # Memcheck:Param # pwritev(vector[...]) -# fun:pwritev # ... # fun:iov_write* #} -# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n fun:pwritev\n ...\n fun:iov_write*\n}"; done >> valgrind_suppress.txt +# for((i=0;i<64;++i)); do echo -e "{\n pwritev-page-flush-$i\n Memcheck:Param\n pwritev(vector[$i])\n ...\n fun:iov_write*\n}"; done >> valgrind_suppress.txt { pwritev-page-flush-0 Memcheck:Param pwritev(vector[0]) - fun:pwritev ... fun:iov_write* } @@ -107,7 +97,6 @@ pwritev-page-flush-1 Memcheck:Param pwritev(vector[1]) - fun:pwritev ... fun:iov_write* } @@ -115,7 +104,6 @@ pwritev-page-flush-2 Memcheck:Param pwritev(vector[2]) - fun:pwritev ... fun:iov_write* } @@ -123,7 +111,6 @@ pwritev-page-flush-3 Memcheck:Param pwritev(vector[3]) - fun:pwritev ... fun:iov_write* } @@ -131,7 +118,6 @@ pwritev-page-flush-4 Memcheck:Param pwritev(vector[4]) - fun:pwritev ... fun:iov_write* } @@ -139,7 +125,6 @@ pwritev-page-flush-5 Memcheck:Param pwritev(vector[5]) - fun:pwritev ... fun:iov_write* } @@ -147,7 +132,6 @@ pwritev-page-flush-6 Memcheck:Param pwritev(vector[6]) - fun:pwritev ... fun:iov_write* } @@ -155,7 +139,6 @@ pwritev-page-flush-7 Memcheck:Param pwritev(vector[7]) - fun:pwritev ... fun:iov_write* } @@ -163,7 +146,6 @@ pwritev-page-flush-8 Memcheck:Param pwritev(vector[8]) - fun:pwritev ... fun:iov_write* } @@ -171,7 +153,6 @@ pwritev-page-flush-9 Memcheck:Param pwritev(vector[9]) - fun:pwritev ... fun:iov_write* } @@ -179,7 +160,6 @@ pwritev-page-flush-10 Memcheck:Param pwritev(vector[10]) - fun:pwritev ... fun:iov_write* } @@ -187,7 +167,6 @@ pwritev-page-flush-11 Memcheck:Param pwritev(vector[11]) - fun:pwritev ... fun:iov_write* } @@ -195,7 +174,6 @@ pwritev-page-flush-12 Memcheck:Param pwritev(vector[12]) - fun:pwritev ... fun:iov_write* } @@ -203,7 +181,6 @@ pwritev-page-flush-13 Memcheck:Param pwritev(vector[13]) - fun:pwritev ... fun:iov_write* } @@ -211,7 +188,6 @@ pwritev-page-flush-14 Memcheck:Param pwritev(vector[14]) - fun:pwritev ... fun:iov_write* } @@ -219,7 +195,6 @@ pwritev-page-flush-15 Memcheck:Param pwritev(vector[15]) - fun:pwritev ... fun:iov_write* } @@ -227,7 +202,6 @@ pwritev-page-flush-16 Memcheck:Param pwritev(vector[16]) - fun:pwritev ... fun:iov_write* } @@ -235,7 +209,6 @@ pwritev-page-flush-17 Memcheck:Param pwritev(vector[17]) - fun:pwritev ... fun:iov_write* } @@ -243,7 +216,6 @@ pwritev-page-flush-18 Memcheck:Param pwritev(vector[18]) - fun:pwritev ... fun:iov_write* } @@ -251,7 +223,6 @@ pwritev-page-flush-19 Memcheck:Param pwritev(vector[19]) - fun:pwritev ... fun:iov_write* } @@ -259,7 +230,6 @@ pwritev-page-flush-20 Memcheck:Param pwritev(vector[20]) - fun:pwritev ... fun:iov_write* } @@ -267,7 +237,6 @@ pwritev-page-flush-21 Memcheck:Param pwritev(vector[21]) - fun:pwritev ... fun:iov_write* } @@ -275,7 +244,6 @@ pwritev-page-flush-22 Memcheck:Param pwritev(vector[22]) - fun:pwritev ... fun:iov_write* } @@ -283,7 +251,6 @@ pwritev-page-flush-23 Memcheck:Param pwritev(vector[23]) - fun:pwritev ... fun:iov_write* } @@ -291,7 +258,6 @@ pwritev-page-flush-24 Memcheck:Param pwritev(vector[24]) - fun:pwritev ... fun:iov_write* } @@ -299,7 +265,6 @@ pwritev-page-flush-25 Memcheck:Param pwritev(vector[25]) - fun:pwritev ... fun:iov_write* } @@ -307,7 +272,6 @@ pwritev-page-flush-26 Memcheck:Param pwritev(vector[26]) - fun:pwritev ... fun:iov_write* } @@ -315,7 +279,6 @@ pwritev-page-flush-27 Memcheck:Param pwritev(vector[27]) - fun:pwritev ... fun:iov_write* } @@ -323,7 +286,6 @@ pwritev-page-flush-28 Memcheck:Param pwritev(vector[28]) - fun:pwritev ... fun:iov_write* } @@ -331,7 +293,6 @@ pwritev-page-flush-29 Memcheck:Param pwritev(vector[29]) - fun:pwritev ... fun:iov_write* } @@ -339,7 +300,6 @@ pwritev-page-flush-30 Memcheck:Param pwritev(vector[30]) - fun:pwritev ... fun:iov_write* } @@ -347,7 +307,6 @@ pwritev-page-flush-31 Memcheck:Param pwritev(vector[31]) - fun:pwritev ... fun:iov_write* } @@ -355,7 +314,6 @@ pwritev-page-flush-32 Memcheck:Param pwritev(vector[32]) - fun:pwritev ... fun:iov_write* } @@ -363,7 +321,6 @@ pwritev-page-flush-33 Memcheck:Param pwritev(vector[33]) - fun:pwritev ... fun:iov_write* } @@ -371,7 +328,6 @@ pwritev-page-flush-34 Memcheck:Param pwritev(vector[34]) - fun:pwritev ... fun:iov_write* } @@ -379,7 +335,6 @@ pwritev-page-flush-35 Memcheck:Param pwritev(vector[35]) - fun:pwritev ... fun:iov_write* } @@ -387,7 +342,6 @@ pwritev-page-flush-36 Memcheck:Param pwritev(vector[36]) - fun:pwritev ... fun:iov_write* } @@ -395,7 +349,6 @@ pwritev-page-flush-37 Memcheck:Param pwritev(vector[37]) - fun:pwritev ... fun:iov_write* } @@ -403,7 +356,6 @@ pwritev-page-flush-38 Memcheck:Param pwritev(vector[38]) - fun:pwritev ... fun:iov_write* } @@ -411,7 +363,6 @@ pwritev-page-flush-39 Memcheck:Param pwritev(vector[39]) - fun:pwritev ... fun:iov_write* } @@ -419,7 +370,6 @@ pwritev-page-flush-40 Memcheck:Param pwritev(vector[40]) - fun:pwritev ... fun:iov_write* } @@ -427,7 +377,6 @@ pwritev-page-flush-41 Memcheck:Param pwritev(vector[41]) - fun:pwritev ... fun:iov_write* } @@ -435,7 +384,6 @@ pwritev-page-flush-42 Memcheck:Param pwritev(vector[42]) - fun:pwritev ... fun:iov_write* } @@ -443,7 +391,6 @@ pwritev-page-flush-43 Memcheck:Param pwritev(vector[43]) - fun:pwritev ... fun:iov_write* } @@ -451,7 +398,6 @@ pwritev-page-flush-44 Memcheck:Param pwritev(vector[44]) - fun:pwritev ... fun:iov_write* } @@ -459,7 +405,6 @@ pwritev-page-flush-45 Memcheck:Param pwritev(vector[45]) - fun:pwritev ... fun:iov_write* } @@ -467,7 +412,6 @@ pwritev-page-flush-46 Memcheck:Param pwritev(vector[46]) - fun:pwritev ... fun:iov_write* } @@ -475,7 +419,6 @@ pwritev-page-flush-47 Memcheck:Param pwritev(vector[47]) - fun:pwritev ... fun:iov_write* } @@ -483,7 +426,6 @@ pwritev-page-flush-48 Memcheck:Param pwritev(vector[48]) - fun:pwritev ... fun:iov_write* } @@ -491,7 +433,6 @@ pwritev-page-flush-49 Memcheck:Param pwritev(vector[49]) - fun:pwritev ... fun:iov_write* } @@ -499,7 +440,6 @@ pwritev-page-flush-50 Memcheck:Param pwritev(vector[50]) - fun:pwritev ... fun:iov_write* } @@ -507,7 +447,6 @@ pwritev-page-flush-51 Memcheck:Param pwritev(vector[51]) - fun:pwritev ... fun:iov_write* } @@ -515,7 +454,6 @@ pwritev-page-flush-52 Memcheck:Param pwritev(vector[52]) - fun:pwritev ... fun:iov_write* } @@ -523,7 +461,6 @@ pwritev-page-flush-53 Memcheck:Param pwritev(vector[53]) - fun:pwritev ... fun:iov_write* } @@ -531,7 +468,6 @@ pwritev-page-flush-54 Memcheck:Param pwritev(vector[54]) - fun:pwritev ... fun:iov_write* } @@ -539,7 +475,6 @@ pwritev-page-flush-55 Memcheck:Param pwritev(vector[55]) - fun:pwritev ... fun:iov_write* } @@ -547,7 +482,6 @@ pwritev-page-flush-56 Memcheck:Param pwritev(vector[56]) - fun:pwritev ... fun:iov_write* } @@ -555,7 +489,6 @@ pwritev-page-flush-57 Memcheck:Param pwritev(vector[57]) - fun:pwritev ... fun:iov_write* } @@ -563,7 +496,6 @@ pwritev-page-flush-58 Memcheck:Param pwritev(vector[58]) - fun:pwritev ... fun:iov_write* } @@ -571,7 +503,6 @@ pwritev-page-flush-59 Memcheck:Param pwritev(vector[59]) - fun:pwritev ... fun:iov_write* } @@ -579,7 +510,6 @@ pwritev-page-flush-60 Memcheck:Param pwritev(vector[60]) - fun:pwritev ... fun:iov_write* } @@ -587,7 +517,6 @@ pwritev-page-flush-61 Memcheck:Param pwritev(vector[61]) - fun:pwritev ... fun:iov_write* } @@ -595,7 +524,6 @@ pwritev-page-flush-62 Memcheck:Param pwritev(vector[62]) - fun:pwritev ... fun:iov_write* } @@ -603,7 +531,6 @@ pwritev-page-flush-63 Memcheck:Param pwritev(vector[63]) - fun:pwritev ... fun:iov_write* } From 54920cd07bc42395ebf6ef8db0a727ae1a2ff893 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 31 Oct 2023 12:11:59 +0300 Subject: [PATCH 021/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BE=D0=BA=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`osal=5Ftxn=5Flock()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck-posix.c | 5 ++++- src/lck-windows.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/lck-posix.c b/src/lck-posix.c index 7f58e9ed..d55a9395 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -956,11 +956,14 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); - eASSERT(env, !env->me_txn0->mt_owner); jitter4testing(true); const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { + eASSERT(env, !env->me_txn0->mt_owner || + err == /* если другой поток в этом-же процессе завершился + не освободив блокировку */ + MDBX_RESULT_TRUE); env->me_txn0->mt_owner = osal_thread_self(); rc = MDBX_SUCCESS; } diff --git a/src/lck-windows.c b/src/lck-windows.c index ed77da30..d2354285 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -179,7 +179,6 @@ static int funlock(mdbx_filehandle_t fd, size_t offset, size_t bytes) { #define DXB_WHOLE 0, DXB_MAXLEN int osal_txn_lock(MDBX_env *env, bool dontwait) { - eASSERT(env, !env->me_txn0->mt_owner); if (dontwait) { if (!TryEnterCriticalSection(&env->me_windowsbug_lock)) return MDBX_BUSY; @@ -195,6 +194,7 @@ int osal_txn_lock(MDBX_env *env, bool dontwait) { } } + eASSERT(env, !env->me_txn0->mt_owner); if (env->me_flags & MDBX_EXCLUSIVE) goto done; From 81f386f83123f7db742054cf1693408e8fc82ea1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:28:13 +0300 Subject: [PATCH 022/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=BC?= =?UTF-8?q?=D0=B5=D1=89=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D0=B5?= =?UTF-8?q?=D0=B9=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20`MDBX=5Ftxn`=20?= =?UTF-8?q?=D0=B8=20`MDBX=5Fenv`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В текущем понимании так префетчер ЦПУ может быть чуть более эффективным и чуть меньше зазоров для выравнивания. --- src/internals.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/src/internals.h b/src/internals.h index c871b3df..8fdb37a8 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1170,6 +1170,8 @@ struct MDBX_txn { #error "Oops, some txn flags overlapped or wrong" #endif uint32_t mt_flags; + unsigned mt_numdbs; + size_t mt_owner; /* thread ID that owns this transaction */ MDBX_txn *mt_parent; /* parent of a nested txn */ /* Nested txn under this txn, set together with flag MDBX_TXN_HAS_CHILD */ @@ -1191,8 +1193,6 @@ struct MDBX_txn { MDBX_dbx *mt_dbxs; /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; - /* Array of sequence numbers for each DB handle */ - MDBX_atomic_uint32_t *mt_dbiseqs; /* Transaction DBI Flags */ #define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ @@ -1202,16 +1202,15 @@ struct MDBX_txn { #define DBI_VALID 0x10 /* DB handle is valid, see also DB_VALID */ #define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ #define DBI_AUDITED 0x40 /* Internal flag for accounting during audit */ - /* Array of flags for each DB */ + /* Array of non-shared txn's flags of DBI */ uint8_t *mt_dbistate; - /* Number of DB records in use, or 0 when the txn is finished. - * This number only ever increments until the txn finishes; we - * don't decrement it when individual DB handles are closed. */ - MDBX_dbi mt_numdbs; - size_t mt_owner; /* thread ID that owns this transaction */ + + /* Array of sequence numbers for each DB handle. */ + MDBX_atomic_uint32_t *mt_dbiseqs; + MDBX_cursor **mt_cursors; + MDBX_canary mt_canary; void *mt_userctx; /* User-settable context */ - MDBX_cursor **mt_cursors; union { struct { @@ -1364,6 +1363,7 @@ struct MDBX_env { #define MDBX_DEPRECATED_COALESCE UINT32_C(0x2000000) #define ENV_INTERNAL_FLAGS (MDBX_FATAL_ERROR | MDBX_ENV_ACTIVE | MDBX_ENV_TXKEY) uint32_t me_flags; + unsigned me_psize; /* DB page size, initialized from me_os_psize */ osal_mmap_t me_dxb_mmap; /* The main data file */ #define me_map me_dxb_mmap.base #define me_lazy_fd me_dxb_mmap.fd @@ -1376,7 +1376,6 @@ struct MDBX_env { #define me_lfd me_lck_mmap.fd struct MDBX_lockinfo *me_lck; - unsigned me_psize; /* DB page size, initialized from me_os_psize */ unsigned me_leaf_nodemax; /* max size of a leaf-node */ unsigned me_branch_nodemax; /* max size of a branch-node */ atomic_pgno_t me_mlocked_pgno; @@ -1448,6 +1447,7 @@ struct MDBX_env { } me_sysv_ipc; #endif /* MDBX_LOCKING == MDBX_LOCKING_SYSV */ bool me_incore; + bool me_prefault_write; MDBX_env *me_lcklist_next; @@ -1455,11 +1455,11 @@ struct MDBX_env { MDBX_txn *me_txn; /* current write transaction */ osal_fastmutex_t me_dbi_lock; - MDBX_dbi me_numdbs; /* number of DBs opened */ - bool me_prefault_write; + unsigned me_numdbs; /* number of DBs opened */ - MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ unsigned me_dp_reserve_len; + MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ + /* PNL of pages that became unused in a write txn */ MDBX_PNL me_retired_pages; osal_ioring_t me_ioring; From f3171707066b8cf83ff0e386d4b7cfbf1d3719c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:30:54 +0300 Subject: [PATCH 023/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=B2?= =?UTF-8?q?=D0=BD=D1=83=D1=82=D1=80=D0=B5=D0=BD=D0=BD=D0=B8=D1=85=20=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D0=B5=D0=B9=20=D0=B8=20=D0=BC=D0=B0=D0=BA=D1=80?= =?UTF-8?q?=D0=BE=D1=81=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=87=D0=B8=D1=82?= =?UTF-8?q?=D0=B0=D0=B5=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D0=BA=D0=BE=D0=B4?= =?UTF-8?q?=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 314 ++++++++++++++++++++++++------------------------ src/internals.h | 32 ++--- 2 files changed, 174 insertions(+), 172 deletions(-) diff --git a/src/core.c b/src/core.c index 4a741bf3..2db1e56f 100644 --- a/src/core.c +++ b/src/core.c @@ -3232,26 +3232,26 @@ static int page_touch(MDBX_cursor *mc); static int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, const MDBX_val *data); -#define MDBX_END_NAMES \ +#define TXN_END_NAMES \ { \ "committed", "empty-commit", "abort", "reset", "reset-tmp", "fail-begin", \ "fail-beginchild" \ } enum { /* txn_end operation number, for logging */ - MDBX_END_COMMITTED, - MDBX_END_PURE_COMMIT, - MDBX_END_ABORT, - MDBX_END_RESET, - MDBX_END_RESET_TMP, - MDBX_END_FAIL_BEGIN, - MDBX_END_FAIL_BEGINCHILD + TXN_END_COMMITTED, + TXN_END_PURE_COMMIT, + TXN_END_ABORT, + TXN_END_RESET, + TXN_END_RESET_TMP, + TXN_END_FAIL_BEGIN, + TXN_END_FAIL_BEGINCHILD }; -#define MDBX_END_OPMASK 0x0F /* mask for txn_end() operation number */ -#define MDBX_END_UPDATE 0x10 /* update env state (DBIs) */ -#define MDBX_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ -#define MDBX_END_EOTDONE 0x40 /* txn's cursors already closed */ -#define MDBX_END_SLOT 0x80 /* release any reader slot if MDBX_NOTLS */ +#define TXN_END_OPMASK 0x0F /* mask for txn_end() operation number */ +#define TXN_END_UPDATE 0x10 /* update env state (DBIs) */ +#define TXN_END_FREE 0x20 /* free txn unless it is MDBX_env.me_txn0 */ +#define TXN_END_EOTDONE 0x40 /* txn's cursors already closed */ +#define TXN_END_SLOT 0x80 /* release any reader slot if MDBX_NOTLS */ static int txn_end(MDBX_txn *txn, const unsigned mode); static __always_inline pgr_t page_get_inline(const uint16_t ILL, @@ -4830,7 +4830,7 @@ static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { txn_lru_turn(txn); size_t keep = m0 ? cursor_keep(txn, m0) : 0; for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) - if (F_ISSET(txn->mt_dbistate[i], DBI_DIRTY | DBI_VALID) && + if (F_ISSET(txn->mt_dbi_state[i], DBI_DIRTY | DBI_VALID) && txn->mt_dbs[i].md_root != P_INVALID) for (MDBX_cursor *mc = txn->mt_cursors[i]; mc; mc = mc->mc_next) if (mc != m0) @@ -7761,7 +7761,7 @@ done: __hot static pgr_t page_alloc(const MDBX_cursor *const mc) { MDBX_txn *const txn = mc->mc_txn; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(txn->mt_dbistate[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(txn->mt_dbi_state[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); /* If there are any loose pages, just use them */ while (likely(txn->tw.loose_pages)) { @@ -7901,7 +7901,7 @@ __hot static int page_touch(MDBX_cursor *mc) { int rc; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(*mc->mc_dbistate, DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_DIRTY | DBI_VALID)); tASSERT(txn, !IS_OVERFLOW(mp)); if (ASSERT_ENABLED()) { if (mc->mc_flags & C_SUB) { @@ -7909,7 +7909,7 @@ __hot static int page_touch(MDBX_cursor *mc) { MDBX_cursor_couple *couple = container_of(mx, MDBX_cursor_couple, inner); tASSERT(txn, mc->mc_db == &couple->outer.mc_xcursor->mx_db); tASSERT(txn, mc->mc_dbx == &couple->outer.mc_xcursor->mx_dbx); - tASSERT(txn, *couple->outer.mc_dbistate & DBI_DIRTY); + tASSERT(txn, *couple->outer.mc_dbi_state & DBI_DIRTY); } tASSERT(txn, dirtylist_check(txn)); } @@ -8313,7 +8313,7 @@ static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { * txn pointer here for cursor fixups to keep working. */ mc->mc_txn = nested; mc->mc_db = &nested->mt_dbs[i]; - mc->mc_dbistate = &nested->mt_dbistate[i]; + mc->mc_dbi_state = &nested->mt_dbi_state[i]; MDBX_xcursor *mx = mc->mc_xcursor; if (mx != NULL) { *(MDBX_xcursor *)(bk + 1) = *mx; @@ -8362,7 +8362,7 @@ static void cursors_eot(MDBX_txn *txn, const bool merge) { mc->mc_backup = bk->mc_backup; mc->mc_txn = bk->mc_txn; mc->mc_db = bk->mc_db; - mc->mc_dbistate = bk->mc_dbistate; + mc->mc_dbi_state = bk->mc_dbi_state; if (mx) { if (mx != bk->mc_xcursor) { *bk->mc_xcursor = *mx; @@ -8994,7 +8994,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); env->me_txn = txn; txn->mt_numdbs = env->me_numdbs; - memcpy(txn->mt_dbiseqs, env->me_dbiseqs, txn->mt_numdbs * sizeof(unsigned)); + memcpy(txn->mt_dbi_seqs, env->me_dbi_seqs, + txn->mt_numdbs * sizeof(unsigned)); if ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { rc = dpl_alloc(txn); @@ -9016,17 +9017,17 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { osal_compiler_barrier(); memset(txn->mt_cursors, 0, sizeof(MDBX_cursor *) * txn->mt_numdbs); for (size_t i = CORE_DBS; i < txn->mt_numdbs; i++) { - const unsigned db_flags = env->me_dbflags[i]; + const unsigned db_flags = env->me_db_flags[i]; txn->mt_dbs[i].md_flags = db_flags & DB_PERSISTENT_FLAGS; - txn->mt_dbistate[i] = + txn->mt_dbi_state[i] = (db_flags & DB_VALID) ? DBI_VALID | DBI_USRVALID | DBI_STALE : 0; } - txn->mt_dbistate[MAIN_DBI] = DBI_VALID | DBI_USRVALID; + txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; rc = setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - txn->mt_dbistate[FREE_DBI] = DBI_VALID; + txn->mt_dbi_state[FREE_DBI] = DBI_VALID; txn->mt_front = txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); @@ -9134,7 +9135,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } bailout: tASSERT(txn, rc != MDBX_SUCCESS); - txn_end(txn, MDBX_END_SLOT | MDBX_END_FAIL_BEGIN); + txn_end(txn, TXN_END_SLOT | TXN_END_FAIL_BEGIN); return rc; } @@ -9292,14 +9293,14 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, #if MDBX_DEBUG txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ #endif /* MDBX_DEBUG */ - txn->mt_dbistate = ptr_disp(txn, size - env->me_maxdbs); + txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_flags = flags; txn->mt_env = env; if (parent) { tASSERT(parent, dirtylist_check(parent)); - txn->mt_dbiseqs = parent->mt_dbiseqs; + txn->mt_dbi_seqs = parent->mt_dbi_seqs; txn->mt_geo = parent->mt_geo; rc = dpl_alloc(txn); if (likely(rc == MDBX_SUCCESS)) { @@ -9380,10 +9381,10 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->mt_owner = parent->mt_owner; memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); txn->tw.troika = parent->tw.troika; - /* Copy parent's mt_dbistate, but clear DB_NEW */ + /* Copy parent's mt_dbi_state, but clear DB_NEW */ for (size_t i = 0; i < txn->mt_numdbs; i++) - txn->mt_dbistate[i] = - parent->mt_dbistate[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->mt_dbi_state[i] = + parent->mt_dbi_state[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == (parent->mt_parent ? parent->mt_parent->tw.dirtyroom @@ -9398,9 +9399,9 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, tASSERT(txn, audit_ex(txn, 0, false) == 0); } if (unlikely(rc != MDBX_SUCCESS)) - txn_end(txn, MDBX_END_FAIL_BEGINCHILD); + txn_end(txn, TXN_END_FAIL_BEGINCHILD); } else { /* MDBX_TXN_RDONLY */ - txn->mt_dbiseqs = env->me_dbiseqs; + txn->mt_dbi_seqs = env->me_dbi_seqs; renew: rc = txn_renew(txn, flags); } @@ -9583,18 +9584,18 @@ int mdbx_txn_flags(const MDBX_txn *txn) { /* Check for misused dbi handles */ static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { - if (txn->mt_dbiseqs == txn->mt_env->me_dbiseqs) + if (txn->mt_dbi_seqs == txn->mt_env->me_dbi_seqs) return false; if (likely( - txn->mt_dbiseqs[dbi].weak == - atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbiseqs[dbi], + txn->mt_dbi_seqs[dbi].weak == + atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbi_seqs[dbi], mo_AcquireRelease))) return false; return true; } static __inline unsigned dbi_seq(const MDBX_env *const env, size_t slot) { - unsigned v = env->me_dbiseqs[slot].weak + 1; + unsigned v = env->me_dbi_seqs[slot].weak + 1; return v + (v == 0); } @@ -9604,21 +9605,21 @@ static void dbi_import_locked(MDBX_txn *txn) { for (size_t i = CORE_DBS; i < n; ++i) { if (i >= txn->mt_numdbs) { txn->mt_cursors[i] = NULL; - if (txn->mt_dbiseqs != env->me_dbiseqs) - txn->mt_dbiseqs[i].weak = 0; - txn->mt_dbistate[i] = 0; + if (txn->mt_dbi_seqs != env->me_dbi_seqs) + txn->mt_dbi_seqs[i].weak = 0; + txn->mt_dbi_state[i] = 0; } if ((dbi_changed(txn, i) && - (txn->mt_dbistate[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || - ((env->me_dbflags[i] & DB_VALID) && - !(txn->mt_dbistate[i] & DBI_VALID))) { - tASSERT(txn, - (txn->mt_dbistate[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); - txn->mt_dbiseqs[i] = env->me_dbiseqs[i]; - txn->mt_dbs[i].md_flags = env->me_dbflags[i] & DB_PERSISTENT_FLAGS; - txn->mt_dbistate[i] = 0; - if (env->me_dbflags[i] & DB_VALID) { - txn->mt_dbistate[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; + (txn->mt_dbi_state[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || + ((env->me_db_flags[i] & DB_VALID) && + !(txn->mt_dbi_state[i] & DBI_VALID))) { + tASSERT(txn, (txn->mt_dbi_state[i] & + (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); + txn->mt_dbi_seqs[i] = env->me_dbi_seqs[i]; + txn->mt_dbs[i].md_flags = env->me_db_flags[i] & DB_PERSISTENT_FLAGS; + txn->mt_dbi_state[i] = 0; + if (env->me_db_flags[i] & DB_VALID) { + txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; tASSERT(txn, txn->mt_dbxs[i].md_cmp != NULL); tASSERT(txn, txn->mt_dbxs[i].md_name.iov_base != NULL); } @@ -9626,13 +9627,13 @@ static void dbi_import_locked(MDBX_txn *txn) { } while (unlikely(n < txn->mt_numdbs)) if (txn->mt_cursors[txn->mt_numdbs - 1] == NULL && - (txn->mt_dbistate[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) + (txn->mt_dbi_state[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) txn->mt_numdbs -= 1; else { - if ((txn->mt_dbistate[n] & DBI_USRVALID) == 0) { - if (txn->mt_dbiseqs != env->me_dbiseqs) - txn->mt_dbiseqs[n].weak = 0; - txn->mt_dbistate[n] = 0; + if ((txn->mt_dbi_state[n] & DBI_USRVALID) == 0) { + if (txn->mt_dbi_seqs != env->me_dbi_seqs) + txn->mt_dbi_seqs[n].weak = 0; + txn->mt_dbi_state[n] = 0; } ++n; } @@ -9650,7 +9651,7 @@ __cold static bool dbi_import(MDBX_txn *txn, MDBX_dbi dbi) { dbi_import_locked(txn); ENSURE(txn->mt_env, osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - return txn->mt_dbistate[dbi] & DBI_USRVALID; + return txn->mt_dbi_state[dbi] & DBI_USRVALID; } /* Export or close DBI handles opened in this txn. */ @@ -9662,43 +9663,43 @@ static void dbi_update(MDBX_txn *txn, int keep) { MDBX_env *const env = txn->mt_env; for (size_t i = n; --i >= CORE_DBS;) { - if (likely((txn->mt_dbistate[i] & DBI_CREAT) == 0)) + if (likely((txn->mt_dbi_state[i] & DBI_CREAT) == 0)) continue; if (!locked) { ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); locked = true; } if (env->me_numdbs <= i || - txn->mt_dbiseqs[i].weak != env->me_dbiseqs[i].weak) + txn->mt_dbi_seqs[i].weak != env->me_dbi_seqs[i].weak) continue /* dbi explicitly closed and/or then re-opened by other txn */; if (keep) { - env->me_dbflags[i] = txn->mt_dbs[i].md_flags | DB_VALID; + env->me_db_flags[i] = txn->mt_dbs[i].md_flags | DB_VALID; } else { const MDBX_val name = env->me_dbxs[i].md_name; if (name.iov_base) { env->me_dbxs[i].md_name.iov_base = nullptr; - eASSERT(env, env->me_dbflags[i] == 0); - atomic_store32(&env->me_dbiseqs[i], dbi_seq(env, i), + eASSERT(env, env->me_db_flags[i] == 0); + atomic_store32(&env->me_dbi_seqs[i], dbi_seq(env, i), mo_AcquireRelease); env->me_dbxs[i].md_name.iov_len = 0; if (name.iov_len) osal_free(name.iov_base); } else { eASSERT(env, name.iov_len == 0); - eASSERT(env, env->me_dbflags[i] == 0); + eASSERT(env, env->me_db_flags[i] == 0); } } } n = env->me_numdbs; - if (n > CORE_DBS && unlikely(!(env->me_dbflags[n - 1] & DB_VALID))) { + if (n > CORE_DBS && unlikely(!(env->me_db_flags[n - 1] & DB_VALID))) { if (!locked) { ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); locked = true; } n = env->me_numdbs; - while (n > CORE_DBS && !(env->me_dbflags[n - 1] & DB_VALID)) + while (n > CORE_DBS && !(env->me_db_flags[n - 1] & DB_VALID)) --n; env->me_numdbs = n; } @@ -9782,7 +9783,7 @@ static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) { * [in] mode why and how to end the transaction */ static int txn_end(MDBX_txn *txn, const unsigned mode) { MDBX_env *env = txn->mt_env; - static const char *const names[] = MDBX_END_NAMES; + static const char *const names[] = TXN_END_NAMES; #if MDBX_ENV_CHECKPID if (unlikely(txn->mt_env->me_pid != osal_getpid())) { @@ -9793,11 +9794,11 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { DEBUG("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO "/%" PRIaPGNO, - names[mode & MDBX_END_OPMASK], txn->mt_txnid, + names[mode & TXN_END_OPMASK], txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root); - if (!(mode & MDBX_END_EOTDONE)) /* !(already closed cursors) */ + if (!(mode & TXN_END_EOTDONE)) /* !(already closed cursors) */ cursors_eot(txn, false); int rc = MDBX_SUCCESS; @@ -9823,7 +9824,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, slot->mr_pid.weak == env->me_pid); eASSERT(env, slot->mr_txnid.weak >= SAFE64_INVALID_THRESHOLD); } - if (mode & MDBX_END_SLOT) { + if (mode & TXN_END_SLOT) { if ((env->me_flags & MDBX_ENV_TXKEY) == 0) atomic_store32(&slot->mr_pid, 0, mo_Relaxed); txn->to.reader = NULL; @@ -9852,7 +9853,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (txn == env->me_txn0) { eASSERT(env, txn->mt_parent == NULL); /* Export or close DBI handles created in this txn */ - dbi_update(txn, mode & MDBX_END_UPDATE); + dbi_update(txn, mode & TXN_END_UPDATE); pnl_shrink(&txn->tw.retired_pages); pnl_shrink(&txn->tw.relist); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -9925,7 +9926,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { } eASSERT(env, txn == env->me_txn0 || txn->mt_owner == 0); - if ((mode & MDBX_END_FREE) != 0 && txn != env->me_txn0) { + if ((mode & TXN_END_FREE) != 0 && txn != env->me_txn0) { txn->mt_signature = 0; osal_free(txn); } @@ -9943,7 +9944,7 @@ int mdbx_txn_reset(MDBX_txn *txn) { return MDBX_EINVAL; /* LY: don't close DBI-handles */ - rc = txn_end(txn, MDBX_END_RESET | MDBX_END_UPDATE); + rc = txn_end(txn, TXN_END_RESET | TXN_END_UPDATE); if (rc == MDBX_SUCCESS) { tASSERT(txn, txn->mt_signature == MDBX_MT_SIGNATURE); tASSERT(txn, txn->mt_owner == 0); @@ -9971,8 +9972,8 @@ int mdbx_txn_abort(MDBX_txn *txn) { if (txn->mt_flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ - return txn_end(txn, MDBX_END_ABORT | MDBX_END_UPDATE | MDBX_END_SLOT | - MDBX_END_FREE); + return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | + TXN_END_FREE); if (unlikely(txn->mt_flags & MDBX_TXN_FINISHED)) return MDBX_BAD_TXN; @@ -9981,7 +9982,7 @@ int mdbx_txn_abort(MDBX_txn *txn) { mdbx_txn_abort(txn->mt_child); tASSERT(txn, (txn->mt_flags & MDBX_TXN_ERROR) || dirtylist_check(txn)); - return txn_end(txn, MDBX_END_ABORT | MDBX_END_SLOT | MDBX_END_FREE); + return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } /* Count all the pages in each DB and in the GC and make sure @@ -10019,16 +10020,16 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, tASSERT(txn, rc == MDBX_NOTFOUND); for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) - txn->mt_dbistate[i] &= ~DBI_AUDITED; + txn->mt_dbi_state[i] &= ~DBI_AUDIT; size_t used = NUM_METAS; for (size_t i = FREE_DBI; i <= MAIN_DBI; i++) { - if (!(txn->mt_dbistate[i] & DBI_VALID)) + if (!(txn->mt_dbi_state[i] & DBI_VALID)) continue; rc = cursor_init(&cx.outer, txn, i); if (unlikely(rc != MDBX_SUCCESS)) return rc; - txn->mt_dbistate[i] |= DBI_AUDITED; + txn->mt_dbi_state[i] |= DBI_AUDIT; if (txn->mt_dbs[i].md_root == P_INVALID) continue; used += (size_t)txn->mt_dbs[i].md_branch_pages + @@ -10049,13 +10050,13 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { for (MDBX_dbi k = txn->mt_numdbs; --k > MAIN_DBI;) { - if ((txn->mt_dbistate[k] & DBI_VALID) && + if ((txn->mt_dbi_state[k] & DBI_VALID) && /* txn->mt_dbxs[k].md_name.iov_base && */ node_ks(node) == txn->mt_dbxs[k].md_name.iov_len && memcmp(node_key(node), txn->mt_dbxs[k].md_name.iov_base, node_ks(node)) == 0) { - txn->mt_dbistate[k] |= DBI_AUDITED; - if (!(txn->mt_dbistate[k] & MDBX_DBI_STALE)) + txn->mt_dbi_state[k] |= DBI_AUDIT; + if (!(txn->mt_dbi_state[k] & MDBX_DBI_STALE)) db = txn->mt_dbs + k; break; } @@ -10071,25 +10072,25 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) { - if ((txn->mt_dbistate[i] & (DBI_VALID | DBI_AUDITED | DBI_STALE)) != + if ((txn->mt_dbi_state[i] & (DBI_VALID | DBI_AUDIT | DBI_STALE)) != DBI_VALID) continue; for (MDBX_txn *t = txn; t; t = t->mt_parent) - if (F_ISSET(t->mt_dbistate[i], DBI_DIRTY | DBI_CREAT)) { + if (F_ISSET(t->mt_dbi_state[i], DBI_DIRTY | DBI_CREAT)) { used += (size_t)t->mt_dbs[i].md_branch_pages + (size_t)t->mt_dbs[i].md_leaf_pages + (size_t)t->mt_dbs[i].md_overflow_pages; - txn->mt_dbistate[i] |= DBI_AUDITED; + txn->mt_dbi_state[i] |= DBI_AUDIT; break; } MDBX_ANALYSIS_ASSUME(txn != nullptr); - if (!(txn->mt_dbistate[i] & DBI_AUDITED)) { + if (!(txn->mt_dbi_state[i] & DBI_AUDIT)) { WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, (int)txn->mt_dbxs[i].md_name.iov_len, (const char *)txn->mt_dbxs[i].md_name.iov_base, - txn->mt_dbistate[i]); + txn->mt_dbi_state[i]); } } @@ -11175,10 +11176,10 @@ static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, unsigned validity) { if (likely(dbi < txn->mt_numdbs)) { if (likely(!dbi_changed(txn, dbi))) { - if (likely(txn->mt_dbistate[dbi] & validity)) + if (likely(txn->mt_dbi_state[dbi] & validity)) return true; if (likely(dbi < CORE_DBS || - (txn->mt_env->me_dbflags[dbi] & DB_VALID) == 0)) + (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0)) return false; } } @@ -11601,7 +11602,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { /* txn_end() mode for a commit which writes nothing */ unsigned end_mode = - MDBX_END_PURE_COMMIT | MDBX_END_UPDATE | MDBX_END_SLOT | MDBX_END_FREE; + TXN_END_PURE_COMMIT | TXN_END_UPDATE | TXN_END_SLOT | TXN_END_FREE; if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) goto done; @@ -11630,9 +11631,9 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->tw.dirtylist->length == 0 && !(txn->mt_flags & MDBX_TXN_DIRTY) && parent->mt_numdbs == txn->mt_numdbs) { for (int i = txn->mt_numdbs; --i >= 0;) { - tASSERT(txn, (txn->mt_dbistate[i] & DBI_DIRTY) == 0); - if ((txn->mt_dbistate[i] & DBI_STALE) && - !(parent->mt_dbistate[i] & DBI_STALE)) + tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); + if ((txn->mt_dbi_state[i] & DBI_STALE) && + !(parent->mt_dbi_state[i] & DBI_STALE)) tASSERT(txn, memcmp(&parent->mt_dbs[i], &txn->mt_dbs[i], sizeof(MDBX_db)) == 0); } @@ -11646,7 +11647,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, txn->tw.loose_count == 0); /* fast completion of pure nested transaction */ - end_mode = MDBX_END_PURE_COMMIT | MDBX_END_SLOT | MDBX_END_FREE; + end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; goto done; } @@ -11706,7 +11707,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { /* Merge our cursors into parent's and close them */ cursors_eot(txn, true); - end_mode |= MDBX_END_EOTDONE; + end_mode |= TXN_END_EOTDONE; /* Update parent's DBs array */ memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); @@ -11714,12 +11715,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { for (size_t i = 0; i < txn->mt_numdbs; i++) { /* preserve parent's status */ const uint8_t state = - txn->mt_dbistate[i] | - (parent->mt_dbistate[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + txn->mt_dbi_state[i] | + (parent->mt_dbi_state[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", i, - (parent->mt_dbistate[i] != state) ? "update" : "still", - parent->mt_dbistate[i], state); - parent->mt_dbistate[i] = state; + (parent->mt_dbi_state[i] != state) ? "update" : "still", + parent->mt_dbi_state[i], state); + parent->mt_dbi_state[i] = state; } if (latency) { @@ -11767,12 +11768,12 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { : txn->mt_env->me_options.dp_limit)); } cursors_eot(txn, false); - end_mode |= MDBX_END_EOTDONE; + end_mode |= TXN_END_EOTDONE; if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { for (intptr_t i = txn->mt_numdbs; --i >= 0;) - tASSERT(txn, (txn->mt_dbistate[i] & DBI_DIRTY) == 0); + tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); #if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT rc = txn_end(txn, end_mode); if (unlikely(rc != MDBX_SUCCESS)) @@ -11799,7 +11800,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (unlikely(rc != MDBX_SUCCESS)) goto fail; for (MDBX_dbi i = CORE_DBS; i < txn->mt_numdbs; i++) { - if (txn->mt_dbistate[i] & DBI_DIRTY) { + if (txn->mt_dbi_state[i] & DBI_DIRTY) { MDBX_db *db = &txn->mt_dbs[i]; DEBUG("update main's entry for sub-db %u, mod_txnid %" PRIaTXN " -> %" PRIaTXN, @@ -11830,11 +11831,11 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto fail; tASSERT(txn, txn->tw.loose_count == 0); - txn->mt_dbs[FREE_DBI].md_mod_txnid = (txn->mt_dbistate[FREE_DBI] & DBI_DIRTY) + txn->mt_dbs[FREE_DBI].md_mod_txnid = (txn->mt_dbi_state[FREE_DBI] & DBI_DIRTY) ? txn->mt_txnid : txn->mt_dbs[FREE_DBI].md_mod_txnid; - txn->mt_dbs[MAIN_DBI].md_mod_txnid = (txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY) + txn->mt_dbs[MAIN_DBI].md_mod_txnid = (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) ? txn->mt_txnid : txn->mt_dbs[MAIN_DBI].md_mod_txnid; @@ -11969,7 +11970,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { goto fail; } - end_mode = MDBX_END_COMMITTED | MDBX_END_UPDATE | MDBX_END_EOTDONE; + end_mode = TXN_END_COMMITTED | TXN_END_UPDATE | TXN_END_EOTDONE; done: if (latency) @@ -14910,10 +14911,10 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(MDBX_dbx)); - env->me_dbflags = osal_calloc(env->me_maxdbs, sizeof(env->me_dbflags[0])); - env->me_dbiseqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbiseqs[0])); - if (!(env->me_dbxs && env->me_pathname && env->me_dbflags && - env->me_dbiseqs)) { + env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); + env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); + if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && + env->me_dbi_seqs)) { rc = MDBX_ENOMEM; goto bailout; } @@ -15268,10 +15269,10 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, txn->mt_dbs = ptr_disp(txn, tsize); txn->mt_cursors = ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); - txn->mt_dbiseqs = + txn->mt_dbi_seqs = ptr_disp(txn->mt_cursors, sizeof(MDBX_cursor *) * env->me_maxdbs); - txn->mt_dbistate = ptr_disp( - txn->mt_dbiseqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); + txn->mt_dbi_state = ptr_disp( + txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); txn->mt_env = env; txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDBX_TXN_FINISHED; @@ -15399,13 +15400,13 @@ __cold static int env_close(MDBX_env *env) { osal_memalign_free(env->me_pbuf); env->me_pbuf = nullptr; } - if (env->me_dbiseqs) { - osal_free(env->me_dbiseqs); - env->me_dbiseqs = nullptr; + if (env->me_dbi_seqs) { + osal_free(env->me_dbi_seqs); + env->me_dbi_seqs = nullptr; } - if (env->me_dbflags) { - osal_free(env->me_dbflags); - env->me_dbflags = nullptr; + if (env->me_db_flags) { + osal_free(env->me_db_flags); + env->me_db_flags = nullptr; } if (env->me_pathname) { osal_free(env->me_pathname); @@ -15945,7 +15946,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - txn->mt_dbistate[dbi] &= ~DBI_STALE; + txn->mt_dbi_state[dbi] &= ~DBI_STALE; return MDBX_SUCCESS; } @@ -15995,7 +15996,7 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { } /* Make sure we're using an up-to-date root */ - if (unlikely(*mc->mc_dbistate & DBI_STALE)) { + if (unlikely(*mc->mc_dbi_state & DBI_STALE)) { rc = fetch_sdb(mc->mc_txn, mc->mc_dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -16019,7 +16020,7 @@ __hot static int page_search(MDBX_cursor *mc, const MDBX_val *key, int flags) { do if ((scan->mt_flags & MDBX_TXN_DIRTY) && (mc->mc_dbi == MAIN_DBI || - (scan->mt_dbistate[mc->mc_dbi] & DBI_DIRTY))) { + (scan->mt_dbi_state[mc->mc_dbi] & DBI_DIRTY))) { /* После коммита вложенных тразакций может быть mod_txnid > front */ pp_txnid = scan->mt_front; break; @@ -17171,8 +17172,8 @@ int mdbx_cursor_get_batch(MDBX_cursor *mc, size_t *count, MDBX_val *pairs, } static int touch_dbi(MDBX_cursor *mc) { - cASSERT(mc, (*mc->mc_dbistate & DBI_DIRTY) == 0); - *mc->mc_dbistate |= DBI_DIRTY; + cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) == 0); + *mc->mc_dbi_state |= DBI_DIRTY; mc->mc_txn->mt_flags |= MDBX_TXN_DIRTY; if (mc->mc_dbi >= CORE_DBS) { /* Touch DB record of named DB */ @@ -17180,7 +17181,7 @@ static int touch_dbi(MDBX_cursor *mc) { int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; - mc->mc_txn->mt_dbistate[MAIN_DBI] |= DBI_DIRTY; + mc->mc_txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; rc = page_search(&cx.outer, &mc->mc_dbx->md_name, MDBX_PS_MODIFY); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -17198,7 +17199,7 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, MDBX_txn *const txn = mc->mc_txn; txn_lru_turn(txn); - if (unlikely((*mc->mc_dbistate & DBI_DIRTY) == 0)) { + if (unlikely((*mc->mc_dbi_state & DBI_DIRTY) == 0)) { int err = touch_dbi(mc); if (unlikely(err != MDBX_SUCCESS)) return err; @@ -18261,7 +18262,7 @@ static pgr_t page_new(MDBX_cursor *mc, const unsigned flags) { DEBUG("db %u allocated new page %" PRIaPGNO, mc->mc_dbi, ret.page->mp_pgno); ret.page->mp_flags = (uint16_t)flags; - cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY); + cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); #if MDBX_ENABLE_PGOP_STAT mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += 1; @@ -18292,7 +18293,7 @@ static pgr_t page_new_large(MDBX_cursor *mc, const size_t npages) { DEBUG("db %u allocated new large-page %" PRIaPGNO ", num %zu", mc->mc_dbi, ret.page->mp_pgno, npages); ret.page->mp_flags = P_OVERFLOW; - cASSERT(mc, *mc->mc_dbistate & DBI_DIRTY); + cASSERT(mc, *mc->mc_dbi_state & DBI_DIRTY); cASSERT(mc, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); #if MDBX_ENABLE_PGOP_STAT mc->mc_txn->mt_env->me_lck->mti_pgop_stat.newly.weak += npages; @@ -18615,7 +18616,7 @@ static int cursor_xinit0(MDBX_cursor *mc) { mx->mx_cursor.mc_db = &mx->mx_db; mx->mx_cursor.mc_dbx = &mx->mx_dbx; mx->mx_cursor.mc_dbi = mc->mc_dbi; - mx->mx_cursor.mc_dbistate = mc->mc_dbistate; + mx->mx_cursor.mc_dbi_state = mc->mc_dbi_state; mx->mx_cursor.mc_snum = 0; mx->mx_cursor.mc_top = 0; mx->mx_cursor.mc_flags = C_SUB; @@ -18769,7 +18770,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; - couple->outer.mc_dbistate = dbstate; + couple->outer.mc_dbi_state = dbstate; couple->outer.mc_snum = 0; couple->outer.mc_top = 0; couple->outer.mc_pg[0] = 0; @@ -18784,7 +18785,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_xcursor = NULL; int rc = MDBX_SUCCESS; - if (unlikely(*couple->outer.mc_dbistate & DBI_STALE)) { + if (unlikely(*couple->outer.mc_dbi_state & DBI_STALE)) { rc = page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; } else if (unlikely(dbx->md_klen_max == 0)) { @@ -18808,7 +18809,7 @@ static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], - &txn->mt_dbistate[dbi]); + &txn->mt_dbi_state[dbi]); } MDBX_cursor *mdbx_cursor_create(void *context) { @@ -18907,7 +18908,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); cASSERT(mc, mc->mc_dbi == dbi); - cASSERT(mc, mc->mc_dbistate == &txn->mt_dbistate[dbi]); + cASSERT(mc, mc->mc_dbi_state == &txn->mt_dbi_state[dbi]); return likely(mc->mc_dbi == dbi && /* paranoia */ mc->mc_signature == MDBX_MC_LIVE && mc->mc_txn == txn) @@ -18970,7 +18971,7 @@ int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { assert(dest->mc_db == src->mc_db); assert(dest->mc_dbi == src->mc_dbi); assert(dest->mc_dbx == src->mc_dbx); - assert(dest->mc_dbistate == src->mc_dbistate); + assert(dest->mc_dbi_state == src->mc_dbi_state); again: assert(dest->mc_txn == src->mc_txn); dest->mc_flags ^= (dest->mc_flags ^ src->mc_flags) & ~C_UNTRACK; @@ -19728,7 +19729,7 @@ static void cursor_restore(const MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(cdst, cdst->mc_txn == csrc->mc_txn); cASSERT(cdst, cdst->mc_db == csrc->mc_db); cASSERT(cdst, cdst->mc_dbx == csrc->mc_dbx); - cASSERT(cdst, cdst->mc_dbistate == csrc->mc_dbistate); + cASSERT(cdst, cdst->mc_dbi_state == csrc->mc_dbi_state); cdst->mc_snum = csrc->mc_snum; cdst->mc_top = csrc->mc_top; cdst->mc_flags = csrc->mc_flags; @@ -19753,7 +19754,7 @@ static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst) { cdst->mc_txn = csrc->mc_txn; cdst->mc_db = csrc->mc_db; cdst->mc_dbx = csrc->mc_dbx; - cdst->mc_dbistate = csrc->mc_dbistate; + cdst->mc_dbi_state = csrc->mc_dbi_state; cursor_restore(csrc, cdst); } @@ -19811,7 +19812,7 @@ static int rebalance(MDBX_cursor *mc) { if (nkeys == 0) { cASSERT(mc, IS_LEAF(mp)); DEBUG("%s", "tree is completely empty"); - cASSERT(mc, (*mc->mc_dbistate & DBI_DIRTY) != 0); + cASSERT(mc, (*mc->mc_dbi_state & DBI_DIRTY) != 0); mc->mc_db->md_root = P_INVALID; mc->mc_db->md_depth = 0; cASSERT(mc, mc->mc_db->md_branch_pages == 0 && @@ -21508,7 +21509,7 @@ __cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb) { memset(&couple, 0, sizeof(couple)); couple.inner.mx_cursor.mc_signature = ~MDBX_MC_LIVE; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDITED; + uint8_t dbistate = DBI_VALID | DBI_AUDIT; int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -21716,7 +21717,7 @@ __cold static int env_copy_asis(MDBX_env *env, MDBX_txn *read_txn, const bool dest_is_pipe, const MDBX_copy_flags_t flags) { /* We must start the actual read txn after blocking writers */ - int rc = txn_end(read_txn, MDBX_END_RESET_TMP); + int rc = txn_end(read_txn, TXN_END_RESET_TMP); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22165,7 +22166,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* account opened named subDBs */ for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) - if ((txn->mt_dbistate[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) + if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->mt_dbs + dbi, st, bytes); if (!(txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_DUPSORT | MDBX_INTEGERKEY)) && @@ -22188,7 +22189,7 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* skip opened and already accounted */ for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) - if ((txn->mt_dbistate[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && + if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && node_ks(node) == txn->mt_dbxs[dbi].md_name.iov_len && memcmp(node_key(node), txn->mt_dbxs[dbi].md_name.iov_base, node_ks(node)) == 0) { @@ -22657,13 +22658,13 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, goto bailout; } /* Пересоздаём MAIN_DBI если там пусто. */ - atomic_store32(&txn->mt_dbiseqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), + atomic_store32(&txn->mt_dbi_seqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), mo_AcquireRelease); tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && txn->mt_dbs[MAIN_DBI].md_entries == 0 && txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; - txn->mt_dbistate[MAIN_DBI] |= DBI_DIRTY; + txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; txn->mt_flags |= MDBX_TXN_DIRTY; txn->mt_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); @@ -22790,24 +22791,25 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, dbiflags |= DBI_DIRTY | DBI_CREAT; txn->mt_flags |= MDBX_TXN_DIRTY; - tASSERT(txn, (txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY) != 0); + tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); } /* Got info, register DBI in this txn */ memset(txn->mt_dbxs + slot, 0, sizeof(MDBX_dbx)); memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); - env->me_dbflags[slot] = 0; + env->me_db_flags[slot] = 0; rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) { tASSERT(txn, (dbiflags & DBI_CREAT) == 0); goto bailout; } - txn->mt_dbistate[slot] = (uint8_t)dbiflags; + txn->mt_dbi_state[slot] = (uint8_t)dbiflags; txn->mt_dbxs[slot].md_name = key; - txn->mt_dbiseqs[slot].weak = env->me_dbiseqs[slot].weak = dbi_seq(env, slot); + txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = + dbi_seq(env, slot); if (!(dbiflags & DBI_CREAT)) - env->me_dbflags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; if (txn->mt_numdbs == slot) { txn->mt_cursors[slot] = NULL; osal_compiler_barrier(); @@ -22880,7 +22882,7 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) return MDBX_BAD_TXN; - if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { + if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb((MDBX_txn *)txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22901,7 +22903,7 @@ static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { if (unlikely(!ptr)) return MDBX_BAD_DBI; - env->me_dbflags[dbi] = 0; + env->me_db_flags[dbi] = 0; env->me_dbxs[dbi].md_name.iov_len = 0; osal_memory_fence(mo_AcquireRelease, true); env->me_dbxs[dbi].md_name.iov_base = NULL; @@ -22934,7 +22936,7 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { - rc = (dbi < env->me_maxdbs && (env->me_dbflags[dbi] & DB_VALID)) + rc = (dbi < env->me_maxdbs && (env->me_db_flags[dbi] & DB_VALID)) ? dbi_close_locked(env, dbi) : MDBX_BAD_DBI; ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); @@ -22956,7 +22958,7 @@ int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, *flags = txn->mt_dbs[dbi].md_flags & DB_PERSISTENT_FLAGS; *state = - txn->mt_dbistate[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); + txn->mt_dbi_state[dbi] & (DBI_FRESH | DBI_CREAT | DBI_DIRTY | DBI_STALE); return MDBX_SUCCESS; } @@ -23078,9 +23080,9 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { if (del && dbi >= CORE_DBS) { rc = delete (txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (likely(rc == MDBX_SUCCESS)) { - tASSERT(txn, txn->mt_dbistate[MAIN_DBI] & DBI_DIRTY); + tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); - txn->mt_dbistate[dbi] = DBI_STALE; + txn->mt_dbi_state[dbi] = DBI_STALE; MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (unlikely(rc != MDBX_SUCCESS)) { @@ -23094,7 +23096,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { } } else { /* reset the DB record, mark it dirty */ - txn->mt_dbistate[dbi] |= DBI_DIRTY; + txn->mt_dbi_state[dbi] |= DBI_DIRTY; txn->mt_dbs[dbi].md_depth = 0; txn->mt_dbs[dbi].md_branch_pages = 0; txn->mt_dbs[dbi].md_leaf_pages = 0; @@ -23749,7 +23751,7 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDITED; + uint8_t dbistate = DBI_VALID | DBI_AUDIT; int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24492,7 +24494,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) return MDBX_BAD_DBI; - if (unlikely(txn->mt_dbistate[dbi] & DBI_STALE)) { + if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb(txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24513,7 +24515,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, tASSERT(txn, new > dbs->md_seq); dbs->md_seq = new; txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbistate[dbi] |= DBI_DIRTY; + txn->mt_dbi_state[dbi] |= DBI_DIRTY; } return MDBX_SUCCESS; @@ -27027,7 +27029,7 @@ bailout: mdbx_cursor_close(cursor); if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && - txn->mt_dbistate[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + txn->mt_dbi_state[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) mdbx_dbi_close(env, dbi); } return err; diff --git a/src/internals.h b/src/internals.h index 8fdb37a8..d4ac2215 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1194,19 +1194,19 @@ struct MDBX_txn { /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; - /* Transaction DBI Flags */ -#define DBI_DIRTY MDBX_DBI_DIRTY /* DB was written in this txn */ -#define DBI_STALE MDBX_DBI_STALE /* Named-DB record is older than txnID */ -#define DBI_FRESH MDBX_DBI_FRESH /* Named-DB handle opened in this txn */ -#define DBI_CREAT MDBX_DBI_CREAT /* Named-DB handle created in this txn */ -#define DBI_VALID 0x10 /* DB handle is valid, see also DB_VALID */ -#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ -#define DBI_AUDITED 0x40 /* Internal flag for accounting during audit */ + /* Non-shared DBI state flags inside transaction */ +#define DBI_DIRTY 0x01 /* DB was written in this txn */ +#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ +#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ +#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ +#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ +#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ +#define DBI_AUDIT 0x40 /* Internal flag for accounting during audit */ /* Array of non-shared txn's flags of DBI */ - uint8_t *mt_dbistate; + uint8_t *mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - MDBX_atomic_uint32_t *mt_dbiseqs; + MDBX_atomic_uint32_t *mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1292,8 +1292,8 @@ struct MDBX_cursor { MDBX_db *mc_db; /* The database auxiliary record for this cursor */ MDBX_dbx *mc_dbx; - /* The mt_dbistate for this database */ - uint8_t *mc_dbistate; + /* The mt_dbi_state[] for this DBI */ + uint8_t *mc_dbi_state; uint8_t mc_snum; /* number of pushed pages */ uint8_t mc_top; /* index of top page, normally mc_snum-1 */ @@ -1393,9 +1393,9 @@ struct MDBX_env { void *me_pbuf; /* scratch area for DUPSORT put() */ MDBX_txn *me_txn0; /* preallocated write transaction */ - MDBX_dbx *me_dbxs; /* array of static DB info */ - uint16_t *me_dbflags; /* array of flags from MDBX_db.md_flags */ - MDBX_atomic_uint32_t *me_dbiseqs; /* array of dbi sequence numbers */ + MDBX_dbx *me_dbxs; /* array of static DB info */ + uint16_t *me_db_flags; /* array of flags from MDBX_db.md_flags */ + MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ unsigned me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ unsigned me_maxgc_per_branch; @@ -1662,7 +1662,7 @@ typedef struct MDBX_node { /* mdbx_dbi_open() flags */ #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) -#define DB_VALID 0x8000 /* DB handle is valid, for me_dbflags */ +#define DB_VALID 0x8000 /* DB handle is valid, for me_db_flags */ #define DB_INTERNAL_FLAGS DB_VALID #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS From 4b79d46d38cb6ca5f109fac1071b7f2879203047 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 11:36:59 +0300 Subject: [PATCH 024/137] =?UTF-8?q?mdbx:=20=D1=83=D0=B4=D0=B0=D0=BB=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=BB=D1=8F=20`mt=5Fdbxs`=20?= =?UTF-8?q?=D0=B8=D0=B7=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Явного выигрыша или проигрыша в производительности тут нет. Но теперь меньше алиасинга указателей и чуть меньше полей в транзакциях. --- src/core.c | 110 ++++++++++++++++++++++++------------------------ src/internals.h | 2 - 2 files changed, 56 insertions(+), 56 deletions(-) diff --git a/src/core.c b/src/core.c index 2db1e56f..6e264f84 100644 --- a/src/core.c +++ b/src/core.c @@ -3781,13 +3781,13 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - return txn->mt_dbxs[dbi].md_cmp(a, b); + return txn->mt_env->me_dbxs[dbi].md_cmp(a, b); } int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); - return txn->mt_dbxs[dbi].md_dcmp(a, b); + return txn->mt_env->me_dbxs[dbi].md_dcmp(a, b); } /* Allocate memory for a page. @@ -8917,7 +8917,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - txn->mt_dbxs = env->me_dbxs; /* mostly static anyway */ txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck @@ -9024,7 +9023,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; rc = - setup_dbx(&txn->mt_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); + setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; txn->mt_dbi_state[FREE_DBI] = DBI_VALID; @@ -9294,7 +9293,6 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ #endif /* MDBX_DEBUG */ txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); - txn->mt_dbxs = env->me_dbxs; /* static */ txn->mt_flags = flags; txn->mt_env = env; @@ -9620,8 +9618,8 @@ static void dbi_import_locked(MDBX_txn *txn) { txn->mt_dbi_state[i] = 0; if (env->me_db_flags[i] & DB_VALID) { txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; - tASSERT(txn, txn->mt_dbxs[i].md_cmp != NULL); - tASSERT(txn, txn->mt_dbxs[i].md_name.iov_base != NULL); + tASSERT(txn, env->me_dbxs[i].md_cmp != NULL); + tASSERT(txn, env->me_dbxs[i].md_name.iov_base != NULL); } } } @@ -10039,29 +10037,32 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, if (i != MAIN_DBI) continue; rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); + const MDBX_env *const env = txn->mt_env; while (rc == MDBX_SUCCESS) { MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; for (size_t j = 0; j < page_numkeys(mp); j++) { - MDBX_node *node = page_node(mp, j); + const MDBX_node *node = page_node(mp, j); if (node_flags(node) == F_SUBDATA) { if (unlikely(node_ds(node) != sizeof(MDBX_db))) return MDBX_CORRUPTED; - MDBX_db db_copy, *db; - memcpy(db = &db_copy, node_data(node), sizeof(db_copy)); + const MDBX_val name = {node_key(node), node_ks(node)}; + const MDBX_db *db = nullptr; if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { - for (MDBX_dbi k = txn->mt_numdbs; --k > MAIN_DBI;) { - if ((txn->mt_dbi_state[k] & DBI_VALID) && - /* txn->mt_dbxs[k].md_name.iov_base && */ - node_ks(node) == txn->mt_dbxs[k].md_name.iov_len && - memcmp(node_key(node), txn->mt_dbxs[k].md_name.iov_base, - node_ks(node)) == 0) { - txn->mt_dbi_state[k] |= DBI_AUDIT; - if (!(txn->mt_dbi_state[k] & MDBX_DBI_STALE)) - db = txn->mt_dbs + k; + for (MDBX_dbi dbi = txn->mt_numdbs; --dbi > MAIN_DBI;) { + if ((txn->mt_dbi_state[dbi] & DBI_VALID) && + /* env->me_dbxs[k].md_name.iov_base && */ + env->me_dbxs[MAIN_DBI].md_cmp( + &name, &env->me_dbxs[dbi].md_name) == 0) { + txn->mt_dbi_state[dbi] |= DBI_AUDIT; + if (!(txn->mt_dbi_state[dbi] & MDBX_DBI_STALE)) + db = txn->mt_dbs + dbi; break; } } } + MDBX_db aligned; + if (!db) + db = memcpy(&aligned, node_data(node), sizeof(MDBX_db)); used += (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + (size_t)db->md_overflow_pages; } @@ -10088,8 +10089,8 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, - (int)txn->mt_dbxs[i].md_name.iov_len, - (const char *)txn->mt_dbxs[i].md_name.iov_base, + (int)txn->mt_env->me_dbxs[i].md_name.iov_len, + (const char *)txn->mt_env->me_dbxs[i].md_name.iov_base, txn->mt_dbi_state[i]); } } @@ -11810,7 +11811,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { data.iov_base = db; WITH_CURSOR_TRACKING( couple.outer, - rc = cursor_put_nochecklen(&couple.outer, &txn->mt_dbxs[i].md_name, + rc = cursor_put_nochecklen(&couple.outer, &env->me_dbxs[i].md_name, &data, F_SUBDATA)); if (unlikely(rc != MDBX_SUCCESS)) goto fail; @@ -15274,7 +15275,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, txn->mt_dbi_state = ptr_disp( txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); txn->mt_env = env; - txn->mt_dbxs = env->me_dbxs; txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn0 = txn; txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); @@ -15884,7 +15884,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - MDBX_dbx *const dbx = &txn->mt_dbxs[dbi]; + MDBX_dbx *const dbx = &txn->mt_env->me_dbxs[dbi]; rc = page_search(&couple.outer, &dbx->md_name, 0); if (unlikely(rc != MDBX_SUCCESS)) { notfound: @@ -18808,7 +18808,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, - &txn->mt_dbs[dbi], &txn->mt_dbxs[dbi], + &txn->mt_dbs[dbi], &txn->mt_env->me_dbxs[dbi], &txn->mt_dbi_state[dbi]); } @@ -18906,7 +18906,7 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { return MDBX_EINVAL; cASSERT(mc, mc->mc_db == &txn->mt_dbs[dbi]); - cASSERT(mc, mc->mc_dbx == &txn->mt_dbxs[dbi]); + cASSERT(mc, mc->mc_dbx == &txn->mt_env->me_dbxs[dbi]); cASSERT(mc, mc->mc_dbi == dbi); cASSERT(mc, mc->mc_dbi_state == &txn->mt_dbi_state[dbi]); return likely(mc->mc_dbi == dbi && @@ -22155,7 +22155,8 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (unlikely(err != MDBX_SUCCESS)) return err; - st->ms_psize = txn->mt_env->me_psize; + const MDBX_env *const env = txn->mt_env; + st->ms_psize = env->me_psize; #if 1 /* assuming GC is internal and not subject for accounting */ stat_get(&txn->mt_dbs[MAIN_DBI], st, bytes); @@ -22188,11 +22189,11 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { return MDBX_CORRUPTED; /* skip opened and already accounted */ + const MDBX_val name = {node_key(node), node_ks(node)}; for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && - node_ks(node) == txn->mt_dbxs[dbi].md_name.iov_len && - memcmp(node_key(node), txn->mt_dbxs[dbi].md_name.iov_base, - node_ks(node)) == 0) { + env->me_dbxs[MAIN_DBI].md_cmp(&name, + &env->me_dbxs[dbi].md_name) == 0) { node = NULL; break; } @@ -22528,10 +22529,11 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, * 3) user_flags differs, but table is empty and MDBX_CREATE is provided * = assume that a properly create request with custom flags; */ + const MDBX_env *const env = txn->mt_env; if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & DB_PERSISTENT_FLAGS) { /* flags are differs, check other conditions */ - if ((!user_flags && (!keycmp || keycmp == txn->mt_dbxs[dbi].md_cmp) && - (!datacmp || datacmp == txn->mt_dbxs[dbi].md_dcmp)) || + if ((!user_flags && (!keycmp || keycmp == env->me_dbxs[dbi].md_cmp) && + (!datacmp || datacmp == env->me_dbxs[dbi].md_dcmp)) || user_flags == MDBX_ACCEDE) { /* no comparators were provided and flags are zero, * seems that is case #1 above */ @@ -22544,29 +22546,29 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, txn->mt_flags |= MDBX_TXN_DIRTY; /* обнуляем компараторы для установки в соответствии с флагами, * либо заданных пользователем */ - txn->mt_dbxs[dbi].md_cmp = nullptr; - txn->mt_dbxs[dbi].md_dcmp = nullptr; + env->me_dbxs[dbi].md_cmp = nullptr; + env->me_dbxs[dbi].md_dcmp = nullptr; } else { return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; } } if (!keycmp) - keycmp = txn->mt_dbxs[dbi].md_cmp ? txn->mt_dbxs[dbi].md_cmp + keycmp = env->me_dbxs[dbi].md_cmp ? env->me_dbxs[dbi].md_cmp : get_default_keycmp(user_flags); - if (txn->mt_dbxs[dbi].md_cmp != keycmp) { - if (txn->mt_dbxs[dbi].md_cmp) + if (env->me_dbxs[dbi].md_cmp != keycmp) { + if (env->me_dbxs[dbi].md_cmp) return MDBX_EINVAL; - txn->mt_dbxs[dbi].md_cmp = keycmp; + env->me_dbxs[dbi].md_cmp = keycmp; } if (!datacmp) - datacmp = txn->mt_dbxs[dbi].md_dcmp ? txn->mt_dbxs[dbi].md_dcmp + datacmp = env->me_dbxs[dbi].md_dcmp ? env->me_dbxs[dbi].md_dcmp : get_default_datacmp(user_flags); - if (txn->mt_dbxs[dbi].md_dcmp != datacmp) { - if (txn->mt_dbxs[dbi].md_dcmp) + if (env->me_dbxs[dbi].md_dcmp != datacmp) { + if (env->me_dbxs[dbi].md_dcmp) return MDBX_EINVAL; - txn->mt_dbxs[dbi].md_dcmp = datacmp; + env->me_dbxs[dbi].md_dcmp = datacmp; } return MDBX_SUCCESS; @@ -22652,7 +22654,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, rc = MDBX_NOTFOUND; goto bailout; } - if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || txn->mt_dbxs[MAIN_DBI].md_cmp) { + if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || env->me_dbxs[MAIN_DBI].md_cmp) { /* В MAIN_DBI есть записи либо она уже использовалась. */ rc = MDBX_INCOMPATIBLE; goto bailout; @@ -22666,24 +22668,24 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; txn->mt_flags |= MDBX_TXN_DIRTY; - txn->mt_dbxs[MAIN_DBI].md_cmp = + env->me_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); - txn->mt_dbxs[MAIN_DBI].md_dcmp = + env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(txn->mt_dbs[MAIN_DBI].md_flags); } - tASSERT(txn, txn->mt_dbxs[MAIN_DBI].md_cmp); + tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); /* Is the DB already open? */ MDBX_dbi scan, slot; for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!txn->mt_dbxs[scan].md_name.iov_base) { + if (!env->me_dbxs[scan].md_name.iov_base) { /* Remember this free slot */ slot = scan; continue; } - if (key.iov_len == txn->mt_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, txn->mt_dbxs[scan].md_name.iov_base, + if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && + !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, key.iov_len)) { rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) @@ -22751,13 +22753,13 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, /* Rescan after mutex acquisition & import handles */ for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!txn->mt_dbxs[scan].md_name.iov_base) { + if (!env->me_dbxs[scan].md_name.iov_base) { /* Remember this free slot */ slot = scan; continue; } - if (key.iov_len == txn->mt_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, txn->mt_dbxs[scan].md_name.iov_base, + if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && + !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, key.iov_len)) { rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) @@ -22795,7 +22797,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } /* Got info, register DBI in this txn */ - memset(txn->mt_dbxs + slot, 0, sizeof(MDBX_dbx)); + memset(env->me_dbxs + slot, 0, sizeof(MDBX_dbx)); memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); env->me_db_flags[slot] = 0; rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); @@ -22805,7 +22807,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, } txn->mt_dbi_state[slot] = (uint8_t)dbiflags; - txn->mt_dbxs[slot].md_name = key; + env->me_dbxs[slot].md_name = key; txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = dbi_seq(env, slot); if (!(dbiflags & DBI_CREAT)) diff --git a/src/internals.h b/src/internals.h index d4ac2215..d8dafc2d 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1189,8 +1189,6 @@ struct MDBX_txn { txnid_t mt_front; MDBX_env *mt_env; /* the DB environment */ - /* Array of records for each DB known in the environment. */ - MDBX_dbx *mt_dbxs; /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; From 796e56b9b9aa2f11987b95a848b1f7f29ff9924c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 3 Nov 2023 14:11:58 +0300 Subject: [PATCH 025/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BA=D0=BE=D0=B4=D0=B0=20?= =?UTF-8?q?=D1=81=D0=B8=D1=81=D1=82=D0=B5=D0=BC=D0=BD=D0=BE=D0=B9=20=D0=BE?= =?UTF-8?q?=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20`MDBX=5FEDEADLK`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 ++++-- src/lck-windows.c | 2 +- src/osal.c | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/mdbx.h b/mdbx.h index 5c43ab89..ee6e21c7 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1950,7 +1950,8 @@ enum MDBX_error_t { MDBX_EPERM = ERROR_INVALID_FUNCTION, MDBX_EINTR = ERROR_CANCELLED, MDBX_ENOFILE = ERROR_FILE_NOT_FOUND, - MDBX_EREMOTE = ERROR_REMOTE_STORAGE_MEDIA_ERROR + MDBX_EREMOTE = ERROR_REMOTE_STORAGE_MEDIA_ERROR, + MDBX_EDEADLK = ERROR_POSSIBLE_DEADLOCK #else /* Windows */ #ifdef ENODATA MDBX_ENODATA = ENODATA, @@ -1966,7 +1967,8 @@ enum MDBX_error_t { MDBX_EPERM = EPERM, MDBX_EINTR = EINTR, MDBX_ENOFILE = ENOENT, - MDBX_EREMOTE = ENOTBLK + MDBX_EREMOTE = ENOTBLK, + MDBX_EDEADLK = EDEADLK #endif /* !Windows */ }; #ifndef __cplusplus diff --git a/src/lck-windows.c b/src/lck-windows.c index d2354285..bc77150d 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -190,7 +190,7 @@ int osal_txn_lock(MDBX_env *env, bool dontwait) { 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { - return ERROR_POSSIBLE_DEADLOCK; + return MDBX_EDEADLK; } } diff --git a/src/osal.c b/src/osal.c index adffbabf..5559b204 100644 --- a/src/osal.c +++ b/src/osal.c @@ -536,7 +536,7 @@ MDBX_INTERNAL_FUNC int osal_fastmutex_acquire(osal_fastmutex_t *fastmutex) { 0xC0000194 /* STATUS_POSSIBLE_DEADLOCK / EXCEPTION_POSSIBLE_DEADLOCK */) ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH) { - return ERROR_POSSIBLE_DEADLOCK; + return MDBX_EDEADLK; } return MDBX_SUCCESS; #else From e6af7d7c53428ca2892bcbf7eec1c2acee06fd44 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 5 Nov 2023 22:10:29 +0300 Subject: [PATCH 026/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B8=D0=BD=D0=B8=D1=86?= =?UTF-8?q?=D0=B8=D0=B0=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D0=B8,=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=B8=20?= =?UTF-8?q?=D0=B8=D0=BC=D0=BF=D0=BE=D1=80=D1=82=D0=B0=20dbi-=D1=85=D0=B5?= =?UTF-8?q?=D0=BD=D0=B4=D0=BB=D0=BE=D0=B2=20=D0=B2=20=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D1=8F=D1=85.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ранее инициализация в транзакциях структур данных, связанных с dbi-хендлами и subDb, выполнялась непосредственно при запуске транзакций. Что в сценариях с большим кол-вом dbi-дексприторов (например libfpta) порождало заметные накладные расходы, которые расли линейно от общего кол-ва открытых subDb, а не от реально используемых в транзакции. При использовании одной-двух сотен хендлов, при старте каждой транзакции могли копироваться и/или обнуляться десятки килобайт. Теперь этот недостаток устранен. Изменена схема инициализации, валидации и импорта хендлов открытых после старта транзакции: 1) Инициализация теперь выполняется отложенна, а при старте транзации обнуляется только массив с однобайтовыми статустами dbi-хендлов. При этом доступнва опция сборки `MDBX_ENABLE_DBI_SPARSE`, при активации которой используется битовая карты, что снижает объем инициализации при старте транзакции в 8 раз (CHAR_BIT). 2) Переработана валидация dbi-хендлов на входах API, с уменьшением кол-ва проверок и ветвлений до теоретического минимума. 3) Переработ импорт dbi-хендов открытых после старта транзакци, теперь при этом не захватывается мьютекс. --- CMakeLists.txt | 1 + mdbx.h | 8 +- mdbx.h++ | 1 + src/bits.md | 6 +- src/config.h.in | 1 + src/core.c | 1803 +++++++++++++++++++++++++++-------------------- src/internals.h | 23 +- src/mdbx.c++ | 3 + src/options.h | 7 + 9 files changed, 1068 insertions(+), 785 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 50bd1b4b..89eee769 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -531,6 +531,7 @@ add_mdbx_option(MDBX_ENABLE_BIGFOOT "Chunking long list of retired pages during add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" ON) add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) +add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") diff --git a/mdbx.h b/mdbx.h index ee6e21c7..d9cc392e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1921,7 +1921,7 @@ enum MDBX_error_t { MDBX_TOO_LARGE = -30417, /** A thread has attempted to use a not owned object, - * e.g. a transaction that started by another thread. */ + * e.g. a transaction that started by another thread */ MDBX_THREAD_MISMATCH = -30416, /** Overlapping read and write transactions for the current thread */ @@ -1936,8 +1936,12 @@ enum MDBX_error_t { /** Alternative/Duplicate LCK-file is exists and should be removed manually */ MDBX_DUPLICATED_CLK = -30413, + /** Some cursors and/or other resources should be closed before subDb or + * corresponding DBI-handle could be (re)used */ + MDBX_DANGLING_DBI = -30412, + /* The last of MDBX-added error codes */ - MDBX_LAST_ADDED_ERRCODE = MDBX_DUPLICATED_CLK, + MDBX_LAST_ADDED_ERRCODE = MDBX_DANGLING_DBI, #if defined(_WIN32) || defined(_WIN64) MDBX_ENODATA = ERROR_HANDLE_EOF, diff --git a/mdbx.h++ b/mdbx.h++ index ea0131be..6c33a0b3 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -559,6 +559,7 @@ MDBX_DECLARE_EXCEPTION(thread_mismatch); MDBX_DECLARE_EXCEPTION(transaction_full); MDBX_DECLARE_EXCEPTION(transaction_overlapping); MDBX_DECLARE_EXCEPTION(duplicated_lck_file); +MDBX_DECLARE_EXCEPTION(dangling_map_id); #undef MDBX_DECLARE_EXCEPTION [[noreturn]] LIBMDBX_API void throw_too_small_target_buffer(); diff --git a/src/bits.md b/src/bits.md index e8708f02..d8166d16 100644 --- a/src/bits.md +++ b/src/bits.md @@ -5,9 +5,9 @@ N | MASK | ENV | TXN | DB | PUT | DBI | NOD 2 |0000 0004|ALLOC_COLSC|TXN_DIRTY |DUPSORT | |DBI_FRESH |F_DUPDATA|P_OVERFLOW| | 3 |0000 0008|ALLOC_SSCAN|TXN_SPILLS |INTEGERKEY| |DBI_CREAT | |P_META | | 4 |0000 0010|ALLOC_FIFO |TXN_HAS_CHILD |DUPFIXED |NOOVERWRITE|DBI_VALID | |P_BAD | | -5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA |DBI_USRVALID| |P_LEAF2 | | -6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_DUPDATA | |P_SUBP | | -7 |0000 0080| | | |ALLDUPS |DBI_AUDITED | | | | +5 |0000 0020| |TXN_DRAINED_GC|INTEGERDUP|NODUPDATA | | |P_LEAF2 | | +6 |0000 0040| | |REVERSEDUP|CURRENT |DBI_OLDEN | |P_SUBP | | +7 |0000 0080| | | |ALLDUPS |DBI_LINDO | | | | 8 |0000 0100| _MAY_MOVE | | | | | | | <= | 9 |0000 0200| _MAY_UNMAP| | | | | | | <= | 10|0000 0400| | | | | | | | | diff --git a/src/config.h.in b/src/config.h.in index 05c561b1..2ffb9ecf 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -33,6 +33,7 @@ #cmakedefine01 MDBX_ENABLE_BIGFOOT #cmakedefine01 MDBX_ENABLE_PGOP_STAT #cmakedefine01 MDBX_ENABLE_PROFGC +#cmakedefine01 MDBX_ENABLE_DBI_SPARSE /* Windows */ #cmakedefine01 MDBX_WITHOUT_MSVC_CRT diff --git a/src/core.c b/src/core.c index 6e264f84..af6ff541 100644 --- a/src/core.c +++ b/src/core.c @@ -3474,8 +3474,11 @@ __cold const char *mdbx_liberr2str(int errnum) { return "MDBX_TXN_OVERLAPPING: Overlapping read and write transactions for" " the current thread"; case MDBX_DUPLICATED_CLK: - return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists, " - "please keep one and remove unused other"; + return "MDBX_DUPLICATED_CLK: Alternative/Duplicate LCK-file is exists," + " please keep one and remove unused other"; + case MDBX_DANGLING_DBI: + return "MDBX_DANGLING_DBI: Some cursors and/or other resources should be" + " closed before subDb or corresponding DBI-handle could be (re)used"; default: return NULL; } @@ -3778,15 +3781,409 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { *tracking_head = tracked->mc_next; \ } while (0) +#if MDBX_ENABLE_DBI_SPARSE + +static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { + tASSERT(txn, bmi > 0); + STATIC_ASSERT(sizeof(bmi) >= sizeof(txn->mt_dbi_sparse[0])); +#if __GNUC_PREREQ(4, 1) || __has_builtin(__builtin_ctzl) + if (sizeof(txn->mt_dbi_sparse[0]) <= sizeof(int)) + return __builtin_ctz((int)bmi); + if (sizeof(txn->mt_dbi_sparse[0]) == sizeof(long)) + return __builtin_ctzl((long)bmi); +#if (defined(__SIZEOF_LONG_LONG__) && __SIZEOF_LONG_LONG__ == 8) || \ + __has_builtin(__builtin_ctzll) + return __builtin_ctzll(bmi); +#endif /* have(long long) && long long == uint64_t */ +#endif /* GNU C */ + +#if defined(_MSC_VER) + unsigned long index; + if (sizeof(txn->mt_dbi_sparse[0]) > 4) { +#if defined(_M_AMD64) || defined(_M_ARM64) || defined(_M_X64) + _BitScanForward64(&index, bmi); + return index; +#else + if (bmi > UINT32_MAX) { + _BitScanForward(&index, (uint32_t)((uint64_t)bmi >> 32)); + return index; + } +#endif + } + _BitScanForward(&index, (uint32_t)bmi); + return index; +#endif /* MSVC */ + + bmi &= -bmi; + if (sizeof(txn->mt_dbi_sparse[0]) > 4) { + static const uint8_t debruijn_ctz64[64] = { + 0, 1, 2, 53, 3, 7, 54, 27, 4, 38, 41, 8, 34, 55, 48, 28, + 62, 5, 39, 46, 44, 42, 22, 9, 24, 35, 59, 56, 49, 18, 29, 11, + 63, 52, 6, 26, 37, 40, 33, 47, 61, 45, 43, 21, 23, 58, 17, 10, + 51, 25, 36, 32, 60, 20, 57, 16, 50, 31, 19, 15, 30, 14, 13, 12}; + return debruijn_ctz64[(UINT64_C(0x022FDD63CC95386D) * (uint64_t)bmi) >> 58]; + } else { + static const uint8_t debruijn_ctz32[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9}; + return debruijn_ctz32[(UINT32_C(0x077CB531) * (uint32_t)bmi) >> 27]; + } +} + +/* LY: Макрос целенаправленно сделан с одним циклом, чтобы сохранить возможность + * использования оператора break */ +#define TXN_FOREACH_DBI_FROM(TXN, I, FROM) \ + for (size_t bitmap_chunk = CHAR_BIT * sizeof(TXN->mt_dbi_sparse[0]), \ + bitmap_item = TXN->mt_dbi_sparse[0] >> FROM, I = FROM; \ + I < TXN->mt_numdbs; ++I) \ + if (bitmap_item == 0) { \ + I |= bitmap_chunk - 1; \ + bitmap_item = TXN->mt_dbi_sparse[(1 + I) / bitmap_chunk]; \ + continue; \ + } else if ((bitmap_item & 1) == 0) { \ + size_t bitmap_skip = dbi_bitmap_ctz(txn, bitmap_item); \ + bitmap_item >>= bitmap_skip; \ + I += bitmap_skip - 1; \ + continue; \ + } else if (bitmap_item >>= 1, TXN->mt_dbi_state[I]) +#else +#define TXN_FOREACH_DBI_FROM(TXN, I, SKIP) \ + for (size_t I = SKIP; I < TXN->mt_numdbs; ++I) \ + if (TXN->mt_dbi_state[I]) +#endif /* MDBX_ENABLE_DBI_SPARSE */ + +#define TXN_FOREACH_DBI_ALL(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, 0) +#define TXN_FOREACH_DBI_USER(TXN, I) TXN_FOREACH_DBI_FROM(TXN, I, CORE_DBS) + +/* Back up parent txn's cursor, then grab the original for tracking */ +static int cursor_shadow(MDBX_cursor *parent_cursor, MDBX_txn *nested_txn, + const size_t dbi) { + + tASSERT(nested_txn, dbi > FREE_DBI && dbi < nested_txn->mt_numdbs); + const size_t size = parent_cursor->mc_xcursor + ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) + : sizeof(MDBX_cursor); + for (MDBX_cursor *bk; parent_cursor; parent_cursor = bk->mc_next) { + bk = parent_cursor; + if (parent_cursor->mc_signature != MDBX_MC_LIVE) + continue; + bk = osal_malloc(size); + if (unlikely(!bk)) + return MDBX_ENOMEM; +#if MDBX_DEBUG + memset(bk, 0xCD, size); + VALGRIND_MAKE_MEM_UNDEFINED(bk, size); +#endif /* MDBX_DEBUG */ + *bk = *parent_cursor; + parent_cursor->mc_backup = bk; + /* Kill pointers into src to reduce abuse: The + * user may not use mc until dst ends. But we need a valid + * txn pointer here for cursor fixups to keep working. */ + parent_cursor->mc_txn = nested_txn; + parent_cursor->mc_db = &nested_txn->mt_dbs[dbi]; + parent_cursor->mc_dbi_state = &nested_txn->mt_dbi_state[dbi]; + MDBX_xcursor *mx = parent_cursor->mc_xcursor; + if (mx != NULL) { + *(MDBX_xcursor *)(bk + 1) = *mx; + mx->mx_cursor.mc_txn = nested_txn; + } + parent_cursor->mc_next = nested_txn->mt_cursors[dbi]; + nested_txn->mt_cursors[dbi] = parent_cursor; + } + return MDBX_SUCCESS; +} + +/* Close this txn's cursors, give parent txn's cursors back to parent. + * + * [in] txn the transaction handle. + * [in] merge true to keep changes to parent cursors, false to revert. + * + * Returns 0 on success, non-zero on failure. */ +static void cursors_eot(MDBX_txn *txn, const bool merge) { + tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr); + TXN_FOREACH_DBI_FROM(txn, i, /* skip FREE_DBI */ 1) { + MDBX_cursor *mc = txn->mt_cursors[i]; + if (!mc) + continue; + txn->mt_cursors[i] = nullptr; + do { + const unsigned stage = mc->mc_signature; + MDBX_cursor *const next = mc->mc_next; + MDBX_cursor *const bk = mc->mc_backup; + ENSURE(txn->mt_env, + stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); + cASSERT(mc, mc->mc_dbi == (MDBX_dbi)i); + if (bk) { + MDBX_xcursor *mx = mc->mc_xcursor; + tASSERT(txn, txn->mt_parent != NULL); + /* Zap: Using uninitialized memory '*mc->mc_backup'. */ + MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); + ENSURE(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); + tASSERT(txn, mx == bk->mc_xcursor); + if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) + mc->mc_signature = stage /* Promote closed state to parent txn */; + else if (merge) { + /* Restore pointers to parent txn */ + mc->mc_next = bk->mc_next; + mc->mc_backup = bk->mc_backup; + mc->mc_txn = bk->mc_txn; + mc->mc_db = bk->mc_db; + mc->mc_dbi_state = bk->mc_dbi_state; + if (mx) { + if (mx != bk->mc_xcursor) { + *bk->mc_xcursor = *mx; + mx = bk->mc_xcursor; + } + mx->mx_cursor.mc_txn = bk->mc_txn; + } + } else { + /* Restore from backup, i.e. rollback/abort nested txn */ + *mc = *bk; + if (mx) + *mx = *(MDBX_xcursor *)(bk + 1); + } + bk->mc_signature = 0; + osal_free(bk); + } else { + ENSURE(txn->mt_env, stage == MDBX_MC_LIVE); + mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; + mc->mc_flags = 0 /* reset C_UNTRACK */; + } + mc = next; + } while (mc); + } +} + +static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi); + +static __inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { + STATIC_ASSERT(DBI_DIRTY == MDBX_DBI_DIRTY && DBI_STALE == MDBX_DBI_STALE && + DBI_FRESH == MDBX_DBI_FRESH && DBI_CREAT == MDBX_DBI_CREAT); + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + return likely(dbi < txn->mt_numdbs && + (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) != 0) + ? txn->mt_dbi_state[dbi] + : 0; +#else + return likely(dbi < txn->mt_numdbs) ? txn->mt_dbi_state[dbi] : 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ +} + +static __inline bool dbi_changed(const MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->mt_env; + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + const uint32_t snap_seq = + atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + return snap_seq != txn->mt_dbi_seqs[dbi]; +} + +static __always_inline int dbi_check(const MDBX_txn *txn, const size_t dbi) { + const uint8_t state = dbi_state(txn, dbi); + if (likely((state & DBI_LINDO) != 0 && !dbi_changed(txn, dbi))) + return (state & DBI_VALID) ? MDBX_SUCCESS : MDBX_BAD_DBI; + + /* Медленный путь: ленивая до-инициализацяи и импорт */ + return dbi_import((MDBX_txn *)txn, dbi); +} + +static __inline uint32_t dbi_seq_next(const MDBX_env *const env, size_t dbi) { + uint32_t v = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease) + 1; + return v ? v : 1; +} + +struct dbi_snap_result { + uint32_t sequence; + unsigned flags; +}; + +static struct dbi_snap_result dbi_snap(const MDBX_env *env, const size_t dbi) { + eASSERT(env, dbi < env->me_numdbs); + struct dbi_snap_result r; + uint32_t snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + do { + r.sequence = snap; + r.flags = env->me_db_flags[dbi]; + snap = atomic_load32(&env->me_dbi_seqs[dbi], mo_AcquireRelease); + } while (unlikely(snap != r.sequence)); + return r; +} + +static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi) { + const MDBX_env *const env = txn->mt_env; + if (dbi >= env->me_numdbs || !env->me_db_flags[dbi]) + return MDBX_BAD_DBI; + +#if MDBX_ENABLE_DBI_SPARSE + const size_t bitmap_chunk = CHAR_BIT * sizeof(txn->mt_dbi_sparse[0]); + const size_t bitmap_indx = dbi / bitmap_chunk; + const size_t bitmap_mask = (size_t)1 << dbi % bitmap_chunk; + if (dbi >= txn->mt_numdbs) { + for (size_t i = (txn->mt_numdbs + bitmap_chunk - 1) / bitmap_chunk; + bitmap_indx >= i; ++i) + txn->mt_dbi_sparse[i] = 0; + eASSERT(env, (txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0); + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); + eASSERT(env, scan->mt_numdbs < dbi + 1); + scan->mt_numdbs = (unsigned)dbi + 1; + scan->mt_dbi_state[dbi] = 0; + scan = scan->mt_parent; + } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); + txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } + if ((txn->mt_dbi_sparse[bitmap_indx] & bitmap_mask) == 0) { + MDBX_txn *scan = txn; + do { + eASSERT(env, scan->mt_dbi_sparse == txn->mt_dbi_sparse); + eASSERT(env, scan->mt_numdbs == txn->mt_numdbs); + scan->mt_dbi_state[dbi] = 0; + scan = scan->mt_parent; + } while (scan /* && scan->mt_dbi_sparse == txn->mt_dbi_sparse */); + txn->mt_dbi_sparse[bitmap_indx] |= bitmap_mask; + goto lindo; + } +#else + if (dbi >= txn->mt_numdbs) { + size_t i = txn->mt_numdbs; + do + txn->mt_dbi_state[i] = 0; + while (dbi >= ++i); + txn->mt_numdbs = i; + goto lindo; + } +#endif /* MDBX_ENABLE_DBI_SPARSE */ + + if (!txn->mt_dbi_state[dbi]) { + lindo: + /* dbi-слот еще не инициализирован в транзакции, а хендл не использовался */ + txn->mt_cursors[dbi] = nullptr; + MDBX_txn *const parent = txn->mt_parent; + if (parent) { + /* вложенная пишущая транзакция */ + int rc = dbi_check(parent, dbi); + /* копируем состояние subDB очищая new-флаги. */ + eASSERT(env, txn->mt_dbi_seqs == parent->mt_dbi_seqs); + txn->mt_dbi_state[dbi] = + parent->mt_dbi_state[dbi] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + if (likely(rc == MDBX_SUCCESS)) { + txn->mt_dbs[dbi] = parent->mt_dbs[dbi]; + if (parent->mt_cursors[dbi]) { + rc = cursor_shadow(parent->mt_cursors[dbi], txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) { + /* не получилось забекапить курсоры */ + txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO | DBI_STALE; + txn->mt_flags |= MDBX_TXN_ERROR; + } + } + } + return rc; + } + txn->mt_dbi_seqs[dbi] = 0; + txn->mt_dbi_state[dbi] = DBI_LINDO; + } else { + eASSERT(env, txn->mt_dbi_seqs[dbi] != env->me_dbi_seqs[dbi].weak); + if (unlikely((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_OLDEN)) || + txn->mt_cursors[dbi])) { + /* хендл уже использовался в транзакции, но был закрыт или переоткрыт, + * либо при явном пере-открытии хендла есть висячие курсоры */ + eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_STALE) == 0); + txn->mt_dbi_seqs[dbi] = env->me_dbi_seqs[dbi].weak; + txn->mt_dbi_state[dbi] = DBI_OLDEN | DBI_LINDO; + return txn->mt_cursors[dbi] ? MDBX_DANGLING_DBI : MDBX_BAD_DBI; + } + } + + /* хендл не использовался в транзакции, либо явно пере-отрывается при + * отсутствии висячих курсоров */ + eASSERT(env, (txn->mt_dbi_state[dbi] & DBI_LINDO) && !txn->mt_cursors[dbi]); + + /* читаем актуальные флаги и sequence */ + struct dbi_snap_result snap = dbi_snap(env, dbi); + txn->mt_dbi_seqs[dbi] = snap.sequence; + if (snap.flags & DB_VALID) { + txn->mt_dbs[dbi].md_flags = snap.flags & DB_PERSISTENT_FLAGS; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_STALE; + return MDBX_SUCCESS; + } + return MDBX_BAD_DBI; +} + +/* Export or close DBI handles opened in this txn. */ +static int dbi_update(MDBX_txn *txn, int keep) { + MDBX_env *const env = txn->mt_env; + tASSERT(txn, !txn->mt_parent && txn == env->me_txn0); + bool locked = false; + void *defer_free = nullptr; + TXN_FOREACH_DBI_USER(txn, dbi) { + if (likely((txn->mt_dbi_state[dbi] & DBI_CREAT) == 0)) + continue; + if (!locked) { + int err = osal_fastmutex_acquire(&env->me_dbi_lock); + if (unlikely(err != MDBX_SUCCESS)) + return err; + locked = true; + if (dbi >= env->me_numdbs) + /* хендл был закрыт из другого потока пока захватывали блокировку */ + continue; + } + tASSERT(txn, dbi < env->me_numdbs); + if (keep) { + env->me_db_flags[dbi] = txn->mt_dbs[dbi].md_flags | DB_VALID; + } else { + uint32_t seq = dbi_seq_next(env, dbi); + void *ptr = env->me_dbxs[dbi].md_name.iov_base; + if (ptr) { + env->me_db_flags[dbi] = 0; + env->me_dbxs[dbi].md_name.iov_len = 0; + env->me_dbxs[dbi].md_name.iov_base = nullptr; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + osal_free(defer_free); + defer_free = ptr; + } else { + eASSERT(env, env->me_dbxs[dbi].md_name.iov_len == 0); + eASSERT(env, env->me_db_flags[dbi] == 0); + } + } + } + + if (locked) { + size_t i = env->me_numdbs; + while ((env->me_db_flags[i - 1] & DB_VALID) == 0) { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } + env->me_numdbs = (unsigned)i; + ENSURE(txn->mt_env, + osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + osal_free(defer_free); + } + return MDBX_SUCCESS; +} + int mdbx_cmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); return txn->mt_env->me_dbxs[dbi].md_cmp(a, b); } int mdbx_dcmp(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *a, const MDBX_val *b) { eASSERT(NULL, txn->mt_signature == MDBX_MT_SIGNATURE); + tASSERT(txn, (dbi_state(txn, dbi) & DBI_VALID) && !dbi_changed(txn, dbi)); + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID)); return txn->mt_env->me_dbxs[dbi].md_dcmp(a, b); } @@ -4829,12 +5226,15 @@ static size_t txn_keep(MDBX_txn *txn, MDBX_cursor *m0) { tASSERT(txn, (txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_WRITEMAP)) == 0); txn_lru_turn(txn); size_t keep = m0 ? cursor_keep(txn, m0) : 0; - for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) - if (F_ISSET(txn->mt_dbi_state[i], DBI_DIRTY | DBI_VALID) && - txn->mt_dbs[i].md_root != P_INVALID) - for (MDBX_cursor *mc = txn->mt_cursors[i]; mc; mc = mc->mc_next) + + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (F_ISSET(txn->mt_dbi_state[dbi], DBI_DIRTY | DBI_VALID) && + txn->mt_dbs[dbi].md_root != P_INVALID) + for (MDBX_cursor *mc = txn->mt_cursors[dbi]; mc; mc = mc->mc_next) if (mc != m0) keep += cursor_keep(txn, mc); + } + return keep; } @@ -4891,33 +5291,6 @@ spill_prio(const MDBX_txn *txn, const size_t i, const uint32_t reciprocal) { return prio = (unsigned)factor; } -/* Spill pages from the dirty list back to disk. - * This is intended to prevent running into MDBX_TXN_FULL situations, - * but note that they may still occur in a few cases: - * - * 1) our estimate of the txn size could be too small. Currently this - * seems unlikely, except with a large number of MDBX_MULTIPLE items. - * - * 2) child txns may run out of space if their parents dirtied a - * lot of pages and never spilled them. TODO: we probably should do - * a preemptive spill during mdbx_txn_begin() of a child txn, if - * the parent's dirtyroom is below a given threshold. - * - * Otherwise, if not using nested txns, it is expected that apps will - * not run into MDBX_TXN_FULL any more. The pages are flushed to disk - * the same way as for a txn commit, e.g. their dirty status is cleared. - * If the txn never references them again, they can be left alone. - * If the txn only reads them, they can be used without any fuss. - * If the txn writes them again, they can be dirtied immediately without - * going thru all of the work of page_touch(). Such references are - * handled by page_unspill(). - * - * Also note, we never spill DB root pages, nor pages of active cursors, - * because we'll need these back again soon anyway. And in nested txns, - * we can't spill a page in a child txn if it was already spilled in a - * parent txn. That would alter the parent txns' data even though - * the child hasn't committed yet, and we'd have no way to undo it if - * the child aborted. */ __cold static int txn_spill_slowpath(MDBX_txn *const txn, MDBX_cursor *const m0, const intptr_t wanna_spill_entries, const intptr_t wanna_spill_npages, @@ -6854,22 +7227,6 @@ static pgno_t *scan4seq_resolver(pgno_t *range, const size_t len, //------------------------------------------------------------------------------ -/* Allocate page numbers and memory for writing. Maintain mt_last_reclaimed, - * mt_relist and mt_next_pgno. Set MDBX_TXN_ERROR on failure. - * - * If there are free pages available from older transactions, they - * are re-used first. Otherwise allocate a new page at mt_next_pgno. - * Do not modify the GC, just merge GC records into mt_relist - * and move mt_last_reclaimed to say which records were consumed. Only this - * function can create mt_relist and move - * mt_last_reclaimed/mt_next_pgno. - * - * [in] mc cursor A cursor handle identifying the transaction and - * database for which we are allocating. - * [in] num the number of pages to allocate. - * - * Returns 0 on success, non-zero on failure.*/ - #define MDBX_ALLOC_DEFAULT 0 #define MDBX_ALLOC_RESERVE 1 #define MDBX_ALLOC_UNIMPORTANT 2 @@ -7761,7 +8118,8 @@ done: __hot static pgr_t page_alloc(const MDBX_cursor *const mc) { MDBX_txn *const txn = mc->mc_txn; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(txn->mt_dbi_state[mc->mc_dbi], DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(dbi_state(txn, mc->mc_dbi), + DBI_LINDO | DBI_VALID | DBI_DIRTY)); /* If there are any loose pages, just use them */ while (likely(txn->tw.loose_pages)) { @@ -7901,7 +8259,7 @@ __hot static int page_touch(MDBX_cursor *mc) { int rc; tASSERT(txn, mc->mc_txn->mt_flags & MDBX_TXN_DIRTY); - tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_DIRTY | DBI_VALID)); + tASSERT(txn, F_ISSET(*mc->mc_dbi_state, DBI_LINDO | DBI_VALID | DBI_DIRTY)); tASSERT(txn, !IS_OVERFLOW(mp)); if (ASSERT_ENABLED()) { if (mc->mc_flags & C_SUB) { @@ -8285,109 +8643,6 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { return env_sync(env, force, nonblock); } -/* Back up parent txn's cursors, then grab the originals for tracking */ -static int cursor_shadow(MDBX_txn *parent, MDBX_txn *nested) { - tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr); - nested->mt_cursors[FREE_DBI] = nullptr; - for (int i = parent->mt_numdbs; --i > FREE_DBI;) { - nested->mt_cursors[i] = NULL; - MDBX_cursor *mc = parent->mt_cursors[i]; - if (mc != NULL) { - size_t size = mc->mc_xcursor ? sizeof(MDBX_cursor) + sizeof(MDBX_xcursor) - : sizeof(MDBX_cursor); - for (MDBX_cursor *bk; mc; mc = bk->mc_next) { - bk = mc; - if (mc->mc_signature != MDBX_MC_LIVE) - continue; - bk = osal_malloc(size); - if (unlikely(!bk)) - return MDBX_ENOMEM; -#if MDBX_DEBUG - memset(bk, 0xCD, size); - VALGRIND_MAKE_MEM_UNDEFINED(bk, size); -#endif /* MDBX_DEBUG */ - *bk = *mc; - mc->mc_backup = bk; - /* Kill pointers into src to reduce abuse: The - * user may not use mc until dst ends. But we need a valid - * txn pointer here for cursor fixups to keep working. */ - mc->mc_txn = nested; - mc->mc_db = &nested->mt_dbs[i]; - mc->mc_dbi_state = &nested->mt_dbi_state[i]; - MDBX_xcursor *mx = mc->mc_xcursor; - if (mx != NULL) { - *(MDBX_xcursor *)(bk + 1) = *mx; - mx->mx_cursor.mc_txn = nested; - } - mc->mc_next = nested->mt_cursors[i]; - nested->mt_cursors[i] = mc; - } - } - } - return MDBX_SUCCESS; -} - -/* Close this txn's cursors, give parent txn's cursors back to parent. - * - * [in] txn the transaction handle. - * [in] merge true to keep changes to parent cursors, false to revert. - * - * Returns 0 on success, non-zero on failure. */ -static void cursors_eot(MDBX_txn *txn, const bool merge) { - tASSERT(txn, txn->mt_cursors[FREE_DBI] == nullptr); - for (intptr_t i = txn->mt_numdbs; --i > FREE_DBI;) { - MDBX_cursor *mc = txn->mt_cursors[i]; - if (!mc) - continue; - txn->mt_cursors[i] = nullptr; - do { - const unsigned stage = mc->mc_signature; - MDBX_cursor *const next = mc->mc_next; - MDBX_cursor *const bk = mc->mc_backup; - ENSURE(txn->mt_env, - stage == MDBX_MC_LIVE || (stage == MDBX_MC_WAIT4EOT && bk)); - cASSERT(mc, mc->mc_dbi == (MDBX_dbi)i); - if (bk) { - MDBX_xcursor *mx = mc->mc_xcursor; - tASSERT(txn, txn->mt_parent != NULL); - /* Zap: Using uninitialized memory '*mc->mc_backup'. */ - MDBX_SUPPRESS_GOOFY_MSVC_ANALYZER(6001); - ENSURE(txn->mt_env, bk->mc_signature == MDBX_MC_LIVE); - tASSERT(txn, mx == bk->mc_xcursor); - if (stage == MDBX_MC_WAIT4EOT /* Cursor was closed by user */) - mc->mc_signature = stage /* Promote closed state to parent txn */; - else if (merge) { - /* Restore pointers to parent txn */ - mc->mc_next = bk->mc_next; - mc->mc_backup = bk->mc_backup; - mc->mc_txn = bk->mc_txn; - mc->mc_db = bk->mc_db; - mc->mc_dbi_state = bk->mc_dbi_state; - if (mx) { - if (mx != bk->mc_xcursor) { - *bk->mc_xcursor = *mx; - mx = bk->mc_xcursor; - } - mx->mx_cursor.mc_txn = bk->mc_txn; - } - } else { - /* Restore from backup, i.e. rollback/abort nested txn */ - *mc = *bk; - if (mx) - *mx = *(MDBX_xcursor *)(bk + 1); - } - bk->mc_signature = 0; - osal_free(bk); - } else { - ENSURE(txn->mt_env, stage == MDBX_MC_LIVE); - mc->mc_signature = MDBX_MC_READY4CLOSE /* Cursor may be reused */; - mc->mc_flags = 0 /* reset C_UNTRACK */; - } - mc = next; - } while (mc); - } -} - #if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) /* Find largest mvcc-snapshot still referenced by this process. */ static pgno_t find_largest_this(MDBX_env *env, pgno_t largest) { @@ -8730,6 +8985,8 @@ __hot static int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, /* Copy the DB info and flags */ txn->mt_geo = head.ptr_v->mm_geo; memcpy(txn->mt_dbs, head.ptr_c->mm_dbs, CORE_DBS * sizeof(MDBX_db)); + VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbs + CORE_DBS, + txn->mt_env->me_maxdbs - CORE_DBS); txn->mt_canary = head.ptr_v->mm_canary; if (unlikely(!coherency_check(txn->mt_env, head.txnid, txn->mt_dbs, @@ -8917,7 +9174,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = MDBX_CORRUPTED; goto bailout; } - txn->mt_numdbs = env->me_numdbs; ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); @@ -8992,9 +9248,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { if (txn->tw.lifo_reclaimed) MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); env->me_txn = txn; - txn->mt_numdbs = env->me_numdbs; - memcpy(txn->mt_dbi_seqs, env->me_dbi_seqs, - txn->mt_numdbs * sizeof(unsigned)); if ((txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC) { rc = dpl_alloc(txn); @@ -9012,23 +9265,46 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { eASSERT(env, txn->tw.writemap_spilled_npages == 0); } + txn->mt_front = + txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); + /* Setup db info */ - osal_compiler_barrier(); - memset(txn->mt_cursors, 0, sizeof(MDBX_cursor *) * txn->mt_numdbs); - for (size_t i = CORE_DBS; i < txn->mt_numdbs; i++) { - const unsigned db_flags = env->me_db_flags[i]; - txn->mt_dbs[i].md_flags = db_flags & DB_PERSISTENT_FLAGS; - txn->mt_dbi_state[i] = - (db_flags & DB_VALID) ? DBI_VALID | DBI_USRVALID | DBI_STALE : 0; + VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbi_state, env->me_maxdbs); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_numdbs = CORE_DBS; + VALGRIND_MAKE_MEM_UNDEFINED( + txn->mt_dbi_sparse, + ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT); + txn->mt_dbi_sparse[0] = (1 << CORE_DBS) - 1; +#else + txn->mt_numdbs = (env->me_numdbs < 8) ? env->me_numdbs : 8; + if (txn->mt_numdbs > CORE_DBS) + memset(txn->mt_dbi_state + CORE_DBS, 0, txn->mt_numdbs - CORE_DBS); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->mt_dbi_state[FREE_DBI] = DBI_LINDO | DBI_VALID; + txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO | DBI_VALID; + txn->mt_cursors[FREE_DBI] = nullptr; + txn->mt_cursors[MAIN_DBI] = nullptr; + txn->mt_dbi_seqs[FREE_DBI] = 0; + struct dbi_snap_result main_snap = dbi_snap(env, MAIN_DBI); + if (unlikely(main_snap.flags != + (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags))) { + if (main_snap.flags & DB_VALID) { + rc = MDBX_INCOMPATIBLE; + goto bailout; + } + env->me_db_flags[MAIN_DBI] = DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; + main_snap.sequence = + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); } - txn->mt_dbi_state[MAIN_DBI] = DBI_VALID | DBI_USRVALID; + txn->mt_dbi_seqs[MAIN_DBI] = main_snap.sequence; + rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - txn->mt_dbi_state[FREE_DBI] = DBI_VALID; - txn->mt_front = - txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { WARNING("%s", "environment had fatal error, must shutdown!"); @@ -9238,7 +9514,6 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, return MDBX_EACCESS; flags |= env->me_flags & MDBX_WRITEMAP; - MDBX_txn *txn = nullptr; if (parent) { /* Nested transactions: Max 1 child, write txns only, no writemap */ @@ -9270,11 +9545,24 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, goto renew; } + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->me_maxdbs, CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + STATIC_ASSERT(sizeof(txn->tw) > sizeof(txn->to)); const size_t base = (flags & MDBX_TXN_RDONLY) ? sizeof(MDBX_txn) - sizeof(txn->tw) + sizeof(txn->to) : sizeof(MDBX_txn); const size_t size = - base + env->me_maxdbs * (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + 1); + base + + ((flags & MDBX_TXN_RDONLY) + ? (size_t)bitmap_bytes + env->me_maxdbs * sizeof(txn->mt_dbi_seqs[0]) + : 0) + + env->me_maxdbs * (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + + sizeof(txn->mt_dbi_state[0])); txn = osal_malloc(size); if (unlikely(txn == nullptr)) { DEBUG("calloc: %s", "failed"); @@ -9288,16 +9576,21 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, memset(txn, 0, (MDBX_GOOFY_MSVC_STATIC_ANALYZER && base > size) ? size : base); txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); + txn->mt_cursors = + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); #if MDBX_DEBUG txn->mt_cursors[FREE_DBI] = nullptr; /* avoid SIGSEGV in an assertion later */ -#endif /* MDBX_DEBUG */ - txn->mt_dbi_state = ptr_disp(txn, size - env->me_maxdbs); +#endif + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); txn->mt_flags = flags; txn->mt_env = env; if (parent) { tASSERT(parent, dirtylist_check(parent)); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = parent->mt_dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ txn->mt_dbi_seqs = parent->mt_dbi_seqs; txn->mt_geo = parent->mt_geo; rc = dpl_alloc(txn); @@ -9375,14 +9668,19 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, parent->mt_flags |= MDBX_TXN_HAS_CHILD; parent->mt_child = txn; txn->mt_parent = parent; - txn->mt_numdbs = parent->mt_numdbs; txn->mt_owner = parent->mt_owner; - memcpy(txn->mt_dbs, parent->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); txn->tw.troika = parent->tw.troika; - /* Copy parent's mt_dbi_state, but clear DB_NEW */ - for (size_t i = 0; i < txn->mt_numdbs; i++) - txn->mt_dbi_state[i] = - parent->mt_dbi_state[i] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + + txn->mt_cursors[FREE_DBI] = nullptr; + txn->mt_cursors[MAIN_DBI] = nullptr; + txn->mt_dbi_state[FREE_DBI] = + parent->mt_dbi_state[FREE_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + txn->mt_dbi_state[MAIN_DBI] = + parent->mt_dbi_state[MAIN_DBI] & ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY); + memset(txn->mt_dbi_state + CORE_DBS, 0, + (txn->mt_numdbs = parent->mt_numdbs) - CORE_DBS); + memcpy(txn->mt_dbs, parent->mt_dbs, sizeof(txn->mt_dbs[0]) * CORE_DBS); + tASSERT(parent, parent->tw.dirtyroom + parent->tw.dirtylist->length == (parent->mt_parent ? parent->mt_parent->tw.dirtyroom @@ -9391,7 +9689,10 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, (txn->mt_parent ? txn->mt_parent->tw.dirtyroom : txn->mt_env->me_options.dp_limit)); env->me_txn = txn; - rc = cursor_shadow(parent, txn); + tASSERT(parent, parent->mt_cursors[FREE_DBI] == nullptr); + rc = parent->mt_cursors[MAIN_DBI] + ? cursor_shadow(parent->mt_cursors[MAIN_DBI], txn, MAIN_DBI) + : MDBX_SUCCESS; if (AUDIT_ENABLED() && ASSERT_ENABLED()) { txn->mt_signature = MDBX_MT_SIGNATURE; tASSERT(txn, audit_ex(txn, 0, false) == 0); @@ -9399,7 +9700,11 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, if (unlikely(rc != MDBX_SUCCESS)) txn_end(txn, TXN_END_FAIL_BEGINCHILD); } else { /* MDBX_TXN_RDONLY */ - txn->mt_dbi_seqs = env->me_dbi_seqs; + txn->mt_dbi_seqs = + ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ renew: rc = txn_renew(txn, flags); } @@ -9580,133 +9885,6 @@ int mdbx_txn_flags(const MDBX_txn *txn) { return txn->mt_flags; } -/* Check for misused dbi handles */ -static __inline bool dbi_changed(const MDBX_txn *txn, size_t dbi) { - if (txn->mt_dbi_seqs == txn->mt_env->me_dbi_seqs) - return false; - if (likely( - txn->mt_dbi_seqs[dbi].weak == - atomic_load32((MDBX_atomic_uint32_t *)&txn->mt_env->me_dbi_seqs[dbi], - mo_AcquireRelease))) - return false; - return true; -} - -static __inline unsigned dbi_seq(const MDBX_env *const env, size_t slot) { - unsigned v = env->me_dbi_seqs[slot].weak + 1; - return v + (v == 0); -} - -static void dbi_import_locked(MDBX_txn *txn) { - const MDBX_env *const env = txn->mt_env; - size_t n = env->me_numdbs; - for (size_t i = CORE_DBS; i < n; ++i) { - if (i >= txn->mt_numdbs) { - txn->mt_cursors[i] = NULL; - if (txn->mt_dbi_seqs != env->me_dbi_seqs) - txn->mt_dbi_seqs[i].weak = 0; - txn->mt_dbi_state[i] = 0; - } - if ((dbi_changed(txn, i) && - (txn->mt_dbi_state[i] & (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0) || - ((env->me_db_flags[i] & DB_VALID) && - !(txn->mt_dbi_state[i] & DBI_VALID))) { - tASSERT(txn, (txn->mt_dbi_state[i] & - (DBI_CREAT | DBI_DIRTY | DBI_FRESH)) == 0); - txn->mt_dbi_seqs[i] = env->me_dbi_seqs[i]; - txn->mt_dbs[i].md_flags = env->me_db_flags[i] & DB_PERSISTENT_FLAGS; - txn->mt_dbi_state[i] = 0; - if (env->me_db_flags[i] & DB_VALID) { - txn->mt_dbi_state[i] = DBI_VALID | DBI_USRVALID | DBI_STALE; - tASSERT(txn, env->me_dbxs[i].md_cmp != NULL); - tASSERT(txn, env->me_dbxs[i].md_name.iov_base != NULL); - } - } - } - while (unlikely(n < txn->mt_numdbs)) - if (txn->mt_cursors[txn->mt_numdbs - 1] == NULL && - (txn->mt_dbi_state[txn->mt_numdbs - 1] & DBI_USRVALID) == 0) - txn->mt_numdbs -= 1; - else { - if ((txn->mt_dbi_state[n] & DBI_USRVALID) == 0) { - if (txn->mt_dbi_seqs != env->me_dbi_seqs) - txn->mt_dbi_seqs[n].weak = 0; - txn->mt_dbi_state[n] = 0; - } - ++n; - } - txn->mt_numdbs = (MDBX_dbi)n; -} - -/* Import DBI which opened after txn started into context */ -__cold static bool dbi_import(MDBX_txn *txn, MDBX_dbi dbi) { - if (dbi < CORE_DBS || - (dbi >= txn->mt_numdbs && dbi >= txn->mt_env->me_numdbs)) - return false; - - ENSURE(txn->mt_env, - osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - dbi_import_locked(txn); - ENSURE(txn->mt_env, - osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - return txn->mt_dbi_state[dbi] & DBI_USRVALID; -} - -/* Export or close DBI handles opened in this txn. */ -static void dbi_update(MDBX_txn *txn, int keep) { - tASSERT(txn, !txn->mt_parent && txn == txn->mt_env->me_txn0); - MDBX_dbi n = txn->mt_numdbs; - if (n) { - bool locked = false; - MDBX_env *const env = txn->mt_env; - - for (size_t i = n; --i >= CORE_DBS;) { - if (likely((txn->mt_dbi_state[i] & DBI_CREAT) == 0)) - continue; - if (!locked) { - ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); - locked = true; - } - if (env->me_numdbs <= i || - txn->mt_dbi_seqs[i].weak != env->me_dbi_seqs[i].weak) - continue /* dbi explicitly closed and/or then re-opened by other txn */; - if (keep) { - env->me_db_flags[i] = txn->mt_dbs[i].md_flags | DB_VALID; - } else { - const MDBX_val name = env->me_dbxs[i].md_name; - if (name.iov_base) { - env->me_dbxs[i].md_name.iov_base = nullptr; - eASSERT(env, env->me_db_flags[i] == 0); - atomic_store32(&env->me_dbi_seqs[i], dbi_seq(env, i), - mo_AcquireRelease); - env->me_dbxs[i].md_name.iov_len = 0; - if (name.iov_len) - osal_free(name.iov_base); - } else { - eASSERT(env, name.iov_len == 0); - eASSERT(env, env->me_db_flags[i] == 0); - } - } - } - - n = env->me_numdbs; - if (n > CORE_DBS && unlikely(!(env->me_db_flags[n - 1] & DB_VALID))) { - if (!locked) { - ENSURE(env, osal_fastmutex_acquire(&env->me_dbi_lock) == MDBX_SUCCESS); - locked = true; - } - - n = env->me_numdbs; - while (n > CORE_DBS && !(env->me_db_flags[n - 1] & DB_VALID)) - --n; - env->me_numdbs = n; - } - - if (unlikely(locked)) - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } -} - /* Filter-out pgno list from transaction's dirty-page list */ static void dpl_sift(MDBX_txn *const txn, MDBX_PNL pl, const bool spilled) { tASSERT(txn, (txn->mt_flags & MDBX_TXN_RDONLY) == 0); @@ -9790,7 +9968,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { } #endif /* MDBX_ENV_CHECKPID */ - DEBUG("%s txn %" PRIaTXN "%c %p on mdbenv %p, root page %" PRIaPGNO + DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], txn->mt_txnid, (txn->mt_flags & MDBX_TXN_RDONLY) ? 'r' : 'w', (void *)txn, (void *)env, @@ -9851,7 +10029,7 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { if (txn == env->me_txn0) { eASSERT(env, txn->mt_parent == NULL); /* Export or close DBI handles created in this txn */ - dbi_update(txn, mode & TXN_END_UPDATE); + rc = dbi_update(txn, mode & TXN_END_UPDATE); pnl_shrink(&txn->tw.retired_pages); pnl_shrink(&txn->tw.relist); if (!(env->me_flags & MDBX_WRITEMAP)) @@ -9983,10 +10161,41 @@ int mdbx_txn_abort(MDBX_txn *txn) { return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } +__cold static MDBX_db *audit_db_dig(const MDBX_txn *txn, const size_t dbi, + MDBX_db *fallback) { + const MDBX_txn *dig = txn; + do { + tASSERT(txn, txn->mt_numdbs == dig->mt_numdbs); + const uint8_t state = dbi_state(dig, dbi); + if (state & DBI_LINDO) + switch (state & (DBI_VALID | DBI_STALE | DBI_OLDEN)) { + case DBI_VALID: + case DBI_OLDEN: + return dig->mt_dbs + dbi; + case 0: + return nullptr; + case DBI_VALID | DBI_STALE: + case DBI_OLDEN | DBI_STALE: + break; + default: + tASSERT(txn, !!"unexpected dig->mt_dbi_state[dbi]"); + } + dig = dig->mt_parent; + } while (dig); + return fallback; +} + +static size_t audit_db_used(const MDBX_db *db) { + return db ? (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + + (size_t)db->md_overflow_pages + : 0; +} + /* Count all the pages in each DB and in the GC and make sure * it matches the actual number of pages being used. */ -__cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, - bool dont_filter_gc) { +__cold static int audit_ex_locked(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc) { + const MDBX_env *const env = txn->mt_env; size_t pending = 0; if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) pending = txn->tw.loose_count + MDBX_PNL_GETSIZE(txn->tw.relist) + @@ -10017,82 +10226,66 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } tASSERT(txn, rc == MDBX_NOTFOUND); - for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) - txn->mt_dbi_state[i] &= ~DBI_AUDIT; - - size_t used = NUM_METAS; - for (size_t i = FREE_DBI; i <= MAIN_DBI; i++) { - if (!(txn->mt_dbi_state[i] & DBI_VALID)) - continue; - rc = cursor_init(&cx.outer, txn, i); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - txn->mt_dbi_state[i] |= DBI_AUDIT; - if (txn->mt_dbs[i].md_root == P_INVALID) - continue; - used += (size_t)txn->mt_dbs[i].md_branch_pages + - (size_t)txn->mt_dbs[i].md_leaf_pages + - (size_t)txn->mt_dbs[i].md_overflow_pages; - - if (i != MAIN_DBI) - continue; - rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); - const MDBX_env *const env = txn->mt_env; - while (rc == MDBX_SUCCESS) { - MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; - for (size_t j = 0; j < page_numkeys(mp); j++) { - const MDBX_node *node = page_node(mp, j); - if (node_flags(node) == F_SUBDATA) { - if (unlikely(node_ds(node) != sizeof(MDBX_db))) - return MDBX_CORRUPTED; - const MDBX_val name = {node_key(node), node_ks(node)}; - const MDBX_db *db = nullptr; - if ((txn->mt_flags & MDBX_TXN_RDONLY) == 0) { - for (MDBX_dbi dbi = txn->mt_numdbs; --dbi > MAIN_DBI;) { - if ((txn->mt_dbi_state[dbi] & DBI_VALID) && - /* env->me_dbxs[k].md_name.iov_base && */ - env->me_dbxs[MAIN_DBI].md_cmp( - &name, &env->me_dbxs[dbi].md_name) == 0) { - txn->mt_dbi_state[dbi] |= DBI_AUDIT; - if (!(txn->mt_dbi_state[dbi] & MDBX_DBI_STALE)) - db = txn->mt_dbs + dbi; - break; - } - } - } - MDBX_db aligned; - if (!db) - db = memcpy(&aligned, node_data(node), sizeof(MDBX_db)); - used += (size_t)db->md_branch_pages + (size_t)db->md_leaf_pages + - (size_t)db->md_overflow_pages; - } - } - rc = cursor_sibling(&cx.outer, SIBLING_RIGHT); - } - tASSERT(txn, rc == MDBX_NOTFOUND); + const size_t done_bitmap_size = (txn->mt_numdbs + CHAR_BIT - 1) / CHAR_BIT; + uint8_t *const done_bitmap = alloca(done_bitmap_size); + memset(done_bitmap, 0, done_bitmap_size); + if (txn->mt_parent) { + tASSERT(txn, txn->mt_numdbs == txn->mt_parent->mt_numdbs && + txn->mt_numdbs == txn->mt_env->me_txn->mt_numdbs); +#if MDBX_ENABLE_DBI_SPARSE + tASSERT(txn, txn->mt_dbi_sparse == txn->mt_parent->mt_dbi_sparse && + txn->mt_dbi_sparse == txn->mt_env->me_txn->mt_dbi_sparse); +#endif /* MDBX_ENABLE_DBI_SPARSE */ } - for (size_t i = FREE_DBI; i < txn->mt_numdbs; i++) { - if ((txn->mt_dbi_state[i] & (DBI_VALID | DBI_AUDIT | DBI_STALE)) != - DBI_VALID) - continue; - for (MDBX_txn *t = txn; t; t = t->mt_parent) - if (F_ISSET(t->mt_dbi_state[i], DBI_DIRTY | DBI_CREAT)) { - used += (size_t)t->mt_dbs[i].md_branch_pages + - (size_t)t->mt_dbs[i].md_leaf_pages + - (size_t)t->mt_dbs[i].md_overflow_pages; - txn->mt_dbi_state[i] |= DBI_AUDIT; + size_t used = NUM_METAS + + audit_db_used(audit_db_dig(txn, FREE_DBI, nullptr)) + + audit_db_used(audit_db_dig(txn, MAIN_DBI, nullptr)); + rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + for (rc = page_search(&cx.outer, NULL, MDBX_PS_FIRST); rc == MDBX_SUCCESS; + rc = cursor_sibling(&cx.outer, SIBLING_RIGHT)) { + MDBX_page *mp = cx.outer.mc_pg[cx.outer.mc_top]; + for (size_t k = 0; k < page_numkeys(mp); k++) { + MDBX_node *node = page_node(mp, k); + if (node_flags(node) != F_SUBDATA) + continue; + if (unlikely(node_ds(node) != sizeof(MDBX_db))) + return MDBX_CORRUPTED; + + MDBX_db reside; + const MDBX_db *db = memcpy(&reside, node_data(node), sizeof(reside)); + const MDBX_val name = {node_key(node), node_ks(node)}; + for (size_t dbi = CORE_DBS; dbi < env->me_numdbs; ++dbi) { + if (dbi >= txn->mt_numdbs || !(env->me_db_flags[dbi] & DB_VALID)) + continue; + if (env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[dbi].md_name)) + continue; + + done_bitmap[dbi / CHAR_BIT] |= 1 << dbi % CHAR_BIT; + db = audit_db_dig(txn, dbi, &reside); break; } - MDBX_ANALYSIS_ASSUME(txn != nullptr); - if (!(txn->mt_dbi_state[i] & DBI_AUDIT)) { + used += audit_db_used(db); + } + } + tASSERT(txn, rc == MDBX_NOTFOUND); + + for (size_t dbi = CORE_DBS; dbi < txn->mt_numdbs; ++dbi) { + if (done_bitmap[dbi / CHAR_BIT] & (1 << dbi % CHAR_BIT)) + continue; + const MDBX_db *db = audit_db_dig(txn, dbi, nullptr); + if (db) + used += audit_db_used(db); + else if (dbi_state(txn, dbi)) WARNING("audit %s@%" PRIaTXN ": unable account dbi %zd / \"%*s\", state 0x%02x", - txn->mt_parent ? "nested-" : "", txn->mt_txnid, i, - (int)txn->mt_env->me_dbxs[i].md_name.iov_len, - (const char *)txn->mt_env->me_dbxs[i].md_name.iov_base, - txn->mt_dbi_state[i]); - } + txn->mt_parent ? "nested-" : "", txn->mt_txnid, dbi, + (int)env->me_dbxs[dbi].md_name.iov_len, + (const char *)env->me_dbxs[dbi].md_name.iov_base, + dbi_state(txn, dbi)); } if (pending + gc + used == txn->mt_next_pgno) @@ -10113,6 +10306,18 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, return MDBX_PROBLEM; } +__cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, + bool dont_filter_gc) { + MDBX_env *const env = txn->mt_env; + int rc = osal_fastmutex_acquire(&env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = audit_ex_locked(txn, retired_stored, dont_filter_gc); + ENSURE(txn->mt_env, + osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + } + return rc; +} + typedef struct gc_update_context { size_t retired_stored, loop; size_t settled, cleaned_slot, reused_slot, filled_slot; @@ -11172,21 +11377,6 @@ static int txn_write(MDBX_txn *txn, iov_ctx_t *ctx) { return rc; } -/* Check txn and dbi arguments to a function */ -static __always_inline bool check_dbi(const MDBX_txn *txn, MDBX_dbi dbi, - unsigned validity) { - if (likely(dbi < txn->mt_numdbs)) { - if (likely(!dbi_changed(txn, dbi))) { - if (likely(txn->mt_dbi_state[dbi] & validity)) - return true; - if (likely(dbi < CORE_DBS || - (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0)) - return false; - } - } - return dbi_import((MDBX_txn *)txn, dbi); -} - /* Merge child txn into parent */ static __inline void txn_merge(MDBX_txn *const parent, MDBX_txn *const txn, const size_t parent_retired_len) { @@ -11631,7 +11821,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { if (txn->tw.dirtylist->length == 0 && !(txn->mt_flags & MDBX_TXN_DIRTY) && parent->mt_numdbs == txn->mt_numdbs) { - for (int i = txn->mt_numdbs; --i >= 0;) { + TXN_FOREACH_DBI_ALL(txn, i) { tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); if ((txn->mt_dbi_state[i] & DBI_STALE) && !(parent->mt_dbi_state[i] & DBI_STALE)) @@ -11648,6 +11838,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, txn->tw.loose_count == 0); /* fast completion of pure nested transaction */ + VERBOSE("fast-complete pure nested txn %" PRIaTXN, txn->mt_txnid); end_mode = TXN_END_PURE_COMMIT | TXN_END_SLOT | TXN_END_FREE; goto done; } @@ -11711,17 +11902,23 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { end_mode |= TXN_END_EOTDONE; /* Update parent's DBs array */ - memcpy(parent->mt_dbs, txn->mt_dbs, txn->mt_numdbs * sizeof(MDBX_db)); - parent->mt_numdbs = txn->mt_numdbs; - for (size_t i = 0; i < txn->mt_numdbs; i++) { - /* preserve parent's status */ - const uint8_t state = - txn->mt_dbi_state[i] | - (parent->mt_dbi_state[i] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); - DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", i, - (parent->mt_dbi_state[i] != state) ? "update" : "still", - parent->mt_dbi_state[i], state); - parent->mt_dbi_state[i] = state; + eASSERT(env, parent->mt_numdbs == txn->mt_numdbs); + TXN_FOREACH_DBI_ALL(txn, dbi) { + if (txn->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)) { + parent->mt_dbs[dbi] = txn->mt_dbs[dbi]; + /* preserve parent's status */ + const uint8_t state = + txn->mt_dbi_state[dbi] | + (parent->mt_dbi_state[dbi] & (DBI_CREAT | DBI_FRESH | DBI_DIRTY)); + DEBUG("dbi %zu dbi-state %s 0x%02x -> 0x%02x", dbi, + (parent->mt_dbi_state[dbi] != state) ? "update" : "still", + parent->mt_dbi_state[dbi], state); + parent->mt_dbi_state[dbi] = state; + } else { + eASSERT(env, txn->mt_dbi_state[dbi] == + (parent->mt_dbi_state[dbi] & + ~(DBI_FRESH | DBI_CREAT | DBI_DIRTY))); + } } if (latency) { @@ -11766,15 +11963,16 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { tASSERT(txn, (txn->mt_flags & MDBX_WRITEMAP) == 0 || MDBX_AVOID_MSYNC); tASSERT(txn, txn->tw.dirtyroom + txn->tw.dirtylist->length == (txn->mt_parent ? txn->mt_parent->tw.dirtyroom - : txn->mt_env->me_options.dp_limit)); + : env->me_options.dp_limit)); } cursors_eot(txn, false); end_mode |= TXN_END_EOTDONE; if ((!txn->tw.dirtylist || txn->tw.dirtylist->length == 0) && (txn->mt_flags & (MDBX_TXN_DIRTY | MDBX_TXN_SPILLS)) == 0) { - for (intptr_t i = txn->mt_numdbs; --i >= 0;) - tASSERT(txn, (txn->mt_dbi_state[i] & DBI_DIRTY) == 0); + TXN_FOREACH_DBI_ALL(txn, i) { + tASSERT(txn, !(txn->mt_dbi_state[i] & DBI_DIRTY)); + } #if defined(MDBX_NOSUCCESS_EMPTY_COMMIT) && MDBX_NOSUCCESS_EMPTY_COMMIT rc = txn_end(txn, end_mode); if (unlikely(rc != MDBX_SUCCESS)) @@ -11786,37 +11984,37 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { #endif /* MDBX_NOSUCCESS_EMPTY_COMMIT */ } - DEBUG("committing txn %" PRIaTXN " %p on mdbenv %p, root page %" PRIaPGNO + DEBUG("committing txn %" PRIaTXN " %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, txn->mt_txnid, (void *)txn, (void *)env, txn->mt_dbs[MAIN_DBI].md_root, txn->mt_dbs[FREE_DBI].md_root); - /* Update DB root pointers */ if (txn->mt_numdbs > CORE_DBS) { - MDBX_cursor_couple couple; - MDBX_val data; - data.iov_len = sizeof(MDBX_db); - - rc = cursor_init(&couple.outer, txn, MAIN_DBI); + /* Update subDB root pointers */ + MDBX_cursor_couple cx; + rc = cursor_init(&cx.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) goto fail; - for (MDBX_dbi i = CORE_DBS; i < txn->mt_numdbs; i++) { - if (txn->mt_dbi_state[i] & DBI_DIRTY) { - MDBX_db *db = &txn->mt_dbs[i]; - DEBUG("update main's entry for sub-db %u, mod_txnid %" PRIaTXN - " -> %" PRIaTXN, - i, db->md_mod_txnid, txn->mt_txnid); - /* Может быть mod_txnid > front после коммита вложенных тразакций */ - db->md_mod_txnid = txn->mt_txnid; - data.iov_base = db; - WITH_CURSOR_TRACKING( - couple.outer, - rc = cursor_put_nochecklen(&couple.outer, &env->me_dbxs[i].md_name, - &data, F_SUBDATA)); - if (unlikely(rc != MDBX_SUCCESS)) - goto fail; + cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; + txn->mt_cursors[MAIN_DBI] = &cx.outer; + TXN_FOREACH_DBI_USER(txn, i) { + if ((txn->mt_dbi_state[i] & DBI_DIRTY) == 0) + continue; + MDBX_db *const db = &txn->mt_dbs[i]; + DEBUG("update main's entry for sub-db %zu, mod_txnid %" PRIaTXN + " -> %" PRIaTXN, + i, db->md_mod_txnid, txn->mt_txnid); + /* Может быть mod_txnid > front после коммита вложенных тразакций */ + db->md_mod_txnid = txn->mt_txnid; + MDBX_val data = {db, sizeof(MDBX_db)}; + rc = cursor_put_nochecklen(&cx.outer, &env->me_dbxs[i].md_name, &data, + F_SUBDATA); + if (unlikely(rc != MDBX_SUCCESS)) { + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; + goto fail; } } + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; } ts_1 = latency ? osal_monotime() : 0; @@ -14911,7 +15109,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); - env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(MDBX_dbx)); + env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && @@ -14921,6 +15119,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } memcpy(env->me_pathname, env_pathname.dxb, env_pathname.ent_len * sizeof(pathchar_t)); + env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; @@ -15256,24 +15455,38 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } if ((flags & MDBX_RDONLY) == 0) { - const size_t tsize = sizeof(MDBX_txn) + sizeof(MDBX_cursor), - size = tsize + env->me_maxdbs * - (sizeof(MDBX_db) + sizeof(MDBX_cursor *) + - sizeof(MDBX_atomic_uint32_t) + 1); + MDBX_txn *txn = nullptr; + const intptr_t bitmap_bytes = +#if MDBX_ENABLE_DBI_SPARSE + ceil_powerof2(env->me_maxdbs, + CHAR_BIT * sizeof(txn->mt_dbi_sparse[0])) / + CHAR_BIT; +#else + 0; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + const size_t base = sizeof(MDBX_txn) + sizeof(MDBX_cursor); + const size_t size = + base + bitmap_bytes + + env->me_maxdbs * + (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + + sizeof(txn->mt_dbi_seqs[0]) + sizeof(txn->mt_dbi_state[0])); rc = alloc_page_buf(env); if (rc == MDBX_SUCCESS) { memset(env->me_pbuf, -1, env->me_psize * (size_t)2); memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, env->me_psize); - MDBX_txn *txn = osal_calloc(1, size); + txn = osal_calloc(1, size); if (txn) { - txn->mt_dbs = ptr_disp(txn, tsize); + txn->mt_dbs = ptr_disp(txn, base); txn->mt_cursors = - ptr_disp(txn->mt_dbs, sizeof(MDBX_db) * env->me_maxdbs); - txn->mt_dbi_seqs = - ptr_disp(txn->mt_cursors, sizeof(MDBX_cursor *) * env->me_maxdbs); - txn->mt_dbi_state = ptr_disp( - txn->mt_dbi_seqs, sizeof(MDBX_atomic_uint32_t) * env->me_maxdbs); + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); + txn->mt_dbi_seqs = ptr_disp( + txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ txn->mt_env = env; txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn0 = txn; @@ -15876,10 +16089,6 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, static int fetch_sdb(MDBX_txn *txn, size_t dbi) { MDBX_cursor_couple couple; - if (unlikely(dbi_changed(txn, dbi))) { - NOTICE("dbi %zu was changed for txn %" PRIaTXN, dbi, txn->mt_txnid); - return MDBX_BAD_DBI; - } int rc = cursor_init(&couple.outer, txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -15887,7 +16096,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { MDBX_dbx *const dbx = &txn->mt_env->me_dbxs[dbi]; rc = page_search(&couple.outer, &dbx->md_name, 0); if (unlikely(rc != MDBX_SUCCESS)) { - notfound: + bailout: NOTICE("dbi %zu refs to inaccessible subDB `%*s` for txn %" PRIaTXN " (err %d)", dbi, (int)dbx->md_name.iov_len, (const char *)dbx->md_name.iov_base, @@ -15899,7 +16108,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { struct node_result nsr = node_search(&couple.outer, &dbx->md_name); if (unlikely(!nsr.exact)) { rc = MDBX_NOTFOUND; - goto notfound; + goto bailout; } if (unlikely((node_flags(nsr.node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { NOTICE("dbi %zu refs to not a named subDB `%*s` for txn %" PRIaTXN " (%s)", @@ -15977,8 +16186,8 @@ __hot static int page_search_lowest(MDBX_cursor *mc) { * [in] key the key to search for, or NULL for first/last page. * [in] flags If MDBX_PS_MODIFY is set, visited pages in the DB * are touched (updated with new page numbers). - * If MDBX_PS_FIRST or MDBX_PS_LAST is set, find first or last - * leaf. + * If MDBX_PS_FIRST or MDBX_PS_LAST is set, + * find first or last leaf. * This is used by mdbx_cursor_first() and mdbx_cursor_last(). * If MDBX_PS_ROOTONLY set, just fetch root node, no further * lookups. @@ -16096,9 +16305,6 @@ int mdbx_get(const MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -16116,9 +16322,6 @@ int mdbx_get_equal_or_great(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - if (unlikely(txn->mt_flags & MDBX_TXN_BLOCKED)) return MDBX_BAD_TXN; @@ -16142,9 +16345,6 @@ int mdbx_get_ex(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *key, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -17178,7 +17378,10 @@ static int touch_dbi(MDBX_cursor *mc) { if (mc->mc_dbi >= CORE_DBS) { /* Touch DB record of named DB */ MDBX_cursor_couple cx; - int rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); + int rc = dbi_check(mc->mc_txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_init(&cx.outer, mc->mc_txn, MAIN_DBI); if (unlikely(rc != MDBX_SUCCESS)) return rc; mc->mc_txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; @@ -17195,6 +17398,8 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, cASSERT(mc, (mc->mc_flags & C_INITIALIZED) || mc->mc_snum == 0); cASSERT(mc, cursor_is_tracked(mc)); + cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, FREE_DBI), DBI_LINDO | DBI_VALID)); + cASSERT(mc, F_ISSET(dbi_state(mc->mc_txn, MAIN_DBI), DBI_LINDO | DBI_VALID)); if ((mc->mc_flags & C_SUB) == 0) { MDBX_txn *const txn = mc->mc_txn; txn_lru_turn(txn); @@ -18655,7 +18860,8 @@ static int cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, case F_DUPDATA | F_SUBDATA: if (!MDBX_DISABLE_VALIDATION && unlikely(node_ds(node) != sizeof(MDBX_db))) { - ERROR("invalid nested-db record size %zu", node_ds(node)); + ERROR("invalid nested-db record size (%zu, expect %zu)", node_ds(node), + sizeof(MDBX_db)); return MDBX_CORRUPTED; } memcpy(&mx->mx_db, node_data(node), sizeof(MDBX_db)); @@ -18762,7 +18968,8 @@ static int cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, const MDBX_txn *const txn, MDBX_db *const db, - MDBX_dbx *const dbx, uint8_t *const dbstate) { + MDBX_dbx *const dbx, uint8_t *const dbi_state) { + tASSERT(txn, F_ISSET(*dbi_state, DBI_VALID | DBI_LINDO)); couple->outer.mc_signature = MDBX_MC_LIVE; couple->outer.mc_next = NULL; couple->outer.mc_backup = NULL; @@ -18770,7 +18977,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, couple->outer.mc_txn = (MDBX_txn *)txn; couple->outer.mc_db = db; couple->outer.mc_dbx = dbx; - couple->outer.mc_dbi_state = dbstate; + couple->outer.mc_dbi_state = dbi_state; couple->outer.mc_snum = 0; couple->outer.mc_top = 0; couple->outer.mc_pg[0] = 0; @@ -18807,9 +19014,12 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, /* Initialize a cursor for a given transaction and database. */ static int cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi) { STATIC_ASSERT(offsetof(MDBX_cursor_couple, outer) == 0); - return couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, + int rc = dbi_check(txn, dbi); + if (likely(rc == MDBX_SUCCESS)) + rc = couple_init(container_of(mc, MDBX_cursor_couple, outer), dbi, txn, &txn->mt_dbs[dbi], &txn->mt_env->me_dbxs[dbi], &txn->mt_dbi_state[dbi]); + return rc; } MDBX_cursor *mdbx_cursor_create(void *context) { @@ -18892,8 +19102,9 @@ int mdbx_cursor_bind(const MDBX_txn *txn, MDBX_cursor *mc, MDBX_dbi dbi) { if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; if (unlikely(dbi == FREE_DBI && !(txn->mt_flags & MDBX_TXN_RDONLY))) return MDBX_EACCESS; @@ -19024,7 +19235,7 @@ void mdbx_cursor_close(MDBX_cursor *mc) { int mdbx_txn_release_all_cursors(const MDBX_txn *txn, bool unbind) { int rc = check_txn(txn, MDBX_TXN_FINISHED | MDBX_TXN_HAS_CHILD); if (likely(rc == MDBX_SUCCESS)) { - for (size_t i = FREE_DBI; i < txn->mt_numdbs; ++i) { + TXN_FOREACH_DBI_FROM(txn, i, MAIN_DBI) { while (txn->mt_cursors[i]) { MDBX_cursor *mc = txn->mt_cursors[i]; ENSURE(NULL, mc->mc_signature == MDBX_MC_LIVE && @@ -20281,7 +20492,8 @@ __cold static int page_check(const MDBX_cursor *const mc, break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: if (unlikely(dsize != sizeof(MDBX_db))) { - rc = bad_page(mp, "invalid nested-db record size (%zu)\n", dsize); + rc = bad_page(mp, "invalid nested-db record size (%zu, expect %zu)\n", + dsize, sizeof(MDBX_db)); continue; } break; @@ -20486,7 +20698,7 @@ int mdbx_del(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, if (unlikely(!key)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(txn->mt_flags & (MDBX_TXN_RDONLY | MDBX_TXN_BLOCKED))) @@ -21155,7 +21367,7 @@ int mdbx_put(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, MDBX_val *data, if (unlikely(!key || !data)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(flags & ~(MDBX_NOOVERWRITE | MDBX_NODUPDATA | MDBX_ALLDUPS | @@ -21509,8 +21721,8 @@ __cold static int compacting_walk_sdb(mdbx_compacting_ctx *ctx, MDBX_db *sdb) { memset(&couple, 0, sizeof(couple)); couple.inner.mx_cursor.mc_signature = ~MDBX_MC_LIVE; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDIT; - int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbistate); + uint8_t dbi_state = DBI_LINDO | DBI_VALID; + int rc = couple_init(&couple, ~0u, ctx->mc_txn, sdb, &dbx, &dbi_state); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -22155,27 +22367,22 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { if (unlikely(err != MDBX_SUCCESS)) return err; + MDBX_cursor_couple cx; + err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); + if (unlikely(err != MDBX_SUCCESS)) + return err; + const MDBX_env *const env = txn->mt_env; st->ms_psize = env->me_psize; -#if 1 - /* assuming GC is internal and not subject for accounting */ - stat_get(&txn->mt_dbs[MAIN_DBI], st, bytes); -#else - stat_get(&txn->mt_dbs[FREE_DBI], st, bytes); - stat_add(&txn->mt_dbs[MAIN_DBI], st, bytes); -#endif - - /* account opened named subDBs */ - for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) + TXN_FOREACH_DBI_FROM( + txn, dbi, + /* assuming GC is internal and not subject for accounting */ MAIN_DBI) { if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID) stat_add(txn->mt_dbs + dbi, st, bytes); + } - if (!(txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_DUPSORT | MDBX_INTEGERKEY)) && + if (!(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT) && txn->mt_dbs[MAIN_DBI].md_entries /* TODO: use `md_subs` field */) { - MDBX_cursor_couple cx; - err = cursor_init(&cx.outer, (MDBX_txn *)txn, MAIN_DBI); - if (unlikely(err != MDBX_SUCCESS)) - return err; /* scan and account not opened named subDBs */ err = page_search(&cx.outer, NULL, MDBX_PS_FIRST); @@ -22190,13 +22397,14 @@ __cold static int stat_acc(const MDBX_txn *txn, MDBX_stat *st, size_t bytes) { /* skip opened and already accounted */ const MDBX_val name = {node_key(node), node_ks(node)}; - for (MDBX_dbi dbi = CORE_DBS; dbi < txn->mt_numdbs; dbi++) + TXN_FOREACH_DBI_USER(txn, dbi) { if ((txn->mt_dbi_state[dbi] & (DBI_VALID | DBI_STALE)) == DBI_VALID && env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[dbi].md_name) == 0) { node = NULL; break; } + } if (node) { MDBX_db db; @@ -22256,9 +22464,6 @@ __cold int mdbx_dbi_dupsort_depthmask(const MDBX_txn *txn, MDBX_dbi dbi, if (unlikely(!mask)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple cx; rc = cursor_init(&cx.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) @@ -22519,54 +22724,110 @@ static __inline MDBX_cmp_func *get_default_datacmp(MDBX_db_flags_t flags) { : ((flags & MDBX_REVERSEDUP) ? cmp_reverse : cmp_lexical)); } -static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, +static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp) { - /* Accepting only three cases: - * 1) user_flags and both comparators are zero - * = assume that a by-default mode/flags is requested for reading; - * 2) user_flags exactly the same - * = assume that the target mode/flags are requested properly; - * 3) user_flags differs, but table is empty and MDBX_CREATE is provided - * = assume that a properly create request with custom flags; - */ const MDBX_env *const env = txn->mt_env; - if ((user_flags ^ txn->mt_dbs[dbi].md_flags) & DB_PERSISTENT_FLAGS) { + eASSERT(env, dbi < txn->mt_numdbs && dbi < env->me_numdbs); + eASSERT(env, dbi_state(txn, dbi) & DBI_LINDO); + eASSERT(env, env->me_db_flags[dbi] != DB_POISON); + if ((env->me_db_flags[dbi] & DB_VALID) == 0) { + eASSERT(env, !env->me_dbxs[dbi].md_cmp && !env->me_dbxs[dbi].md_dcmp && + !env->me_dbxs[dbi].md_name.iov_len && + !env->me_dbxs[dbi].md_name.iov_base && + !env->me_dbxs[dbi].md_klen_max && + !env->me_dbxs[dbi].md_klen_min && + !env->me_dbxs[dbi].md_vlen_max && + !env->me_dbxs[dbi].md_vlen_min); + } else { + eASSERT(env, !(txn->mt_dbi_state[dbi] & DBI_VALID) || + (txn->mt_dbs[dbi].md_flags | DB_VALID) == + env->me_db_flags[dbi]); + eASSERT(env, env->me_dbxs[dbi].md_name.iov_base); + } + + /* Если dbi уже использовался, то корректными считаем четыре варианта: + * 1) user_flags равны MDBX_DB_ACCEDE + * = предполагаем что пользователь открывает существующую subDb, + * при этом код проверки не позволит установить другие компараторы. + * 2) user_flags нулевые, а оба компаратора пустые/нулевые или равны текущим + * = предполагаем что пользователь открывает существующую subDb + * старым способом с нулевыми с флагами по-умолчанию. + * 3) user_flags совпадают, а компараторы не заданы или те же + * = предполагаем что пользователь открывает subDb указывая все параметры; + * 4) user_flags отличаются, но subDb пустая и задан флаг MDBX_CREATE + * = предполагаем что пользователь пересоздает subDb; + */ + if ((user_flags & ~MDBX_CREATE) != + (unsigned)(env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS)) { /* flags are differs, check other conditions */ if ((!user_flags && (!keycmp || keycmp == env->me_dbxs[dbi].md_cmp) && (!datacmp || datacmp == env->me_dbxs[dbi].md_dcmp)) || - user_flags == MDBX_ACCEDE) { - /* no comparators were provided and flags are zero, - * seems that is case #1 above */ - user_flags = txn->mt_dbs[dbi].md_flags; - } else if ((user_flags & MDBX_CREATE) && txn->mt_dbs[dbi].md_entries == 0) { - if (txn->mt_flags & MDBX_TXN_RDONLY) - return /* FIXME: return extended info */ MDBX_EACCESS; - /* make sure flags changes get committed */ - txn->mt_dbs[dbi].md_flags = user_flags & DB_PERSISTENT_FLAGS; - txn->mt_flags |= MDBX_TXN_DIRTY; - /* обнуляем компараторы для установки в соответствии с флагами, - * либо заданных пользователем */ - env->me_dbxs[dbi].md_cmp = nullptr; - env->me_dbxs[dbi].md_dcmp = nullptr; - } else { + user_flags == MDBX_DB_ACCEDE) { + user_flags = env->me_db_flags[dbi] & DB_PERSISTENT_FLAGS; + } else if ((user_flags & MDBX_CREATE) == 0) return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + else { + eASSERT(env, env->me_db_flags[dbi] & DB_VALID); + if (txn->mt_dbi_state[dbi] & DBI_STALE) { + int err = fetch_sdb(txn, dbi); + if (unlikely(err == MDBX_SUCCESS)) + return err; + } + eASSERT(env, + (txn->mt_dbi_state[dbi] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->mt_dbs[dbi].md_leaf_pages)) + return /* FIXME: return extended info */ MDBX_INCOMPATIBLE; + + /* Пересоздаём subDB если там пусто */ + if (unlikely(txn->mt_cursors[dbi])) + return MDBX_DANGLING_DBI; + env->me_db_flags[dbi] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[dbi], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, dbi); + const uint16_t db_flags = user_flags & DB_PERSISTENT_FLAGS; + eASSERT(env, txn->mt_dbs[dbi].md_depth == 0 && + txn->mt_dbs[dbi].md_entries == 0 && + txn->mt_dbs[dbi].md_root == P_INVALID); + env->me_dbxs[dbi].md_cmp = + keycmp ? keycmp : get_default_keycmp(user_flags); + env->me_dbxs[dbi].md_dcmp = + datacmp ? datacmp : get_default_datacmp(user_flags); + txn->mt_dbs[dbi].md_flags = db_flags; + txn->mt_dbs[dbi].md_xsize = 0; + if (unlikely(setup_dbx(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], + env->me_psize))) { + txn->mt_dbi_state[dbi] = DBI_LINDO; + txn->mt_flags |= MDBX_TXN_ERROR; + return MDBX_PROBLEM; + } + + env->me_db_flags[dbi] = db_flags | DB_VALID; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + txn->mt_dbi_seqs[dbi] = seq; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_VALID | DBI_CREAT | DBI_DIRTY; + txn->mt_flags |= MDBX_TXN_DIRTY; } } if (!keycmp) - keycmp = env->me_dbxs[dbi].md_cmp ? env->me_dbxs[dbi].md_cmp - : get_default_keycmp(user_flags); + keycmp = (env->me_db_flags[dbi] & DB_VALID) + ? env->me_dbxs[dbi].md_cmp + : get_default_keycmp(user_flags); if (env->me_dbxs[dbi].md_cmp != keycmp) { - if (env->me_dbxs[dbi].md_cmp) + if (env->me_db_flags[dbi] & DB_VALID) return MDBX_EINVAL; env->me_dbxs[dbi].md_cmp = keycmp; } if (!datacmp) - datacmp = env->me_dbxs[dbi].md_dcmp ? env->me_dbxs[dbi].md_dcmp - : get_default_datacmp(user_flags); + datacmp = (env->me_db_flags[dbi] & DB_VALID) + ? env->me_dbxs[dbi].md_dcmp + : get_default_datacmp(user_flags); if (env->me_dbxs[dbi].md_dcmp != datacmp) { - if (env->me_dbxs[dbi].md_dcmp) + if (env->me_db_flags[dbi] & DB_VALID) return MDBX_EINVAL; env->me_dbxs[dbi].md_dcmp = datacmp; } @@ -22574,34 +22835,207 @@ static int dbi_bind(MDBX_txn *txn, const MDBX_dbi dbi, unsigned user_flags, return MDBX_SUCCESS; } -static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, - unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, - MDBX_cmp_func *datacmp) { - int rc = MDBX_EINVAL; - if (unlikely(!dbi)) - return rc; +static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, + MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, + MDBX_val name) { + MDBX_env *const env = txn->mt_env; - void *clone = nullptr; - bool locked = false; - if (unlikely((user_flags & ~DB_USABLE_FLAGS) != 0)) { - bailout: - tASSERT(txn, MDBX_IS_ERROR(rc)); - *dbi = 0; - if (locked) - ENSURE(txn->mt_env, - osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); - osal_free(clone); - return rc; + /* Cannot mix named table(s) with DUPSORT flags */ + tASSERT(txn, + (txn->mt_dbi_state[MAIN_DBI] & (DBI_LINDO | DBI_VALID | DBI_STALE)) == + (DBI_LINDO | DBI_VALID)); + if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT)) { + if (unlikely((user_flags & MDBX_CREATE) == 0)) + return MDBX_NOTFOUND; + if (unlikely(txn->mt_dbs[MAIN_DBI].md_leaf_pages)) + /* В MainDB есть записи, либо она уже использовалась. */ + return MDBX_INCOMPATIBLE; + + /* Пересоздаём MainDB когда там пусто. */ + tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && + txn->mt_dbs[MAIN_DBI].md_entries == 0 && + txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); + if (unlikely(txn->mt_cursors[MAIN_DBI])) + return MDBX_DANGLING_DBI; + env->me_db_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), + mo_AcquireRelease); + + const uint32_t seq = dbi_seq_next(env, MAIN_DBI); + const uint16_t main_flags = + txn->mt_dbs[MAIN_DBI].md_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + env->me_dbxs[MAIN_DBI].md_cmp = get_default_keycmp(main_flags); + env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(main_flags); + txn->mt_dbs[MAIN_DBI].md_flags = main_flags; + txn->mt_dbs[MAIN_DBI].md_xsize = 0; + if (unlikely(setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + env->me_psize) != MDBX_SUCCESS)) { + txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO; + txn->mt_flags |= MDBX_TXN_ERROR; + env->me_flags |= MDBX_FATAL_ERROR; + return MDBX_FATAL_ERROR; + } + env->me_db_flags[MAIN_DBI] = main_flags | DB_VALID; + txn->mt_dbi_seqs[MAIN_DBI] = + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; + txn->mt_flags |= MDBX_TXN_DIRTY; } - rc = check_txn(txn, MDBX_TXN_BLOCKED); + tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); + + /* Is the DB already open? */ + size_t slot = env->me_numdbs; + for (size_t scan = CORE_DBS; scan < env->me_numdbs; ++scan) { + if ((env->me_db_flags[scan] & DB_VALID) == 0) { + /* Remember this free slot */ + slot = (slot < scan) ? slot : scan; + continue; + } + if (!env->me_dbxs[MAIN_DBI].md_cmp(&name, &env->me_dbxs[scan].md_name)) { + slot = scan; + int err = dbi_check(txn, slot); + if (err == MDBX_BAD_DBI && + txn->mt_dbi_state[slot] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->mt_cursors[slot]); + txn->mt_dbi_state[slot] = DBI_LINDO; + err = dbi_check(txn, slot); + } + if (err == MDBX_SUCCESS) { + err = dbi_bind(txn, slot, user_flags, keycmp, datacmp); + if (likely(err == MDBX_SUCCESS)) { + goto done; + } + } + return err; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(slot >= env->me_maxdbs)) + return MDBX_DBS_FULL; + + if (env->me_numdbs == slot) + eASSERT(env, !env->me_db_flags[slot] && + !env->me_dbxs[slot].md_name.iov_len && + !env->me_dbxs[slot].md_name.iov_base); + + env->me_db_flags[slot] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[slot], dbi_seq_next(env, slot), + mo_AcquireRelease); + memset(&env->me_dbxs[slot], 0, sizeof(env->me_dbxs[slot])); + if (env->me_numdbs == slot) + env->me_numdbs = (unsigned)slot + 1; + eASSERT(env, slot < env->me_numdbs); + + int err = dbi_check(txn, slot); + eASSERT(env, err == MDBX_BAD_DBI); + if (err != MDBX_BAD_DBI) + return MDBX_PROBLEM; + + /* Find the DB info */ + MDBX_val body; + MDBX_cursor_couple cx; + int rc = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + rc = cursor_set(&cx.outer, &name, &body, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) + return rc; + } else { + /* make sure this is actually a table */ + MDBX_node *node = page_node(cx.outer.mc_pg[cx.outer.mc_top], + cx.outer.mc_ki[cx.outer.mc_top]); + if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) + return MDBX_INCOMPATIBLE; + if (!MDBX_DISABLE_VALIDATION && unlikely(body.iov_len != sizeof(MDBX_db))) + return MDBX_CORRUPTED; + memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(MDBX_db)); + } + + /* Done here so we cannot fail after creating a new DB */ + void *clone = nullptr; + if (name.iov_len) { + clone = osal_malloc(name.iov_len); + if (unlikely(!clone)) + return MDBX_ENOMEM; + name.iov_base = memcpy(clone, name.iov_base, name.iov_len); + } else + name.iov_base = ""; + + uint8_t dbi_state = DBI_LINDO | DBI_VALID | DBI_FRESH; + if (unlikely(rc)) { + /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ + tASSERT(txn, rc == MDBX_NOTFOUND); + body.iov_base = + memset(&txn->mt_dbs[slot], 0, body.iov_len = sizeof(MDBX_db)); + txn->mt_dbs[slot].md_root = P_INVALID; + txn->mt_dbs[slot].md_mod_txnid = txn->mt_txnid; + txn->mt_dbs[slot].md_flags = user_flags & DB_PERSISTENT_FLAGS; + WITH_CURSOR_TRACKING( + cx.outer, rc = cursor_put_checklen(&cx.outer, &name, &body, + F_SUBDATA | MDBX_NOOVERWRITE)); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + dbi_state |= DBI_DIRTY | DBI_CREAT; + txn->mt_flags |= MDBX_TXN_DIRTY; + tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); + } + + /* Got info, register DBI in this txn */ + const uint32_t seq = dbi_seq_next(env, slot); + eASSERT(env, + env->me_db_flags[slot] == DB_POISON && !txn->mt_cursors[slot] && + (txn->mt_dbi_state[slot] & (DBI_LINDO | DBI_VALID)) == DBI_LINDO); + txn->mt_dbi_state[slot] = dbi_state; + memcpy(&txn->mt_dbs[slot], body.iov_base, sizeof(txn->mt_dbs[slot])); + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags; + rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; - if ((user_flags & MDBX_CREATE) && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) { - rc = MDBX_EACCESS; - goto bailout; - } + env->me_dbxs[slot].md_name = name; + env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; + txn->mt_dbi_seqs[slot] = + atomic_store32(&env->me_dbi_seqs[slot], seq, mo_AcquireRelease); + +done: + *dbi = (MDBX_dbi)slot; + tASSERT(txn, + slot < txn->mt_numdbs && (env->me_db_flags[slot] & DB_VALID) != 0); + eASSERT(env, dbi_check(txn, slot) == MDBX_SUCCESS); + return MDBX_SUCCESS; + +bailout: + eASSERT(env, !txn->mt_cursors[slot] && !env->me_dbxs[slot].md_name.iov_len && + !env->me_dbxs[slot].md_name.iov_base); + txn->mt_dbi_state[slot] &= DBI_LINDO | DBI_OLDEN; + env->me_db_flags[slot] = 0; + osal_free(clone); + if (slot + 1 == env->me_numdbs) + txn->mt_numdbs = env->me_numdbs = (unsigned)slot; + return rc; +} + +static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, + unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, + MDBX_cmp_func *datacmp) { + if (unlikely(!dbi)) + return MDBX_EINVAL; + *dbi = 0; + if (unlikely((user_flags & ~DB_USABLE_FLAGS) != 0)) + return MDBX_EINVAL; + + int rc = check_txn(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if ((user_flags & MDBX_CREATE) && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) + return MDBX_EACCESS; switch (user_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED | MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_ACCEDE)) { @@ -22610,8 +23044,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, break; __fallthrough /* fall through */; default: - rc = MDBX_EINVAL; - goto bailout; + return MDBX_EINVAL; case MDBX_DUPSORT: case MDBX_DUPSORT | MDBX_REVERSEDUP: @@ -22619,213 +23052,36 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const table_name, case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: - case 0: + case MDBX_DB_DEFAULTS: break; } /* main table? */ - if (table_name == MDBX_CHK_MAIN || table_name->iov_base == MDBX_CHK_MAIN) { + if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { rc = dbi_bind(txn, MAIN_DBI, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = MAIN_DBI; + if (likely(rc == MDBX_SUCCESS)) + *dbi = MAIN_DBI; return rc; } - if (table_name == MDBX_CHK_GC || table_name->iov_base == MDBX_CHK_GC) { + if (unlikely(name == MDBX_CHK_GC || name->iov_base == MDBX_CHK_GC)) { rc = dbi_bind(txn, FREE_DBI, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = FREE_DBI; + if (likely(rc == MDBX_SUCCESS)) + *dbi = FREE_DBI; return rc; } - if (table_name == MDBX_CHK_META || table_name->iov_base == MDBX_CHK_META) { - rc = MDBX_EINVAL; - goto bailout; - } - - MDBX_val key = *table_name; - MDBX_env *const env = txn->mt_env; - if (key.iov_len > env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db)) + if (unlikely(name == MDBX_CHK_META || name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + if (unlikely(name->iov_len > + txn->mt_env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db))) return MDBX_EINVAL; - /* Cannot mix named table(s) with DUPSORT flags */ - if (unlikely(txn->mt_dbs[MAIN_DBI].md_flags & MDBX_DUPSORT)) { - if ((user_flags & MDBX_CREATE) == 0) { - rc = MDBX_NOTFOUND; - goto bailout; - } - if (txn->mt_dbs[MAIN_DBI].md_leaf_pages || env->me_dbxs[MAIN_DBI].md_cmp) { - /* В MAIN_DBI есть записи либо она уже использовалась. */ - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - /* Пересоздаём MAIN_DBI если там пусто. */ - atomic_store32(&txn->mt_dbi_seqs[MAIN_DBI], dbi_seq(env, MAIN_DBI), - mo_AcquireRelease); - tASSERT(txn, txn->mt_dbs[MAIN_DBI].md_depth == 0 && - txn->mt_dbs[MAIN_DBI].md_entries == 0 && - txn->mt_dbs[MAIN_DBI].md_root == P_INVALID); - txn->mt_dbs[MAIN_DBI].md_flags &= MDBX_REVERSEKEY | MDBX_INTEGERKEY; - txn->mt_dbi_state[MAIN_DBI] |= DBI_DIRTY; - txn->mt_flags |= MDBX_TXN_DIRTY; - env->me_dbxs[MAIN_DBI].md_cmp = - get_default_keycmp(txn->mt_dbs[MAIN_DBI].md_flags); - env->me_dbxs[MAIN_DBI].md_dcmp = - get_default_datacmp(txn->mt_dbs[MAIN_DBI].md_flags); + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); + ENSURE(txn->mt_env, + osal_fastmutex_release(&txn->mt_env->me_dbi_lock) == MDBX_SUCCESS); } - - tASSERT(txn, env->me_dbxs[MAIN_DBI].md_cmp); - - /* Is the DB already open? */ - MDBX_dbi scan, slot; - for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!env->me_dbxs[scan].md_name.iov_base) { - /* Remember this free slot */ - slot = scan; - continue; - } - if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, - key.iov_len)) { - rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - *dbi = scan; - return rc; - } - } - - /* Fail, if no free slot and max hit */ - if (unlikely(slot >= env->me_maxdbs)) { - rc = MDBX_DBS_FULL; - goto bailout; - } - - /* Find the DB info */ - MDBX_val data; - MDBX_cursor_couple couple; - rc = cursor_init(&couple.outer, txn, MAIN_DBI); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - rc = cursor_set(&couple.outer, &key, &data, MDBX_SET).err; - if (unlikely(rc != MDBX_SUCCESS)) { - if (rc != MDBX_NOTFOUND || !(user_flags & MDBX_CREATE)) - goto bailout; - } else { - /* make sure this is actually a table */ - MDBX_node *node = page_node(couple.outer.mc_pg[couple.outer.mc_top], - couple.outer.mc_ki[couple.outer.mc_top]); - if (unlikely((node_flags(node) & (F_DUPDATA | F_SUBDATA)) != F_SUBDATA)) { - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - if (!MDBX_DISABLE_VALIDATION && unlikely(data.iov_len != sizeof(MDBX_db))) { - rc = MDBX_CORRUPTED; - goto bailout; - } - } - - if (rc != MDBX_SUCCESS && unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) { - rc = MDBX_EACCESS; - goto bailout; - } - - /* Done here so we cannot fail after creating a new DB */ - if (key.iov_len) { - clone = osal_malloc(key.iov_len); - if (unlikely(!clone)) { - rc = MDBX_ENOMEM; - goto bailout; - } - key.iov_base = memcpy(clone, key.iov_base, key.iov_len); - } else - key.iov_base = ""; - - int err = osal_fastmutex_acquire(&env->me_dbi_lock); - if (unlikely(err != MDBX_SUCCESS)) { - rc = err; - goto bailout; - } - locked = true; - - /* Import handles from env */ - dbi_import_locked(txn); - - /* Rescan after mutex acquisition & import handles */ - for (slot = scan = txn->mt_numdbs; --scan >= CORE_DBS;) { - if (!env->me_dbxs[scan].md_name.iov_base) { - /* Remember this free slot */ - slot = scan; - continue; - } - if (key.iov_len == env->me_dbxs[scan].md_name.iov_len && - !memcmp(key.iov_base, env->me_dbxs[scan].md_name.iov_base, - key.iov_len)) { - rc = dbi_bind(txn, scan, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - slot = scan; - goto done; - } - } - - if (unlikely(slot >= env->me_maxdbs)) { - rc = MDBX_DBS_FULL; - goto bailout; - } - - unsigned dbiflags = DBI_FRESH | DBI_VALID | DBI_USRVALID; - MDBX_db db_dummy; - if (unlikely(rc)) { - /* MDBX_NOTFOUND and MDBX_CREATE: Create new DB */ - tASSERT(txn, rc == MDBX_NOTFOUND); - memset(&db_dummy, 0, sizeof(db_dummy)); - db_dummy.md_root = P_INVALID; - db_dummy.md_mod_txnid = txn->mt_txnid; - db_dummy.md_flags = user_flags & DB_PERSISTENT_FLAGS; - data.iov_len = sizeof(db_dummy); - data.iov_base = &db_dummy; - WITH_CURSOR_TRACKING( - couple.outer, rc = cursor_put_checklen(&couple.outer, &key, &data, - F_SUBDATA | MDBX_NOOVERWRITE)); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - dbiflags |= DBI_DIRTY | DBI_CREAT; - txn->mt_flags |= MDBX_TXN_DIRTY; - tASSERT(txn, (txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY) != 0); - } - - /* Got info, register DBI in this txn */ - memset(env->me_dbxs + slot, 0, sizeof(MDBX_dbx)); - memcpy(&txn->mt_dbs[slot], data.iov_base, sizeof(MDBX_db)); - env->me_db_flags[slot] = 0; - rc = dbi_bind(txn, slot, user_flags, keycmp, datacmp); - if (unlikely(rc != MDBX_SUCCESS)) { - tASSERT(txn, (dbiflags & DBI_CREAT) == 0); - goto bailout; - } - - txn->mt_dbi_state[slot] = (uint8_t)dbiflags; - env->me_dbxs[slot].md_name = key; - txn->mt_dbi_seqs[slot].weak = env->me_dbi_seqs[slot].weak = - dbi_seq(env, slot); - if (!(dbiflags & DBI_CREAT)) - env->me_db_flags[slot] = txn->mt_dbs[slot].md_flags | DB_VALID; - if (txn->mt_numdbs == slot) { - txn->mt_cursors[slot] = NULL; - osal_compiler_barrier(); - txn->mt_numdbs = slot + 1; - } - if (env->me_numdbs <= slot) { - osal_memory_fence(mo_AcquireRelease, true); - env->me_numdbs = slot + 1; - } - -done: - *dbi = slot; - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - return MDBX_SUCCESS; + return rc; } static int dbi_open_cstr(MDBX_txn *txn, const char *name_cstr, @@ -22874,8 +23130,9 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, if (unlikely(!dest)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; const size_t size_before_modtxnid = offsetof(MDBX_stat, ms_mod_txnid); if (unlikely(bytes != sizeof(MDBX_stat)) && bytes != size_before_modtxnid) @@ -22909,14 +23166,18 @@ static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { env->me_dbxs[dbi].md_name.iov_len = 0; osal_memory_fence(mo_AcquireRelease, true); env->me_dbxs[dbi].md_name.iov_base = NULL; + osal_flush_incoherent_cpu_writeback(); osal_free(ptr); if (env->me_numdbs == dbi + 1) { size_t i = env->me_numdbs; - do + do { --i; - while (i > CORE_DBS && !env->me_dbxs[i - 1].md_name.iov_base); - env->me_numdbs = (MDBX_dbi)i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } while ((env->me_db_flags[i - 1] & DB_VALID) == 0); + env->me_numdbs = (unsigned)i; } return MDBX_SUCCESS; @@ -22955,8 +23216,9 @@ int mdbx_dbi_flags_ex(const MDBX_txn *txn, MDBX_dbi dbi, unsigned *flags, if (unlikely(!flags || !state)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_VALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; *flags = txn->mt_dbs[dbi].md_flags & DB_PERSISTENT_FLAGS; *state = @@ -23080,22 +23342,20 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { /* Can't delete the main DB */ if (del && dbi >= CORE_DBS) { - rc = delete (txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); + rc = delete(txn, MAIN_DBI, &mc->mc_dbx->md_name, NULL, F_SUBDATA); if (likely(rc == MDBX_SUCCESS)) { tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); - txn->mt_dbi_state[dbi] = DBI_STALE; + txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (unlikely(rc != MDBX_SUCCESS)) { - txn->mt_flags |= MDBX_TXN_ERROR; + if (likely(rc == MDBX_SUCCESS)) { + dbi_close_locked(env, dbi); + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); goto bailout; } - dbi_close_locked(env, dbi); - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } else { - txn->mt_flags |= MDBX_TXN_ERROR; } + txn->mt_flags |= MDBX_TXN_ERROR; } else { /* reset the DB record, mark it dirty */ txn->mt_dbi_state[dbi] |= DBI_DIRTY; @@ -23753,8 +24013,8 @@ __cold static int walk_sdb(mdbx_walk_ctx_t *ctx, MDBX_walk_sdb_t *sdb, MDBX_cursor_couple couple; MDBX_dbx dbx = {.md_klen_min = INT_MAX}; - uint8_t dbistate = DBI_VALID | DBI_AUDIT; - int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbistate); + uint8_t dbi_state = DBI_LINDO | DBI_VALID; + int rc = couple_init(&couple, ~0u, ctx->mw_txn, db, &dbx, &dbi_state); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24137,9 +24397,6 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, if (unlikely(begin_key == MDBX_EPSILON && end_key == MDBX_EPSILON)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; - MDBX_cursor_couple begin; /* LY: first, initialize cursor to refresh a DB in case it have DB_STALE */ rc = cursor_init(&begin.outer, txn, dbi); @@ -24313,7 +24570,7 @@ int mdbx_replace_ex(MDBX_txn *txn, MDBX_dbi dbi, const MDBX_val *key, (flags & (MDBX_CURRENT | MDBX_RESERVE)) != MDBX_CURRENT)) return MDBX_EINVAL; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) + if (unlikely(dbi <= FREE_DBI)) return MDBX_BAD_DBI; if (unlikely(flags & @@ -24493,8 +24750,9 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (unlikely(!check_dbi(txn, dbi, DBI_USRVALID))) - return MDBX_BAD_DBI; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; if (unlikely(txn->mt_dbi_state[dbi] & DBI_STALE)) { rc = fetch_sdb(txn, dbi); @@ -24507,7 +24765,7 @@ int mdbx_dbi_sequence(MDBX_txn *txn, MDBX_dbi dbi, uint64_t *result, *result = dbs->md_seq; if (likely(increment > 0)) { - if (unlikely(txn->mt_flags & MDBX_TXN_RDONLY)) + if (unlikely(dbi == FREE_DBI || (txn->mt_flags & MDBX_TXN_RDONLY) != 0)) return MDBX_EACCESS; uint64_t new = dbs->md_seq + increment; @@ -26721,9 +26979,13 @@ __cold static int chk_db(MDBX_chk_scope_t *const scope, MDBX_dbi dbi, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr, (chk->flags & MDBX_CHK_IGNORE_ORDER) ? cmp_equal_or_greater : nullptr); if (unlikely(err)) { + tASSERT(txn, dbi >= txn->mt_env->me_numdbs || + (txn->mt_env->me_db_flags[dbi] & DB_VALID) == 0); chk_error_rc(scope, err, "mdbx_dbi_open"); goto bailout; } + tASSERT(txn, dbi < txn->mt_env->me_numdbs && + (txn->mt_env->me_db_flags[dbi] & DB_VALID) != 0); } const MDBX_db *const db = txn->mt_dbs + dbi; @@ -27030,8 +27292,7 @@ bailout: } mdbx_cursor_close(cursor); - if (dbi >= CORE_DBS && !txn->mt_cursors[dbi] && - txn->mt_dbi_state[dbi] == (DBI_FRESH | DBI_VALID | DBI_USRVALID)) + if (!txn->mt_cursors[dbi] && (txn->mt_dbi_state[dbi] & DBI_FRESH)) mdbx_dbi_close(env, dbi); } return err; diff --git a/src/internals.h b/src/internals.h index d8dafc2d..a3e8e5cc 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1192,19 +1192,23 @@ struct MDBX_txn { /* Array of MDBX_db records for each known DB */ MDBX_db *mt_dbs; +#if MDBX_ENABLE_DBI_SPARSE + unsigned *mt_dbi_sparse; +#endif /* MDBX_ENABLE_DBI_SPARSE */ + /* Non-shared DBI state flags inside transaction */ -#define DBI_DIRTY 0x01 /* DB was written in this txn */ -#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ -#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ -#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ -#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ -#define DBI_USRVALID 0x20 /* As DB_VALID, but not set for FREE_DBI */ -#define DBI_AUDIT 0x40 /* Internal flag for accounting during audit */ +#define DBI_DIRTY 0x01 /* DB was written in this txn */ +#define DBI_STALE 0x02 /* Named-DB record is older than txnID */ +#define DBI_FRESH 0x04 /* Named-DB handle opened in this txn */ +#define DBI_CREAT 0x08 /* Named-DB handle created in this txn */ +#define DBI_VALID 0x10 /* Handle is valid, see also DB_VALID */ +#define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */ +#define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */ /* Array of non-shared txn's flags of DBI */ uint8_t *mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - MDBX_atomic_uint32_t *mt_dbi_seqs; + uint32_t *mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1660,7 +1664,8 @@ typedef struct MDBX_node { /* mdbx_dbi_open() flags */ #define DB_USABLE_FLAGS (DB_PERSISTENT_FLAGS | MDBX_CREATE | MDBX_DB_ACCEDE) -#define DB_VALID 0x8000 /* DB handle is valid, for me_db_flags */ +#define DB_VALID 0x8000u /* DB handle is valid, for me_db_flags */ +#define DB_POISON 0x7fffu /* update pending */ #define DB_INTERNAL_FLAGS DB_VALID #if DB_INTERNAL_FLAGS & DB_USABLE_FLAGS diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 9ac6cd71..1b52739a 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -286,6 +286,8 @@ DEFINE_EXCEPTION(thread_mismatch) DEFINE_EXCEPTION(transaction_full) DEFINE_EXCEPTION(transaction_overlapping) DEFINE_EXCEPTION(duplicated_lck_file) +DEFINE_EXCEPTION(dangling_map_id) + #undef DEFINE_EXCEPTION __cold const char *error::what() const noexcept { @@ -372,6 +374,7 @@ __cold void error::throw_exception() const { CASE_EXCEPTION(transaction_full, MDBX_TXN_FULL); CASE_EXCEPTION(transaction_overlapping, MDBX_TXN_OVERLAPPING); CASE_EXCEPTION(duplicated_lck_file, MDBX_DUPLICATED_CLK); + CASE_EXCEPTION(dangling_map_id, MDBX_DANGLING_DBI); #undef CASE_EXCEPTION default: if (is_mdbx_error()) diff --git a/src/options.h b/src/options.h index 6e542153..65bb797c 100644 --- a/src/options.h +++ b/src/options.h @@ -163,6 +163,13 @@ #error MDBX_AVOID_MSYNC must be defined as 0 or 1 #endif /* MDBX_AVOID_MSYNC */ +/** FIXME */ +#ifndef MDBX_ENABLE_DBI_SPARSE +#define MDBX_ENABLE_DBI_SPARSE 1 +#elif !(MDBX_ENABLE_DBI_SPARSE == 0 || MDBX_ENABLE_DBI_SPARSE == 1) +#error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_DBI_SPARSE */ + /** Controls sort order of internal page number lists. * This mostly experimental/advanced option with not for regular MDBX users. * \warning The database format depend on this option and libmdbx built with From 3622669a9fbc46c29421f0dc751c56ada8c14104 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 5 Nov 2023 22:14:33 +0300 Subject: [PATCH 027/137] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=20db-=D1=84=D0=BB=D0=B0=D0=B3=D0=BE=D0=B2=20?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B5-=D0=B8=D0=BD=D0=B8=D1=86=D0=B8=D0=B0?= =?UTF-8?q?=D0=BB=D0=B8=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20MainDB=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=20=D0=B8=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B8=20=D1=84=D0=BB=D0=B0=D0=B3=D0=BE=D0=B2=20=D0=B4=D1=80?= =?UTF-8?q?=D1=83=D0=B3=D0=B8=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86=D0=B5=D1=81?= =?UTF-8?q?=D1=81=D0=BE=D0=BC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 159 +++++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 30 deletions(-) diff --git a/src/core.c b/src/core.c index af6ff541..cdb25b1b 100644 --- a/src/core.c +++ b/src/core.c @@ -3956,6 +3956,23 @@ static void cursors_eot(MDBX_txn *txn, const bool merge) { static __noinline int dbi_import(MDBX_txn *txn, const size_t dbi); +static __inline bool db_check_flags(uint16_t db_flags) { + switch (db_flags & ~(DB_VALID | MDBX_REVERSEKEY | MDBX_INTEGERKEY)) { + default: + NOTICE("invalid db-flags 0x%x", db_flags); + return false; + case MDBX_DUPSORT: + case MDBX_DUPSORT | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_REVERSEDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP: + case MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP: + case MDBX_DB_DEFAULTS: + return (db_flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) != + (MDBX_REVERSEKEY | MDBX_INTEGERKEY); + } +} + static __inline uint8_t dbi_state(const MDBX_txn *txn, const size_t dbi) { STATIC_ASSERT(DBI_DIRTY == MDBX_DBI_DIRTY && DBI_STALE == MDBX_DBI_STALE && DBI_FRESH == MDBX_DBI_FRESH && DBI_CREAT == MDBX_DBI_CREAT); @@ -8992,6 +9009,9 @@ __hot static int coherency_check_head(MDBX_txn *txn, const meta_ptr_t head, if (unlikely(!coherency_check(txn->mt_env, head.txnid, txn->mt_dbs, head.ptr_v, *timestamp == 0))) return coherency_timeout(timestamp, -1, txn->mt_env); + + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); return MDBX_SUCCESS; } @@ -9015,6 +9035,9 @@ static int coherency_check_written(const MDBX_env *env, const txnid_t txnid, } if (unlikely(!coherency_check(env, head_txnid, meta->mm_dbs, meta, report))) return coherency_timeout(timestamp, pgno, env); + + eASSERT(env, meta->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags)); return MDBX_SUCCESS; } @@ -9177,6 +9200,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); } else { eASSERT(env, (flags & ~(MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS | MDBX_WRITEMAP)) == 0); @@ -9234,6 +9259,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { goto bailout; } + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); txn->mt_flags = flags; txn->mt_child = NULL; txn->tw.loose_pages = NULL; @@ -9269,6 +9296,8 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_txnid + ((flags & (MDBX_WRITEMAP | MDBX_RDONLY)) == 0); /* Setup db info */ + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); VALGRIND_MAKE_MEM_UNDEFINED(txn->mt_dbi_state, env->me_maxdbs); #if MDBX_ENABLE_DBI_SPARSE txn->mt_numdbs = CORE_DBS; @@ -9287,25 +9316,74 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_cursors[FREE_DBI] = nullptr; txn->mt_cursors[MAIN_DBI] = nullptr; txn->mt_dbi_seqs[FREE_DBI] = 0; - struct dbi_snap_result main_snap = dbi_snap(env, MAIN_DBI); - if (unlikely(main_snap.flags != + txn->mt_dbi_seqs[MAIN_DBI] = + atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); + + if (unlikely(env->me_db_flags[MAIN_DBI] != (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags))) { - if (main_snap.flags & DB_VALID) { - rc = MDBX_INCOMPATIBLE; - goto bailout; + const bool need_txn_lock = env->me_txn0 && env->me_txn0->mt_owner != tid; + bool should_unlock = false; + if (need_txn_lock) { + rc = osal_txn_lock(env, true); + if (rc == MDBX_SUCCESS) + should_unlock = true; + else if (rc != MDBX_BUSY && rc != MDBX_EDEADLK) + goto bailout; } - env->me_db_flags[MAIN_DBI] = DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; - main_snap.sequence = - atomic_store32(&env->me_dbi_seqs[MAIN_DBI], dbi_seq_next(env, MAIN_DBI), - mo_AcquireRelease); + rc = osal_fastmutex_acquire(&env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + uint32_t seq = dbi_seq_next(env, MAIN_DBI); + /* проверяем повторно после захвата блокировки */ + if (env->me_db_flags[MAIN_DBI] != + (DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags)) { + if (!need_txn_lock || should_unlock || + /* если нет активной пишущей транзакции, + * то следующая будет ждать на me_dbi_lock */ + !env->me_txn) { + if (env->me_db_flags[MAIN_DBI] != 0 || MDBX_DEBUG) + NOTICE("renew MainDB for %s-txn %" PRIaTXN + " since db-flags changes 0x%x -> 0x%x", + (txn->mt_flags & MDBX_TXN_RDONLY) ? "ro" : "rw", + txn->mt_txnid, env->me_db_flags[MAIN_DBI] & ~DB_VALID, + txn->mt_dbs[MAIN_DBI].md_flags); + env->me_db_flags[MAIN_DBI] = DB_POISON; + atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + env->me_psize); + if (likely(rc == MDBX_SUCCESS)) { + seq = dbi_seq_next(env, MAIN_DBI); + env->me_db_flags[MAIN_DBI] = + DB_VALID | txn->mt_dbs[MAIN_DBI].md_flags; + txn->mt_dbi_seqs[MAIN_DBI] = atomic_store32( + &env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); + } + } else { + ERROR("MainDB db-flags changes 0x%x -> 0x%x ahead of read-txn " + "%" PRIaTXN, + txn->mt_dbs[MAIN_DBI].md_flags, + env->me_db_flags[MAIN_DBI] & ~DB_VALID, txn->mt_txnid); + rc = MDBX_INCOMPATIBLE; + } + } + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + } else { + DEBUG("me_dbi_lock failed, err %d", rc); + } + if (should_unlock) + osal_txn_unlock(env); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; } - txn->mt_dbi_seqs[MAIN_DBI] = main_snap.sequence; - rc = - setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + txn->mt_dbs[FREE_DBI].md_flags); + rc = MDBX_INCOMPATIBLE; goto bailout; + } + tASSERT(txn, txn->mt_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + tASSERT(txn, db_check_flags(txn->mt_dbs[MAIN_DBI].md_flags)); if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) { WARNING("%s", "environment had fatal error, must shutdown!"); rc = MDBX_PANIC; @@ -9390,13 +9468,6 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { } #endif /* Windows */ } else { - if (unlikely(txn->mt_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { - ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", - txn->mt_dbs[FREE_DBI].md_flags); - rc = MDBX_INCOMPATIBLE; - goto bailout; - } - tASSERT(txn, txn == env->me_txn0); MDBX_cursor *const gc = ptr_disp(txn, sizeof(MDBX_txn)); rc = cursor_init(gc, txn, FREE_DBI); @@ -12404,6 +12475,17 @@ static int validate_meta(MDBX_env *env, MDBX_meta *const meta, return MDBX_RESULT_TRUE; } + if (unlikely(meta->mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "GC/FreeDB", meta->mm_dbs[FREE_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + if (unlikely(!db_check_flags(meta->mm_dbs[MAIN_DBI].md_flags))) { + WARNING("meta[%u] has invalid %s flags 0x%u, skip it", meta_number, + "MainDB", meta->mm_dbs[MAIN_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + DEBUG("checking meta%" PRIaPGNO " = root %" PRIaPGNO "/%" PRIaPGNO ", geo %" PRIaPGNO "/%" PRIaPGNO "-%" PRIaPGNO "/%" PRIaPGNO " +%u -%u, txn_id %" PRIaTXN ", %s", @@ -12788,6 +12870,8 @@ __cold static MDBX_meta *init_metas(const MDBX_env *env, void *buffer) { static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, meta_troika_t *const troika) { eASSERT(env, ((env->me_flags ^ flags) & MDBX_WRITEMAP) == 0); + eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); const MDBX_meta *const meta0 = METAPAGE(env, 0); const MDBX_meta *const meta1 = METAPAGE(env, 1); const MDBX_meta *const meta2 = METAPAGE(env, 2); @@ -13086,6 +13170,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, target->mm_geo = pending->mm_geo; target->mm_dbs[FREE_DBI] = pending->mm_dbs[FREE_DBI]; target->mm_dbs[MAIN_DBI] = pending->mm_dbs[MAIN_DBI]; + eASSERT(env, target->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(target->mm_dbs[MAIN_DBI].md_flags)); target->mm_canary = pending->mm_canary; memcpy(target->mm_pages_retired, pending->mm_pages_retired, 8); jitter4testing(true); @@ -13140,6 +13226,8 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, env->me_lck->mti_pgop_stat.wops.weak += 1; #endif /* MDBX_ENABLE_PGOP_STAT */ const MDBX_meta undo_meta = *target; + eASSERT(env, pending->mm_dbs[FREE_DBI].md_flags == MDBX_INTEGERKEY); + eASSERT(env, db_check_flags(pending->mm_dbs[MAIN_DBI].md_flags)); rc = osal_pwrite(env->me_fd4meta, pending, sizeof(MDBX_meta), ptr_dist(target, env->me_map)); if (unlikely(rc != MDBX_SUCCESS)) { @@ -13879,6 +13967,19 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, pv2pages(header.mm_geo.shrink_pv), unaligned_peek_u64(4, header.mm_txnid_a), durable_caption(&header)); + if (unlikely(header.mm_dbs[FREE_DBI].md_flags != MDBX_INTEGERKEY)) { + ERROR("unexpected/invalid db-flags 0x%u for GC/FreeDB", + header.mm_dbs[FREE_DBI].md_flags); + return MDBX_INCOMPATIBLE; + } + env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; + env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ + env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; + env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; + env->me_dbxs[FREE_DBI].md_vlen_min = 4; + env->me_dbxs[FREE_DBI].md_vlen_max = + mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); + if (env->me_psize != header.mm_psize) setup_pagesize(env, header.mm_psize); const size_t used_bytes = pgno2bytes(env, header.mm_geo.next); @@ -14631,7 +14732,7 @@ __cold static int __must_check_result override_meta(MDBX_env *env, if (shape) { if (txnid && unlikely(!check_meta_coherency(env, shape, false))) { ERROR("bailout overriding meta-%zu since model failed " - "freedb/maindb %s-check for txnid #%" PRIaTXN, + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, target, "pre", constmeta_txnid(shape)); return MDBX_PROBLEM; } @@ -14655,7 +14756,7 @@ __cold static int __must_check_result override_meta(MDBX_env *env, sizeof(model->mm_magic_and_version)); if (unlikely(!check_meta_coherency(env, model, false))) { ERROR("bailout overriding meta-%zu since model failed " - "freedb/maindb %s-check for txnid #%" PRIaTXN, + "FreeDB/MainDB %s-check for txnid #%" PRIaTXN, target, "post", txnid); return MDBX_PROBLEM; } @@ -15119,13 +15220,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } memcpy(env->me_pathname, env_pathname.dxb, env_pathname.ent_len * sizeof(pathchar_t)); - env->me_db_flags[FREE_DBI] = DB_VALID | MDBX_INTEGERKEY; - env->me_dbxs[FREE_DBI].md_cmp = cmp_int_align4; /* aligned MDBX_INTEGERKEY */ - env->me_dbxs[FREE_DBI].md_dcmp = cmp_lenfast; - env->me_dbxs[FREE_DBI].md_klen_max = env->me_dbxs[FREE_DBI].md_klen_min = 8; - env->me_dbxs[FREE_DBI].md_vlen_min = 4; - env->me_dbxs[FREE_DBI].md_vlen_max = - mdbx_env_get_maxvalsize_ex(env, MDBX_INTEGERKEY); /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * @@ -16059,6 +16153,10 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc, static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize) { + if (unlikely(!db_check_flags(db->md_flags))) { + ERROR("incompatible or invalid db.md_flags (%u) ", db->md_flags); + return MDBX_INCOMPATIBLE; + } if (unlikely(!dbx->md_cmp)) { dbx->md_cmp = get_default_keycmp(db->md_flags); dbx->md_dcmp = get_default_datacmp(db->md_flags); @@ -22742,7 +22840,7 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, eASSERT(env, !(txn->mt_dbi_state[dbi] & DBI_VALID) || (txn->mt_dbs[dbi].md_flags | DB_VALID) == env->me_db_flags[dbi]); - eASSERT(env, env->me_dbxs[dbi].md_name.iov_base); + eASSERT(env, env->me_dbxs[dbi].md_name.iov_base || dbi < CORE_DBS); } /* Если dbi уже использовался, то корректными считаем четыре варианта: @@ -23055,6 +23153,7 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, case MDBX_DB_DEFAULTS: break; } + tASSERT(txn, db_check_flags((uint16_t)user_flags)); /* main table? */ if (unlikely(name == MDBX_CHK_MAIN || name->iov_base == MDBX_CHK_MAIN)) { From 96504bf338fda2f739419bdfef2aad077e8acc9e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 4 Nov 2023 23:45:29 +0300 Subject: [PATCH 028/137] =?UTF-8?q?mdbx:=20=D0=BE=D1=82=D0=BB=D0=BE=D0=B6?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=BE=D0=B5=20=D0=BE=D1=81=D0=B2=D0=BE=D0=B1?= =?UTF-8?q?=D0=BE=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B8=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=20=D1=81=D0=B2=D1=8F=D0=B7=D0=B0=D0=BD=D0=BD=D1=8B=D1=85?= =?UTF-8?q?=20c=20dbi-=D1=85=D0=B5=D0=BD=D0=B4=D0=BB=D0=B0=D0=BC=D0=B8=20?= =?UTF-8?q?=D0=B8=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B8=20`MDBX=5FENABLE=5FDBI?= =?UTF-8?q?=5FLOCKFREE`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Отложенное освобождение позволяет реализовать безопасное выполнение fastpath/lockfree при повторном открытии из других потоков/транзакцйий уже открытых subDB, что и происходит при активации добавленной опции сборки `MDBX_ENABLE_DBI_LOCKFREE`. --- CMakeLists.txt | 1 + src/config.h.in | 1 + src/core.c | 196 ++++++++++++++++++++++++++++++++++++++---------- src/internals.h | 8 ++ src/options.h | 7 ++ 5 files changed, 173 insertions(+), 40 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 89eee769..90c2d766 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -532,6 +532,7 @@ add_mdbx_option(MDBX_ENABLE_PGOP_STAT "Gathering statistics for page operations" add_mdbx_option(MDBX_ENABLE_PROFGC "Profiling of GC search and updates" OFF) mark_as_advanced(MDBX_ENABLE_PROFGC) add_mdbx_option(MDBX_ENABLE_DBI_SPARSE "FIXME" ON) +add_mdbx_option(MDBX_ENABLE_DBI_LOCKFREE "FIXME" ON) if(NOT MDBX_AMALGAMATED_SOURCE) if(CMAKE_CONFIGURATION_TYPES OR CMAKE_BUILD_TYPE_UPPERCASE STREQUAL "DEBUG") diff --git a/src/config.h.in b/src/config.h.in index 2ffb9ecf..0304db03 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -34,6 +34,7 @@ #cmakedefine01 MDBX_ENABLE_PGOP_STAT #cmakedefine01 MDBX_ENABLE_PROFGC #cmakedefine01 MDBX_ENABLE_DBI_SPARSE +#cmakedefine01 MDBX_ENABLE_DBI_LOCKFREE /* Windows */ #cmakedefine01 MDBX_WITHOUT_MSVC_CRT diff --git a/src/core.c b/src/core.c index cdb25b1b..1b1d4b58 100644 --- a/src/core.c +++ b/src/core.c @@ -3781,6 +3781,58 @@ MDBX_MAYBE_UNUSED static bool cursor_is_tracked(const MDBX_cursor *mc) { *tracking_head = tracked->mc_next; \ } while (0) +static int +env_defer_free_and_release(MDBX_env *const env, + struct mdbx_defer_free_item *const chain) { + size_t length = 0; + struct mdbx_defer_free_item *obsolete_chain = nullptr; +#if MDBX_ENABLE_DBI_LOCKFREE + const uint64_t now = osal_monotime(); + struct mdbx_defer_free_item **scan = &env->me_defer_free; + if (env->me_defer_free) { + const uint64_t threshold_1second = osal_16dot16_to_monotime(1 * 65536); + do { + struct mdbx_defer_free_item *item = *scan; + if (now - item->timestamp < threshold_1second) { + scan = &item->next; + length += 1; + } else { + *scan = item->next; + item->next = obsolete_chain; + obsolete_chain = item; + } + } while (*scan); + } + + eASSERT(env, *scan == nullptr); + if (chain) { + struct mdbx_defer_free_item *item = chain; + do { + item->timestamp = now; + item = item->next; + } while (item); + *scan = chain; + } +#else /* MDBX_ENABLE_DBI_LOCKFREE */ + obsolete_chain = chain; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + + ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + if (length > 42) { +#if defined(_WIN32) || defined(_WIN64) + SwitchToThread(); +#else + sched_yield(); +#endif /* Windows */ + } + while (obsolete_chain) { + struct mdbx_defer_free_item *item = obsolete_chain; + obsolete_chain = obsolete_chain->next; + osal_free(item); + } + return chain ? MDBX_SUCCESS : MDBX_BAD_DBI; +} + #if MDBX_ENABLE_DBI_SPARSE static __inline size_t dbi_bitmap_ctz(const MDBX_txn *txn, intptr_t bmi) { @@ -4136,7 +4188,7 @@ static int dbi_update(MDBX_txn *txn, int keep) { MDBX_env *const env = txn->mt_env; tASSERT(txn, !txn->mt_parent && txn == env->me_txn0); bool locked = false; - void *defer_free = nullptr; + struct mdbx_defer_free_item *defer_chain = nullptr; TXN_FOREACH_DBI_USER(txn, dbi) { if (likely((txn->mt_dbi_state[dbi] & DBI_CREAT) == 0)) continue; @@ -4154,15 +4206,15 @@ static int dbi_update(MDBX_txn *txn, int keep) { env->me_db_flags[dbi] = txn->mt_dbs[dbi].md_flags | DB_VALID; } else { uint32_t seq = dbi_seq_next(env, dbi); - void *ptr = env->me_dbxs[dbi].md_name.iov_base; - if (ptr) { + struct mdbx_defer_free_item *item = env->me_dbxs[dbi].md_name.iov_base; + if (item) { env->me_db_flags[dbi] = 0; env->me_dbxs[dbi].md_name.iov_len = 0; env->me_dbxs[dbi].md_name.iov_base = nullptr; atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); osal_flush_incoherent_cpu_writeback(); - osal_free(defer_free); - defer_free = ptr; + item->next = defer_chain; + defer_chain = item; } else { eASSERT(env, env->me_dbxs[dbi].md_name.iov_len == 0); eASSERT(env, env->me_db_flags[dbi] == 0); @@ -4179,9 +4231,7 @@ static int dbi_update(MDBX_txn *txn, int keep) { !env->me_dbxs[i].md_name.iov_base); } env->me_numdbs = (unsigned)i; - ENSURE(txn->mt_env, - osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - osal_free(defer_free); + env_defer_free_and_release(env, defer_chain); } return MDBX_SUCCESS; } @@ -15651,6 +15701,14 @@ __cold static int env_close(MDBX_env *env) { env->me_txkey = (osal_thread_key_t)0; } +#if MDBX_ENABLE_DBI_LOCKFREE + for (struct mdbx_defer_free_item *next, *ptr = env->me_defer_free; ptr; + ptr = next) { + next = ptr->next; + osal_free(ptr); + } +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + munlock_all(env); if (!(env->me_flags & MDBX_RDONLY)) osal_ioring_destroy(&env->me_ioring); @@ -23057,7 +23115,9 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* Done here so we cannot fail after creating a new DB */ void *clone = nullptr; if (name.iov_len) { - clone = osal_malloc(name.iov_len); + clone = osal_malloc((name.iov_len > sizeof(struct mdbx_defer_free_item)) + ? name.iov_len + : sizeof(struct mdbx_defer_free_item)); if (unlikely(!clone)) return MDBX_ENOMEM; name.iov_base = memcpy(clone, name.iov_base, name.iov_len); @@ -23174,6 +23234,67 @@ static int dbi_open(MDBX_txn *txn, const MDBX_val *const name, txn->mt_env->me_leaf_nodemax - NODESIZE - sizeof(MDBX_db))) return MDBX_EINVAL; +#if MDBX_ENABLE_DBI_LOCKFREE + /* Is the DB already open? */ + const MDBX_env *const env = txn->mt_env; + size_t free_slot = env->me_numdbs; + for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) { + retry: + if ((env->me_db_flags[i] & DB_VALID) == 0) { + free_slot = i; + continue; + } + + const uint32_t snap_seq = + atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease); + const uint16_t snap_flags = env->me_db_flags[i]; + const MDBX_val snap_name = env->me_dbxs[i].md_name; + if (user_flags != MDBX_ACCEDE && + (((user_flags ^ snap_flags) & DB_PERSISTENT_FLAGS) || + (keycmp && keycmp != env->me_dbxs[i].md_cmp) || + (datacmp && datacmp != env->me_dbxs[i].md_dcmp))) + continue; + const uint32_t main_seq = + atomic_load32(&env->me_dbi_seqs[MAIN_DBI], mo_AcquireRelease); + MDBX_cmp_func *const snap_cmp = env->me_dbxs[MAIN_DBI].md_cmp; + if (unlikely(!(snap_flags & DB_VALID) || !snap_name.iov_base || + !snap_name.iov_len || !snap_cmp)) + continue; + + const bool name_match = snap_cmp(&snap_name, name) == 0; + osal_flush_incoherent_cpu_writeback(); + if (unlikely(snap_seq != + atomic_load32(&env->me_dbi_seqs[i], mo_AcquireRelease) || + main_seq != atomic_load32(&env->me_dbi_seqs[MAIN_DBI], + mo_AcquireRelease) || + snap_flags != env->me_db_flags[i] || + snap_name.iov_base != env->me_dbxs[i].md_name.iov_base || + snap_name.iov_len != env->me_dbxs[i].md_name.iov_len)) + goto retry; + if (name_match) { + rc = dbi_check(txn, i); + if (rc == MDBX_BAD_DBI && + txn->mt_dbi_state[i] == (DBI_OLDEN | DBI_LINDO)) { + /* хендл использовался, стал невалидным, + * но теперь явно пере-открывается в этой транзакци */ + eASSERT(env, !txn->mt_cursors[i]); + txn->mt_dbi_state[i] = DBI_LINDO; + rc = dbi_check(txn, i); + } + if (likely(rc == MDBX_SUCCESS)) { + rc = dbi_bind(txn, i, user_flags, keycmp, datacmp); + if (likely(rc == MDBX_SUCCESS)) + *dbi = (MDBX_dbi)i; + } + return rc; + } + } + + /* Fail, if no free slot and max hit */ + if (unlikely(free_slot >= env->me_maxdbs)) + return MDBX_DBS_FULL; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { rc = dbi_open_locked(txn, user_flags, dbi, keycmp, datacmp, *name); @@ -23251,35 +23372,35 @@ __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, return MDBX_SUCCESS; } -static int dbi_close_locked(MDBX_env *env, MDBX_dbi dbi) { +static struct mdbx_defer_free_item *dbi_close_locked(MDBX_env *env, + MDBX_dbi dbi) { eASSERT(env, dbi >= CORE_DBS); if (unlikely(dbi >= env->me_numdbs)) - return MDBX_BAD_DBI; + return nullptr; - char *const ptr = env->me_dbxs[dbi].md_name.iov_base; - /* If there was no name, this was already closed */ - if (unlikely(!ptr)) - return MDBX_BAD_DBI; + const uint32_t seq = dbi_seq_next(env, dbi); + struct mdbx_defer_free_item *defer_item = env->me_dbxs[dbi].md_name.iov_base; + if (likely(defer_item)) { + env->me_db_flags[dbi] = 0; + env->me_dbxs[dbi].md_name.iov_len = 0; + env->me_dbxs[dbi].md_name.iov_base = nullptr; + atomic_store32(&env->me_dbi_seqs[dbi], seq, mo_AcquireRelease); + osal_flush_incoherent_cpu_writeback(); + defer_item->next = nullptr; - env->me_db_flags[dbi] = 0; - env->me_dbxs[dbi].md_name.iov_len = 0; - osal_memory_fence(mo_AcquireRelease, true); - env->me_dbxs[dbi].md_name.iov_base = NULL; - osal_flush_incoherent_cpu_writeback(); - osal_free(ptr); - - if (env->me_numdbs == dbi + 1) { - size_t i = env->me_numdbs; - do { - --i; - eASSERT(env, i >= CORE_DBS); - eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && - !env->me_dbxs[i].md_name.iov_base); - } while ((env->me_db_flags[i - 1] & DB_VALID) == 0); - env->me_numdbs = (unsigned)i; + if (env->me_numdbs == dbi + 1) { + size_t i = env->me_numdbs; + do { + --i; + eASSERT(env, i >= CORE_DBS); + eASSERT(env, !env->me_db_flags[i] && !env->me_dbxs[i].md_name.iov_len && + !env->me_dbxs[i].md_name.iov_base); + } while (i > CORE_DBS && !env->me_dbxs[i - 1].md_name.iov_base); + env->me_numdbs = (unsigned)i; + } } - return MDBX_SUCCESS; + return defer_item; } int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { @@ -23297,12 +23418,8 @@ int mdbx_dbi_close(MDBX_env *env, MDBX_dbi dbi) { return MDBX_BAD_DBI; rc = osal_fastmutex_acquire(&env->me_dbi_lock); - if (likely(rc == MDBX_SUCCESS)) { - rc = (dbi < env->me_maxdbs && (env->me_db_flags[dbi] & DB_VALID)) - ? dbi_close_locked(env, dbi) - : MDBX_BAD_DBI; - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); - } + if (likely(rc == MDBX_SUCCESS)) + rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); return rc; } @@ -23449,8 +23566,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { MDBX_env *env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { - dbi_close_locked(env, dbi); - ENSURE(env, osal_fastmutex_release(&env->me_dbi_lock) == MDBX_SUCCESS); + rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); goto bailout; } } diff --git a/src/internals.h b/src/internals.h index a3e8e5cc..856ad1db 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1348,6 +1348,11 @@ typedef struct MDBX_cursor_couple { MDBX_xcursor inner; } MDBX_cursor_couple; +struct mdbx_defer_free_item { + struct mdbx_defer_free_item *next; + uint64_t timestamp; +}; + /* The database environment. */ struct MDBX_env { /* ----------------------------------------------------- mostly static part */ @@ -1452,6 +1457,9 @@ struct MDBX_env { bool me_prefault_write; MDBX_env *me_lcklist_next; +#if MDBX_ENABLE_DBI_LOCKFREE + struct mdbx_defer_free_item *me_defer_free; +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ /* --------------------------------------------------- mostly volatile part */ diff --git a/src/options.h b/src/options.h index 65bb797c..9aff6755 100644 --- a/src/options.h +++ b/src/options.h @@ -170,6 +170,13 @@ #error MDBX_ENABLE_DBI_SPARSE must be defined as 0 or 1 #endif /* MDBX_ENABLE_DBI_SPARSE */ +/** FIXME */ +#ifndef MDBX_ENABLE_DBI_LOCKFREE +#define MDBX_ENABLE_DBI_LOCKFREE 1 +#elif !(MDBX_ENABLE_DBI_LOCKFREE == 0 || MDBX_ENABLE_DBI_LOCKFREE == 1) +#error MDBX_ENABLE_DBI_LOCKFREE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_DBI_LOCKFREE */ + /** Controls sort order of internal page number lists. * This mostly experimental/advanced option with not for regular MDBX users. * \warning The database format depend on this option and libmdbx built with From 903d964f4d1dcd15c08eed2eee263f00c170e468 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 23:15:38 +0300 Subject: [PATCH 029/137] =?UTF-8?q?mdbx:=20=D0=B8=D0=BD=D1=82=D0=B5=D0=BD?= =?UTF-8?q?=D1=81=D0=B8=D0=B2=D0=BD=D0=BE=D0=B5=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`=5F?= =?UTF-8?q?=5Frestrict`,=20=D0=B2=20=D1=82=D0=BE=D0=BC=20=D1=87=D0=B8?= =?UTF-8?q?=D1=81=D0=BB=D0=B5=20=D0=BF=D1=80=D0=B8=20=D0=BE=D0=BF=D1=80?= =?UTF-8?q?=D0=B5=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8=D0=B8=20=D1=8D=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=B5=D0=BD=D1=82=D0=BE=D0=B2=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D1=83=D0=BA=D1=82=D1=83=D1=80.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 34 ++++++++++++++++++---------------- src/internals.h | 32 ++++++++++++++++++-------------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/src/core.c b/src/core.c index 1b1d4b58..5034c45e 100644 --- a/src/core.c +++ b/src/core.c @@ -2265,7 +2265,7 @@ static void pnl_free(MDBX_PNL pl) { } /* Shrink the PNL to the default size if it has grown larger */ -static void pnl_shrink(MDBX_PNL *ppl) { +static void pnl_shrink(MDBX_PNL __restrict *__restrict ppl) { assert(pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) >= MDBX_PNL_INITIAL && pnl_bytes2size(pnl_size2bytes(MDBX_PNL_INITIAL)) < MDBX_PNL_INITIAL * 3 / 2); @@ -2288,7 +2288,8 @@ static void pnl_shrink(MDBX_PNL *ppl) { } /* Grow the PNL to the size growed to at least given size */ -static int pnl_reserve(MDBX_PNL *ppl, const size_t wanna) { +static int pnl_reserve(MDBX_PNL __restrict *__restrict ppl, + const size_t wanna) { const size_t allocated = MDBX_PNL_ALLOCLEN(*ppl); assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); @@ -2318,8 +2319,8 @@ static int pnl_reserve(MDBX_PNL *ppl, const size_t wanna) { } /* Make room for num additional elements in an PNL */ -static __always_inline int __must_check_result pnl_need(MDBX_PNL *ppl, - size_t num) { +static __always_inline int __must_check_result +pnl_need(MDBX_PNL __restrict *__restrict ppl, size_t num) { assert(MDBX_PNL_GETSIZE(*ppl) <= MDBX_PGL_LIMIT && MDBX_PNL_ALLOCLEN(*ppl) >= MDBX_PNL_GETSIZE(*ppl)); assert(num <= MDBX_PGL_LIMIT); @@ -2328,7 +2329,7 @@ static __always_inline int __must_check_result pnl_need(MDBX_PNL *ppl, : pnl_reserve(ppl, wanna); } -static __always_inline void pnl_xappend(MDBX_PNL pl, pgno_t pgno) { +static __always_inline void pnl_xappend(__restrict MDBX_PNL pl, pgno_t pgno) { assert(MDBX_PNL_GETSIZE(pl) < MDBX_PNL_ALLOCLEN(pl)); if (AUDIT_ENABLED()) { for (size_t i = MDBX_PNL_GETSIZE(pl); i > 0; --i) @@ -2339,10 +2340,8 @@ static __always_inline void pnl_xappend(MDBX_PNL pl, pgno_t pgno) { } /* Append an pgno range onto an unsorted PNL */ -__always_inline static int __must_check_result pnl_append_range(bool spilled, - MDBX_PNL *ppl, - pgno_t pgno, - size_t n) { +__always_inline static int __must_check_result pnl_append_range( + bool spilled, __restrict MDBX_PNL *ppl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppl, n); if (unlikely(rc != MDBX_SUCCESS)) @@ -2369,7 +2368,7 @@ __always_inline static int __must_check_result pnl_append_range(bool spilled, } /* Append an pgno range into the sorted PNL */ -__hot static int __must_check_result pnl_insert_range(MDBX_PNL *ppl, +__hot static int __must_check_result pnl_insert_range(__restrict MDBX_PNL *ppl, pgno_t pgno, size_t n) { assert(n > 0); int rc = pnl_need(ppl, n); @@ -2673,7 +2672,8 @@ static void txl_free(MDBX_TXL tl) { osal_free(tl - 1); } -static int txl_reserve(MDBX_TXL *ptl, const size_t wanna) { +static int txl_reserve(MDBX_TXL __restrict *__restrict ptl, + const size_t wanna) { const size_t allocated = (size_t)MDBX_PNL_ALLOCLEN(*ptl); assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); @@ -2702,8 +2702,8 @@ static int txl_reserve(MDBX_TXL *ptl, const size_t wanna) { return MDBX_ENOMEM; } -static __always_inline int __must_check_result txl_need(MDBX_TXL *ptl, - size_t num) { +static __always_inline int __must_check_result +txl_need(MDBX_TXL __restrict *__restrict ptl, size_t num) { assert(MDBX_PNL_GETSIZE(*ptl) <= MDBX_TXL_MAX && MDBX_PNL_ALLOCLEN(*ptl) >= MDBX_PNL_GETSIZE(*ptl)); assert(num <= MDBX_PGL_LIMIT); @@ -2712,7 +2712,7 @@ static __always_inline int __must_check_result txl_need(MDBX_TXL *ptl, : txl_reserve(ptl, wanna); } -static __always_inline void txl_xappend(MDBX_TXL tl, txnid_t id) { +static __always_inline void txl_xappend(MDBX_TXL __restrict tl, txnid_t id) { assert(MDBX_PNL_GETSIZE(tl) < MDBX_PNL_ALLOCLEN(tl)); tl[0] += 1; MDBX_PNL_LAST(tl) = id; @@ -2724,7 +2724,8 @@ static void txl_sort(MDBX_TXL tl) { txnid_sort(MDBX_PNL_BEGIN(tl), MDBX_PNL_END(tl)); } -static int __must_check_result txl_append(MDBX_TXL *ptl, txnid_t id) { +static int __must_check_result txl_append(MDBX_TXL __restrict *ptl, + txnid_t id) { if (unlikely(MDBX_PNL_GETSIZE(*ptl) == MDBX_PNL_ALLOCLEN(*ptl))) { int rc = txl_need(ptl, MDBX_TXL_GRANULATE); if (unlikely(rc != MDBX_SUCCESS)) @@ -4582,7 +4583,8 @@ static void refund_loose(MDBX_txn *txn) { /* Filter-out loose chain & dispose refunded pages. */ unlink_loose: - for (MDBX_page **link = &txn->tw.loose_pages; *link;) { + for (MDBX_page *__restrict *__restrict link = &txn->tw.loose_pages; + *link;) { MDBX_page *dp = *link; tASSERT(txn, dp->mp_flags == P_LOOSE); MDBX_ASAN_UNPOISON_MEMORY_REGION(&mp_next(dp), sizeof(MDBX_page *)); diff --git a/src/internals.h b/src/internals.h index 856ad1db..f38c7f7a 100644 --- a/src/internals.h +++ b/src/internals.h @@ -93,6 +93,10 @@ disable : 5105) /* winbase.h(9531): warning C5105: macro expansion \ producing 'defined' has undefined behavior */ #endif +#if _MSC_VER < 1920 +/* avoid "error C2219: syntax error: type qualifier must be after '*'" */ +#define __restrict +#endif #if _MSC_VER > 1930 #pragma warning(disable : 6235) /* is always a constant */ #pragma warning(disable : 6237) /* is never evaluated and might \ @@ -1193,7 +1197,7 @@ struct MDBX_txn { MDBX_db *mt_dbs; #if MDBX_ENABLE_DBI_SPARSE - unsigned *mt_dbi_sparse; + unsigned *__restrict mt_dbi_sparse; #endif /* MDBX_ENABLE_DBI_SPARSE */ /* Non-shared DBI state flags inside transaction */ @@ -1205,10 +1209,10 @@ struct MDBX_txn { #define DBI_OLDEN 0x40 /* Handle was closed/reopened outside txn */ #define DBI_LINDO 0x80 /* Lazy initialization done for DBI-slot */ /* Array of non-shared txn's flags of DBI */ - uint8_t *mt_dbi_state; + uint8_t *__restrict mt_dbi_state; /* Array of sequence numbers for each DB handle. */ - uint32_t *mt_dbi_seqs; + uint32_t *__restrict mt_dbi_seqs; MDBX_cursor **mt_cursors; MDBX_canary mt_canary; @@ -1222,8 +1226,8 @@ struct MDBX_txn { struct { meta_troika_t troika; /* In write txns, array of cursors for each DB */ - MDBX_PNL relist; /* Reclaimed GC pages */ - txnid_t last_reclaimed; /* ID of last used record */ + MDBX_PNL __restrict relist; /* Reclaimed GC pages */ + txnid_t last_reclaimed; /* ID of last used record */ #if MDBX_ENABLE_REFUND pgno_t loose_refund_wl /* FIXME: describe */; #endif /* MDBX_ENABLE_REFUND */ @@ -1235,14 +1239,14 @@ struct MDBX_txn { * dirtylist into mt_parent after freeing hidden mt_parent pages. */ size_t dirtyroom; /* For write txns: Modified pages. Sorted when not MDBX_WRITEMAP. */ - MDBX_dpl *dirtylist; + MDBX_dpl *__restrict dirtylist; /* The list of reclaimed txns from GC */ - MDBX_TXL lifo_reclaimed; + MDBX_TXL __restrict lifo_reclaimed; /* The list of pages that became unused during this transaction. */ - MDBX_PNL retired_pages; + MDBX_PNL __restrict retired_pages; /* The list of loose pages that became unused and may be reused * in this transaction, linked through `mp_next`. */ - MDBX_page *loose_pages; + MDBX_page *__restrict loose_pages; /* Number of loose pages (tw.loose_pages) */ size_t loose_count; union { @@ -1251,7 +1255,7 @@ struct MDBX_txn { /* The sorted list of dirty pages we temporarily wrote to disk * because the dirty list was full. page numbers in here are * shifted left by 1, deleted slots have the LSB set. */ - MDBX_PNL list; + MDBX_PNL __restrict list; } spilled; size_t writemap_dirty_npages; size_t writemap_spilled_npages; @@ -1295,7 +1299,7 @@ struct MDBX_cursor { /* The database auxiliary record for this cursor */ MDBX_dbx *mc_dbx; /* The mt_dbi_state[] for this DBI */ - uint8_t *mc_dbi_state; + uint8_t *__restrict mc_dbi_state; uint8_t mc_snum; /* number of pushed pages */ uint8_t mc_top; /* index of top page, normally mc_snum-1 */ @@ -1401,7 +1405,7 @@ struct MDBX_env { MDBX_txn *me_txn0; /* preallocated write transaction */ MDBX_dbx *me_dbxs; /* array of static DB info */ - uint16_t *me_db_flags; /* array of flags from MDBX_db.md_flags */ + uint16_t *__restrict me_db_flags; /* array of flags from MDBX_db.md_flags */ MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ unsigned me_maxgc_ov1page; /* Number of pgno_t fit in a single overflow page */ @@ -1468,10 +1472,10 @@ struct MDBX_env { unsigned me_numdbs; /* number of DBs opened */ unsigned me_dp_reserve_len; - MDBX_page *me_dp_reserve; /* list of malloc'ed blocks for re-use */ + MDBX_page *__restrict me_dp_reserve; /* list of malloc'ed blocks for re-use */ /* PNL of pages that became unused in a write txn */ - MDBX_PNL me_retired_pages; + MDBX_PNL __restrict me_retired_pages; osal_ioring_t me_ioring; #if defined(_WIN32) || defined(_WIN64) From c9c02dddfb3aea039a64be65f6994a24407bae06 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 1 Nov 2023 01:07:01 +0300 Subject: [PATCH 030/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fdbi=5Frename()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 +++ src/core.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 113 insertions(+), 5 deletions(-) diff --git a/mdbx.h b/mdbx.h index d9cc392e..e2da78b3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4225,6 +4225,11 @@ MDBX_DEPRECATED LIBMDBX_API int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, MDBX_db_flags_t flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp); +/** FIXME */ +LIBMDBX_API int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name); +LIBMDBX_API int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *name); + /** \defgroup value2key Value-to-Key functions * \brief Value-to-Key functions to * \ref avoid_custom_comparators "avoid using custom comparators" diff --git a/src/core.c b/src/core.c index 5034c45e..56fb4f6b 100644 --- a/src/core.c +++ b/src/core.c @@ -22993,6 +22993,12 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, return MDBX_SUCCESS; } +static __inline size_t dbi_namelen(const MDBX_val name) { + return (name.iov_len > sizeof(struct mdbx_defer_free_item)) + ? name.iov_len + : sizeof(struct mdbx_defer_free_item); +} + static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, MDBX_cmp_func *keycmp, MDBX_cmp_func *datacmp, MDBX_val name) { @@ -23117,9 +23123,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, /* Done here so we cannot fail after creating a new DB */ void *clone = nullptr; if (name.iov_len) { - clone = osal_malloc((name.iov_len > sizeof(struct mdbx_defer_free_item)) - ? name.iov_len - : sizeof(struct mdbx_defer_free_item)); + clone = osal_malloc(dbi_namelen(name)); if (unlikely(!clone)) return MDBX_ENOMEM; name.iov_base = memcpy(clone, name.iov_base, name.iov_len); @@ -23343,6 +23347,105 @@ int mdbx_dbi_open_ex2(MDBX_txn *txn, const MDBX_val *name, return dbi_open(txn, name, flags, dbi, keycmp, datacmp); } +__cold int mdbx_dbi_rename(MDBX_txn *txn, MDBX_dbi dbi, const char *name_cstr) { + MDBX_val thunk, *name; + if (name_cstr == MDBX_CHK_MAIN || name_cstr == MDBX_CHK_GC || + name_cstr == MDBX_CHK_META) + name = (void *)name_cstr; + else { + thunk.iov_len = strlen(name_cstr); + thunk.iov_base = (void *)name_cstr; + name = &thunk; + } + return mdbx_dbi_rename2(txn, dbi, name); +} + +struct dbi_rename_result { + struct mdbx_defer_free_item *defer; + int err; +}; + +__cold static struct dbi_rename_result +dbi_rename_locked(MDBX_txn *txn, MDBX_dbi dbi, MDBX_val new_name) { + struct dbi_rename_result pair; + pair.defer = nullptr; + pair.err = dbi_check(txn, dbi); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + + MDBX_env *const env = txn->mt_env; + MDBX_val old_name = env->me_dbxs[dbi].md_name; + if (env->me_dbxs[MAIN_DBI].md_cmp(&new_name, &old_name) == 0 && + MDBX_DEBUG == 0) + return pair; + + MDBX_cursor_couple cx; + pair.err = cursor_init(&cx.outer, txn, MAIN_DBI); + if (unlikely(pair.err != MDBX_SUCCESS)) + return pair; + pair.err = cursor_set(&cx.outer, &new_name, nullptr, MDBX_SET).err; + if (unlikely(pair.err != MDBX_NOTFOUND)) { + pair.err = (pair.err == MDBX_SUCCESS) ? MDBX_KEYEXIST : pair.err; + return pair; + } + + pair.defer = osal_malloc(dbi_namelen(new_name)); + if (unlikely(!pair.defer)) { + pair.err = MDBX_ENOMEM; + return pair; + } + new_name.iov_base = memcpy(pair.defer, new_name.iov_base, new_name.iov_len); + + cx.outer.mc_next = txn->mt_cursors[MAIN_DBI]; + txn->mt_cursors[MAIN_DBI] = &cx.outer; + + MDBX_val data = {&txn->mt_dbs[dbi], sizeof(MDBX_db)}; + pair.err = cursor_put_checklen(&cx.outer, &new_name, &data, + F_SUBDATA | MDBX_NOOVERWRITE); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.err = cursor_set(&cx.outer, &old_name, nullptr, MDBX_SET).err; + if (likely(pair.err == MDBX_SUCCESS)) + pair.err = cursor_del(&cx.outer, F_SUBDATA); + if (likely(pair.err == MDBX_SUCCESS)) { + pair.defer = env->me_dbxs[dbi].md_name.iov_base; + env->me_dbxs[dbi].md_name = new_name; + } else + txn->mt_flags |= MDBX_TXN_ERROR; + } + + txn->mt_cursors[MAIN_DBI] = cx.outer.mc_next; + return pair; +} + +__cold int mdbx_dbi_rename2(MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *new_name) { + int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(new_name == MDBX_CHK_MAIN || + new_name->iov_base == MDBX_CHK_MAIN || new_name == MDBX_CHK_GC || + new_name->iov_base == MDBX_CHK_GC || new_name == MDBX_CHK_META || + new_name->iov_base == MDBX_CHK_META)) + return MDBX_EINVAL; + + if (unlikely(dbi < CORE_DBS)) + return MDBX_EINVAL; + rc = dbi_check(txn, dbi); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = osal_fastmutex_acquire(&txn->mt_env->me_dbi_lock); + if (likely(rc == MDBX_SUCCESS)) { + struct dbi_rename_result pair = dbi_rename_locked(txn, dbi, *new_name); + if (pair.defer) + pair.defer->next = nullptr; + env_defer_free_and_release(txn->mt_env, pair.defer); + rc = pair.err; + } + return rc; +} + __cold int mdbx_dbi_stat(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_stat *dest, size_t bytes) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); @@ -23540,7 +23643,7 @@ static int drop_tree(MDBX_cursor *mc, const bool may_have_subDBs) { return rc; } -int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { +__cold int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { int rc = check_txn_rw(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -23565,7 +23668,7 @@ int mdbx_drop(MDBX_txn *txn, MDBX_dbi dbi, bool del) { tASSERT(txn, txn->mt_dbi_state[MAIN_DBI] & DBI_DIRTY); tASSERT(txn, txn->mt_flags & MDBX_TXN_DIRTY); txn->mt_dbi_state[dbi] = DBI_LINDO | DBI_OLDEN; - MDBX_env *env = txn->mt_env; + MDBX_env *const env = txn->mt_env; rc = osal_fastmutex_acquire(&env->me_dbi_lock); if (likely(rc == MDBX_SUCCESS)) { rc = env_defer_free_and_release(env, dbi_close_locked(env, dbi)); From c216e1afb7b9bbe76bcee2d51d573b92ccbe5fff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 1 Nov 2023 11:04:00 +0300 Subject: [PATCH 031/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D0=BA=D0=B8=20`mdbx=5Fdbi=5Frename()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/jitter.c++ | 27 ++++++++++++++++++++++++++- test/test.c++ | 42 ++++++++++++++++++++++++++++-------------- test/test.h++ | 5 ++++- 3 files changed, 58 insertions(+), 16 deletions(-) diff --git a/test/jitter.c++ b/test/jitter.c++ index b25599b0..993631e8 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -39,6 +39,12 @@ bool testcase_jitter::run() { if (upper_limit < 1) upper_limit = config.params.size_now * 2; + tablename_buf buffer; + const char *const tablename = db_tablename(buffer); + tablename_buf buffer_renamed; + const char *const tablename_renamed = + db_tablename(buffer_renamed, ".renamed"); + while (should_continue()) { jitter_delay(); db_open(); @@ -48,6 +54,15 @@ bool testcase_jitter::run() { txn_begin(false); dbi = db_table_open(true); check_dbi_error(MDBX_SUCCESS, "created-uncommitted"); + + bool renamed = false; + if (flipcoin()) { + err = mdbx_dbi_rename(txn_guard.get(), dbi, tablename_renamed); + if (err != MDBX_SUCCESS) + failure_perror("jitter.rename-1", err); + renamed = true; + } + // note: here and below the 4-byte length keys and value are used // to be compatible with any Db-flags given from command line. MDBX_val k = {(void *)"k000", 4}, v = {(void *)"v001", 4}; @@ -75,7 +90,17 @@ bool testcase_jitter::run() { failure_perror("jitter.put-2", err); check_dbi_error(MDBX_BAD_DBI, "dropped-recreated-aborted"); // restore DBI - dbi = db_table_open(false); + dbi = db_table_open(false, renamed); + if (renamed) { + err = mdbx_dbi_open( + txn_guard.get(), tablename_renamed, + flipcoin() ? MDBX_DB_ACCEDE : config.params.table_flags, &dbi); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("open-renamed", err); + err = mdbx_dbi_rename(txn_guard.get(), dbi, tablename); + if (err != MDBX_SUCCESS) + failure_perror("jitter.rename-2", err); + } check_dbi_error(MDBX_SUCCESS, "dropped-recreated-aborted+reopened"); v = {(void *)"v003", 4}; err = mdbx_put(txn_guard.get(), dbi, &k, &v, MDBX_UPSERT); diff --git a/test/test.c++ b/test/test.c++ index 77c90c0a..e590d3ce 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -537,29 +537,43 @@ int testcase::db_open__begin__table_create_open_clean(MDBX_dbi &handle) { return err; } -MDBX_dbi testcase::db_table_open(bool create) { - log_trace(">> testcase::db_table_create"); - - char tablename_buf[16]; +const char *testcase::db_tablename(tablename_buf &buffer, + const char *suffix) const { const char *tablename = nullptr; if (config.space_id) { - int rc = snprintf(tablename_buf, sizeof(tablename_buf), "TBL%04u", - config.space_id); + int rc = + snprintf(buffer, sizeof(buffer), "TBL%04u%s", config.space_id, suffix); if (rc < 4 || rc >= (int)sizeof(tablename_buf) - 1) failure("snprintf(tablename): %d", rc); - tablename = tablename_buf; + tablename = buffer; } log_debug("use %s table", tablename ? tablename : "MAINDB"); + return tablename; +} + +MDBX_dbi testcase::db_table_open(bool create, bool expect_failure) { + log_trace(">> testcase::db_table_%s%s", create ? "create" : "open", + expect_failure ? "(expect_failure)" : ""); + + tablename_buf buffer; + const char *tablename = db_tablename(buffer); MDBX_dbi handle = 0; - int rc = mdbx_dbi_open(txn_guard.get(), tablename, - (create ? MDBX_CREATE : MDBX_DB_DEFAULTS) | - config.params.table_flags, - &handle); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_open()", rc); + int rc = mdbx_dbi_open( + txn_guard.get(), tablename, + create ? (MDBX_CREATE | config.params.table_flags) + : (flipcoin() ? MDBX_DB_ACCEDE + : MDBX_DB_DEFAULTS | config.params.table_flags), + &handle); + if (unlikely(expect_failure != (rc != MDBX_SUCCESS))) { + char act[64]; + snprintf(act, sizeof(act), "mdbx_dbi_open(create=%s,expect_failure=%s)", + create ? "true" : "false", expect_failure ? "true" : "false"); + failure_perror(act, rc); + } - log_trace("<< testcase::db_table_create, handle %u", handle); + log_trace("<< testcase::db_table_%s%s, handle %u", create ? "create" : "open", + expect_failure ? "(expect_failure)" : "", handle); return handle; } diff --git a/test/test.h++ b/test/test.h++ index 6158ba66..96d93a7c 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -265,7 +265,10 @@ protected: MDBX_val expected_valued); unsigned txn_underutilization_x256(MDBX_txn *txn) const; - MDBX_dbi db_table_open(bool create); + using tablename_buf = char[32]; + const char *db_tablename(tablename_buf &buffer, + const char *suffix = "") const; + MDBX_dbi db_table_open(bool create, bool expect_failure = false); void db_table_drop(MDBX_dbi handle); void db_table_clear(MDBX_dbi handle, MDBX_txn *txn = nullptr); void db_table_close(MDBX_dbi handle); From 0916d2432151c943d6c29bd9bef5f03e6f6076e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 16:54:32 +0300 Subject: [PATCH 032/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=86=D0=B8=D0=B8=20`ENABLE=5FMEMCHECK`=20=D0=B2=D0=BC=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=20`ENABLE=5FVALGRIND`=20=D0=B8=20`MDBX=5FUSE?= =?UTF-8?q?=5FVALGRIND`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- CMakeLists.txt | 2 +- GNUmakefile | 4 +- cmake/profile.cmake | 45 +++++++++++++++------ src/base.h | 4 +- src/config.h.in | 2 +- src/core.c | 96 ++++++++++++++++++++++----------------------- src/internals.h | 6 +-- src/options.h | 4 +- 8 files changed, 91 insertions(+), 72 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 90c2d766..26e08261 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -467,7 +467,7 @@ endif() # #### # # # #### # # #### # -set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN MDBX_USE_VALGRIND ENABLE_GPROF ENABLE_GCOV) +set(MDBX_BUILD_OPTIONS ENABLE_UBSAN ENABLE_ASAN ENABLE_MEMCHECK ENABLE_GPROF ENABLE_GCOV) macro(add_mdbx_option NAME DESCRIPTION DEFAULT) list(APPEND MDBX_BUILD_OPTIONS ${NAME}) if(NOT ${DEFAULT} STREQUAL "AUTO") diff --git a/GNUmakefile b/GNUmakefile index 104ae372..d6e222b5 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -428,13 +428,13 @@ test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -test-valgrind: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND +test-valgrind: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-valgrind: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt -memcheck: CFLAGS_EXTRA=-Ofast -DMDBX_USE_VALGRIND +memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ diff --git a/cmake/profile.cmake b/cmake/profile.cmake index f13b6976..e4973f51 100644 --- a/cmake/profile.cmake +++ b/cmake/profile.cmake @@ -24,6 +24,25 @@ endif() cmake_policy(PUSH) cmake_policy(VERSION ${CMAKE_MINIMUM_REQUIRED_VERSION}) +unset(MEMCHECK_OPTION_NAME) +if(NOT DEFINED ENABLE_MEMCHECK) + if (DEFINED MDBX_USE_VALGRIND) + set(MEMCHECK_OPTION_NAME "MDBX_USE_VALGRIND") + elseif(DEFINED ENABLE_VALGRIND) + set(MEMCHECK_OPTION_NAME "ENABLE_VALGRIND") + else() + set(MEMCHECK_OPTION_NAME "ENABLE_MEMCHECK") + endif() + if(MEMCHECK_OPTION_NAME STREQUAL "ENABLE_MEMCHECK") + option(ENABLE_MEMCHECK + "Enable integration with valgrind, a memory analyzing tool" OFF) + elseif(${MEMCHECK_OPTION_NAME}) + set(ENABLE_MEMCHECK ON) + else() + set(ENABLE_MEMCHECK OFF) + endif() +endif() + include(CheckLibraryExists) check_library_exists(gcov __gcov_flush "" HAVE_GCOV) @@ -33,23 +52,23 @@ option(ENABLE_GCOV option(ENABLE_GPROF "Enable integration with gprof, a performance analyzing tool" OFF) -if(CMAKE_CXX_COMPILER_LOADED) - include(CheckIncludeFileCXX) - check_include_file_cxx(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) -else() - include(CheckIncludeFile) - check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) -endif() - -option(MDBX_USE_VALGRIND "Enable integration with valgrind, a memory analyzing tool" OFF) -if(MDBX_USE_VALGRIND AND NOT HAVE_VALGRIND_MEMCHECK_H) - message(FATAL_ERROR "MDBX_USE_VALGRIND option is set but valgrind/memcheck.h is not found") -endif() - option(ENABLE_ASAN "Enable AddressSanitizer, a fast memory error detector based on compiler instrumentation" OFF) option(ENABLE_UBSAN "Enable UndefinedBehaviorSanitizer, a fast undefined behavior detector based on compiler instrumentation" OFF) +if(ENABLE_MEMCHECK) + if(CMAKE_CXX_COMPILER_LOADED) + include(CheckIncludeFileCXX) + check_include_file_cxx(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) + else() + include(CheckIncludeFile) + check_include_file(valgrind/memcheck.h HAVE_VALGRIND_MEMCHECK_H) + endif() + if(NOT HAVE_VALGRIND_MEMCHECK_H) + message(FATAL_ERROR "${MEMCHECK_OPTION_NAME} option is set but valgrind/memcheck.h is not found") + endif() +endif() + cmake_policy(POP) diff --git a/src/base.h b/src/base.h index fd730945..8f1d533f 100644 --- a/src/base.h +++ b/src/base.h @@ -686,7 +686,7 @@ __extern_C key_t ftok(const char *, int); /*----------------------------------------------------------------------------*/ -#if defined(MDBX_USE_VALGRIND) +#if defined(ENABLE_MEMCHECK) #include #ifndef VALGRIND_DISABLE_ADDR_ERROR_REPORTING_IN_RANGE /* LY: available since Valgrind 3.10 */ @@ -708,7 +708,7 @@ __extern_C key_t ftok(const char *, int); #define VALGRIND_CHECK_MEM_IS_ADDRESSABLE(a, s) (0) #define VALGRIND_CHECK_MEM_IS_DEFINED(a, s) (0) #define RUNNING_ON_VALGRIND (0) -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ #ifdef __SANITIZE_ADDRESS__ #include diff --git a/src/config.h.in b/src/config.h.in index 0304db03..88a282c8 100644 --- a/src/config.h.in +++ b/src/config.h.in @@ -5,7 +5,7 @@ /* clang-format off */ #cmakedefine LTO_ENABLED -#cmakedefine MDBX_USE_VALGRIND +#cmakedefine ENABLE_MEMCHECK #cmakedefine ENABLE_GPROF #cmakedefine ENABLE_GCOV #cmakedefine ENABLE_ASAN diff --git a/src/core.c b/src/core.c index 56fb4f6b..e94f4eba 100644 --- a/src/core.c +++ b/src/core.c @@ -4935,7 +4935,7 @@ status_done: return MDBX_SUCCESS; } -#if !MDBX_DEBUG && !defined(MDBX_USE_VALGRIND) && !defined(__SANITIZE_ADDRESS__) +#if !MDBX_DEBUG && !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) if (unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) #endif { @@ -4952,7 +4952,7 @@ status_done: goto skip_invalidate; } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (MDBX_DEBUG != 0 || unlikely(txn->mt_env->me_flags & MDBX_PAGEPERTURB)) #endif kill_page(txn, mp, pgno, npages); @@ -6485,9 +6485,9 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, } const size_t limit_bytes = pgno_align2os_bytes(env, limit_pgno); const size_t size_bytes = pgno_align2os_bytes(env, size_pgno); -#if MDBX_ENABLE_MADVISE || defined(MDBX_USE_VALGRIND) +#if MDBX_ENABLE_MADVISE || defined(ENABLE_MEMCHECK) const void *const prev_map = env->me_dxb_mmap.base; -#endif /* MDBX_ENABLE_MADVISE || MDBX_USE_VALGRIND */ +#endif /* MDBX_ENABLE_MADVISE || ENABLE_MEMCHECK */ VERBOSE("resize/%d datafile/mapping: " "present %" PRIuPTR " -> %" PRIuPTR ", " @@ -6672,7 +6672,7 @@ bailout: env->me_dbgeo.now = env->me_dxb_mmap.current; env->me_dbgeo.upper = env->me_dxb_mmap.limit; adjust_defaults(env); -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK if (prev_limit != env->me_dxb_mmap.limit || prev_map != env->me_map) { VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = 0; @@ -6680,7 +6680,7 @@ bailout: env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); } -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ } else { if (rc != MDBX_UNABLE_EXTEND_MAPSIZE && rc != MDBX_EPERM) { ERROR("failed resize datafile/mapping: " @@ -6979,9 +6979,9 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { do { mask = (uint8_t)diffcmp2mask_sse2(range - 3, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 28 - __builtin_clz(mask); } range -= 4; @@ -6994,7 +6994,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7006,7 +7006,7 @@ scan4seq_sse2(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ do if (*range - range[offset] == target) return range; @@ -7050,9 +7050,9 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { do { mask = (uint8_t)diffcmp2mask_avx2(range - 7, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 24 - __builtin_clz(mask); } range -= 8; @@ -7065,7 +7065,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfe0 /* enough for '-31' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7077,7 +7077,7 @@ scan4seq_avx2(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ if (range - 3 > detent) { mask = diffcmp2mask_sse2avx(range - 3, offset, *(const __m128i *)&pattern); if (mask) @@ -7118,9 +7118,9 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { do { mask = diffcmp2mask_avx512bw(range - 15, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return range + 16 - __builtin_clz(mask); } range -= 16; @@ -7133,7 +7133,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xfc0 /* enough for '-63' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7145,7 +7145,7 @@ scan4seq_avx512bw(pgno_t *range, const size_t len, const size_t seq) { goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ if (range - 7 > detent) { mask = diffcmp2mask_avx2(range - 7, offset, *(const __m256i *)&pattern); if (mask) @@ -7198,9 +7198,9 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, do { mask = diffcmp2mask_neon(range - 3, offset, pattern); if (mask) { -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) found: -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ return ptr_disp(range, -(__builtin_clzl(mask) >> sizeof(size_t) / 4)); } range -= 4; @@ -7213,7 +7213,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, * только за пределами региона выделенного под PNL, но и пересекать границу * страницы памяти. Что может приводить как к ошибкам ASAN, так и к падению. * Поэтому проверяем смещение на странице, а с ASAN всегда страхуемся. */ -#ifndef __SANITIZE_ADDRESS__ +#if !defined(ENABLE_MEMCHECK) && !defined(__SANITIZE_ADDRESS__) const unsigned on_page_safe_mask = 0xff0 /* enough for '-15' bytes offset */; if (likely(on_page_safe_mask & (uintptr_t)(range + offset)) && !RUNNING_ON_VALGRIND) { @@ -7225,7 +7225,7 @@ __hot static pgno_t *scan4seq_neon(pgno_t *range, const size_t len, goto found; return nullptr; } -#endif /* __SANITIZE_ADDRESS__ */ +#endif /* !ENABLE_MEMCHECK && !__SANITIZE_ADDRESS__ */ do if (*range - range[offset] == target) return range; @@ -8712,7 +8712,7 @@ __cold int mdbx_env_sync_ex(MDBX_env *env, bool force, bool nonblock) { return env_sync(env, force, nonblock); } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) /* Find largest mvcc-snapshot still referenced by this process. */ static pgno_t find_largest_this(MDBX_env *env, pgno_t largest) { MDBX_lockinfo *const lck = env->me_lck_mmap.lck; @@ -8790,7 +8790,7 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { osal_txn_unlock(env); } } -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ typedef struct { int err; @@ -9526,9 +9526,9 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { if (rc != MDBX_SUCCESS) goto bailout; } -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, txn); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ return MDBX_SUCCESS; } bailout: @@ -10112,9 +10112,9 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { eASSERT(env, txn->mt_txnid == slot->mr_txnid.weak && slot->mr_txnid.weak >= env->me_lck->mti_oldest_reader.weak); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ atomic_store32(&slot->mr_snapshot_pages_used, 0, mo_Relaxed); safe64_reset(&slot->mr_txnid, false); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, @@ -10140,10 +10140,10 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { ENSURE(env, txn->mt_txnid >= /* paranoia is appropriate here */ env->me_lck ->mti_oldest_reader.weak); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (txn == env->me_txn0) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ txn->mt_flags = MDBX_TXN_FINISHED; env->me_txn = txn->mt_parent; @@ -10588,13 +10588,13 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { } static __inline void gcu_clean_reserved(MDBX_env *env, MDBX_val pnl) { -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается ниже внутри update_gc() в * цикле очистки и цикле заполнения зарезервированных элементов. */ memset(pnl.iov_base, 0xBB, pnl.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ /* PNL is initially empty, zero out at least the length */ memset(pnl.iov_base, 0, sizeof(pgno_t)); @@ -10911,14 +10911,14 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ if (retired_pages_before == MDBX_PNL_GETSIZE(txn->tw.retired_pages)) { const size_t at = (ctx->lifo == MDBX_PNL_ASCENDING) @@ -10958,14 +10958,14 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; -#if MDBX_DEBUG && (defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__)) +#if MDBX_DEBUG && (defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__)) /* Для предотвращения предупреждения Valgrind из mdbx_dump_val() * вызванное через макрос DVAL_DEBUG() на выходе * из cursor_set(MDBX_SET_KEY), которая вызывается как выше в цикле * очистки, так и ниже в цикле заполнения зарезервированных элементов. */ memset(data.iov_base, 0xBB, data.iov_len); -#endif /* MDBX_DEBUG && (MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__) */ +#endif /* MDBX_DEBUG && (ENABLE_MEMCHECK || __SANITIZE_ADDRESS__) */ /* Retry if tw.retired_pages[] grew during the Put() */ } while (data.iov_len < MDBX_PNL_SIZEOF(txn->tw.retired_pages)); @@ -12967,7 +12967,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, : pending->mm_geo.next); eASSERT(env, largest_pgno >= NUM_METAS); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) const pgno_t edge = env->me_poison_edge; if (edge > largest_pgno) { env->me_poison_edge = largest_pgno; @@ -12978,7 +12978,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, ptr_disp(env->me_map, pgno2bytes(env, largest_pgno)), pgno2bytes(env, edge - largest_pgno)); } -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #if MDBX_ENABLE_MADVISE && \ (defined(MADV_DONTNEED) || defined(POSIX_MADV_DONTNEED)) @@ -14188,14 +14188,14 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, #endif /* MADV_DODUMP */ #endif /* MDBX_ENABLE_MADVISE */ -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK env->me_valgrind_handle = VALGRIND_CREATE_BLOCK(env->me_map, env->me_dxb_mmap.limit, "mdbx"); -#endif /* MDBX_USE_VALGRIND */ +#endif /* ENABLE_MEMCHECK */ eASSERT(env, used_bytes >= pgno2bytes(env, NUM_METAS) && used_bytes <= env->me_dxb_mmap.limit); -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) if (env->me_dxb_mmap.filesize > used_bytes && env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) { VALGRIND_MAKE_MEM_NOACCESS(ptr_disp(env->me_map, used_bytes), @@ -14207,7 +14207,7 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, bytes2pgno(env, (env->me_dxb_mmap.filesize < env->me_dxb_mmap.limit) ? env->me_dxb_mmap.filesize : env->me_dxb_mmap.limit); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ meta_troika_t troika = meta_tap(env); #if MDBX_DEBUG @@ -15681,9 +15681,9 @@ bailout: env->me_flags = saved_me_flags | ((rc != MDBX_PANIC) ? 0 : MDBX_FATAL_ERROR); } else { -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } osal_free(env_pathname.buffer_for_free); return rc; @@ -15725,7 +15725,7 @@ __cold static int env_close(MDBX_env *env) { if (env->me_map) { osal_munmap(&env->me_dxb_mmap); -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = -1; #endif @@ -28392,9 +28392,9 @@ __dll_export #ifdef __SANITIZE_ADDRESS__ " SANITIZE_ADDRESS=YES" #endif /* __SANITIZE_ADDRESS__ */ -#ifdef MDBX_USE_VALGRIND - " MDBX_USE_VALGRIND=YES" -#endif /* MDBX_USE_VALGRIND */ +#ifdef ENABLE_MEMCHECK + " ENABLE_MEMCHECK=YES" +#endif /* ENABLE_MEMCHECK */ #if MDBX_FORCE_ASSERTIONS " MDBX_FORCE_ASSERTIONS=YES" #endif /* MDBX_FORCE_ASSERTIONS */ diff --git a/src/internals.h b/src/internals.h index f38c7f7a..6a5d8018 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1493,12 +1493,12 @@ struct MDBX_env { #if MDBX_DEBUG MDBX_assert_func *me_assert_func; /* Callback for assertion failures */ #endif -#ifdef MDBX_USE_VALGRIND +#ifdef ENABLE_MEMCHECK int me_valgrind_handle; #endif -#if defined(MDBX_USE_VALGRIND) || defined(__SANITIZE_ADDRESS__) +#if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) pgno_t me_poison_edge; -#endif /* MDBX_USE_VALGRIND || __SANITIZE_ADDRESS__ */ +#endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ #ifndef xMDBX_DEBUG_SPILLING #define xMDBX_DEBUG_SPILLING 0 diff --git a/src/options.h b/src/options.h index 9aff6755..fe47904e 100644 --- a/src/options.h +++ b/src/options.h @@ -224,8 +224,8 @@ /** If defined then enables integration with Valgrind, * a memory analyzing tool. */ -#ifndef MDBX_USE_VALGRIND -#endif /* MDBX_USE_VALGRIND */ +#ifndef ENABLE_MEMCHECK +#endif /* ENABLE_MEMCHECK */ /** If defined then enables use C11 atomics, * otherwise detects ones availability automatically. */ From 24d5b26bc50b333466d8e17b0c45b3b80ccfb13a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 2 Nov 2023 20:10:59 +0300 Subject: [PATCH 033/137] =?UTF-8?q?mdbx-make:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D1=86=D0=B5=D0=BB=D0=B5=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 47 ++++++++++++++++++++++++++++------------------- Makefile | 7 ++++--- 2 files changed, 32 insertions(+), 22 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index d6e222b5..0ddce68e 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -172,22 +172,22 @@ help: @echo " make bench-clean - remove temp database(s) after benchmark" #> dist-cutoff-begin @echo "" - @echo " make smoke - fast smoke test" - @echo " make test - basic test" @echo " make check - smoke test with amalgamation and installation checking" - @echo " make long-test - execute long test which runs for several weeks, or until you interrupt it" - @echo " make memcheck - build with Valgrind's and smoke test with memcheck tool" - @echo " make test-valgrind - build with Valgrind's and basic test with memcheck tool" - @echo " make test-asan - build with AddressSanitizer and basic test" - @echo " make test-leak - build with LeakSanitizer and basic test" - @echo " make test-ubsan - build with UndefinedBehaviourSanitizer and basic test" + @echo " make smoke - fast smoke test" + @echo " make smoke-memcheck - build with Valgrind support and run smoke test under memcheck tool" + @echo " make smoke-fault - execute transaction owner failure smoke testcase" + @echo " make smoke-singleprocess - execute single-process smoke test" + @echo " make test - basic test" + @echo " make test-memcheck - build with Valgrind support and run basic test under memcheck tool" + @echo " make test-long - execute long test which runs for several weeks, or until interruption" + @echo " make test-asan - build with AddressSanitizer and run basic test" + @echo " make test-leak - build with LeakSanitizer and run basic test" + @echo " make test-ubsan - build with UndefinedBehaviourSanitizer and run basic test" + @echo " make test-singleprocess - execute single-process basic test (also used by make cross-qemu)" @echo " make cross-gcc - check cross-compilation without test execution" @echo " make cross-qemu - run cross-compilation and execution basic test with QEMU" @echo " make gcc-analyzer - run gcc-analyzer (mostly useless for now)" @echo " make build-test - build test executable(s)" - @echo " make smoke-fault - execute transaction owner failure smoke testcase" - @echo " make smoke-singleprocess - execute single-process smoke test" - @echo " make test-singleprocess - execute single-process basic test (also used by make cross-qemu)" @echo "" @echo " make dist - build amalgamated source code" @echo " make doxygen - build HTML documentation" @@ -328,8 +328,14 @@ else .PHONY: build-test build-test-with-valgrind check cross-gcc cross-qemu dist doxygen gcc-analyzer long-test .PHONY: reformat release-assets tags smoke test test-asan smoke-fault test-leak -.PHONY: smoke-singleprocess test-singleprocess test-ubsan test-valgrind memcheck -.PHONY: smoke-assertion test-assertion long-test-assertion +.PHONY: smoke-singleprocess test-singleprocess test-ubsan test-valgrind test-memcheck memcheck smoke-memcheck +.PHONY: smoke-assertion test-assertion long-test-assertion test-ci test-ci-extra + +test-ci-extra: test-ci cross-gcc cross-qemu + +test-ci: check \ + smoke-singleprocess smoke-fault smoke-memcheck smoke \ + test-leak test-asan test-ubsan test-singleprocess test test-memcheck define uname2osal case "$(UNAME)" in @@ -420,7 +426,8 @@ test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -long-test: build-test +long-test: test-long +test-long: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog @@ -428,14 +435,16 @@ test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' $(QUIET)test/long_stochastic.sh --dont-check-ram-size --single --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) -test-valgrind: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK -test-valgrind: build-test +test-valgrind: test-memcheck +test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK +test-memcheck: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) -memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt -memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK -memcheck: build-test +memcheck: smoke-memcheck +smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt +smoke-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK +smoke-memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ diff --git a/Makefile b/Makefile index 599e4787..78ba3483 100644 --- a/Makefile +++ b/Makefile @@ -6,9 +6,10 @@ bench bench-clean bench-couple bench-quartet bench-triplet re-bench \ lib libs lib-static lib-shared tools-static \ libmdbx mdbx mdbx_chk mdbx_copy mdbx_drop mdbx_dump mdbx_load mdbx_stat \ check dist memcheck cross-gcc cross-qemu doxygen gcc-analyzer reformat \ -release-assets tags test build-test mdbx_test smoke smoke-fault smoke-singleprocess \ -smoke-assertion test-assertion long-test-assertion \ -test-asan test-leak test-singleprocess test-ubsan test-valgrind: +release-assets tags build-test mdbx_test \ +smoke smoke-fault smoke-singleprocess smoke-assertion smoke-memcheck \ +test test-assertion test-long test-long-assertion test-ci test-ci-extra \ +test-asan test-leak test-singleprocess test-ubsan test-memcheck: @CC=$(CC) \ CXX=`if test -n "$(CXX)" && which "$(CXX)" > /dev/null; then echo "$(CXX)"; elif test -n "$(CCC)" && which "$(CCC)" > /dev/null; then echo "$(CCC)"; else echo "c++"; fi` \ `which gmake || which gnumake || echo 'echo "GNU Make 3.80 or above is required"; exit 2;'` \ From 9a6f8a1bf86de125d81714c3b121394740befbc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 12:33:21 +0300 Subject: [PATCH 034/137] =?UTF-8?q?mdbx-test:=20=D1=83=D0=B2=D0=B5=D0=BB?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D0=BD=D0=B8=D0=B5=20tail-log=20=D0=B4=D0=BE?= =?UTF-8?q?=203333=20=D0=B4=D0=BB=D1=8F=20CI.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index c03c83da..ff73726c 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -41,7 +41,7 @@ do exit -2 ;; --taillog) - TAILLOG=999 + TAILLOG=3333 ;; --multi) LIST=basic From dea6570fc192814a06ae9d6bd4f1501e5cab3bc9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 13:48:06 +0300 Subject: [PATCH 035/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`coherency=5Fcheck()`=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D1=81=D0=BB=D1=83=D1=87=D0=B0=D1=8F=20=D0=BF=D0=BB?= =?UTF-8?q?=D0=BE=D1=85=D0=B8=D1=85=20=D0=BD=D0=BE=D0=BC=D0=B5=D1=80=D0=BE?= =?UTF-8?q?=D0=B2=20=D0=BA=D0=BE=D1=80=D0=BD=D0=B5=D0=B2=D1=8B=D1=85=20?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index e94f4eba..6d62234e 100644 --- a/src/core.c +++ b/src/core.c @@ -8939,20 +8939,45 @@ static bool coherency_check(const MDBX_env *env, const txnid_t txnid, const volatile MDBX_meta *meta, bool report) { const txnid_t freedb_mod_txnid = dbs[FREE_DBI].md_mod_txnid; const txnid_t maindb_mod_txnid = dbs[MAIN_DBI].md_mod_txnid; + const pgno_t last_pgno = meta->mm_geo.now; const pgno_t freedb_root_pgno = dbs[FREE_DBI].md_root; - const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno != P_INVALID) + const MDBX_page *freedb_root = (env->me_map && freedb_root_pgno < last_pgno) ? pgno2page(env, freedb_root_pgno) : nullptr; const pgno_t maindb_root_pgno = dbs[MAIN_DBI].md_root; - const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno != P_INVALID) + const MDBX_page *maindb_root = (env->me_map && maindb_root_pgno < last_pgno) ? pgno2page(env, maindb_root_pgno) : nullptr; const uint64_t magic_and_version = unaligned_peek_u64_volatile(4, &meta->mm_magic_and_version); bool ok = true; + if (freedb_root_pgno != P_INVALID && + unlikely(freedb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "free", freedb_root_pgno, txnid, + (env->me_stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } + if (maindb_root_pgno != P_INVALID && + unlikely(maindb_root_pgno >= last_pgno)) { + if (report) + WARNING( + "catch invalid %sdb root %" PRIaPGNO " for meta_txnid %" PRIaTXN + " %s", + "main", maindb_root_pgno, txnid, + (env->me_stuck_meta < 0) + ? "(workaround for incoherent flaw of unified page/buffer cache)" + : "(wagering meta)"); + ok = false; + } if (unlikely(txnid < freedb_mod_txnid || (!freedb_mod_txnid && freedb_root && likely(magic_and_version == MDBX_DATA_MAGIC)))) { From 6477e6c5de3afb3ac47f960bbe1cfe049e11f794 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 00:00:33 +0300 Subject: [PATCH 036/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B0=D0=B2=D1=82=D0=BE?= =?UTF-8?q?-=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B8=20`MDBX?= =?UTF-8?q?=5FENV=5FCHECKPID`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BB=D1=83?= =?UTF-8?q?=D1=87=D0=B0=D1=8F=20`MDBX=5FENABLE=5FMADVISE=3D0`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/options.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/options.h b/src/options.h index fe47904e..21dd57bc 100644 --- a/src/options.h +++ b/src/options.h @@ -28,9 +28,17 @@ #define MDBX_OSX_SPEED_INSTEADOF_DURABILITY MDBX_OSX_WANNA_DURABILITY #endif /* MDBX_OSX_SPEED_INSTEADOF_DURABILITY */ +/** Controls using of POSIX' madvise() and/or similar hints. */ +#ifndef MDBX_ENABLE_MADVISE +#define MDBX_ENABLE_MADVISE 1 +#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) +#error MDBX_ENABLE_MADVISE must be defined as 0 or 1 +#endif /* MDBX_ENABLE_MADVISE */ + /** Controls checking PID against reuse DB environment after the fork() */ #ifndef MDBX_ENV_CHECKPID -#if defined(MADV_DONTFORK) || defined(_WIN32) || defined(_WIN64) +#if (defined(MADV_DONTFORK) && MDBX_ENABLE_MADVISE) || defined(_WIN32) || \ + defined(_WIN64) /* PID check could be omitted: * - on Linux when madvise(MADV_DONTFORK) is available, i.e. after the fork() * mapped pages will not be available for child process. @@ -117,13 +125,6 @@ #error MDBX_ENABLE_BIGFOOT must be defined as 0 or 1 #endif /* MDBX_ENABLE_BIGFOOT */ -/** Controls using of POSIX' madvise() and/or similar hints. */ -#ifndef MDBX_ENABLE_MADVISE -#define MDBX_ENABLE_MADVISE 1 -#elif !(MDBX_ENABLE_MADVISE == 0 || MDBX_ENABLE_MADVISE == 1) -#error MDBX_ENABLE_MADVISE must be defined as 0 or 1 -#endif /* MDBX_ENABLE_MADVISE */ - /** Disable some checks to reduce an overhead and detection probability of * database corruption to a values closer to the LMDB. */ #ifndef MDBX_DISABLE_VALIDATION From 2fe01eee89eb205512127f5c6440419ecc41c15c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:07:21 +0300 Subject: [PATCH 037/137] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BA=D0=B0=20pid=20=D1=82=D0=BE=D0=BB=D1=8C=D0=BA=D0=BE?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=B7=D0=B0=D0=BF=D1=80=D0=BE=D1=81?= =?UTF-8?q?=D0=B0=20=D0=B0=D0=BA=D1=82=D0=B8=D0=B2=D0=BD=D0=BE=D0=B9=20env?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/src/core.c b/src/core.c index 6d62234e..3d3f22ac 100644 --- a/src/core.c +++ b/src/core.c @@ -8685,17 +8685,16 @@ static __inline int check_env(const MDBX_env *env, const bool wanna_active) { if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) return MDBX_EBADSIGN; -#if MDBX_ENV_CHECKPID - if (unlikely(env->me_pid != osal_getpid())) { - ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) return MDBX_PANIC; if (wanna_active) { +#if MDBX_ENV_CHECKPID + if (unlikely(env->me_pid != osal_getpid())) { + ((MDBX_env *)env)->me_flags |= MDBX_FATAL_ERROR; + return MDBX_PANIC; + } +#endif /* MDBX_ENV_CHECKPID */ if (unlikely((env->me_flags & MDBX_ENV_ACTIVE) == 0)) return MDBX_EPERM; eASSERT(env, env->me_map != nullptr); From 1b2f5f25d4e6cd66578b96c586ffd18acab9404e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:10:44 +0300 Subject: [PATCH 038/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=B2=D1=8B?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=20`txn=5Fabort()`=20=D0=B1=D0=B5=D0=B7=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=B4=D0=B0=20=D0=B2=D1=85=D0=BE=D0=B4=D1=8F=D1=89?= =?UTF-8?q?=D0=B8=D1=85=20=D0=BF=D0=B5=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BE?= =?UTF-8?q?=D0=BA.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index 3d3f22ac..bc96a044 100644 --- a/src/core.c +++ b/src/core.c @@ -10108,13 +10108,6 @@ static int txn_end(MDBX_txn *txn, const unsigned mode) { MDBX_env *env = txn->mt_env; static const char *const names[] = TXN_END_NAMES; -#if MDBX_ENV_CHECKPID - if (unlikely(txn->mt_env->me_pid != osal_getpid())) { - env->me_flags |= MDBX_FATAL_ERROR; - return MDBX_PANIC; - } -#endif /* MDBX_ENV_CHECKPID */ - DEBUG("%s txn %" PRIaTXN "%c %p on env %p, root page %" PRIaPGNO "/%" PRIaPGNO, names[mode & TXN_END_OPMASK], txn->mt_txnid, @@ -10288,11 +10281,7 @@ int mdbx_txn_break(MDBX_txn *txn) { return MDBX_SUCCESS; } -int mdbx_txn_abort(MDBX_txn *txn) { - int rc = check_txn(txn, 0); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - +static int txn_abort(MDBX_txn *txn) { if (txn->mt_flags & MDBX_TXN_RDONLY) /* LY: don't close DBI-handles */ return txn_end(txn, TXN_END_ABORT | TXN_END_UPDATE | TXN_END_SLOT | @@ -10302,12 +10291,24 @@ int mdbx_txn_abort(MDBX_txn *txn) { return MDBX_BAD_TXN; if (txn->mt_child) - mdbx_txn_abort(txn->mt_child); + txn_abort(txn->mt_child); tASSERT(txn, (txn->mt_flags & MDBX_TXN_ERROR) || dirtylist_check(txn)); return txn_end(txn, TXN_END_ABORT | TXN_END_SLOT | TXN_END_FREE); } +int mdbx_txn_abort(MDBX_txn *txn) { + int rc = check_txn(txn, 0); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + rc = check_env(txn->mt_env, true); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + return txn_abort(txn); +} + __cold static MDBX_db *audit_db_dig(const MDBX_txn *txn, const size_t dbi, MDBX_db *fallback) { const MDBX_txn *dig = txn; From 97418d5c9c3d55bf25e1c196601cdfb1e541740f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:16:06 +0300 Subject: [PATCH 039/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20`env=5Fhandle=5Fpathname?= =?UTF-8?q?()`=20=D0=B4=D0=BB=D1=8F=20=D0=BE=D0=B4=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D1=82=D0=BE=D1=87=D0=BA=D0=B8=20=D0=B2=D1=8B=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D1=8F/=D0=BE=D1=81=D0=B2=D0=BE=D0=B1=D0=BE?= =?UTF-8?q?=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=B0=D0=BC=D1=8F?= =?UTF-8?q?=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 203 ++++++++++++++++++++++-------------------------- src/internals.h | 10 ++- 2 files changed, 101 insertions(+), 112 deletions(-) diff --git a/src/core.c b/src/core.c index bc96a044..03bd80ed 100644 --- a/src/core.c +++ b/src/core.c @@ -14535,12 +14535,12 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, /******************************************************************************/ /* Open and/or initialize the lock region for the environment. */ -__cold static int setup_lck(MDBX_env *env, pathchar_t *lck_pathname, - mdbx_mode_t mode) { +__cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { eASSERT(env, env->me_lazy_fd != INVALID_HANDLE_VALUE); eASSERT(env, env->me_lfd == INVALID_HANDLE_VALUE); - int err = osal_openfile(MDBX_OPEN_LCK, env, lck_pathname, &env->me_lfd, mode); + int err = osal_openfile(MDBX_OPEN_LCK, env, env->me_pathname.lck, + &env->me_lfd, mode); if (err != MDBX_SUCCESS) { switch (err) { default: @@ -14559,7 +14559,7 @@ __cold static int setup_lck(MDBX_env *env, pathchar_t *lck_pathname, if (err != MDBX_ENOFILE) { /* ENSURE the file system is read-only */ - err = osal_check_fs_rdonly(env->me_lazy_fd, lck_pathname, err); + err = osal_check_fs_rdonly(env->me_lazy_fd, env->me_pathname.lck, err); if (err != MDBX_SUCCESS && /* ignore ERROR_NOT_SUPPORTED for exclusive mode */ !(err == MDBX_ENOSYS && (env->me_flags & MDBX_EXCLUSIVE))) @@ -14965,12 +14965,6 @@ __cold int mdbx_env_open_for_recoveryW(MDBX_env *env, const wchar_t *pathname, 0); } -typedef struct { - void *buffer_for_free; - pathchar_t *lck, *dxb; - size_t ent_len; -} MDBX_handle_env_pathname; - __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { int err = osal_fileexists(lck_pathname); if (unlikely(err != MDBX_RESULT_FALSE)) { @@ -14982,11 +14976,9 @@ __cold static int check_alternative_lck_absent(const pathchar_t *lck_pathname) { return err; } -__cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, - const pathchar_t *pathname, - MDBX_env_flags_t *flags, +__cold static int env_handle_pathname(MDBX_env *env, const pathchar_t *pathname, const mdbx_mode_t mode) { - memset(ctx, 0, sizeof(*ctx)); + memset(&env->me_pathname, 0, sizeof(env->me_pathname)); if (unlikely(!pathname || !*pathname)) return MDBX_EINVAL; @@ -14997,21 +14989,22 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = GetLastError(); if (rc != MDBX_ENOFILE) return rc; - if (mode == 0 || (*flags & MDBX_RDONLY) != 0) + if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) /* can't open existing */ return rc; /* auto-create directory if requested */ - if ((*flags & MDBX_NOSUBDIR) == 0 && !CreateDirectoryW(pathname, nullptr)) { + if ((env->me_flags & MDBX_NOSUBDIR) == 0 && + !CreateDirectoryW(pathname, nullptr)) { rc = GetLastError(); if (rc != ERROR_ALREADY_EXISTS) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - *flags |= MDBX_NOSUBDIR; + env->me_flags |= MDBX_NOSUBDIR; if (dwAttrib & FILE_ATTRIBUTE_DIRECTORY) - *flags -= MDBX_NOSUBDIR; + env->me_flags -= MDBX_NOSUBDIR; } #else struct stat st; @@ -15019,7 +15012,7 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = errno; if (rc != MDBX_ENOFILE) return rc; - if (mode == 0 || (*flags & MDBX_RDONLY) != 0) + if (mode == 0 || (env->me_flags & MDBX_RDONLY) != 0) /* can't open non-existing */ return rc /* MDBX_ENOFILE */; @@ -15030,16 +15023,16 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, /* always add read/write/search for owner */ S_IRWXU | ((mode & S_IRGRP) ? /* +search if readable by group */ S_IXGRP : 0) | ((mode & S_IROTH) ? /* +search if readable by others */ S_IXOTH : 0); - if ((*flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { + if ((env->me_flags & MDBX_NOSUBDIR) == 0 && mkdir(pathname, dir_mode)) { rc = errno; if (rc != EEXIST) return rc; } } else { /* ignore passed MDBX_NOSUBDIR flag and set it automatically */ - *flags |= MDBX_NOSUBDIR; + env->me_flags |= MDBX_NOSUBDIR; if (S_ISDIR(st.st_mode)) - *flags -= MDBX_NOSUBDIR; + env->me_flags -= MDBX_NOSUBDIR; } #endif @@ -15055,41 +15048,42 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, const size_t pathname_len = strlen(pathname); #endif assert(!osal_isdirsep(lock_suffix[0])); - ctx->ent_len = pathname_len; + size_t base_len = pathname_len; static const size_t dxb_name_len = ARRAY_LENGTH(dxb_name) - 1; - if (*flags & MDBX_NOSUBDIR) { - if (ctx->ent_len > dxb_name_len && - osal_pathequal(pathname + ctx->ent_len - dxb_name_len, dxb_name, + if (env->me_flags & MDBX_NOSUBDIR) { + if (base_len > dxb_name_len && + osal_pathequal(pathname + base_len - dxb_name_len, dxb_name, dxb_name_len)) { - *flags -= MDBX_NOSUBDIR; - ctx->ent_len -= dxb_name_len; - } else if (ctx->ent_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && + env->me_flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len; + } else if (base_len == dxb_name_len - 1 && osal_isdirsep(dxb_name[0]) && osal_isdirsep(lck_name[0]) && - osal_pathequal(pathname + ctx->ent_len - dxb_name_len + 1, + osal_pathequal(pathname + base_len - dxb_name_len + 1, dxb_name + 1, dxb_name_len - 1)) { - *flags -= MDBX_NOSUBDIR; - ctx->ent_len -= dxb_name_len - 1; + env->me_flags -= MDBX_NOSUBDIR; + base_len -= dxb_name_len - 1; } } const size_t suflen_with_NOSUBDIR = sizeof(lock_suffix) + sizeof(pathchar_t); const size_t suflen_without_NOSUBDIR = sizeof(lck_name) + sizeof(dxb_name); - const size_t enogh4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) - ? suflen_with_NOSUBDIR - : suflen_without_NOSUBDIR; - const size_t bytes_needed = sizeof(pathchar_t) * ctx->ent_len * 2 + enogh4any; - ctx->buffer_for_free = osal_malloc(bytes_needed); - if (!ctx->buffer_for_free) + const size_t enough4any = (suflen_with_NOSUBDIR > suflen_without_NOSUBDIR) + ? suflen_with_NOSUBDIR + : suflen_without_NOSUBDIR; + const size_t bytes_needed = + sizeof(pathchar_t) * (base_len * 2 + pathname_len + 1) + enough4any; + env->me_pathname.buffer = osal_malloc(bytes_needed); + if (!env->me_pathname.buffer) return MDBX_ENOMEM; - ctx->dxb = ctx->buffer_for_free; - ctx->lck = ctx->dxb + ctx->ent_len + dxb_name_len + 1; - pathchar_t *const buf = ctx->buffer_for_free; + env->me_pathname.specified = env->me_pathname.buffer; + env->me_pathname.dxb = env->me_pathname.specified + pathname_len + 1; + env->me_pathname.lck = env->me_pathname.dxb + base_len + dxb_name_len + 1; rc = MDBX_SUCCESS; - if (ctx->ent_len) { - memcpy(buf + /* shutting up goofy MSVC static analyzer */ 0, pathname, - sizeof(pathchar_t) * pathname_len); - if (*flags & MDBX_NOSUBDIR) { + pathchar_t *const buf = env->me_pathname.buffer; + if (base_len) { + memcpy(buf, pathname, sizeof(pathchar_t) * pathname_len); + if (env->me_flags & MDBX_NOSUBDIR) { const pathchar_t *const lck_ext = osal_fileext(lck_name, ARRAY_LENGTH(lck_name)); if (lck_ext) { @@ -15099,32 +15093,33 @@ __cold static int handle_env_pathname(MDBX_handle_env_pathname *ctx, rc = check_alternative_lck_absent(buf); } } else { - memcpy(buf + ctx->ent_len, dxb_name, sizeof(dxb_name)); - memcpy(buf + ctx->ent_len + dxb_name_len, lock_suffix, - sizeof(lock_suffix)); + memcpy(buf + base_len, dxb_name, sizeof(dxb_name)); + memcpy(buf + base_len + dxb_name_len, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); } - memcpy(ctx->dxb + /* shutting up goofy MSVC static analyzer */ 0, pathname, - sizeof(pathchar_t) * (ctx->ent_len + 1)); - memcpy(ctx->lck, pathname, sizeof(pathchar_t) * ctx->ent_len); - if (*flags & MDBX_NOSUBDIR) { - memcpy(ctx->lck + ctx->ent_len, lock_suffix, sizeof(lock_suffix)); + memcpy(env->me_pathname.dxb, pathname, sizeof(pathchar_t) * (base_len + 1)); + memcpy(env->me_pathname.lck, pathname, sizeof(pathchar_t) * base_len); + if (env->me_flags & MDBX_NOSUBDIR) { + memcpy(env->me_pathname.lck + base_len, lock_suffix, sizeof(lock_suffix)); } else { - memcpy(ctx->dxb + ctx->ent_len, dxb_name, sizeof(dxb_name)); - memcpy(ctx->lck + ctx->ent_len, lck_name, sizeof(lck_name)); + memcpy(env->me_pathname.dxb + base_len, dxb_name, sizeof(dxb_name)); + memcpy(env->me_pathname.lck + base_len, lck_name, sizeof(lck_name)); } } else { - assert(!(*flags & MDBX_NOSUBDIR)); - memcpy(buf + /* shutting up goofy MSVC static analyzer */ 0, dxb_name + 1, - sizeof(dxb_name) - sizeof(pathchar_t)); + assert(!(env->me_flags & MDBX_NOSUBDIR)); + memcpy(buf, dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); memcpy(buf + dxb_name_len - 1, lock_suffix, sizeof(lock_suffix)); rc = check_alternative_lck_absent(buf); - memcpy(ctx->dxb + /* shutting up goofy MSVC static analyzer */ 0, - dxb_name + 1, sizeof(dxb_name) - sizeof(pathchar_t)); - memcpy(ctx->lck, lck_name + 1, sizeof(lck_name) - sizeof(pathchar_t)); + memcpy(env->me_pathname.dxb, dxb_name + 1, + sizeof(dxb_name) - sizeof(pathchar_t)); + memcpy(env->me_pathname.lck, lck_name + 1, + sizeof(lck_name) - sizeof(pathchar_t)); } + + memcpy(env->me_pathname.specified, pathname, + sizeof(pathchar_t) * (pathname_len + 1)); return rc; } @@ -15162,23 +15157,19 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, (mode == MDBX_ENV_ENSURE_UNUSED) ? MDBX_EXCLUSIVE : MDBX_ENV_DEFAULTS; dummy_env->me_os_psize = (unsigned)osal_syspagesize(); dummy_env->me_psize = (unsigned)mdbx_default_pagesize(); - dummy_env->me_pathname = (pathchar_t *)pathname; - MDBX_handle_env_pathname env_pathname; STATIC_ASSERT(sizeof(dummy_env->me_flags) == sizeof(MDBX_env_flags_t)); - int rc = MDBX_RESULT_TRUE, - err = handle_env_pathname(&env_pathname, pathname, - (MDBX_env_flags_t *)&dummy_env->me_flags, 0); + int rc = MDBX_RESULT_TRUE, err = env_handle_pathname(dummy_env, pathname, 0); if (likely(err == MDBX_SUCCESS)) { mdbx_filehandle_t clk_handle = INVALID_HANDLE_VALUE, dxb_handle = INVALID_HANDLE_VALUE; if (mode > MDBX_ENV_JUST_DELETE) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, env_pathname.dxb, - &dxb_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, + dummy_env->me_pathname.dxb, &dxb_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; if (err == MDBX_SUCCESS) { - err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, env_pathname.lck, - &clk_handle, 0); + err = osal_openfile(MDBX_OPEN_DELETE, dummy_env, + dummy_env->me_pathname.lck, &clk_handle, 0); err = (err == MDBX_ENOFILE) ? MDBX_SUCCESS : err; } if (err == MDBX_SUCCESS && clk_handle != INVALID_HANDLE_VALUE) @@ -15188,7 +15179,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } if (err == MDBX_SUCCESS) { - err = osal_removefile(env_pathname.dxb); + err = osal_removefile(dummy_env->me_pathname.dxb); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) @@ -15196,7 +15187,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } if (err == MDBX_SUCCESS) { - err = osal_removefile(env_pathname.lck); + err = osal_removefile(dummy_env->me_pathname.lck); if (err == MDBX_SUCCESS) rc = MDBX_SUCCESS; else if (err == MDBX_ENOFILE) @@ -15218,7 +15209,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, } else if (err == MDBX_ENOFILE) err = MDBX_SUCCESS; - osal_free(env_pathname.buffer_for_free); + osal_free(dummy_env->me_pathname.buffer); return (err == MDBX_SUCCESS) ? rc : err; } @@ -15280,23 +15271,19 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ } - MDBX_handle_env_pathname env_pathname; - rc = handle_env_pathname(&env_pathname, pathname, &flags, mode); + env->me_flags = (flags & ~MDBX_FATAL_ERROR); + rc = env_handle_pathname(env, pathname, mode); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; - env->me_pathname = osal_calloc(env_pathname.ent_len + 1, sizeof(pathchar_t)); env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); - if (!(env->me_dbxs && env->me_pathname && env->me_db_flags && - env->me_dbi_seqs)) { + if (!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs)) { rc = MDBX_ENOMEM; goto bailout; } - memcpy(env->me_pathname, env_pathname.dxb, - env_pathname.ent_len * sizeof(pathchar_t)); /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * @@ -15385,14 +15372,15 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, * при этом для записи мета требуется отдельный не-overlapped дескриптор. */ - rc = osal_openfile((flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env_pathname.dxb, &env->me_lazy_fd, mode); - if (rc != MDBX_SUCCESS) - goto bailout; + env->me_pid = osal_getpid(); + rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ + : MDBX_OPEN_DXB_LAZY, + env, env->me_pathname.dxb, &env->me_lazy_fd, mode); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; #if MDBX_LOCKING == MDBX_LOCKING_SYSV - env->me_sysv_ipc.key = ftok(env_pathname.dxb, 42); + env->me_sysv_ipc.key = ftok(env->me_pathname.dxb, 42); if (env->me_sysv_ipc.key == -1) { rc = errno; goto bailout; @@ -15447,7 +15435,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, - env, env_pathname.dxb, &env->me_overlapped_fd, 0); + env, env->me_pathname.dxb, &env->me_overlapped_fd, 0); if (rc != MDBX_SUCCESS) goto bailout; env->me_data_lock_event = CreateEventW(nullptr, true, false, nullptr); @@ -15473,7 +15461,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ((mode & S_IRGRP) ? /* +write if readable by group */ S_IWGRP : 0) | ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); #endif /* !Windows */ - const int lck_rc = setup_lck(env, env_pathname.lck, mode); + const int lck_rc = setup_lck(env, mode); if (MDBX_IS_ERROR(lck_rc)) { rc = lck_rc; goto bailout; @@ -15486,7 +15474,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, | MDBX_EXCLUSIVE #endif /* !Windows */ ))) { - rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env_pathname.dxb, + rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->me_pathname.dxb, &env->me_dsync_fd, 0); if (MDBX_IS_ERROR(rc)) goto bailout; @@ -15710,7 +15698,6 @@ bailout: txn_valgrind(env, nullptr); #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } - osal_free(env_pathname.buffer_for_free); return rc; } @@ -15763,6 +15750,10 @@ __cold static int env_close(MDBX_env *env) { CloseHandle(env->me_data_lock_event); env->me_data_lock_event = INVALID_HANDLE_VALUE; } + if (env->me_pathname_char) { + osal_free(env->me_pathname_char); + env->me_pathname_char = nullptr; + } #endif /* Windows */ if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { @@ -15800,16 +15791,10 @@ __cold static int env_close(MDBX_env *env) { osal_free(env->me_db_flags); env->me_db_flags = nullptr; } - if (env->me_pathname) { - osal_free(env->me_pathname); - env->me_pathname = nullptr; + if (env->me_pathname.buffer) { + osal_free(env->me_pathname.buffer); + env->me_pathname.buffer = nullptr; } -#if defined(_WIN32) || defined(_WIN64) - if (env->me_pathname_char) { - osal_free(env->me_pathname_char); - env->me_pathname_char = nullptr; - } -#endif /* Windows */ if (env->me_txn0) { dpl_free(env->me_txn0); txl_free(env->me_txn0->tw.lifo_reclaimed); @@ -22459,7 +22444,7 @@ __cold int mdbx_env_get_pathW(const MDBX_env *env, const wchar_t **arg) { if (unlikely(!arg)) return MDBX_EINVAL; - *arg = env->me_pathname; + *arg = env->me_pathname.specified; return MDBX_SUCCESS; } #endif /* Windows */ @@ -22476,12 +22461,14 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { if (!env->me_pathname_char) { *arg = nullptr; DWORD flags = /* WC_ERR_INVALID_CHARS */ 0x80; - size_t mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags, env->me_pathname, - -1, nullptr, 0, nullptr, nullptr); + size_t mb_len = + WideCharToMultiByte(CP_THREAD_ACP, flags, env->me_pathname.specified, + -1, nullptr, 0, nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); if (rc == ERROR_INVALID_FLAGS) { - mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, env->me_pathname, - -1, nullptr, 0, nullptr, nullptr); + mb_len = WideCharToMultiByte(CP_THREAD_ACP, flags = 0, + env->me_pathname.specified, -1, nullptr, 0, + nullptr, nullptr); rc = mb_len ? MDBX_SUCCESS : (int)GetLastError(); } if (unlikely(rc != MDBX_SUCCESS)) @@ -22490,9 +22477,9 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { char *const mb_pathname = osal_malloc(mb_len); if (!mb_pathname) return MDBX_ENOMEM; - if (mb_len != (size_t)WideCharToMultiByte(CP_THREAD_ACP, flags, - env->me_pathname, -1, mb_pathname, - (int)mb_len, nullptr, nullptr)) { + if (mb_len != (size_t)WideCharToMultiByte( + CP_THREAD_ACP, flags, env->me_pathname.specified, -1, + mb_pathname, (int)mb_len, nullptr, nullptr)) { rc = (int)GetLastError(); osal_free(mb_pathname); return rc; @@ -22504,7 +22491,7 @@ __cold int mdbx_env_get_path(const MDBX_env *env, const char **arg) { } *arg = env->me_pathname_char; #else - *arg = env->me_pathname; + *arg = env->me_pathname.specified; #endif /* Windows */ return MDBX_SUCCESS; } diff --git a/src/internals.h b/src/internals.h index 6a5d8018..0dd35629 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1400,10 +1400,12 @@ struct MDBX_env { MDBX_dbi me_maxdbs; /* size of the DB table */ uint32_t me_pid; /* process ID of this env */ osal_thread_key_t me_txkey; /* thread-key for readers */ - pathchar_t *me_pathname; /* path to the DB files */ - void *me_pbuf; /* scratch area for DUPSORT put() */ - MDBX_txn *me_txn0; /* preallocated write transaction */ - + struct { /* path to the DB files */ + pathchar_t *lck, *dxb, *specified; + void *buffer; + } me_pathname; + void *me_pbuf; /* scratch area for DUPSORT put() */ + MDBX_txn *me_txn0; /* preallocated write transaction */ MDBX_dbx *me_dbxs; /* array of static DB info */ uint16_t *__restrict me_db_flags; /* array of flags from MDBX_db.md_flags */ MDBX_atomic_uint32_t *me_dbi_seqs; /* array of dbi sequence numbers */ From eddade7b9945963b971a219f2e59ce5288a7ad75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:23:26 +0300 Subject: [PATCH 040/137] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B5=D1=84=D0=B8=D0=BA=D1=81=D0=BE=D0=B2=20=D0=B8=D0=BC=D1=91?= =?UTF-8?q?=D0=BD=20osal-ipc=20=D1=84=D1=83=D0=BD=D0=BA=D1=86=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/lck-posix.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lck-posix.c b/src/lck-posix.c index d55a9395..d8f1fdc6 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -875,7 +875,7 @@ MDBX_INTERNAL_FUNC int osal_check_tid4bionic(void) { } #endif /* __ANDROID_API__ || ANDROID) || BIONIC */ -static int mdbx_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, +static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, const bool dont_wait) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 @@ -915,7 +915,7 @@ static int mdbx_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, return rc; } -static int mdbx_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { +int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 int rc = pthread_mutex_unlock(ipc); @@ -940,14 +940,14 @@ static int mdbx_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { TRACE("%s", ">>"); jitter4testing(true); - int rc = mdbx_ipclock_lock(env, &env->me_lck->mti_rlock, false); + int rc = osal_ipclock_lock(env, &env->me_lck->mti_rlock, false); TRACE("<< rc %d", rc); return rc; } MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = mdbx_ipclock_unlock(env, &env->me_lck->mti_rlock); + int rc = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); TRACE("<< rc %d", rc); if (unlikely(rc != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, rc); @@ -957,7 +957,7 @@ MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { int osal_txn_lock(MDBX_env *env, bool dont_wait) { TRACE("%swait %s", dont_wait ? "dont-" : "", ">>"); jitter4testing(true); - const int err = mdbx_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); + const int err = osal_ipclock_lock(env, &env->me_lck->mti_wlock, dont_wait); int rc = err; if (likely(!MDBX_IS_ERROR(err))) { eASSERT(env, !env->me_txn0->mt_owner || @@ -975,7 +975,7 @@ void osal_txn_unlock(MDBX_env *env) { TRACE("%s", ">>"); eASSERT(env, env->me_txn0->mt_owner == osal_thread_self()); env->me_txn0->mt_owner = 0; - int err = mdbx_ipclock_unlock(env, &env->me_lck->mti_wlock); + int err = osal_ipclock_unlock(env, &env->me_lck->mti_wlock); TRACE("<< err %d", err); if (unlikely(err != MDBX_SUCCESS)) mdbx_panic("%s() failed: err %d\n", __func__, err); From 7ad54f54b4a7258353d6c3b98bcb01091dadfd50 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 23:54:21 +0300 Subject: [PATCH 041/137] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D1=8A=D0=B5=D0=B4?= =?UTF-8?q?=D0=B8=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20lck-=D1=81=D0=BF=D0=B8?= =?UTF-8?q?=D1=81=D0=BA=D0=B0=20=D0=B8=20rthc-=D1=82=D0=B0=D0=B1=D0=BB?= =?UTF-8?q?=D0=B8=D1=86=D1=8B=20=D0=B4=D0=BB=D1=8F=20=D1=83=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20(=D0=B4=D0=B5)=D1=80?= =?UTF-8?q?=D0=B5=D0=B3=D0=B8=D1=81=D1=82=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20?= =?UTF-8?q?TLS-=D0=B4=D0=B5=D1=81=D1=82=D1=80=D1=83=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 406 ++++++++++++++++++++++-------------------------- src/internals.h | 5 - 2 files changed, 188 insertions(+), 223 deletions(-) diff --git a/src/core.c b/src/core.c index 03bd80ed..92eb7709 100644 --- a/src/core.c +++ b/src/core.c @@ -1128,10 +1128,12 @@ MDBX_MAYBE_UNUSED static /*----------------------------------------------------------------------------*/ /* rthc (tls keys and destructors) */ +static int rthc_register(MDBX_env *const env); +static int rthc_remove(MDBX_env *const env); +static int rthc_uniq_check(const osal_mmap_t *pending, MDBX_env **found); + typedef struct rthc_entry_t { - MDBX_reader *begin; - MDBX_reader *end; - osal_thread_key_t thr_tls_key; + MDBX_env *env; } rthc_entry_t; #if MDBX_DEBUG @@ -1144,10 +1146,8 @@ static bin128_t bootid; #if defined(_WIN32) || defined(_WIN64) static CRITICAL_SECTION rthc_critical_section; -static CRITICAL_SECTION lcklist_critical_section; #else -static pthread_mutex_t lcklist_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_mutex_t rthc_mutex = PTHREAD_MUTEX_INITIALIZER; static pthread_cond_t rthc_cond = PTHREAD_COND_INITIALIZER; static osal_thread_key_t rthc_key; @@ -1346,17 +1346,24 @@ static void thread_rthc_set(osal_thread_key_t key, const void *value) { /* dtor called for thread, i.e. for all mdbx's environment objects */ __cold void thread_dtor(void *rthc) { rthc_lock(); - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", osal_getpid(), + const uint32_t self_pid = osal_getpid(); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", self_pid, osal_thread_self(), rthc); - const uint32_t self_pid = osal_getpid(); for (size_t i = 0; i < rthc_count; ++i) { - const osal_thread_key_t key = rthc_table[i].thr_tls_key; - MDBX_reader *const reader = thread_rthc_get(key); - if (reader < rthc_table[i].begin || reader >= rthc_table[i].end) + MDBX_env *const env = rthc_table[i].env; + if (env->me_pid != self_pid) + continue; + if (!(env->me_flags & MDBX_ENV_TXKEY)) + continue; + MDBX_reader *const reader = thread_rthc_get(env->me_txkey); + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + if (reader < begin || reader >= end) continue; #if !defined(_WIN32) && !defined(_WIN64) - if (pthread_setspecific(key, nullptr) != 0) { + if (pthread_setspecific(env->me_txkey, nullptr) != 0) { TRACE("== thread 0x%" PRIxPTR ", rthc %p: ignore race with tsd-key deletion", osal_thread_self(), __Wpedantic_format_voidptr(reader)); @@ -1368,13 +1375,13 @@ __cold void thread_dtor(void *rthc) { ", rthc %p, [%zi], %p ... %p (%+i), rtch-pid %i, " "current-pid %i", osal_thread_self(), __Wpedantic_format_voidptr(reader), i, - __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), - (int)(reader - rthc_table[i].begin), reader->mr_pid.weak, self_pid); + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + (int)(reader - begin), reader->mr_pid.weak, self_pid); if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), __Wpedantic_format_voidptr(reader)); (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } } @@ -1419,14 +1426,15 @@ __cold void thread_dtor(void *rthc) { MDBX_EXCLUDE_FOR_GPROF __cold void global_dtor(void) { - TRACE(">> pid %d", osal_getpid()); + const uint32_t self_pid = osal_getpid(); + TRACE(">> pid %d", self_pid); rthc_lock(); #if !defined(_WIN32) && !defined(_WIN64) uint64_t *rthc = pthread_getspecific(rthc_key); TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, rthc ? rthc_read(rthc) : ~UINT64_C(0), atomic_load32(&rthc_pending, mo_Relaxed)); if (rthc) { @@ -1437,20 +1445,20 @@ __cold void global_dtor(void) { rthc_compare_and_clean(rthc, sign_registered)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, "registered", state); } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), osal_getpid(), + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), - osal_getpid(), "wrong", state); + osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + "wrong", state); } } @@ -1467,7 +1475,7 @@ __cold void global_dtor(void) { for (unsigned left; (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { - NOTICE("tls-cleanup: pid %d, pending %u, wait for...", osal_getpid(), left); + NOTICE("tls-cleanup: pid %d, pending %u, wait for...", self_pid, left); const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); if (rc && rc != EINTR) break; @@ -1475,23 +1483,31 @@ __cold void global_dtor(void) { thread_key_delete(rthc_key); #endif - const uint32_t self_pid = osal_getpid(); for (size_t i = 0; i < rthc_count; ++i) { - const osal_thread_key_t key = rthc_table[i].thr_tls_key; - thread_key_delete(key); - for (MDBX_reader *rthc = rthc_table[i].begin; rthc < rthc_table[i].end; - ++rthc) { + MDBX_env *const env = rthc_table[i].env; + if (env->me_pid != self_pid) + continue; + if (!(env->me_flags & MDBX_ENV_TXKEY)) + continue; + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + thread_key_delete(env->me_txkey); + bool cleaned = false; + for (MDBX_reader *reader = begin; reader < end; ++reader) { TRACE("== [%zi] = key %" PRIuPTR ", %p ... %p, rthc %p (%+i), " "rthc-pid %i, current-pid %i", - i, (uintptr_t)key, __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), - __Wpedantic_format_voidptr(rthc), (int)(rthc - rthc_table[i].begin), - rthc->mr_pid.weak, self_pid); - if (atomic_load32(&rthc->mr_pid, mo_Relaxed) == self_pid) { - atomic_store32(&rthc->mr_pid, 0, mo_AcquireRelease); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(rthc)); + i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), + __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), + (int)(reader - begin), reader->mr_pid.weak, self_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { + (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); + cleaned = true; } } + if (cleaned) + atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } rthc_limit = rthc_count = 0; @@ -1501,7 +1517,6 @@ __cold void global_dtor(void) { rthc_unlock(); #if defined(_WIN32) || defined(_WIN64) - DeleteCriticalSection(&lcklist_critical_section); DeleteCriticalSection(&rthc_critical_section); #else /* LY: yielding a few timeslices to give a more chance @@ -1510,24 +1525,26 @@ __cold void global_dtor(void) { #endif osal_dtor(); - TRACE("<< pid %d\n", osal_getpid()); + TRACE("<< pid %d\n", self_pid); } -__cold int rthc_alloc(osal_thread_key_t *pkey, MDBX_reader *begin, - MDBX_reader *end) { - assert(pkey != NULL); -#ifndef NDEBUG - *pkey = (osal_thread_key_t)0xBADBADBAD; -#endif /* NDEBUG */ +__cold int rthc_register(MDBX_env *const env) { + TRACE(">> env %p, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), rthc_count, rthc_limit); - rthc_lock(); - TRACE(">> rthc_count %u, rthc_limit %u", rthc_count, rthc_limit); - int rc; - if (rthc_count == rthc_limit) { + int rc = MDBX_SUCCESS; + for (size_t i = 0; i < rthc_count; ++i) + if (unlikely(rthc_table[i].env == env)) { + rc = MDBX_PANIC; + goto bailout; + } + + env->me_txkey = 0; + if (unlikely(rthc_count == rthc_limit)) { rthc_entry_t *new_table = osal_realloc((rthc_table == rthc_table_static) ? nullptr : rthc_table, sizeof(rthc_entry_t) * rthc_limit * 2); - if (new_table == nullptr) { + if (unlikely(new_table == nullptr)) { rc = MDBX_ENOMEM; goto bailout; } @@ -1537,84 +1554,92 @@ __cold int rthc_alloc(osal_thread_key_t *pkey, MDBX_reader *begin, rthc_limit *= 2; } - rc = thread_key_create(&rthc_table[rthc_count].thr_tls_key); - if (rc != MDBX_SUCCESS) - goto bailout; + if ((env->me_flags & MDBX_NOTLS) == 0) { + rc = thread_key_create(&env->me_txkey); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + env->me_flags |= MDBX_ENV_TXKEY; + } - *pkey = rthc_table[rthc_count].thr_tls_key; - TRACE("== [%i] = key %" PRIuPTR ", %p ... %p", rthc_count, (uintptr_t)*pkey, - __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end)); - - rthc_table[rthc_count].begin = begin; - rthc_table[rthc_count].end = end; + rthc_table[rthc_count].env = env; + TRACE("== [%i] = env %p, key %" PRIuPTR, rthc_count, + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey); ++rthc_count; - TRACE("<< key %" PRIuPTR ", rthc_count %u, rthc_limit %u", (uintptr_t)*pkey, - rthc_count, rthc_limit); - rthc_unlock(); - return MDBX_SUCCESS; bailout: - rthc_unlock(); + TRACE("<< env %p, key %" PRIuPTR ", rthc_count %u, rthc_limit %u, rc %d", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, + rthc_limit, rc); return rc; } +__cold static int rthc_drown(MDBX_env *const env) { + const uint32_t self_pid = osal_getpid(); + int rc = MDBX_SUCCESS; + MDBX_env *inprocess_neighbor = nullptr; + if (likely(env->me_lck_mmap.lck && self_pid == env->me_pid)) { + MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; + MDBX_reader *const end = + &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; + TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", + (self_pid == env->me_pid) ? "cleanup" : "skip", + __Wpedantic_format_voidptr(env), env->me_pid, + __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), + self_pid); + bool cleaned = false; + for (MDBX_reader *r = begin; r < end; ++r) { + if (atomic_load32(&r->mr_pid, mo_Relaxed) == self_pid) { + atomic_store32(&r->mr_pid, 0, mo_AcquireRelease); + TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); + cleaned = true; + } + } + if (cleaned) + atomic_store32(&env->me_lck_mmap.lck->mti_readers_refresh_flag, true, + mo_Relaxed); + rc = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (!inprocess_neighbor && env->me_live_reader && + env->me_lfd != INVALID_HANDLE_VALUE) { + int err = osal_rpid_clear(env); + rc = rc ? rc : err; + } + } + int err = osal_lck_destroy(env, inprocess_neighbor); + env->me_pid = 0; + return rc ? rc : err; +} -__cold void rthc_remove(const osal_thread_key_t key) { - thread_key_delete(key); - rthc_lock(); - TRACE(">> key %zu, rthc_count %u, rthc_limit %u", (uintptr_t)key, rthc_count, +__cold static int rthc_remove(MDBX_env *const env) { + TRACE(">>> env %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, rthc_limit); - for (size_t i = 0; i < rthc_count; ++i) { - if (key == rthc_table[i].thr_tls_key) { - const uint32_t self_pid = osal_getpid(); - TRACE("== [%zi], %p ...%p, current-pid %d", i, - __Wpedantic_format_voidptr(rthc_table[i].begin), - __Wpedantic_format_voidptr(rthc_table[i].end), self_pid); + int rc = MDBX_SUCCESS; + if (likely(env->me_pid)) + rc = rthc_drown(env); - for (MDBX_reader *rthc = rthc_table[i].begin; rthc < rthc_table[i].end; - ++rthc) { - if (atomic_load32(&rthc->mr_pid, mo_Relaxed) == self_pid) { - atomic_store32(&rthc->mr_pid, 0, mo_AcquireRelease); - TRACE("== cleanup %p", __Wpedantic_format_voidptr(rthc)); - } - } + for (size_t i = 0; i < rthc_count; ++i) { + if (rthc_table[i].env == env) { if (--rthc_count > 0) rthc_table[i] = rthc_table[rthc_count]; else if (rthc_table != rthc_table_static) { - osal_free(rthc_table); + void *tmp = rthc_table; rthc_table = rthc_table_static; rthc_limit = RTHC_INITIAL_LIMIT; + osal_memory_barrier(); + osal_free(tmp); } break; } } - TRACE("<< key %zu, rthc_count %u, rthc_limit %u", (size_t)key, rthc_count, + TRACE("<<< %p, key %zu, rthc_count %u, rthc_limit %u", + __Wpedantic_format_voidptr(env), (uintptr_t)env->me_txkey, rthc_count, rthc_limit); - rthc_unlock(); + return rc; } //------------------------------------------------------------------------------ -#define RTHC_ENVLIST_END ((MDBX_env *)((uintptr_t)50459)) -static MDBX_env *inprocess_lcklist_head = RTHC_ENVLIST_END; - -static __inline void lcklist_lock(void) { -#if defined(_WIN32) || defined(_WIN64) - EnterCriticalSection(&lcklist_critical_section); -#else - ENSURE(nullptr, osal_pthread_mutex_lock(&lcklist_mutex) == 0); -#endif -} - -static __inline void lcklist_unlock(void) { -#if defined(_WIN32) || defined(_WIN64) - LeaveCriticalSection(&lcklist_critical_section); -#else - ENSURE(nullptr, pthread_mutex_unlock(&lcklist_mutex) == 0); -#endif -} - MDBX_NOTHROW_CONST_FUNCTION static uint64_t rrxmrrxmsx_0(uint64_t v) { /* Pelle Evensen's mixer, https://bit.ly/2HOfynt */ v ^= (v << 39 | v >> 25) ^ (v << 14 | v >> 50); @@ -1667,13 +1692,16 @@ static int uniq_poke(const osal_mmap_t *pending, osal_mmap_t *scan, return uniq_peek(pending, scan); } -__cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { +__cold static int rthc_uniq_check(const osal_mmap_t *pending, + MDBX_env **found) { *found = nullptr; uint64_t salt = 0; - for (MDBX_env *scan = inprocess_lcklist_head; scan != RTHC_ENVLIST_END; - scan = scan->me_lcklist_next) { - MDBX_lockinfo *const scan_lck = scan->me_lck_mmap.lck; - int err = atomic_load64(&scan_lck->mti_bait_uniqueness, mo_AcquireRelease) + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const scan = rthc_table[i].env; + if (!scan->me_lck_mmap.lck || &scan->me_lck_mmap == pending) + continue; + int err = atomic_load64(&scan->me_lck_mmap.lck->mti_bait_uniqueness, + mo_AcquireRelease) ? uniq_peek(pending, &scan->me_lck_mmap) : uniq_poke(pending, &scan->me_lck_mmap, &salt); if (err == MDBX_ENODATA) { @@ -1681,8 +1709,8 @@ __cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { if (likely(osal_filesize(pending->fd, &length) == MDBX_SUCCESS && length == 0)) { /* LY: skip checking since LCK-file is empty, i.e. just created. */ - DEBUG("uniq-probe: %s", "unique (new/empty lck)"); - return MDBX_RESULT_TRUE; + DEBUG("%s", "unique (new/empty lck)"); + return MDBX_SUCCESS; } } if (err == MDBX_RESULT_TRUE) @@ -1695,44 +1723,17 @@ __cold static int uniq_check(const osal_mmap_t *pending, MDBX_env **found) { if (err == MDBX_RESULT_TRUE) { err = uniq_poke(pending, &scan->me_lck_mmap, &salt); *found = scan; - DEBUG("uniq-probe: found %p", __Wpedantic_format_voidptr(*found)); - return MDBX_RESULT_FALSE; + DEBUG("found %p", __Wpedantic_format_voidptr(*found)); + return MDBX_SUCCESS; } if (unlikely(err != MDBX_SUCCESS)) { - DEBUG("uniq-probe: failed rc %d", err); + DEBUG("failed rc %d", err); return err; } } - DEBUG("uniq-probe: %s", "unique"); - return MDBX_RESULT_TRUE; -} - -static int lcklist_detach_locked(MDBX_env *env) { - MDBX_env *inprocess_neighbor = nullptr; - int rc = MDBX_SUCCESS; - if (env->me_lcklist_next != nullptr) { - ENSURE(env, env->me_lcklist_next != nullptr); - ENSURE(env, inprocess_lcklist_head != RTHC_ENVLIST_END); - for (MDBX_env **ptr = &inprocess_lcklist_head; *ptr != RTHC_ENVLIST_END; - ptr = &(*ptr)->me_lcklist_next) { - if (*ptr == env) { - *ptr = env->me_lcklist_next; - env->me_lcklist_next = nullptr; - break; - } - } - ENSURE(env, env->me_lcklist_next == nullptr); - } - - rc = likely(osal_getpid() == env->me_pid) - ? uniq_check(&env->me_lck_mmap, &inprocess_neighbor) - : MDBX_PANIC; - if (!inprocess_neighbor && env->me_live_reader) - (void)osal_rpid_clear(env); - if (!MDBX_IS_ERROR(rc)) - rc = osal_lck_destroy(env, inprocess_neighbor); - return rc; + DEBUG("%s", "unique"); + return MDBX_SUCCESS; } /*------------------------------------------------------------------------------ @@ -14567,59 +14568,28 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { } /* LY: without-lck mode (e.g. exclusive or on read-only filesystem) */ - /* beginning of a locked section ---------------------------------------- */ - lcklist_lock(); - eASSERT(env, env->me_lcklist_next == nullptr); env->me_lfd = INVALID_HANDLE_VALUE; - const int rc = osal_lck_seize(env); - if (MDBX_IS_ERROR(rc)) { - /* Calling lcklist_detach_locked() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ - lcklist_unlock(); - return rc; - } - /* insert into inprocess lck-list */ - env->me_lcklist_next = inprocess_lcklist_head; - inprocess_lcklist_head = env; - lcklist_unlock(); - /* end of a locked section ---------------------------------------------- */ - - env->me_lck = lckless_stub(env); - env->me_maxreaders = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - return rc; } /* beginning of a locked section ------------------------------------------ */ - lcklist_lock(); - eASSERT(env, env->me_lcklist_next == nullptr); + rthc_lock(); + err = rthc_register(env); + if (likely(err == MDBX_SUCCESS)) + err = osal_lck_seize(env); - /* Try to get exclusive lock. If we succeed, then - * nobody is using the lock region and we should initialize it. */ - err = osal_lck_seize(env); - if (MDBX_IS_ERROR(err)) { - bailout: - /* Calling lcklist_detach_locked() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ - lcklist_unlock(); - return err; - } - - MDBX_env *inprocess_neighbor = nullptr; - if (err == MDBX_RESULT_TRUE) { - err = uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (MDBX_IS_ERROR(err)) - goto bailout; - if (inprocess_neighbor && - ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { - err = MDBX_BUSY; - goto bailout; - } - } const int lck_seize_rc = err; + if (MDBX_IS_ERROR(err)) + goto bailout; + + struct MDBX_lockinfo *lck = nullptr; + if (env->me_lfd == INVALID_HANDLE_VALUE) { + lck = lckless_stub(env); + env->me_maxreaders = UINT_MAX; + DEBUG("lck-setup:%s%s%s", " lck-less", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + goto done; + } DEBUG("lck-setup:%s%s%s", " with-lck", (env->me_flags & MDBX_RDONLY) ? " readonly" : "", @@ -14688,9 +14658,10 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { #endif /* MADV_WILLNEED */ #endif /* MDBX_ENABLE_MADVISE */ - struct MDBX_lockinfo *const lck = env->me_lck_mmap.lck; + lck = env->me_lck_mmap.lck; if (lck_seize_rc == MDBX_RESULT_TRUE) { - /* LY: exclusive mode, check and reset lck content */ + /* If we succeed got exclusive lock, then nobody is using the lock region + * and we should initialize it. */ memset(lck, 0, (size_t)size); jitter4testing(false); lck->mti_magic_and_version = MDBX_LOCK_MAGIC; @@ -14724,19 +14695,32 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { } } + MDBX_env *inprocess_neighbor = nullptr; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (MDBX_IS_ERROR(err)) + goto bailout; + if (inprocess_neighbor && + ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { + err = MDBX_BUSY; + goto bailout; + } + } + err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); if (MDBX_IS_ERROR(err)) goto bailout; - ENSURE(env, env->me_lcklist_next == nullptr); - /* insert into inprocess lck-list */ - env->me_lcklist_next = inprocess_lcklist_head; - inprocess_lcklist_head = env; - lcklist_unlock(); - /* end of a locked section ------------------------------------------------ */ - - eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); +done: env->me_lck = lck; + eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); + +bailout: + /* Calling osal_lck_destroy() is required to restore POSIX-filelock + * and this job will be done by env_close(). */ + rthc_unlock(); + /* end of a locked section ------------------------------------------------ */ return lck_seize_rc; } @@ -15603,14 +15587,6 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, if (MDBX_IS_ERROR(rc)) goto bailout; } - - if ((env->me_flags & MDBX_NOTLS) == 0) { - rc = rthc_alloc(&env->me_txkey, &lck->mti_readers[0], - &lck->mti_readers[env->me_maxreaders]); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - env->me_flags |= MDBX_ENV_TXKEY; - } } if ((flags & MDBX_RDONLY) == 0) { @@ -15704,17 +15680,19 @@ bailout: /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ __cold static int env_close(MDBX_env *env) { const unsigned flags = env->me_flags; - if (!(flags & MDBX_ENV_ACTIVE)) { - ENSURE(env, env->me_lcklist_next == nullptr); - return MDBX_SUCCESS; - } - env->me_flags &= ~ENV_INTERNAL_FLAGS; if (flags & MDBX_ENV_TXKEY) { - rthc_remove(env->me_txkey); - env->me_txkey = (osal_thread_key_t)0; + thread_key_delete(env->me_txkey); + env->me_txkey = 0; } + if (env->me_lck) + munlock_all(env); + + rthc_lock(); + int rc = rthc_remove(env); + rthc_unlock(); + #if MDBX_ENABLE_DBI_LOCKFREE for (struct mdbx_defer_free_item *next, *ptr = env->me_defer_free; ptr; ptr = next) { @@ -15723,14 +15701,9 @@ __cold static int env_close(MDBX_env *env) { } #endif /* MDBX_ENABLE_DBI_LOCKFREE */ - munlock_all(env); if (!(env->me_flags & MDBX_RDONLY)) osal_ioring_destroy(&env->me_ioring); - lcklist_lock(); - const int rc = lcklist_detach_locked(env); - lcklist_unlock(); - env->me_lck = nullptr; if (env->me_lck_mmap.lck) osal_munmap(&env->me_lck_mmap); @@ -15882,8 +15855,6 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { osal_free(ptr); } VALGRIND_DESTROY_MEMPOOL(env); - ENSURE(env, env->me_lcklist_next == nullptr); - env->me_pid = 0; osal_free(env); return rc; @@ -25943,7 +25914,6 @@ __cold void global_ctor(void) { rthc_table = rthc_table_static; #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(&rthc_critical_section); - InitializeCriticalSection(&lcklist_critical_section); #else ENSURE(nullptr, pthread_key_create(&rthc_key, thread_dtor) == 0); TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), diff --git a/src/internals.h b/src/internals.h index 0dd35629..02d8ab49 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1462,7 +1462,6 @@ struct MDBX_env { bool me_incore; bool me_prefault_write; - MDBX_env *me_lcklist_next; #if MDBX_ENABLE_DBI_LOCKFREE struct mdbx_defer_free_item *me_defer_free; #endif /* MDBX_ENABLE_DBI_LOCKFREE */ @@ -1560,10 +1559,6 @@ osal_flush_incoherent_mmap(const void *addr, size_t nbytes, MDBX_INTERNAL_FUNC int cleanup_dead_readers(MDBX_env *env, int rlocked, int *dead); -MDBX_INTERNAL_FUNC int rthc_alloc(osal_thread_key_t *key, MDBX_reader *begin, - MDBX_reader *end); -MDBX_INTERNAL_FUNC void rthc_remove(const osal_thread_key_t key); - MDBX_INTERNAL_FUNC void global_ctor(void); MDBX_INTERNAL_FUNC void osal_ctor(void); MDBX_INTERNAL_FUNC void global_dtor(void); From a3e2300f583f593291332e4d20647cf5a68fcb2b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:03:43 +0300 Subject: [PATCH 042/137] =?UTF-8?q?mdbx:=20=D0=B2=D0=BE=D0=B7=D0=BC=D0=BE?= =?UTF-8?q?=D0=B6=D0=BD=D0=BE=D1=81=D1=82=D1=8C=20=D0=B2=D1=8B=D0=B7=D0=BE?= =?UTF-8?q?=D0=B2=D0=B0=20`osal=5Flck=5Fdestroy()`=20=D0=B2=20=D0=B4=D0=BE?= =?UTF-8?q?=D1=87=D0=B5=D1=80=D0=BD=D0=B5=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B5=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5=20`fo?= =?UTF-8?q?rk()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 51 ++++++++++++++++++++++++----------------------- src/lck-posix.c | 23 +++++++++++++-------- src/lck-windows.c | 4 +++- src/osal.h | 3 ++- 4 files changed, 46 insertions(+), 35 deletions(-) diff --git a/src/core.c b/src/core.c index 92eb7709..b6587417 100644 --- a/src/core.c +++ b/src/core.c @@ -1346,13 +1346,13 @@ static void thread_rthc_set(osal_thread_key_t key, const void *value) { /* dtor called for thread, i.e. for all mdbx's environment objects */ __cold void thread_dtor(void *rthc) { rthc_lock(); - const uint32_t self_pid = osal_getpid(); - TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", self_pid, + const uint32_t current_pid = osal_getpid(); + TRACE(">> pid %d, thread 0x%" PRIxPTR ", rthc %p", current_pid, osal_thread_self(), rthc); for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != self_pid) + if (env->me_pid != current_pid) continue; if (!(env->me_flags & MDBX_ENV_TXKEY)) continue; @@ -1376,11 +1376,11 @@ __cold void thread_dtor(void *rthc) { "current-pid %i", osal_thread_self(), __Wpedantic_format_voidptr(reader), i, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - (int)(reader - begin), reader->mr_pid.weak, self_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { + (int)(reader - begin), reader->mr_pid.weak, current_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { TRACE("==== thread 0x%" PRIxPTR ", rthc %p, cleanup", osal_thread_self(), __Wpedantic_format_voidptr(reader)); - (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + (void)atomic_cas32(&reader->mr_pid, current_pid, 0); atomic_store32(&env->me_lck->mti_readers_refresh_flag, true, mo_Relaxed); } } @@ -1426,15 +1426,15 @@ __cold void thread_dtor(void *rthc) { MDBX_EXCLUDE_FOR_GPROF __cold void global_dtor(void) { - const uint32_t self_pid = osal_getpid(); - TRACE(">> pid %d", self_pid); + const uint32_t current_pid = osal_getpid(); + TRACE(">> pid %d", current_pid); rthc_lock(); #if !defined(_WIN32) && !defined(_WIN64) uint64_t *rthc = pthread_getspecific(rthc_key); TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status 0x%08" PRIx64 ", left %d", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, rthc ? rthc_read(rthc) : ~UINT64_C(0), atomic_load32(&rthc_pending, mo_Relaxed)); if (rthc) { @@ -1445,19 +1445,19 @@ __cold void global_dtor(void) { rthc_compare_and_clean(rthc, sign_registered)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "registered", state); } else if (state == sign_counted && rthc_compare_and_clean(rthc, sign_counted)) { TRACE("== thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "counted", state); ENSURE(nullptr, atomic_sub32(&rthc_pending, 1) > 0); } else { WARNING("thread 0x%" PRIxPTR ", rthc %p, pid %d, self-status %s (0x%08" PRIx64 ")", - osal_thread_self(), __Wpedantic_format_voidptr(rthc), self_pid, + osal_thread_self(), __Wpedantic_format_voidptr(rthc), current_pid, "wrong", state); } } @@ -1475,7 +1475,7 @@ __cold void global_dtor(void) { for (unsigned left; (left = atomic_load32(&rthc_pending, mo_AcquireRelease)) > 0;) { - NOTICE("tls-cleanup: pid %d, pending %u, wait for...", self_pid, left); + NOTICE("tls-cleanup: pid %d, pending %u, wait for...", current_pid, left); const int rc = pthread_cond_timedwait(&rthc_cond, &rthc_mutex, &abstime); if (rc && rc != EINTR) break; @@ -1485,7 +1485,7 @@ __cold void global_dtor(void) { for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; - if (env->me_pid != self_pid) + if (env->me_pid != current_pid) continue; if (!(env->me_flags & MDBX_ENV_TXKEY)) continue; @@ -1499,9 +1499,9 @@ __cold void global_dtor(void) { "rthc-pid %i, current-pid %i", i, (uintptr_t)env->me_txkey, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), __Wpedantic_format_voidptr(reader), - (int)(reader - begin), reader->mr_pid.weak, self_pid); - if (atomic_load32(&reader->mr_pid, mo_Relaxed) == self_pid) { - (void)atomic_cas32(&reader->mr_pid, self_pid, 0); + (int)(reader - begin), reader->mr_pid.weak, current_pid); + if (atomic_load32(&reader->mr_pid, mo_Relaxed) == current_pid) { + (void)atomic_cas32(&reader->mr_pid, current_pid, 0); TRACE("== cleanup %p", __Wpedantic_format_voidptr(reader)); cleaned = true; } @@ -1525,7 +1525,7 @@ __cold void global_dtor(void) { #endif osal_dtor(); - TRACE("<< pid %d\n", self_pid); + TRACE("<< pid %d\n", current_pid); } __cold int rthc_register(MDBX_env *const env) { @@ -1573,21 +1573,21 @@ bailout: return rc; } __cold static int rthc_drown(MDBX_env *const env) { - const uint32_t self_pid = osal_getpid(); + const uint32_t current_pid = osal_getpid(); int rc = MDBX_SUCCESS; MDBX_env *inprocess_neighbor = nullptr; - if (likely(env->me_lck_mmap.lck && self_pid == env->me_pid)) { + if (likely(env->me_lck_mmap.lck && current_pid == env->me_pid)) { MDBX_reader *const begin = &env->me_lck_mmap.lck->mti_readers[0]; MDBX_reader *const end = &env->me_lck_mmap.lck->mti_readers[env->me_maxreaders]; TRACE("== %s env %p pid %d, readers %p ...%p, current-pid %d", - (self_pid == env->me_pid) ? "cleanup" : "skip", + (current_pid == env->me_pid) ? "cleanup" : "skip", __Wpedantic_format_voidptr(env), env->me_pid, __Wpedantic_format_voidptr(begin), __Wpedantic_format_voidptr(end), - self_pid); + current_pid); bool cleaned = false; for (MDBX_reader *r = begin; r < end; ++r) { - if (atomic_load32(&r->mr_pid, mo_Relaxed) == self_pid) { + if (atomic_load32(&r->mr_pid, mo_Relaxed) == current_pid) { atomic_store32(&r->mr_pid, 0, mo_AcquireRelease); TRACE("== cleanup %p", __Wpedantic_format_voidptr(r)); cleaned = true; @@ -1603,7 +1603,7 @@ __cold static int rthc_drown(MDBX_env *const env) { rc = rc ? rc : err; } } - int err = osal_lck_destroy(env, inprocess_neighbor); + int err = osal_lck_destroy(env, inprocess_neighbor, current_pid); env->me_pid = 0; return rc ? rc : err; } @@ -15844,7 +15844,8 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { #if MDBX_LOCKING > MDBX_LOCKING_SYSV MDBX_lockinfo *const stub = lckless_stub(env); - ENSURE(env, osal_ipclock_destroy(&stub->mti_wlock) == 0); + /* может вернуть ошибку в дочернем процессе после fork() */ + osal_ipclock_destroy(&stub->mti_wlock); #endif /* MDBX_LOCKING */ while ((dp = env->me_dp_reserve) != NULL) { diff --git a/src/lck-posix.c b/src/lck-posix.c index d8f1fdc6..af16be1a 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -556,14 +556,13 @@ MDBX_INTERNAL_FUNC int osal_lck_upgrade(MDBX_env *env, bool dont_wait) { } __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor) { - if (unlikely(osal_getpid() != env->me_pid)) - return MDBX_PANIC; - + MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { + eASSERT(env, osal_getpid() == current_pid); int rc = MDBX_SUCCESS; struct stat lck_info; - MDBX_lockinfo *lck = env->me_lck_mmap.lck; - if (env->me_lfd != INVALID_HANDLE_VALUE && !inprocess_neighbor && lck && + MDBX_lockinfo *lck = env->me_lck; + if (lck && lck == env->me_lck_mmap.lck && !inprocess_neighbor && /* try get exclusive access */ lck_op(env->me_lfd, op_setlk, F_WRLCK, 0, OFF_T_MAX) == 0 && /* if LCK was not removed */ @@ -572,7 +571,8 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, (env->me_flags & MDBX_RDONLY) ? F_RDLCK : F_WRLCK, 0, OFF_T_MAX) == 0) { - VERBOSE("%p got exclusive, drown locks", (void *)env); + VERBOSE("%p got exclusive, drown ipc-locks", (void *)env); + eASSERT(env, current_pid == env->me_pid); #if MDBX_LOCKING == MDBX_LOCKING_SYSV if (env->me_sysv_ipc.semid != -1) rc = semctl(env->me_sysv_ipc.semid, 2, IPC_RMID) ? errno : 0; @@ -586,13 +586,20 @@ __cold MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, if (rc == 0) { const bool synced = lck->mti_unsynced_pages.weak == 0; osal_munmap(&env->me_lck_mmap); - if (synced) + if (synced && env->me_lfd != INVALID_HANDLE_VALUE) rc = ftruncate(env->me_lfd, 0) ? errno : 0; } jitter4testing(false); } + if (current_pid != env->me_pid) { + eASSERT(env, !inprocess_neighbor); + NOTICE("drown env %p after-fork pid %d -> %d", + __Wpedantic_format_voidptr(env), env->me_pid, current_pid); + inprocess_neighbor = nullptr; + } + /* 1) POSIX's fcntl() locks (i.e. when op_setlk == F_SETLK) should be restored * after file was closed. * diff --git a/src/lck-windows.c b/src/lck-windows.c index bc77150d..e7a4b03a 100644 --- a/src/lck-windows.c +++ b/src/lck-windows.c @@ -682,7 +682,9 @@ MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, } MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor) { + MDBX_env *inprocess_neighbor, + const uint32_t current_pid) { + (void)current_pid; /* LY: should unmap before releasing the locks to avoid race condition and * STATUS_USER_MAPPED_FILE/ERROR_USER_MAPPED_FILE */ if (env->me_map) diff --git a/src/osal.h b/src/osal.h index 1b5c317f..3e74cdb0 100644 --- a/src/osal.h +++ b/src/osal.h @@ -690,7 +690,8 @@ MDBX_INTERNAL_FUNC int osal_lck_init(MDBX_env *env, /// restore POSIX-fcntl locks after the closing of file descriptors. /// \return Error code (MDBX_PANIC) or zero on success. MDBX_INTERNAL_FUNC int osal_lck_destroy(MDBX_env *env, - MDBX_env *inprocess_neighbor); + MDBX_env *inprocess_neighbor, + const uint32_t current_pid); /// \brief Connects to shared interprocess locking objects and tries to acquire /// the maximum lock level (shared if exclusive is not available) From 54efb8bd81b61da9735f18bbfae185d6062dd649 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 15:30:40 +0300 Subject: [PATCH 043/137] =?UTF-8?q?mdbx:=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D0=BC=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA?= =?UTF-8?q?=D0=B8=20ipc-unlock=20=D0=BA=D1=80=D0=B8=D1=82=D0=B8=D1=87?= =?UTF-8?q?=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B2=20=D1=81=D0=BB=D1=83=D1=87?= =?UTF-8?q?=D0=B0=D0=B5=20=D1=81=D0=BC=D0=B5=D0=BD=D1=8B=20pid.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- src/internals.h | 2 +- src/lck-posix.c | 49 +++++++++++++++++++++++++++++++------------------ 3 files changed, 33 insertions(+), 20 deletions(-) diff --git a/src/core.c b/src/core.c index b6587417..d6959a8c 100644 --- a/src/core.c +++ b/src/core.c @@ -13552,7 +13552,7 @@ __cold int mdbx_env_create(MDBX_env **penv) { #if MDBX_LOCKING > MDBX_LOCKING_SYSV MDBX_lockinfo *const stub = lckless_stub(env); - rc = osal_ipclock_stub(&stub->mti_wlock); + rc = osal_ipclock_stubinit(&stub->mti_wlock); #endif /* MDBX_LOCKING */ if (unlikely(rc != MDBX_SUCCESS)) { osal_fastmutex_destroy(&env->me_remap_guard); diff --git a/src/internals.h b/src/internals.h index 02d8ab49..7f9aedd0 100644 --- a/src/internals.h +++ b/src/internals.h @@ -817,7 +817,7 @@ typedef sem_t osal_ipclock_t; #endif /* MDBX_LOCKING */ #if MDBX_LOCKING > MDBX_LOCKING_SYSV && !defined(__cplusplus) -MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc); +MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc); MDBX_INTERNAL_FUNC int osal_ipclock_destroy(osal_ipclock_t *ipc); #endif /* MDBX_LOCKING */ diff --git a/src/lck-posix.c b/src/lck-posix.c index af16be1a..09e62f8d 100644 --- a/src/lck-posix.c +++ b/src/lck-posix.c @@ -294,7 +294,7 @@ MDBX_INTERNAL_FUNC int osal_rpid_check(MDBX_env *env, uint32_t pid) { /*---------------------------------------------------------------------------*/ #if MDBX_LOCKING > MDBX_LOCKING_SYSV -MDBX_INTERNAL_FUNC int osal_ipclock_stub(osal_ipclock_t *ipc) { +MDBX_INTERNAL_FUNC int osal_ipclock_stubinit(osal_ipclock_t *ipc) { #if MDBX_LOCKING == MDBX_LOCKING_POSIX1988 return sem_init(ipc, false, 1) ? errno : 0; #elif MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ @@ -796,7 +796,7 @@ bailout: #endif /* MDBX_LOCKING > 0 */ } -__cold static int mdbx_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, +__cold static int osal_ipclock_failed(MDBX_env *env, osal_ipclock_t *ipc, const int err) { int rc = err; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2008 || MDBX_LOCKING == MDBX_LOCKING_SYSV @@ -918,29 +918,42 @@ static int osal_ipclock_lock(MDBX_env *env, osal_ipclock_t *ipc, #endif /* MDBX_LOCKING */ if (unlikely(rc != MDBX_SUCCESS && rc != MDBX_BUSY)) - rc = mdbx_ipclock_failed(env, ipc, rc); + rc = osal_ipclock_failed(env, ipc, rc); return rc; } int osal_ipclock_unlock(MDBX_env *env, osal_ipclock_t *ipc) { + int err = MDBX_ENOSYS; #if MDBX_LOCKING == MDBX_LOCKING_POSIX2001 || \ MDBX_LOCKING == MDBX_LOCKING_POSIX2008 - int rc = pthread_mutex_unlock(ipc); - (void)env; + err = pthread_mutex_unlock(ipc); #elif MDBX_LOCKING == MDBX_LOCKING_POSIX1988 - int rc = sem_post(ipc) ? errno : MDBX_SUCCESS; - (void)env; + err = sem_post(ipc) ? errno : MDBX_SUCCESS; #elif MDBX_LOCKING == MDBX_LOCKING_SYSV if (unlikely(*ipc != (pid_t)env->me_pid)) - return EPERM; - *ipc = 0; - struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), - .sem_op = 1, - .sem_flg = SEM_UNDO}; - int rc = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; + err = EPERM; + else { + *ipc = 0; + struct sembuf op = {.sem_num = (ipc != &env->me_lck->mti_wlock), + .sem_op = 1, + .sem_flg = SEM_UNDO}; + err = semop(env->me_sysv_ipc.semid, &op, 1) ? errno : MDBX_SUCCESS; + } #else #error "FIXME" #endif /* MDBX_LOCKING */ + int rc = err; + if (unlikely(rc != MDBX_SUCCESS)) { + const uint32_t current_pid = osal_getpid(); + if (current_pid == env->me_pid || LOG_ENABLED(MDBX_LOG_NOTICE)) + debug_log((current_pid == env->me_pid) + ? MDBX_LOG_FATAL + : (rc = MDBX_SUCCESS, MDBX_LOG_NOTICE), + "ipc-unlock()", __LINE__, "failed: env %p, lck-%s %p, err %d\n", + __Wpedantic_format_voidptr(env), + (env->me_lck == env->me_lck_mmap.lck) ? "mmap" : "stub", + __Wpedantic_format_voidptr(env->me_lck), err); + } return rc; } @@ -954,10 +967,10 @@ MDBX_INTERNAL_FUNC int osal_rdt_lock(MDBX_env *env) { MDBX_INTERNAL_FUNC void osal_rdt_unlock(MDBX_env *env) { TRACE("%s", ">>"); - int rc = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); - TRACE("<< rc %d", rc); - if (unlikely(rc != MDBX_SUCCESS)) - mdbx_panic("%s() failed: err %d\n", __func__, rc); + int err = osal_ipclock_unlock(env, &env->me_lck->mti_rlock); + TRACE("<< err %d", err); + if (unlikely(err != MDBX_SUCCESS)) + mdbx_panic("%s() failed: err %d\n", __func__, err); jitter4testing(true); } @@ -974,7 +987,7 @@ int osal_txn_lock(MDBX_env *env, bool dont_wait) { env->me_txn0->mt_owner = osal_thread_self(); rc = MDBX_SUCCESS; } - TRACE("<< rc %d", err); + TRACE("<< err %d, rc %d", err, rc); return rc; } From ce74fae036ea1e0b48f5587e1bac774857ff6d85 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 00:23:09 +0300 Subject: [PATCH 044/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=B2=D1=8B?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`env=5Fopen()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 353 +++++++++++++++++++++++++++-------------------------- 1 file changed, 177 insertions(+), 176 deletions(-) diff --git a/src/core.c b/src/core.c index d6959a8c..39ba89b1 100644 --- a/src/core.c +++ b/src/core.c @@ -13512,7 +13512,6 @@ __cold int mdbx_env_create(MDBX_env **penv) { env->me_maxdbs = env->me_numdbs = CORE_DBS; env->me_lazy_fd = env->me_dsync_fd = env->me_fd4meta = env->me_lfd = INVALID_HANDLE_VALUE; - env->me_pid = osal_getpid(); env->me_stuck_meta = -1; env->me_options.rp_augment_limit = MDBX_PNL_INITIAL; @@ -13946,7 +13945,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } -#endif +#endif /* Windows */ if (new_geo.now != current_geo->now || new_geo.upper != current_geo->upper) { @@ -13995,6 +13994,7 @@ __cold static int alloc_page_buf(MDBX_env *env) { __cold static int setup_dxb(MDBX_env *env, const int lck_rc, const mdbx_mode_t mode_bits) { MDBX_meta header; + eASSERT(env, !(env->me_flags & MDBX_ENV_ACTIVE)); int rc = MDBX_RESULT_FALSE; int err = read_header(env, &header, lck_rc, mode_bits); if (unlikely(err != MDBX_SUCCESS)) { @@ -14239,7 +14239,6 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, #if MDBX_DEBUG meta_troika_dump(env, &troika); #endif - eASSERT(env, !env->me_txn && !env->me_txn0); //-------------------------------- validate/rollback head & steady meta-pages if (unlikely(env->me_stuck_meta >= 0)) { /* recovery mode */ @@ -15197,78 +15196,7 @@ __cold int mdbx_env_deleteW(const wchar_t *pathname, return (err == MDBX_SUCCESS) ? rc : err; } -__cold int mdbx_env_open(MDBX_env *env, const char *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#if defined(_WIN32) || defined(_WIN64) - wchar_t *pathnameW = nullptr; - int rc = osal_mb2w(pathname, &pathnameW); - if (likely(rc == MDBX_SUCCESS)) { - rc = mdbx_env_openW(env, pathnameW, flags, mode); - osal_free(pathnameW); - if (rc == MDBX_SUCCESS) - /* force to make cache of the multi-byte pathname representation */ - mdbx_env_get_path(env, &pathname); - } - return rc; -} - -__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, - MDBX_env_flags_t flags, mdbx_mode_t mode) { -#endif /* Windows */ - - int rc = check_env(env, false); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - - if (unlikely(flags & ~ENV_USABLE_FLAGS)) - return MDBX_EINVAL; - - if (unlikely(env->me_lazy_fd != INVALID_HANDLE_VALUE || - (env->me_flags & MDBX_ENV_ACTIVE) != 0 || env->me_map)) - return MDBX_EPERM; - - /* Pickup previously mdbx_env_set_flags(), - * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ - const uint32_t saved_me_flags = env->me_flags; - flags = merge_sync_flags(flags | MDBX_DEPRECATED_COALESCE, env->me_flags); - - if (flags & MDBX_RDONLY) { - /* Silently ignore irrelevant flags when we're only getting read access */ - flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | - MDBX_NOMETASYNC | MDBX_DEPRECATED_COALESCE | MDBX_LIFORECLAIM | - MDBX_NOMEMINIT | MDBX_ACCEDE); - mode = 0; - } else { -#if MDBX_MMAP_INCOHERENT_FILE_WRITE - /* Temporary `workaround` for OpenBSD kernel's flaw. - * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ - if ((flags & MDBX_WRITEMAP) == 0) { - if (flags & MDBX_ACCEDE) - flags |= MDBX_WRITEMAP; - else { - debug_log(MDBX_LOG_ERROR, __func__, __LINE__, - "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " - "of an internal flaw(s) in a file/buffer/page cache.\n"); - return 42 /* ENOPROTOOPT */; - } - } -#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ - } - - env->me_flags = (flags & ~MDBX_FATAL_ERROR); - rc = env_handle_pathname(env, pathname, mode); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; - - env->me_flags = (flags & ~MDBX_FATAL_ERROR) | MDBX_ENV_ACTIVE; - env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); - env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); - env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); - if (!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs)) { - rc = MDBX_ENOMEM; - goto bailout; - } - +__cold static int env_open(MDBX_env *env, mdbx_mode_t mode) { /* Использование O_DSYNC или FILE_FLAG_WRITE_THROUGH: * * 0) Если размер страниц БД меньше системной страницы ОЗУ, то ядру ОС @@ -15357,18 +15285,16 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, */ env->me_pid = osal_getpid(); - rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ - : MDBX_OPEN_DXB_LAZY, - env, env->me_pathname.dxb, &env->me_lazy_fd, mode); + int rc = osal_openfile((env->me_flags & MDBX_RDONLY) ? MDBX_OPEN_DXB_READ + : MDBX_OPEN_DXB_LAZY, + env, env->me_pathname.dxb, &env->me_lazy_fd, mode); if (unlikely(rc != MDBX_SUCCESS)) return rc; #if MDBX_LOCKING == MDBX_LOCKING_SYSV env->me_sysv_ipc.key = ftok(env->me_pathname.dxb, 42); - if (env->me_sysv_ipc.key == -1) { - rc = errno; - goto bailout; - } + if (unlikely(env->me_sysv_ipc.key == -1)) + return errno; #endif /* MDBX_LOCKING */ /* Set the position in files outside of the data to avoid corruption @@ -15380,9 +15306,9 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #if defined(_WIN32) || defined(_WIN64) eASSERT(env, env->me_overlapped_fd == 0); bool ior_direct = false; - if (!(flags & + if (!(env->me_flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_NOMETASYNC | MDBX_EXCLUSIVE))) { - if (MDBX_AVOID_MSYNC && (flags & MDBX_WRITEMAP)) { + if (MDBX_AVOID_MSYNC && (env->me_flags & MDBX_WRITEMAP)) { /* Запрошен режим MDBX_SYNC_DURABLE | MDBX_WRITEMAP при активной опции * MDBX_AVOID_MSYNC. * @@ -15420,23 +15346,19 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, rc = osal_openfile(ior_direct ? MDBX_OPEN_DXB_OVERLAPPED_DIRECT : MDBX_OPEN_DXB_OVERLAPPED, env, env->me_pathname.dxb, &env->me_overlapped_fd, 0); - if (rc != MDBX_SUCCESS) - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) + return rc; env->me_data_lock_event = CreateEventW(nullptr, true, false, nullptr); - if (!env->me_data_lock_event) { - rc = (int)GetLastError(); - goto bailout; - } + if (unlikely(!env->me_data_lock_event)) + return (int)GetLastError(); osal_fseek(env->me_overlapped_fd, safe_parking_lot_offset); } #else if (mode == 0) { /* pickup mode for lck-file */ struct stat st; - if (fstat(env->me_lazy_fd, &st)) { - rc = errno; - goto bailout; - } + if (unlikely(fstat(env->me_lazy_fd, &st))) + return errno; mode = st.st_mode; } mode = (/* inherit read permissions for group and others */ mode & @@ -15446,24 +15368,24 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ((mode & S_IROTH) ? /* +write if readable by others */ S_IWOTH : 0); #endif /* !Windows */ const int lck_rc = setup_lck(env, mode); - if (MDBX_IS_ERROR(lck_rc)) { - rc = lck_rc; - goto bailout; - } - osal_fseek(env->me_lfd, safe_parking_lot_offset); + if (unlikely(MDBX_IS_ERROR(lck_rc))) + return lck_rc; + if (env->me_lfd != INVALID_HANDLE_VALUE) + osal_fseek(env->me_lfd, safe_parking_lot_offset); eASSERT(env, env->me_dsync_fd == INVALID_HANDLE_VALUE); - if (!(flags & (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC + if (!(env->me_flags & + (MDBX_RDONLY | MDBX_SAFE_NOSYNC | MDBX_DEPRECATED_MAPASYNC #if defined(_WIN32) || defined(_WIN64) - | MDBX_EXCLUSIVE + | MDBX_EXCLUSIVE #endif /* !Windows */ - ))) { + ))) { rc = osal_openfile(MDBX_OPEN_DXB_DSYNC, env, env->me_pathname.dxb, &env->me_dsync_fd, 0); - if (MDBX_IS_ERROR(rc)) - goto bailout; + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; if (env->me_dsync_fd != INVALID_HANDLE_VALUE) { - if ((flags & MDBX_NOMETASYNC) == 0) + if ((env->me_flags & MDBX_NOMETASYNC) == 0) env->me_fd4meta = env->me_dsync_fd; osal_fseek(env->me_dsync_fd, safe_parking_lot_offset); } @@ -15538,17 +15460,14 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, ERROR("current mode/flags 0x%X incompatible with requested 0x%X, " "rigorous diff 0x%X", env->me_flags, snap_flags, rigorous_diff); - rc = MDBX_INCOMPATIBLE; - goto bailout; + return MDBX_INCOMPATIBLE; } } mincore_clean_cache(env); const int dxb_rc = setup_dxb(env, lck_rc, mode); - if (MDBX_IS_ERROR(dxb_rc)) { - rc = dxb_rc; - goto bailout; - } + if (MDBX_IS_ERROR(dxb_rc)) + return dxb_rc; rc = osal_check_fs_incore(env->me_lazy_fd); env->me_incore = false; @@ -15557,18 +15476,18 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, NOTICE("%s", "in-core database"); } else if (unlikely(rc != MDBX_SUCCESS)) { ERROR("check_fs_incore(), err %d", rc); - goto bailout; + return rc; } if (unlikely(/* recovery mode */ env->me_stuck_meta >= 0) && (lck_rc != /* exclusive */ MDBX_RESULT_TRUE || - (flags & MDBX_EXCLUSIVE) == 0)) { + (env->me_flags & MDBX_EXCLUSIVE) == 0)) { ERROR("%s", "recovery requires exclusive mode"); - rc = MDBX_BUSY; - goto bailout; + return MDBX_BUSY; } DEBUG("opened dbenv %p", (void *)env); + env->me_flags |= MDBX_ENV_ACTIVE; if (!lck || lck_rc == MDBX_RESULT_TRUE) { env->me_lck->mti_envmode.weak = env->me_flags & mode_flags; env->me_lck->mti_meta_sync_txnid.weak = @@ -15581,14 +15500,96 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, DEBUG("lck-downgrade-%s: rc %i", (env->me_flags & MDBX_EXCLUSIVE) ? "partial" : "full", rc); if (rc != MDBX_SUCCESS) - goto bailout; + return rc; } else { rc = cleanup_dead_readers(env, false, NULL); if (MDBX_IS_ERROR(rc)) - goto bailout; + return rc; } } + rc = (env->me_flags & MDBX_RDONLY) + ? MDBX_SUCCESS + : osal_ioring_create(&env->me_ioring +#if defined(_WIN32) || defined(_WIN64) + , + ior_direct, env->me_overlapped_fd +#endif /* Windows */ + ); + return rc; +} + +__cold int mdbx_env_open(MDBX_env *env, const char *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_env_openW(env, pathnameW, flags, mode); + osal_free(pathnameW); + if (rc == MDBX_SUCCESS) + /* force to make cache of the multi-byte pathname representation */ + mdbx_env_get_path(env, &pathname); + } + return rc; +} + +__cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, + MDBX_env_flags_t flags, mdbx_mode_t mode) { +#endif /* Windows */ + + int rc = check_env(env, false); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + + if (unlikely(flags & ~ENV_USABLE_FLAGS)) + return MDBX_EINVAL; + + if (unlikely(env->me_lazy_fd != INVALID_HANDLE_VALUE || + (env->me_flags & MDBX_ENV_ACTIVE) != 0 || env->me_map)) + return MDBX_EPERM; + + /* Pickup previously mdbx_env_set_flags(), + * but avoid MDBX_UTTERLY_NOSYNC by disjunction */ + const uint32_t saved_me_flags = env->me_flags; + flags = merge_sync_flags(flags | MDBX_DEPRECATED_COALESCE, env->me_flags); + + if (flags & MDBX_RDONLY) { + /* Silently ignore irrelevant flags when we're only getting read access */ + flags &= ~(MDBX_WRITEMAP | MDBX_DEPRECATED_MAPASYNC | MDBX_SAFE_NOSYNC | + MDBX_NOMETASYNC | MDBX_DEPRECATED_COALESCE | MDBX_LIFORECLAIM | + MDBX_NOMEMINIT | MDBX_ACCEDE); + mode = 0; + } else { +#if MDBX_MMAP_INCOHERENT_FILE_WRITE + /* Temporary `workaround` for OpenBSD kernel's flaw. + * See https://libmdbx.dqdkfa.ru/dead-github/issues/67 */ + if ((flags & MDBX_WRITEMAP) == 0) { + if (flags & MDBX_ACCEDE) + flags |= MDBX_WRITEMAP; + else { + debug_log(MDBX_LOG_ERROR, __func__, __LINE__, + "System (i.e. OpenBSD) requires MDBX_WRITEMAP because " + "of an internal flaw(s) in a file/buffer/page cache.\n"); + return 42 /* ENOPROTOOPT */; + } + } +#endif /* MDBX_MMAP_INCOHERENT_FILE_WRITE */ + } + + env->me_flags = (flags & ~MDBX_FATAL_ERROR); + rc = env_handle_pathname(env, pathname, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + env->me_dbxs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbxs[0])); + env->me_db_flags = osal_calloc(env->me_maxdbs, sizeof(env->me_db_flags[0])); + env->me_dbi_seqs = osal_calloc(env->me_maxdbs, sizeof(env->me_dbi_seqs[0])); + if (unlikely(!(env->me_dbxs && env->me_db_flags && env->me_dbi_seqs))) { + rc = MDBX_ENOMEM; + goto bailout; + } + if ((flags & MDBX_RDONLY) == 0) { MDBX_txn *txn = nullptr; const intptr_t bitmap_bytes = @@ -15606,73 +15607,73 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, (sizeof(txn->mt_dbs[0]) + sizeof(txn->mt_cursors[0]) + sizeof(txn->mt_dbi_seqs[0]) + sizeof(txn->mt_dbi_state[0])); rc = alloc_page_buf(env); - if (rc == MDBX_SUCCESS) { - memset(env->me_pbuf, -1, env->me_psize * (size_t)2); - memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, - env->me_psize); - txn = osal_calloc(1, size); - if (txn) { - txn->mt_dbs = ptr_disp(txn, base); - txn->mt_cursors = - ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); - txn->mt_dbi_seqs = ptr_disp( - txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); - txn->mt_dbi_state = - ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); -#if MDBX_ENABLE_DBI_SPARSE - txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); -#endif /* MDBX_ENABLE_DBI_SPARSE */ - txn->mt_env = env; - txn->mt_flags = MDBX_TXN_FINISHED; - env->me_txn0 = txn; - txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); - txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); - if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) - rc = MDBX_ENOMEM; - } else - rc = MDBX_ENOMEM; + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + memset(env->me_pbuf, -1, env->me_psize * (size_t)2); + memset(ptr_disp(env->me_pbuf, env->me_psize * (size_t)2), 0, env->me_psize); + txn = osal_calloc(1, size); + if (unlikely(!txn)) { + rc = MDBX_ENOMEM; + goto bailout; } - if (rc == MDBX_SUCCESS) - rc = osal_ioring_create(&env->me_ioring -#if defined(_WIN32) || defined(_WIN64) - , - ior_direct, env->me_overlapped_fd -#endif /* Windows */ - ); - if (rc == MDBX_SUCCESS) - adjust_defaults(env); + txn->mt_dbs = ptr_disp(txn, base); + txn->mt_cursors = + ptr_disp(txn->mt_dbs, env->me_maxdbs * sizeof(txn->mt_dbs[0])); + txn->mt_dbi_seqs = + ptr_disp(txn->mt_cursors, env->me_maxdbs * sizeof(txn->mt_cursors[0])); + txn->mt_dbi_state = + ptr_disp(txn, size - env->me_maxdbs * sizeof(txn->mt_dbi_state[0])); +#if MDBX_ENABLE_DBI_SPARSE + txn->mt_dbi_sparse = ptr_disp(txn->mt_dbi_state, -bitmap_bytes); +#endif /* MDBX_ENABLE_DBI_SPARSE */ + txn->mt_env = env; + txn->mt_flags = MDBX_TXN_FINISHED; + env->me_txn0 = txn; + txn->tw.retired_pages = pnl_alloc(MDBX_PNL_INITIAL); + txn->tw.relist = pnl_alloc(MDBX_PNL_INITIAL); + if (unlikely(!txn->tw.retired_pages || !txn->tw.relist)) { + rc = MDBX_ENOMEM; + goto bailout; + } + adjust_defaults(env); } + rc = env_open(env, mode); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + #if MDBX_DEBUG - if (rc == MDBX_SUCCESS) { - const meta_troika_t troika = meta_tap(env); - const meta_ptr_t head = meta_recent(env, &troika); - const MDBX_db *db = &head.ptr_c->mm_dbs[MAIN_DBI]; + const meta_troika_t troika = meta_tap(env); + const meta_ptr_t head = meta_recent(env, &troika); + const MDBX_db *db = &head.ptr_c->mm_dbs[MAIN_DBI]; - DEBUG("opened database version %u, pagesize %u", - (uint8_t)unaligned_peek_u64(4, head.ptr_c->mm_magic_and_version), - env->me_psize); - DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, - data_page(head.ptr_c)->mp_pgno, head.txnid); - DEBUG("depth: %u", db->md_depth); - DEBUG("entries: %" PRIu64, db->md_entries); - DEBUG("branch pages: %" PRIaPGNO, db->md_branch_pages); - DEBUG("leaf pages: %" PRIaPGNO, db->md_leaf_pages); - DEBUG("large/overflow pages: %" PRIaPGNO, db->md_overflow_pages); - DEBUG("root: %" PRIaPGNO, db->md_root); - DEBUG("schema_altered: %" PRIaTXN, db->md_mod_txnid); - } -#endif + DEBUG("opened database version %u, pagesize %u", + (uint8_t)unaligned_peek_u64(4, head.ptr_c->mm_magic_and_version), + env->me_psize); + DEBUG("using meta page %" PRIaPGNO ", txn %" PRIaTXN, + data_page(head.ptr_c)->mp_pgno, head.txnid); + DEBUG("depth: %u", db->md_depth); + DEBUG("entries: %" PRIu64, db->md_entries); + DEBUG("branch pages: %" PRIaPGNO, db->md_branch_pages); + DEBUG("leaf pages: %" PRIaPGNO, db->md_leaf_pages); + DEBUG("large/overflow pages: %" PRIaPGNO, db->md_overflow_pages); + DEBUG("root: %" PRIaPGNO, db->md_root); + DEBUG("schema_altered: %" PRIaTXN, db->md_mod_txnid); +#endif /* MDBX_DEBUG */ -bailout: - if (rc != MDBX_SUCCESS) { - rc = env_close(env) ? MDBX_PANIC : rc; - env->me_flags = - saved_me_flags | ((rc != MDBX_PANIC) ? 0 : MDBX_FATAL_ERROR); - } else { + if (likely(rc == MDBX_SUCCESS)) { #if defined(ENABLE_MEMCHECK) || defined(__SANITIZE_ADDRESS__) txn_valgrind(env, nullptr); #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ + } else { + bailout: + if (likely(env_close(env) == MDBX_SUCCESS)) { + env->me_flags = saved_me_flags; + } else { + rc = MDBX_PANIC; + env->me_flags = saved_me_flags | MDBX_FATAL_ERROR; + } } return rc; } @@ -15713,7 +15714,7 @@ __cold static int env_close(MDBX_env *env) { #ifdef ENABLE_MEMCHECK VALGRIND_DISCARD(env->me_valgrind_handle); env->me_valgrind_handle = -1; -#endif +#endif /* ENABLE_MEMCHECK */ } #if defined(_WIN32) || defined(_WIN64) From a22ec56938fb539fb335d46360ea5255e1a607d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:21:18 +0300 Subject: [PATCH 045/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB?= =?UTF-8?q?=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`pthread=5Fa?= =?UTF-8?q?tfork(after=5Ffork)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/src/core.c b/src/core.c index 39ba89b1..8138dcaf 100644 --- a/src/core.c +++ b/src/core.c @@ -25910,6 +25910,26 @@ __cold int mdbx_env_warmup(const MDBX_env *env, const MDBX_txn *txn, return rc; } +#if !defined(_WIN32) && !defined(_WIN64) +__cold static void rthc_afterfork(void) { + NOTICE("drown %d rthc entries", rthc_count); + for (size_t i = 0; i < rthc_count; ++i) { + MDBX_env *const env = rthc_table[i].env; + NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); + env->me_dxb_mmap.base = nullptr; + env->me_lck_mmap.base = nullptr; + env->me_lck = lckless_stub(env); + rthc_drown(env); + } + if (rthc_table != rthc_table_static) + osal_free(rthc_table); + rthc_count = 0; + rthc_table = rthc_table_static; + rthc_limit = RTHC_INITIAL_LIMIT; + rthc_pending.weak = 0; +} +#endif /* ! Windows */ + __cold void global_ctor(void) { osal_ctor(); rthc_limit = RTHC_INITIAL_LIMIT; @@ -25917,6 +25937,7 @@ __cold void global_ctor(void) { #if defined(_WIN32) || defined(_WIN64) InitializeCriticalSection(&rthc_critical_section); #else + ENSURE(nullptr, pthread_atfork(nullptr, nullptr, rthc_afterfork) == 0); ENSURE(nullptr, pthread_key_create(&rthc_key, thread_dtor) == 0); TRACE("pid %d, &mdbx_rthc_key = %p, value 0x%x", osal_getpid(), __Wpedantic_format_voidptr(&rthc_key), (unsigned)rthc_key); From af4dfe541b83938ff67ea48012b8c78f3226691a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 20:08:04 +0300 Subject: [PATCH 046/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fenv=5Fresurrect=5Fafte?= =?UTF-8?q?r=5Ffork()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 +++ src/core.c | 109 +++++++++++++++++++++++++++++++++++------------------ 2 files changed, 78 insertions(+), 36 deletions(-) diff --git a/mdbx.h b/mdbx.h index e2da78b3..ce2dad5e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2895,6 +2895,11 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { return mdbx_env_close_ex(env, false); } +#if !(defined(_WIN32) || defined(_WIN64)) +/** FIXME */ +LIBMDBX_API int mdbx_env_resurrect_after_fork(MDBX_env *env); +#endif /* Windows */ + /** \brief Warming up options * \ingroup c_settings * \anchor warmup_flags diff --git a/src/core.c b/src/core.c index 8138dcaf..81d62e69 100644 --- a/src/core.c +++ b/src/core.c @@ -3317,7 +3317,7 @@ static int __must_check_result read_header(MDBX_env *env, MDBX_meta *meta, static int __must_check_result sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, meta_troika_t *const troika); -static int env_close(MDBX_env *env); +static int env_close(MDBX_env *env, bool resurrect_after_fork); struct node_result { MDBX_node *node; @@ -15668,7 +15668,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, #endif /* ENABLE_MEMCHECK || __SANITIZE_ADDRESS__ */ } else { bailout: - if (likely(env_close(env) == MDBX_SUCCESS)) { + if (likely(env_close(env, false) == MDBX_SUCCESS)) { env->me_flags = saved_me_flags; } else { rc = MDBX_PANIC; @@ -15679,7 +15679,7 @@ __cold int mdbx_env_openW(MDBX_env *env, const wchar_t *pathname, } /* Destroy resources from mdbx_env_open(), clear our readers & DBIs */ -__cold static int env_close(MDBX_env *env) { +__cold static int env_close(MDBX_env *env, bool resurrect_after_fork) { const unsigned flags = env->me_flags; env->me_flags &= ~ENV_INTERNAL_FLAGS; if (flags & MDBX_ENV_TXKEY) { @@ -15724,6 +15724,7 @@ __cold static int env_close(MDBX_env *env) { CloseHandle(env->me_data_lock_event); env->me_data_lock_event = INVALID_HANDLE_VALUE; } + eASSERT(env, !resurrect_after_fork); if (env->me_pathname_char) { osal_free(env->me_pathname_char); env->me_pathname_char = nullptr; @@ -15745,43 +15746,79 @@ __cold static int env_close(MDBX_env *env) { env->me_lfd = INVALID_HANDLE_VALUE; } - if (env->me_dbxs) { - for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) - if (env->me_dbxs[i].md_name.iov_len) - osal_free(env->me_dbxs[i].md_name.iov_base); - osal_free(env->me_dbxs); - env->me_numdbs = CORE_DBS; - env->me_dbxs = nullptr; - } - if (env->me_pbuf) { - osal_memalign_free(env->me_pbuf); - env->me_pbuf = nullptr; - } - if (env->me_dbi_seqs) { - osal_free(env->me_dbi_seqs); - env->me_dbi_seqs = nullptr; - } - if (env->me_db_flags) { - osal_free(env->me_db_flags); - env->me_db_flags = nullptr; - } - if (env->me_pathname.buffer) { - osal_free(env->me_pathname.buffer); - env->me_pathname.buffer = nullptr; - } - if (env->me_txn0) { - dpl_free(env->me_txn0); - txl_free(env->me_txn0->tw.lifo_reclaimed); - pnl_free(env->me_txn0->tw.retired_pages); - pnl_free(env->me_txn0->tw.spilled.list); - pnl_free(env->me_txn0->tw.relist); - osal_free(env->me_txn0); - env->me_txn0 = nullptr; + if (!resurrect_after_fork) { + if (env->me_dbxs) { + for (size_t i = CORE_DBS; i < env->me_numdbs; ++i) + if (env->me_dbxs[i].md_name.iov_len) + osal_free(env->me_dbxs[i].md_name.iov_base); + osal_free(env->me_dbxs); + env->me_numdbs = CORE_DBS; + env->me_dbxs = nullptr; + } + if (env->me_pbuf) { + osal_memalign_free(env->me_pbuf); + env->me_pbuf = nullptr; + } + if (env->me_dbi_seqs) { + osal_free(env->me_dbi_seqs); + env->me_dbi_seqs = nullptr; + } + if (env->me_db_flags) { + osal_free(env->me_db_flags); + env->me_db_flags = nullptr; + } + if (env->me_pathname.buffer) { + osal_free(env->me_pathname.buffer); + env->me_pathname.buffer = nullptr; + } + if (env->me_txn0) { + dpl_free(env->me_txn0); + txl_free(env->me_txn0->tw.lifo_reclaimed); + pnl_free(env->me_txn0->tw.retired_pages); + pnl_free(env->me_txn0->tw.spilled.list); + pnl_free(env->me_txn0->tw.relist); + osal_free(env->me_txn0); + env->me_txn0 = nullptr; + } } env->me_stuck_meta = -1; return rc; } +#if !(defined(_WIN32) || defined(_WIN64)) +__cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { + if (unlikely(!env)) + return MDBX_EINVAL; + + if (unlikely(env->me_signature.weak != MDBX_ME_SIGNATURE)) + return MDBX_EBADSIGN; + + if (unlikely(env->me_flags & MDBX_FATAL_ERROR)) + return MDBX_PANIC; + + const uint32_t new_pid = osal_getpid(); + if (unlikely(env->me_pid == new_pid)) + return MDBX_SUCCESS; + + if (!atomic_cas32(&env->me_signature, MDBX_ME_SIGNATURE, ~MDBX_ME_SIGNATURE)) + return MDBX_EBADSIGN; + + if (env->me_txn) + txn_abort(env->me_txn0); + env->me_live_reader = 0; + int rc = env_close(env, true); + env->me_signature.weak = MDBX_ME_SIGNATURE; + if (likely(rc == MDBX_SUCCESS)) { + rc = env_open(env, 0); + if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { + rc = MDBX_PANIC; + env->me_flags |= MDBX_FATAL_ERROR; + } + } + return rc; +} +#endif /* Windows */ + __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { MDBX_page *dp; int rc = MDBX_SUCCESS; @@ -15834,7 +15871,7 @@ __cold int mdbx_env_close_ex(MDBX_env *env, bool dont_sync) { } eASSERT(env, env->me_signature.weak == 0); - rc = env_close(env) ? MDBX_PANIC : rc; + rc = env_close(env, false) ? MDBX_PANIC : rc; ENSURE(env, osal_fastmutex_destroy(&env->me_dbi_lock) == MDBX_SUCCESS); #if defined(_WIN32) || defined(_WIN64) /* me_remap_guard don't have destructor (Slim Reader/Writer Lock) */ From d9f49b17dee1ab709c0d825fb32843415a398edd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 8 Nov 2023 19:58:18 +0300 Subject: [PATCH 047/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=BE=D0=B2=20=D0=B4=D0=BB=D1=8F=20`mdbx=5Fenv=5Fresurrect=5Fa?= =?UTF-8?q?fter=5Ffork()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 1 + test/cases.c++ | 4 + test/config.h++ | 4 + test/fork.c++ | 224 ++++++++++++++++++++++++++++++++++++++++++++ test/log.c++ | 9 +- test/log.h++ | 1 + test/main.c++ | 16 ++++ test/osal-unix.c++ | 16 ++-- test/osal.h++ | 4 + test/test.c++ | 6 ++ 10 files changed, 276 insertions(+), 9 deletions(-) create mode 100644 test/fork.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index f6901916..23789be0 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -28,6 +28,7 @@ set(LIBMDBX_TEST_SOURCES append.c++ ttl.c++ nested.c++ + fork.c++ ) if(NOT MDBX_BUILD_CXX) diff --git a/test/cases.c++ b/test/cases.c++ index 97421e7d..5ccb87ae 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -105,6 +105,10 @@ void testcase_setup(const char *casename, const actor_params ¶ms, configure_actor(last_space_id, ac_try, nullptr, params); configure_actor(last_space_id, ac_jitter, nullptr, params); configure_actor(last_space_id, ac_try, nullptr, params); +#if !defined(_WIN32) && !defined(_WIN64) + configure_actor(last_space_id, ac_forkread, nullptr, params); + configure_actor(last_space_id, ac_forkwrite, nullptr, params); +#endif /* Windows */ log_notice("<<< testcase_setup(%s): done", casename); } else { failure("unknown testcase `%s`", casename); diff --git a/test/config.h++ b/test/config.h++ index f57dce7c..80996157 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -25,6 +25,10 @@ enum actor_testcase { ac_hill, ac_deadread, ac_deadwrite, +#if !defined(_WIN32) && !defined(_WIN64) + ac_forkread, + ac_forkwrite, +#endif /* Windows */ ac_jitter, ac_try, ac_copy, diff --git a/test/fork.c++ b/test/fork.c++ new file mode 100644 index 00000000..7f1c9b19 --- /dev/null +++ b/test/fork.c++ @@ -0,0 +1,224 @@ +/* + * Copyright 2023 Leonid Yuriev + * and other libmdbx authors: please see AUTHORS file. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted only as authorized by the OpenLDAP + * Public License. + * + * A copy of this license is available in the file LICENSE in the + * top-level directory of the distribution or, alternatively, at + * . + */ + +#include "test.h++" + +#if !defined(_WIN32) && !defined(_WIN64) + +#include +#include + +class testcase_smoke4fork : public testcase { + using inherited = testcase; + +public: + testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) + : testcase(config, pid) {} + bool run() override; + virtual bool smoke() = 0; +}; + +bool testcase_smoke4fork::run() { + static std::vector history; + const pid_t current_pid = getpid(); + if (history.empty() || current_pid != history.front()) { + history.push_back(current_pid); + if (history.size() > /* TODO: add test option */ 2) { + log_notice("force exit to avoid fork-bomb: deep %zu, pid stack", + history.size()); + for (const auto pid : history) + logging::feed(" %d", pid); + logging::ln(); + log_flush(); + exit(0); + } + } + const int deep = (int)history.size(); + + int err = db_open__begin__table_create_open_clean(dbi); + if (unlikely(err != MDBX_SUCCESS)) { + log_notice("fork[deep %d, pid %d]: bailout-prepare due '%s'", deep, + current_pid, mdbx_strerror(err)); + return false; + } + + if (flipcoin()) { + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "done"); + } else { + log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, + "skipped"); +#ifdef __SANITIZE_ADDRESS__ + const bool abort_txn_to_avoid_memleak = true; +#else + const bool abort_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); +#endif + if (abort_txn_to_avoid_memleak && txn_guard) + txn_end(false); + } + + log_flush(); + const pid_t child = fork(); + if (child < 0) + failure_perror("fork()", errno); + + if (child == 0) { + const pid_t new_pid = getpid(); + log_verbose(">>> %s, deep %d, parent-pid %d, child-pid %d", + "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid); + log_flush(); + int err = mdbx_env_resurrect_after_fork(db_guard.get()); + log_verbose("<<< %s, deep %d, parent-pid %d, child-pid %d, err %d", + "mdbx_env_resurrect_after_fork()", deep, current_pid, new_pid, + err); + log_flush(); + if (err != MDBX_SUCCESS) + failure_perror("mdbx_env_resurrect_after_fork()", err); + if (txn_guard) + mdbx_txn_abort(txn_guard.release()); + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, + "done"); + log_flush(); + return true; + } + + if (txn_guard) + txn_end(false); + + int status = 0xdeadbeef; + if (waitpid(child, &status, 0) != child) + failure_perror("waitpid()", errno); + + if (WIFEXITED(status)) { + const int code = WEXITSTATUS(status); + if (code != EXIT_SUCCESS) { + log_notice("%s[deep %d, pid %d] child-pid %d failed, err %d", + "fork-child", deep, current_pid, child, code); + return false; + } + log_notice("%s[deep %d, pid %d] child-pid %d done", "fork-child", deep, + current_pid, child); + } else if (WIFSIGNALED(status)) { + const int sig = WTERMSIG(status); + switch (sig) { + case SIGABRT: + case SIGBUS: + case SIGFPE: + case SIGILL: + case SIGSEGV: + log_notice("%s[deep %d, pid %d] child-pid %d %s by SIG%s", "fork-child", + deep, current_pid, child, "terminated", signal_name(sig)); + break; + default: + log_notice("%s[deep %d, pid %d] child-id %d %s by SIG%s", "fork-child", + deep, current_pid, child, "killed", signal_name(sig)); + } + return false; + } else { + assert(false); + } + + if (!smoke()) { + log_notice("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, + "failed"); + return false; + } + log_verbose("%s[deep %d, pid %d] probe %s", "post-fork", deep, current_pid, + "done"); + return true; +} + +//----------------------------------------------------------------------------- + +class testcase_forkread : public testcase_smoke4fork { + using inherited = testcase_smoke4fork; + +public: + testcase_forkread(const actor_config &config, const mdbx_pid_t pid) + : testcase_smoke4fork(config, pid) {} + bool smoke() override; +}; +REGISTER_TESTCASE(forkread); + +bool testcase_forkread::smoke() { + MDBX_envinfo env_info; + int err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, + sizeof(env_info)); + if (err) + failure_perror("mdbx_env_info_ex()", err); + + if (!txn_guard) + txn_begin(true); + + MDBX_txn_info txn_info; + err = mdbx_txn_info(txn_guard.get(), &txn_info, sizeof(txn_info)); + if (err) + failure_perror("mdbx_txn_info()", err); + fetch_canary(); + err = mdbx_env_info_ex(db_guard.get(), txn_guard.get(), &env_info, + sizeof(env_info)); + if (err) + failure_perror("mdbx_env_info_ex()", err); + + uint64_t seq; + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_sequence(get)", err); + txn_end(false); + return true; +} + +//----------------------------------------------------------------------------- + +class testcase_forkwrite : public testcase_forkread { + using inherited = testcase_forkread; + +public: + testcase_forkwrite(const actor_config &config, const mdbx_pid_t pid) + : testcase_forkread(config, pid) {} + bool smoke() override; +}; +REGISTER_TESTCASE(forkwrite); + +bool testcase_forkwrite::smoke() { + const bool firstly_read = flipcoin(); + if (firstly_read) { + if (!testcase_forkread::smoke()) + return false; + } + + if (!txn_guard) + txn_begin(false); + uint64_t seq; + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_sequence(inc)", err); + txn_end(false); + + if (!firstly_read && !testcase_forkread::smoke()) + return false; + return true; +} + +#endif /* Windows */ diff --git a/test/log.c++ b/test/log.c++ index 04dad84d..5fe485c8 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -108,8 +108,7 @@ bool output(const loglevel priority, const char *format, ...) { return true; } -void output_nocheckloglevel_ap(const logging::loglevel priority, - const char *format, va_list ap) { +bool ln() { if (last) { putc('\n', last); fflush(last); @@ -118,8 +117,14 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, fflush(stdout); } last = nullptr; + return true; } + return false; +} +void output_nocheckloglevel_ap(const logging::loglevel priority, + const char *format, va_list ap) { + ln(); chrono::time now = chrono::now_realtime(); struct tm tm; #ifdef _MSC_VER diff --git a/test/log.h++ b/test/log.h++ index aa111ac9..96d68848 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -55,6 +55,7 @@ bool MDBX_PRINTF_ARGS(2, 3) output(const loglevel priority, const char *format, ...); bool feed_ap(const char *format, va_list ap); bool MDBX_PRINTF_ARGS(1, 2) feed(const char *format, ...); +bool ln(); void inline MDBX_PRINTF_ARGS(2, 3) output_nocheckloglevel(const loglevel priority, const char *format, ...) { diff --git a/test/main.c++ b/test/main.c++ index 2b8ff655..ba086e90 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -60,6 +60,10 @@ MDBX_NORETURN void usage(void) { " --append Append-mode insertions\n" " --dead.reader Dead-reader simulator\n" " --dead.writer Dead-writer simulator\n" +#if !defined(_WIN32) && !defined(_WIN64) + " --fork.reader After-fork reader\n" + " --fork.writer After-fork writer\n" +#endif /* Windows */ "Actor options:\n" " --batch.read=N Read-operations batch size\n" " --batch.write=N Write-operations batch size\n" @@ -591,6 +595,18 @@ int main(int argc, char *const argv[]) { configure_actor(last_space_id, ac_nested, value, params); continue; } +#if !defined(_WIN32) && !defined(_WIN64) + if (config::parse_option(argc, argv, narg, "fork.reader", nullptr)) { + fixup4qemu(params); + configure_actor(last_space_id, ac_forkread, value, params); + continue; + } + if (config::parse_option(argc, argv, narg, "fork.writer", nullptr)) { + fixup4qemu(params); + configure_actor(last_space_id, ac_forkwrite, value, params); + continue; + } +#endif /* Windows */ if (*argv[narg] != '-') { fixup4qemu(params); diff --git a/test/osal-unix.c++ b/test/osal-unix.c++ index 094d6769..0554000a 100644 --- a/test/osal-unix.c++ +++ b/test/osal-unix.c++ @@ -356,6 +356,7 @@ mdbx_pid_t osal_getpid(void) { return getpid(); } int osal_delay(unsigned seconds) { return sleep(seconds) ? errno : 0; } int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { + static sigset_t mask; if (children.empty()) { struct sigaction act; memset(&act, 0, sizeof(act)); @@ -366,7 +367,6 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { sigaction(SIGUSR1, &act, nullptr); sigaction(SIGUSR2, &act, nullptr); - sigset_t mask; sigemptyset(&mask); sigaddset(&mask, SIGCHLD); sigaddset(&mask, SIGUSR1); @@ -377,6 +377,7 @@ int osal_actor_start(const actor_config &config, mdbx_pid_t &pid) { pid = fork(); if (pid == 0) { + sigprocmask(SIG_BLOCK, &mask, nullptr); overlord_pid = getppid(); const bool result = test_execute(config); exit(result ? EXIT_SUCCESS : EXIT_FAILURE); @@ -400,7 +401,7 @@ void osal_killall_actors(void) { } } -static const char *signal_name(const int sig) { +const char *signal_name(const int sig) { if (sig == SIGHUP) return "HUP"; if (sig == SIGINT) @@ -532,24 +533,25 @@ int osal_actor_poll(mdbx_pid_t &pid, unsigned timeout) { children[pid] = (WEXITSTATUS(status) == EXIT_SUCCESS) ? as_successful : as_failed; else if (WIFSIGNALED(status)) { + int sig = WTERMSIG(status); #ifdef WCOREDUMP if (WCOREDUMP(status)) children[pid] = as_coredump; else #endif /* WCOREDUMP */ - switch (WTERMSIG(status)) { + switch (sig) { case SIGABRT: case SIGBUS: case SIGFPE: case SIGILL: case SIGSEGV: - log_notice("child pid %lu terminated by SIG%s", (long)pid, - signal_name(WTERMSIG(status))); + log_notice("child pid %lu %s by SIG%s", (long)pid, "terminated", + signal_name(sig)); children[pid] = as_coredump; break; default: - log_notice("child pid %lu killed by SIG%s", (long)pid, - signal_name(WTERMSIG(status))); + log_notice("child pid %lu %s by SIG%s", (long)pid, "killed", + signal_name(sig)); children[pid] = as_killed; } } else if (WIFSTOPPED(status)) diff --git a/test/osal.h++ b/test/osal.h++ index ef3b5562..5c92b2e9 100644 --- a/test/osal.h++ +++ b/test/osal.h++ @@ -46,3 +46,7 @@ std::string osal_tempdir(void); #define STDERR_FILENO _fileno(stderr) #endif #endif /* _MSC_VER */ + +#if !defined(_WIN32) && !defined(_WIN64) +const char *signal_name(const int sig); +#endif /* Windows */ diff --git a/test/test.c++ b/test/test.c++ index e590d3ce..79ca8a43 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -39,6 +39,12 @@ const char *testcase2str(const actor_testcase testcase) { return "ttl"; case ac_nested: return "nested"; +#if !defined(_WIN32) && !defined(_WIN64) + case ac_forkread: + return "forkread"; + case ac_forkwrite: + return "forkwrite"; +#endif /* Windows */ } } From cfce4ef4d3bd3f821474e72f530de71adf926e95 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 10 Nov 2023 21:14:32 +0300 Subject: [PATCH 048/137] =?UTF-8?q?mdbx-test:=20=D1=8F=D0=B2=D0=BD=D0=B0?= =?UTF-8?q?=D1=8F=20=D1=83=D1=81=D1=82=D0=B0=D0=BD=D0=BE=D0=B2=D0=BA=D0=B0?= =?UTF-8?q?=20append-=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20stdout/stderr.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/log.c++ | 100 ++++++++++++++++++++++---------------------------- test/log.h++ | 2 +- test/main.c++ | 13 ++++++- 3 files changed, 57 insertions(+), 58 deletions(-) diff --git a/test/log.c++ b/test/log.c++ index 5fe485c8..dd55fb70 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -56,7 +56,7 @@ namespace logging { static std::string prefix; static std::string suffix; static loglevel level; -static FILE *last; +static FILE *flow; void setlevel(loglevel priority) { level = priority; @@ -67,13 +67,13 @@ void setlevel(loglevel priority) { log_trace("set mdbx debug-opts: 0x%02x", rc); } +void setup(const std::string &_prefix) { prefix = _prefix; } + void setup(loglevel priority, const std::string &_prefix) { setlevel(priority); - prefix = _prefix; + setup(_prefix); } -void setup(const std::string &_prefix) { prefix = _prefix; } - const char *level2str(const loglevel alevel) { switch (alevel) { default: @@ -108,18 +108,13 @@ bool output(const loglevel priority, const char *format, ...) { return true; } -bool ln() { - if (last) { - putc('\n', last); - fflush(last); - if (last == stderr) { +void ln() { + if (flow) { + putc('\n', flow); + if (flow != stdout) putc('\n', stdout); - fflush(stdout); - } - last = nullptr; - return true; + flow = nullptr; } - return false; } void output_nocheckloglevel_ap(const logging::loglevel priority, @@ -139,8 +134,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (rc != MDBX_SUCCESS) failure_perror("localtime_r()", rc); - last = stdout; - fprintf(last, + fprintf(stdout, "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(), @@ -150,19 +144,17 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */; if (same_or_higher(priority, error)) va_copy(ones, ap); - vfprintf(last, format, ap); + vfprintf(stdout, format, ap); size_t len = strlen(format); char end = len ? format[len - 1] : '\0'; switch (end) { default: - putc('\n', last); - MDBX_CXX17_FALLTHROUGH; // fall through + putc('\n', stdout); + break; case '\n': - fflush(last); - last = nullptr; - MDBX_CXX17_FALLTHROUGH; // fall through + break; case ' ': case '_': case ':': @@ -172,46 +164,39 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, case '\b': case '\r': case '\0': + flow = stdout; break; } if (same_or_higher(priority, error)) { - if (last != stderr) { - fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); - vfprintf(stderr, format, ones); - if (end == '\n') - fflush(stderr); - else - last = stderr; - } + if (flow) + flow = stderr; + fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), + prefix.c_str(), level2str(priority), suffix.c_str()); + vfprintf(stderr, format, ones); va_end(ones); } } bool feed_ap(const char *format, va_list ap) { - if (!last) + if (!flow) return false; - if (last == stderr) { + if (flow == stderr) { va_list ones; va_copy(ones, ap); vfprintf(stdout, format, ones); va_end(ones); } - vfprintf(last, format, ap); + vfprintf(flow, format, ap); size_t len = strlen(format); - if (len && format[len - 1] == '\n') { - fflush(last); - if (last == stderr) - fflush(stdout); - last = nullptr; - } + if (len && format[len - 1] == '\n') + flow = nullptr; return true; } bool feed(const char *format, ...) { - if (!last) + if (!flow) return false; va_list ap; @@ -299,73 +284,73 @@ void progress_canary(bool active) { } // namespace logging void log_extra(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::extra, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::extra, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_trace(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::trace, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::trace, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_debug(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::debug, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::debug, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_verbose(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::verbose, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::verbose, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_notice(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::notice, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::notice, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_warning(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::warning, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::warning, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_error(const char *msg, ...) { + logging::ln(); if (logging::same_or_higher(logging::error, logging::level)) { va_list ap; va_start(ap, msg); logging::output_nocheckloglevel_ap(logging::error, msg, ap); va_end(ap); - } else - logging::last = nullptr; + } } void log_trouble(const char *where, const char *what, int errnum) { @@ -376,4 +361,7 @@ bool log_enabled(const logging::loglevel priority) { return logging::same_or_higher(priority, logging::level); } -void log_flush(void) { fflushall(); } +void log_flush(void) { + logging::ln(); + fflushall(); +} diff --git a/test/log.h++ b/test/log.h++ index 96d68848..cf955551 100644 --- a/test/log.h++ +++ b/test/log.h++ @@ -55,7 +55,7 @@ bool MDBX_PRINTF_ARGS(2, 3) output(const loglevel priority, const char *format, ...); bool feed_ap(const char *format, va_list ap); bool MDBX_PRINTF_ARGS(1, 2) feed(const char *format, ...); -bool ln(); +void ln(); void inline MDBX_PRINTF_ARGS(2, 3) output_nocheckloglevel(const loglevel priority, const char *format, ...) { diff --git a/test/main.c++ b/test/main.c++ index ba086e90..6242a05d 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -267,8 +267,19 @@ static void fixup4qemu(actor_params ¶ms) { (void)params; } -int main(int argc, char *const argv[]) { +static void set_linebuf_append(FILE *out) { + setvbuf(out, NULL, _IOLBF, 65536); +#if !defined(_WIN32) && !defined(_WIN64) + int fd = fileno(out); + int flags = fcntl(fd, F_GETFD); + if (flags != -1) + (void)fcntl(fd, F_SETFD, O_APPEND | flags); +#endif /* !Windows */ +} +int main(int argc, char *const argv[]) { + set_linebuf_append(stdout); + set_linebuf_append(stderr); #ifdef _DEBUG log_trace("#argc = %d", argc); for (int i = 0; i < argc; ++i) From 100e95957c9b71d81ef9b8e9645dbe61b77d9b4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 11 Nov 2023 12:27:42 +0300 Subject: [PATCH 049/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=BB=D0=BE=D0=B3=D0=B8=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5/=D0=B8?= =?UTF-8?q?=D0=B7=20=D0=B3=D0=BB=D0=BE=D0=B1=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B?= =?UTF-8?q?=D1=85=20=D0=B4=D0=B5=D1=81=D1=82=D1=80=D1=83=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/extra/pcrf/pcrf_test.c | 2 +- test/log.c++ | 48 +++++++++++++++++++++++++------------ 2 files changed, 34 insertions(+), 16 deletions(-) diff --git a/test/extra/pcrf/pcrf_test.c b/test/extra/pcrf/pcrf_test.c index 1d1f1e7e..c33ee797 100644 --- a/test/extra/pcrf/pcrf_test.c +++ b/test/extra/pcrf/pcrf_test.c @@ -34,7 +34,7 @@ #define IP_PRINTF_ARG_HOST(addr) \ (int)((addr) >> 24), (int)((addr) >> 16 & 0xff), (int)((addr) >> 8 & 0xff), \ - (int)((addr)&0xff) + (int)((addr) & 0xff) char opt_db_path[PATH_MAX] = "./mdbx_bench2"; static MDBX_env *env; diff --git a/test/log.c++ b/test/log.c++ index dd55fb70..037e7509 100644 --- a/test/log.c++ +++ b/test/log.c++ @@ -53,8 +53,16 @@ static void mdbx_logger(MDBX_log_level_t priority, const char *function, namespace logging { -static std::string prefix; -static std::string suffix; +/* логирование может быть вызвано после деструкторов */ +static char prefix_buf[64]; +static size_t prefix_len; +static std::string suffix_buf; +static const char *suffix_ptr = "~~~"; +struct suffix_cleaner { + suffix_cleaner() { suffix_ptr = ""; } + ~suffix_cleaner() { suffix_ptr = "~~~"; } +} static anchor; + static loglevel level; static FILE *flow; @@ -67,11 +75,14 @@ void setlevel(loglevel priority) { log_trace("set mdbx debug-opts: 0x%02x", rc); } -void setup(const std::string &_prefix) { prefix = _prefix; } +void setup(const std::string &prefix) { + prefix_len = std::min(prefix.size(), sizeof(prefix_buf) - 1); + memcpy(prefix_buf, prefix.data(), prefix_len); +} -void setup(loglevel priority, const std::string &_prefix) { +void setup(loglevel priority, const std::string &prefix) { setlevel(priority); - setup(_prefix); + setup(prefix); } const char *level2str(const loglevel alevel) { @@ -138,7 +149,7 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, "[ %02d%02d%02d-%02d:%02d:%02d.%06d_%05lu %-10s %.4s ] %s" /* TODO */, tm.tm_year - 100, tm.tm_mon + 1, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, chrono::fractional2us(now.fractional), (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); + prefix_buf, level2str(priority), suffix_ptr); va_list ones; memset(&ones, 0, sizeof(ones)) /* zap MSVC and other goofy compilers */; @@ -171,8 +182,8 @@ void output_nocheckloglevel_ap(const logging::loglevel priority, if (same_or_higher(priority, error)) { if (flow) flow = stderr; - fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), - prefix.c_str(), level2str(priority), suffix.c_str()); + fprintf(stderr, "[ %05lu %-10s %.4s ] %s", (long)osal_getpid(), prefix_buf, + level2str(priority), suffix_ptr); vfprintf(stderr, format, ones); va_end(ones); } @@ -207,29 +218,36 @@ bool feed(const char *format, ...) { } local_suffix::local_suffix(const char *c_str) - : trim_pos(suffix.size()), indent(0) { - suffix.append(c_str); + : trim_pos(suffix_buf.size()), indent(0) { + suffix_buf.append(c_str); + suffix_ptr = suffix_buf.c_str(); } local_suffix::local_suffix(const std::string &str) - : trim_pos(suffix.size()), indent(0) { - suffix.append(str); + : trim_pos(suffix_buf.size()), indent(0) { + suffix_buf.append(str); + suffix_ptr = suffix_buf.c_str(); } void local_suffix::push() { indent += 1; - suffix.push_back('\t'); + suffix_buf.push_back('\t'); + suffix_ptr = suffix_buf.c_str(); } void local_suffix::pop() { assert(indent > 0); if (indent > 0) { indent -= 1; - suffix.pop_back(); + suffix_buf.pop_back(); + suffix_ptr = suffix_buf.c_str(); } } -local_suffix::~local_suffix() { suffix.erase(trim_pos); } +local_suffix::~local_suffix() { + suffix_buf.erase(trim_pos); + suffix_ptr = suffix_buf.c_str(); +} void progress_canary(bool active) { static chrono::time progress_timestamp; From b7605e8033eb487bb70b3eba29fd989bbb7a371f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 16:33:13 +0300 Subject: [PATCH 050/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=BE=D0=B1=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B8=20`MDBX=5FGET=5FMULTIPLE`=20?= =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20`key`=20?= =?UTF-8?q?=D0=BD=D0=B0=20`NULL`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/src/core.c b/src/core.c index 81d62e69..94a16328 100644 --- a/src/core.c +++ b/src/core.c @@ -17268,24 +17268,30 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_EINVAL; if (unlikely((mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) return MDBX_INCOMPATIBLE; - rc = (mc->mc_flags & C_INITIALIZED) - ? MDBX_SUCCESS - : cursor_set(mc, key, data, MDBX_SET).err; - if ((mc->mc_xcursor->mx_cursor.mc_flags & (C_INITIALIZED | C_EOF)) != - C_INITIALIZED) + if ((mc->mc_flags & C_INITIALIZED) == 0) { + if (unlikely(!key)) + return MDBX_EINVAL; + rc = cursor_set(mc, key, data, MDBX_SET).err; + if (unlikely(rc != MDBX_SUCCESS)) + break; + } + rc = MDBX_SUCCESS; + if (unlikely(C_INITIALIZED != (mc->mc_xcursor->mx_cursor.mc_flags & + (C_INITIALIZED | C_EOF)))) { + rc = MDBX_NOTFOUND; break; - goto fetchm; + } + goto fetch_multiple; case MDBX_NEXT_MULTIPLE: - if (unlikely(data == NULL)) + if (unlikely(!data)) return MDBX_EINVAL; if (unlikely(!(mc->mc_db->md_flags & MDBX_DUPFIXED))) return MDBX_INCOMPATIBLE; rc = cursor_next(mc, key, data, MDBX_NEXT_DUP); if (rc == MDBX_SUCCESS) { if (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { - MDBX_cursor *mx; - fetchm: - mx = &mc->mc_xcursor->mx_cursor; + fetch_multiple:; + MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; data->iov_len = page_numkeys(mx->mc_pg[mx->mc_top]) * mx->mc_db->md_xsize; data->iov_base = page_data(mx->mc_pg[mx->mc_top]); @@ -17296,21 +17302,20 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, } break; case MDBX_PREV_MULTIPLE: - if (data == NULL) + if (unlikely(!data)) return MDBX_EINVAL; if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) return MDBX_INCOMPATIBLE; rc = MDBX_SUCCESS; - if (!(mc->mc_flags & C_INITIALIZED)) + if ((mc->mc_flags & C_INITIALIZED) == 0) rc = cursor_last(mc, key, data); if (rc == MDBX_SUCCESS) { MDBX_cursor *mx = &mc->mc_xcursor->mx_cursor; + rc = MDBX_NOTFOUND; if (mx->mc_flags & C_INITIALIZED) { rc = cursor_sibling(mx, SIBLING_LEFT); if (rc == MDBX_SUCCESS) - goto fetchm; - } else { - rc = MDBX_NOTFOUND; + goto fetch_multiple; } } break; From 44beae00ec30f7e3476286d0d2eb1f41b54d9524 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 16:30:14 +0300 Subject: [PATCH 051/137] =?UTF-8?q?mdbx:=20`const`=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=BD=D0=B0=D1=87=D0=B0=D0=BB=D0=B0=20=D0=B8=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=BD=D1=86=D0=B0=20=D0=B4=D0=B8=D0=B0=D0=BF=D0=B0=D0=B7=D0=BE?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=B2=20=D0=B0=D1=80=D0=B3=D1=83=D0=BC=D0=B5?= =?UTF-8?q?=D0=BD=D1=82=D0=B0=D1=85=20`mdbx=5Festimate=5Frange()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 6 ++++-- src/core.c | 34 ++++++++++++++++++++-------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/mdbx.h b/mdbx.h index ce2dad5e..eb8e4ff6 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5244,8 +5244,10 @@ LIBMDBX_API int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, * * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, - MDBX_val *begin_key, MDBX_val *begin_data, - MDBX_val *end_key, MDBX_val *end_data, + const MDBX_val *begin_key, + const MDBX_val *begin_data, + const MDBX_val *end_key, + const MDBX_val *end_data, ptrdiff_t *distance_items); /** \brief The EPSILON value for mdbx_estimate_range() diff --git a/src/core.c b/src/core.c index 94a16328..119d39d9 100644 --- a/src/core.c +++ b/src/core.c @@ -24725,9 +24725,10 @@ int mdbx_estimate_move(const MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, return mdbx_estimate_distance(cursor, &next.outer, distance_items); } -int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, - MDBX_val *begin_data, MDBX_val *end_key, - MDBX_val *end_data, ptrdiff_t *size_items) { +int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, + const MDBX_val *begin_key, const MDBX_val *begin_data, + const MDBX_val *end_key, const MDBX_val *end_data, + ptrdiff_t *size_items) { int rc = check_txn(txn, MDBX_TXN_BLOCKED); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -24755,13 +24756,13 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, return MDBX_SUCCESS; } + MDBX_val stub; if (!begin_key) { if (unlikely(!end_key)) { /* LY: FIRST..LAST case */ *size_items = (ptrdiff_t)begin.outer.mc_db->md_entries; return MDBX_SUCCESS; } - MDBX_val stub = {0, 0}; rc = cursor_first(&begin.outer, &stub, &stub); if (unlikely(end_key == MDBX_EPSILON)) { /* LY: FIRST..+epsilon case */ @@ -24773,7 +24774,6 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, if (unlikely(begin_key == MDBX_EPSILON)) { if (end_key == NULL) { /* LY: -epsilon..LAST case */ - MDBX_val stub = {0, 0}; rc = cursor_last(&begin.outer, &stub, &stub); return (rc == MDBX_SUCCESS) ? mdbx_cursor_count(&begin.outer, (size_t *)size_items) @@ -24791,7 +24791,7 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, (begin_key == end_key || begin.outer.mc_dbx->md_cmp(begin_key, end_key) == 0)) { /* LY: single key case */ - rc = cursor_set(&begin.outer, begin_key, NULL, MDBX_SET).err; + rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, NULL, MDBX_SET).err; if (unlikely(rc != MDBX_SUCCESS)) { *size_items = 0; return (rc == MDBX_NOTFOUND) ? MDBX_SUCCESS : rc; @@ -24812,10 +24812,14 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, } } return MDBX_SUCCESS; - } else { - rc = cursor_set(&begin.outer, begin_key, begin_data, - begin_data ? MDBX_GET_BOTH_RANGE : MDBX_SET_RANGE) + } else if (begin_data) { + stub = *begin_data; + rc = cursor_set(&begin.outer, (MDBX_val *)begin_key, &stub, + MDBX_GET_BOTH_RANGE) .err; + } else { + stub = *begin_key; + rc = cursor_set(&begin.outer, &stub, nullptr, MDBX_SET_RANGE).err; } } @@ -24828,13 +24832,15 @@ int mdbx_estimate_range(const MDBX_txn *txn, MDBX_dbi dbi, MDBX_val *begin_key, rc = cursor_init(&end.outer, txn, dbi); if (unlikely(rc != MDBX_SUCCESS)) return rc; - if (!end_key) { - MDBX_val stub = {0, 0}; + if (!end_key) rc = cursor_last(&end.outer, &stub, &stub); - } else { - rc = cursor_set(&end.outer, end_key, end_data, - end_data ? MDBX_GET_BOTH_RANGE : MDBX_SET_RANGE) + else if (end_data) { + stub = *end_data; + rc = cursor_set(&end.outer, (MDBX_val *)end_key, &stub, MDBX_GET_BOTH_RANGE) .err; + } else { + stub = *end_key; + rc = cursor_set(&end.outer, &stub, nullptr, MDBX_SET_RANGE).err; } if (unlikely(rc != MDBX_SUCCESS)) { if (rc != MDBX_NOTFOUND || !(end.outer.mc_flags & C_INITIALIZED)) From 6cef39c32fbe9d9f0bd60580f5960e853f15be24 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 12 Nov 2023 18:34:23 +0300 Subject: [PATCH 052/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::estimation=5Fre?= =?UTF-8?q?sult`=20=D0=B8=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=BA=D0=B0=20`cursor::estimate()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 92 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 6c33a0b3..1a7bc510 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4048,10 +4048,12 @@ public: put_multiple(map, key, vector.data(), vector.size(), mode); } - inline ptrdiff_t estimate(map_handle map, pair from, pair to) const; - inline ptrdiff_t estimate(map_handle map, slice from, slice to) const; - inline ptrdiff_t estimate_from_first(map_handle map, slice to) const; - inline ptrdiff_t estimate_to_last(map_handle map, slice from) const; + inline ptrdiff_t estimate(map_handle map, const pair &from, + const pair &to) const; + inline ptrdiff_t estimate(map_handle map, const slice &from, + const slice &to) const; + inline ptrdiff_t estimate_from_first(map_handle map, const slice &to) const; + inline ptrdiff_t estimate_to_last(map_handle map, const slice &from) const; }; /// \brief Managed database transaction. @@ -4164,10 +4166,11 @@ public: struct move_result : public pair_result { inline move_result(const cursor &cursor, bool throw_notfound); - inline move_result(cursor &cursor, move_operation operation, - bool throw_notfound); - inline move_result(cursor &cursor, move_operation operation, - const slice &key, bool throw_notfound); + move_result(cursor &cursor, move_operation operation, bool throw_notfound) + : move_result(cursor, operation, slice(), slice(), throw_notfound) {} + move_result(cursor &cursor, move_operation operation, const slice &key, + bool throw_notfound) + : move_result(cursor, operation, key, slice(), throw_notfound) {} inline move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound); @@ -4175,6 +4178,19 @@ public: move_result &operator=(const move_result &) noexcept = default; }; + struct estimate_result : public pair { + ptrdiff_t approximate_quantity; + estimate_result(const cursor &cursor, move_operation operation) + : estimate_result(cursor, operation, slice(), slice()) {} + estimate_result(const cursor &cursor, move_operation operation, + const slice &key) + : estimate_result(cursor, operation, key, slice()) {} + inline estimate_result(const cursor &cursor, move_operation operation, + const slice &key, const slice &value); + estimate_result(const estimate_result &) noexcept = default; + estimate_result &operator=(const estimate_result &) noexcept = default; + }; + protected: inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, bool throw_notfound) const @@ -4219,9 +4235,10 @@ public: inline bool eof() const; inline bool on_first() const; inline bool on_last() const; - inline ptrdiff_t estimate(slice key, slice value) const; - inline ptrdiff_t estimate(slice key) const; - inline ptrdiff_t estimate(move_operation operation) const; + inline estimate_result estimate(const slice &key, const slice &value) const; + inline estimate_result estimate(const slice &key) const; + inline estimate_result estimate(move_operation operation) const; + inline estimate_result estimate(move_operation operation, slice &key) const; //---------------------------------------------------------------------------- @@ -5875,28 +5892,32 @@ inline size_t txn::put_multiple(map_handle map, const slice &key, return args[1].iov_len /* done item count */; } -inline ptrdiff_t txn::estimate(map_handle map, pair from, pair to) const { +inline ptrdiff_t txn::estimate(map_handle map, const pair &from, + const pair &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range( handle_, map.dbi, &from.key, &from.value, &to.key, &to.value, &result)); return result; } -inline ptrdiff_t txn::estimate(map_handle map, slice from, slice to) const { +inline ptrdiff_t txn::estimate(map_handle map, const slice &from, + const slice &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_from_first(map_handle map, slice to) const { +inline ptrdiff_t txn::estimate_from_first(map_handle map, + const slice &to) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, nullptr, nullptr, &to, nullptr, &result)); return result; } -inline ptrdiff_t txn::estimate_to_last(map_handle map, slice from) const { +inline ptrdiff_t txn::estimate_to_last(map_handle map, + const slice &from) const { ptrdiff_t result; error::success_or_throw(mdbx_estimate_range(handle_, map.dbi, &from, nullptr, nullptr, nullptr, &result)); @@ -5945,22 +5966,8 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) - : pair_result(key, value, false) { - done = cursor.move(get_current, &key, &value, throw_notfound); -} - -inline cursor::move_result::move_result(cursor &cursor, - move_operation operation, - bool throw_notfound) - : pair_result(key, value, false) { - done = cursor.move(operation, &key, &value, throw_notfound); -} - -inline cursor::move_result::move_result(cursor &cursor, - move_operation operation, - const slice &key, bool throw_notfound) - : pair_result(key, slice(), false) { - this->done = cursor.move(operation, &this->key, &this->value, throw_notfound); + : pair_result(slice(), slice(), false) { + done = cursor.move(get_current, &this->key, &this->value, throw_notfound); } inline cursor::move_result::move_result(cursor &cursor, @@ -5987,6 +5994,14 @@ inline bool cursor::move(move_operation operation, MDBX_val *key, } } +inline cursor::estimate_result::estimate_result(const cursor &cursor, + move_operation operation, + const slice &key, + const slice &value) + : pair(key, value), approximate_quantity(PTRDIFF_MIN) { + approximate_quantity = cursor.estimate(operation, &this->key, &this->value); +} + inline ptrdiff_t cursor::estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const { ptrdiff_t result; @@ -6109,17 +6124,18 @@ inline bool cursor::on_last() const { return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); } -inline ptrdiff_t cursor::estimate(slice key, slice value) const { - return estimate(multi_exactkey_lowerboundvalue, &key, &value); +inline cursor::estimate_result cursor::estimate(const slice &key, + const slice &value) const { + return estimate_result(*this, multi_exactkey_lowerboundvalue, key, value); } -inline ptrdiff_t cursor::estimate(slice key) const { - return estimate(key_lowerbound, &key, nullptr); +inline cursor::estimate_result cursor::estimate(const slice &key) const { + return estimate_result(*this, key_lowerbound, key); } -inline ptrdiff_t cursor::estimate(move_operation operation) const { - slice unused_key; - return estimate(operation, &unused_key, nullptr); +inline cursor::estimate_result +cursor::estimate(move_operation operation) const { + return estimate_result(*this, operation); } inline void cursor::renew(const ::mdbx::txn &txn) { From 7f21515940ff513cc2d0df7012858df0cdb5abcf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 13 Nov 2023 20:52:35 +0300 Subject: [PATCH 053/137] =?UTF-8?q?mdbx:=20=D0=BC=D0=B8=D0=BA=D1=80=D0=BE-?= =?UTF-8?q?=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D1=8F=20`cursor=5Fset()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Чуть меньше сравнений и переходов. --- src/core.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/src/core.c b/src/core.c index 119d39d9..fec11370 100644 --- a/src/core.c +++ b/src/core.c @@ -16818,7 +16818,7 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } MDBX_val aligned_key = *key; - uint64_t aligned_keybytes; + uint64_t aligned_key_buf; if (mc->mc_db->md_flags & MDBX_INTEGERKEY) { switch (aligned_key.iov_len) { default: @@ -16829,13 +16829,13 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { if (unlikely(3 & (uintptr_t)aligned_key.iov_base)) /* copy instead of return error to avoid break compatibility */ aligned_key.iov_base = - memcpy(&aligned_keybytes, aligned_key.iov_base, 4); + memcpy(&aligned_key_buf, aligned_key.iov_base, 4); break; case 8: if (unlikely(7 & (uintptr_t)aligned_key.iov_base)) /* copy instead of return error to avoid break compatibility */ aligned_key.iov_base = - memcpy(&aligned_keybytes, aligned_key.iov_base, 8); + memcpy(&aligned_key_buf, aligned_key.iov_base, 8); break; } } @@ -16874,7 +16874,7 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } if (cmp > 0) { const size_t nkeys = page_numkeys(mp); - if (nkeys > 1) { + if (likely(nkeys > 1)) { if (IS_LEAF2(mp)) { nodekey.iov_base = page_leaf2key(mp, nkeys - 1, nodekey.iov_len); } else { @@ -16918,23 +16918,22 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { } /* If any parents have right-sibs, search. * Otherwise, there's nothing further. */ - size_t i; - for (i = 0; i < mc->mc_top; i++) + for (size_t i = 0; i < mc->mc_top; i++) if (mc->mc_ki[i] < page_numkeys(mc->mc_pg[i]) - 1) - break; - if (i == mc->mc_top) { - /* There are no other pages */ - cASSERT(mc, nkeys <= UINT16_MAX); - mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; - mc->mc_flags |= C_EOF; - ret.err = MDBX_NOTFOUND; - return ret; - } + goto continue_other_pages; + + /* There are no other pages */ + cASSERT(mc, nkeys <= UINT16_MAX); + mc->mc_ki[mc->mc_top] = (uint16_t)nkeys; + mc->mc_flags |= C_EOF; + ret.err = MDBX_NOTFOUND; + return ret; } + continue_other_pages: if (!mc->mc_top) { /* There are no other pages */ mc->mc_ki[mc->mc_top] = 0; - if (op == MDBX_SET_RANGE) + if (op >= MDBX_SET_RANGE) goto got_node; cASSERT(mc, mc->mc_ki[mc->mc_top] < page_numkeys(mc->mc_pg[mc->mc_top]) || @@ -16996,7 +16995,7 @@ got_node: } if (IS_LEAF2(mp)) { - if (op == MDBX_SET_RANGE || op == MDBX_SET_KEY) { + if (op >= MDBX_SET_KEY) { key->iov_len = mc->mc_db->md_xsize; key->iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], key->iov_len); } @@ -17076,7 +17075,7 @@ got_node: } /* The key already matches in all other cases */ - if (op == MDBX_SET_RANGE || op == MDBX_SET_KEY) + if (op >= MDBX_SET_KEY) get_key_optional(node, key); DEBUG("==> cursor placed on key [%s], data [%s]", DKEY_DEBUG(key), From c8319aabe7e5d2f30abce66b6f75fa4c7b4f69fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 13:46:35 +0300 Subject: [PATCH 054/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=B0=D1=82?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=B8=D1=85=20=D0=BC=D0=B5=D1=82?= =?UTF-8?q?=D0=BE=D0=B4=D0=BE=D0=B2=20`buffer::hex()`,=20`base64()`,=20`ba?= =?UTF-8?q?se58()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 1a7bc510..24ceea19 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2340,6 +2340,77 @@ public: return slice_.as_pod(); } + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. + static buffer hex(const ::mdbx::slice &source, bool uppercase = false, + unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_hex( + uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. + static buffer base58(const ::mdbx::slice &source, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_base58(wrap_width, + allocator); + } + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. + static buffer base64(const ::mdbx::slice &source, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return source.template encode_base64(wrap_width, + allocator); + } + + /// \brief Returns a new buffer with a hexadecimal dump of the given pod. + template + static buffer hex(const POD &pod, bool uppercase = false, + unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return hex(mdbx::slice::wrap(pod), uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the given pod. + template + static buffer base58(const POD &pod, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return base58(mdbx::slice::wrap(pod), wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the given pod. + template + static buffer base64(const POD &pod, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) { + return base64(mdbx::slice::wrap(pod), wrap_width, allocator); + } + + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. + buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_hex( + uppercase, wrap_width, allocator); + } + + /// \brief Returns a new buffer with a + /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. + buffer + encode_base58(unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base58( + wrap_width, allocator); + } + /// \brief Returns a new buffer with a + /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. + buffer + encode_base64(unsigned wrap_width = 0, + const allocator_type &allocator = allocator_type()) const { + return slice().template encode_base64( + wrap_width, allocator); + } + /// \brief Reserves storage space. void reserve(size_t wanna_headroom, size_t wanna_tailroom) { wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), From 649bbb9d902e3ba6fc4470ae3652e81b7b19f6f3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 13:48:27 +0300 Subject: [PATCH 055/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B8=D0=BF=D0=B0?= =?UTF-8?q?=20`mdbx::comparator`=20=D0=B8=20=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=B9=20`mdbx::default=5Fcomparator()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 24ceea19..d197167e 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3080,6 +3080,14 @@ struct LIBMDBX_API_TYPE map_handle { }; }; +using comparator = ::MDBX_cmp_func *; +inline comparator default_comparator(key_mode mode) noexcept { + return ::mdbx_get_keycmp(static_cast(mode)); +} +inline comparator default_comparator(value_mode mode) noexcept { + return ::mdbx_get_keycmp(static_cast(mode)); +} + /// \brief Key-value pairs put mode. enum put_mode { insert_unique = MDBX_NOOVERWRITE, ///< Insert only unique keys. From e66df2c21b64622b4b5eb0fd142526a7b094a7ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 16 Nov 2023 14:44:49 +0300 Subject: [PATCH 056/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BC=D0=B5=D1=82=D0=BE?= =?UTF-8?q?=D0=B4=D0=BE=D0=B2=20`buffer::hex=5Fdecode()`,=20`base64=5Fdeco?= =?UTF-8?q?de()`,=20`base58=5Fdecode()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d197167e..2fd02072 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2411,6 +2411,55 @@ public: wrap_width, allocator); } + /// \brief Decodes hexadecimal dump from the slice content to returned buffer. + static buffer hex_decode(const ::mdbx::slice &source, + bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template hex_decode(ignore_spaces, + allocator); + } + + /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump + /// from the slice content to returned buffer. + static buffer + base58_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base58_decode( + ignore_spaces, allocator); + } + + /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump + /// from the slice content to returned buffer. + static buffer + base64_decode(const ::mdbx::slice &source, bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) { + return source.template base64_decode( + ignore_spaces, allocator); + } + + /// \brief Decodes hexadecimal dump + /// from the buffer content to new returned buffer. + buffer hex_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return hex_decode(slice(), ignore_spaces, allocator); + } + + /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump + /// from the buffer content to new returned buffer. + buffer + base58_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return base58_decode(slice(), ignore_spaces, allocator); + } + + /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump + /// from the buffer content to new returned buffer. + buffer + base64_decode(bool ignore_spaces = false, + const allocator_type &allocator = allocator_type()) const { + return base64_decode(slice(), ignore_spaces, allocator); + } + /// \brief Reserves storage space. void reserve(size_t wanna_headroom, size_t wanna_tailroom) { wanna_headroom = ::std::min(::std::max(headroom(), wanna_headroom), From 6facd20b2bbe3d8604341d4af7dc0b17ebfe0474 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 17 Nov 2023 23:34:22 +0300 Subject: [PATCH 057/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer::as=5Fuint64()`?= =?UTF-8?q?=20=D0=B8=20=D1=82.=D0=B4.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 2fd02072..af2aa246 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -2340,6 +2340,22 @@ public: return slice_.as_pod(); } +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128() const { return slice().as_uint128(); } +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64() const { return slice().as_uint64(); } + uint32_t as_uint32() const { return slice().as_uint32(); } + uint16_t as_uint16() const { return slice().as_uint16(); } + uint8_t as_uint8() const { return slice().as_uint8(); } + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128() const { return slice().as_int128(); } +#endif /* MDBX_I128_TYPE */ + int64_t as_int64() const { return slice().as_int64(); } + int32_t as_int32() const { return slice().as_int32(); } + int16_t as_int16() const { return slice().as_int16(); } + int8_t as_int8() const { return slice().as_int8(); } + /// \brief Returns a new buffer with a hexadecimal dump of the slice content. static buffer hex(const ::mdbx::slice &source, bool uppercase = false, unsigned wrap_width = 0, From d6a79a9c5fdd6b66923fbe0f31be28aa495fb70d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 17 Nov 2023 23:55:37 +0300 Subject: [PATCH 058/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B8=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20?= =?UTF-8?q?`slice::as=5FintXX()`=20=D0=B2=20`slice::as=5FintXX=5Fadapt()`?= =?UTF-8?q?=20=D0=B8=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`slice::as=5FintXX()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 86 ++++++++++++++++++++++++++++++++++++++++------------ src/mdbx.c++ | 36 +++++++++++----------- 2 files changed, 84 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index af2aa246..486dfdeb 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1058,20 +1058,40 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { } #ifdef MDBX_U128_TYPE - MDBX_U128_TYPE as_uint128() const; + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { + return as_pod(); + } #endif /* MDBX_U128_TYPE */ - uint64_t as_uint64() const; - uint32_t as_uint32() const; - uint16_t as_uint16() const; - uint8_t as_uint8() const; + MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return as_pod(); } #ifdef MDBX_I128_TYPE - MDBX_I128_TYPE as_int128() const; + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { + return as_pod(); + } #endif /* MDBX_I128_TYPE */ - int64_t as_int64() const; - int32_t as_int32() const; - int16_t as_int16() const; - int8_t as_int8() const; + MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int16_t as_int16() const { return as_pod(); } + MDBX_CXX14_CONSTEXPR int8_t as_int8() const { return as_pod(); } + +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128_adapt() const; +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64_adapt() const; + uint32_t as_uint32_adapt() const; + uint16_t as_uint16_adapt() const; + uint8_t as_uint8_adapt() const; + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128_adapt() const; +#endif /* MDBX_I128_TYPE */ + int64_t as_int64_adapt() const; + int32_t as_int32_adapt() const; + int16_t as_int16_adapt() const; + int8_t as_int8_adapt() const; protected: MDBX_CXX11_CONSTEXPR slice(size_t invalid_length) noexcept @@ -2341,20 +2361,46 @@ public: } #ifdef MDBX_U128_TYPE - MDBX_U128_TYPE as_uint128() const { return slice().as_uint128(); } + MDBX_CXX14_CONSTEXPR MDBX_U128_TYPE as_uint128() const { + return slice().as_uint128(); + } #endif /* MDBX_U128_TYPE */ - uint64_t as_uint64() const { return slice().as_uint64(); } - uint32_t as_uint32() const { return slice().as_uint32(); } - uint16_t as_uint16() const { return slice().as_uint16(); } - uint8_t as_uint8() const { return slice().as_uint8(); } + MDBX_CXX14_CONSTEXPR uint64_t as_uint64() const { + return slice().as_uint64(); + } + MDBX_CXX14_CONSTEXPR uint32_t as_uint32() const { + return slice().as_uint32(); + } + MDBX_CXX14_CONSTEXPR uint16_t as_uint16() const { + return slice().as_uint16(); + } + MDBX_CXX14_CONSTEXPR uint8_t as_uint8() const { return slice().as_uint8(); } #ifdef MDBX_I128_TYPE - MDBX_I128_TYPE as_int128() const { return slice().as_int128(); } + MDBX_CXX14_CONSTEXPR MDBX_I128_TYPE as_int128() const { + return slice().as_int128(); + } #endif /* MDBX_I128_TYPE */ - int64_t as_int64() const { return slice().as_int64(); } - int32_t as_int32() const { return slice().as_int32(); } - int16_t as_int16() const { return slice().as_int16(); } - int8_t as_int8() const { return slice().as_int8(); } + MDBX_CXX14_CONSTEXPR int64_t as_int64() const { return slice().as_int64(); } + MDBX_CXX14_CONSTEXPR int32_t as_int32() const { return slice().as_int32(); } + MDBX_CXX14_CONSTEXPR int16_t as_int16() const { return slice().as_int16(); } + MDBX_CXX14_CONSTEXPR int8_t as_int8() const { return slice().as_int8(); } + +#ifdef MDBX_U128_TYPE + MDBX_U128_TYPE as_uint128_adapt() const { return slice().as_uint128_adapt(); } +#endif /* MDBX_U128_TYPE */ + uint64_t as_uint64_adapt() const { return slice().as_uint64_adapt(); } + uint32_t as_uint32_adapt() const { return slice().as_uint32_adapt(); } + uint16_t as_uint16_adapt() const { return slice().as_uint16_adapt(); } + uint8_t as_uint8_adapt() const { return slice().as_uint8_adapt(); } + +#ifdef MDBX_I128_TYPE + MDBX_I128_TYPE as_int128_adapt() const { return slice().as_int128_adapt(); } +#endif /* MDBX_I128_TYPE */ + int64_t as_int64_adapt() const { return slice().as_int64_adapt(); } + int32_t as_int32_adapt() const { return slice().as_int32_adapt(); } + int16_t as_int16_adapt() const { return slice().as_int16_adapt(); } + int8_t as_int8_adapt() const { return slice().as_int8_adapt(); } /// \brief Returns a new buffer with a hexadecimal dump of the slice content. static buffer hex(const ::mdbx::slice &source, bool uppercase = false, diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 1b52739a..dd75aaa0 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -492,48 +492,48 @@ bool slice::is_printable(bool disable_utf8) const noexcept { } #ifdef MDBX_U128_TYPE -MDBX_U128_TYPE slice::as_uint128() const { +MDBX_U128_TYPE slice::as_uint128_adapt() const { static_assert(sizeof(MDBX_U128_TYPE) == 16, "WTF?"); if (size() == 16) { MDBX_U128_TYPE r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint64(); + return as_uint64_adapt(); } #endif /* MDBX_U128_TYPE */ -uint64_t slice::as_uint64() const { +uint64_t slice::as_uint64_adapt() const { static_assert(sizeof(uint64_t) == 8, "WTF?"); if (size() == 8) { uint64_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint32(); + return as_uint32_adapt(); } -uint32_t slice::as_uint32() const { +uint32_t slice::as_uint32_adapt() const { static_assert(sizeof(uint32_t) == 4, "WTF?"); if (size() == 4) { uint32_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint16(); + return as_uint16_adapt(); } -uint16_t slice::as_uint16() const { +uint16_t slice::as_uint16_adapt() const { static_assert(sizeof(uint16_t) == 2, "WTF?"); if (size() == 2) { uint16_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_uint8(); + return as_uint8_adapt(); } -uint8_t slice::as_uint8() const { +uint8_t slice::as_uint8_adapt() const { static_assert(sizeof(uint8_t) == 1, "WTF?"); if (size() == 1) return *static_cast(data()); @@ -544,48 +544,48 @@ uint8_t slice::as_uint8() const { } #ifdef MDBX_I128_TYPE -MDBX_I128_TYPE slice::as_int128() const { +MDBX_I128_TYPE slice::as_int128_adapt() const { static_assert(sizeof(MDBX_I128_TYPE) == 16, "WTF?"); if (size() == 16) { MDBX_I128_TYPE r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int64(); + return as_int64_adapt(); } #endif /* MDBX_I128_TYPE */ -int64_t slice::as_int64() const { +int64_t slice::as_int64_adapt() const { static_assert(sizeof(int64_t) == 8, "WTF?"); if (size() == 8) { uint64_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int32(); + return as_int32_adapt(); } -int32_t slice::as_int32() const { +int32_t slice::as_int32_adapt() const { static_assert(sizeof(int32_t) == 4, "WTF?"); if (size() == 4) { int32_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int16(); + return as_int16_adapt(); } -int16_t slice::as_int16() const { +int16_t slice::as_int16_adapt() const { static_assert(sizeof(int16_t) == 2, "WTF?"); if (size() == 2) { int16_t r; memcpy(&r, data(), sizeof(r)); return r; } else - return as_int8(); + return as_int8_adapt(); } -int8_t slice::as_int8() const { +int8_t slice::as_int8_adapt() const { if (size() == 1) return *static_cast(data()); else if (size() == 0) From 992eee4f0f6e8ad7ea2e5a6327573d3f447d453f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 00:40:10 +0300 Subject: [PATCH 059/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::clone()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 486dfdeb..aeeb2252 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4324,6 +4324,7 @@ public: inline cursor &operator=(cursor &&other) noexcept; inline cursor(cursor &&other) noexcept; inline ~cursor() noexcept; + inline cursor_managed clone(void *your_context = nullptr) const; MDBX_CXX14_CONSTEXPR operator bool() const noexcept; MDBX_CXX14_CONSTEXPR operator const MDBX_cursor *() const; MDBX_CXX14_CONSTEXPR operator MDBX_cursor *(); @@ -4495,7 +4496,8 @@ class LIBMDBX_API_TYPE cursor_managed : public cursor { public: /// \brief Creates a new managed cursor with underlying object. - cursor_managed() : cursor_managed(::mdbx_cursor_create(nullptr)) { + cursor_managed(void *your_context = nullptr) + : cursor_managed(::mdbx_cursor_create(your_context)) { if (MDBX_UNLIKELY(!handle_)) MDBX_CXX20_UNLIKELY error::throw_exception(MDBX_ENOMEM); } @@ -6118,6 +6120,12 @@ inline ptrdiff_t txn::estimate_to_last(map_handle map, MDBX_CXX11_CONSTEXPR cursor::cursor(MDBX_cursor *ptr) noexcept : handle_(ptr) {} +inline cursor_managed cursor::clone(void *your_context) const { + cursor_managed clone(your_context); + error::success_or_throw(::mdbx_cursor_copy(handle_, clone.handle_)); + return clone; +} + inline cursor &cursor::operator=(cursor &&other) noexcept { handle_ = other.handle_; other.handle_ = nullptr; From 225f54833903d08e428c6f155b114d157c57653b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 00:53:42 +0300 Subject: [PATCH 060/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20get=5F/set=5Fcontext=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA?= =?UTF-8?q?=D1=86=D0=B8=D0=B9=20=D0=B8=20=D0=BA=D1=83=D1=80=D1=81=D0=BE?= =?UTF-8?q?=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index aeeb2252..9be7e341 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3636,7 +3636,7 @@ public: inline void *get_context() const noexcept; /// \brief Sets the application context associated with the environment. - inline env &set_context(void *); + inline env &set_context(void *your_context); /// \brief Sets threshold to force flush the data buffers to disk, for /// non-sync durability modes. @@ -3994,6 +3994,12 @@ public: /// \brief Return the transaction's ID. inline uint64_t id() const; + /// \brief Returns the application context associated with the transaction. + inline void *get_context() const noexcept; + + /// \brief Sets the application context associated with the transaction. + inline txn &set_context(void *your_context); + /// \brief Checks whether the given data is on a dirty page. inline bool is_dirty(const void *ptr) const; @@ -4333,6 +4339,12 @@ public: friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept; + /// \brief Returns the application context associated with the cursor. + inline void *get_context() const noexcept; + + /// \brief Sets the application context associated with the cursor. + inline cursor &set_context(void *your_context); + enum move_operation { first = MDBX_FIRST, last = MDBX_LAST, @@ -5639,6 +5651,15 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const txn &a, const txn &b) noexcept { return a.handle_ != b.handle_; } +inline void *txn::get_context() const noexcept { + return mdbx_txn_get_userctx(handle_); +} + +inline txn &txn::set_context(void *ptr) { + error::success_or_throw(::mdbx_txn_set_userctx(handle_, ptr)); + return *this; +} + inline bool txn::is_dirty(const void *ptr) const { int err = ::mdbx_is_dirty(handle_, ptr); switch (err) { @@ -6126,6 +6147,15 @@ inline cursor_managed cursor::clone(void *your_context) const { return clone; } +inline void *cursor::get_context() const noexcept { + return mdbx_cursor_get_userctx(handle_); +} + +inline cursor &cursor::set_context(void *ptr) { + error::success_or_throw(::mdbx_cursor_set_userctx(handle_, ptr)); + return *this; +} + inline cursor &cursor::operator=(cursor &&other) noexcept { handle_ = other.handle_; other.handle_ = nullptr; From 10abf731917e82a0c45c516653e9456abf4868bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 01:11:12 +0300 Subject: [PATCH 061/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B8=20=D0=BC=D0=B8?= =?UTF-8?q?=D0=BA=D1=80=D0=BE-=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D1=8F=20`cursor=5Fnext|=5Fprev()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20dupsort-=D1=83=D0=B7=D0=BB=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - меньше сравнений и переходов. - вложенный курсор всегда сбрасывается/очищается при переходе с dupsort-узла. --- src/core.c | 75 +++++++++++++++++++++++++++++------------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/src/core.c b/src/core.c index fec11370..eadfb491 100644 --- a/src/core.c +++ b/src/core.c @@ -3391,7 +3391,8 @@ static int __must_check_result cursor_last(MDBX_cursor *mc, MDBX_val *key, static int __must_check_result cursor_init(MDBX_cursor *mc, const MDBX_txn *txn, size_t dbi); static int __must_check_result cursor_xinit0(MDBX_cursor *mc); -static int __must_check_result cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, +static int __must_check_result cursor_xinit1(MDBX_cursor *mc, + const MDBX_node *node, const MDBX_page *mp); static int __must_check_result cursor_xinit2(MDBX_cursor *mc, MDBX_xcursor *src_mx, @@ -16616,39 +16617,41 @@ static int cursor_sibling(MDBX_cursor *mc, int dir) { /* Move the cursor to the next data item. */ static int cursor_next(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { - MDBX_page *mp; - MDBX_node *node; + assert(op == MDBX_NEXT || op == MDBX_NEXT_DUP || op == MDBX_NEXT_NODUP); int rc; if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_NEXT_DUP) return MDBX_NOTFOUND; - if (unlikely(!(mc->mc_flags & C_INITIALIZED))) + if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { + if (unlikely(mc->mc_flags & C_SUB)) + return MDBX_NOTFOUND; return cursor_first(mc, key, data); + } - mp = mc->mc_pg[mc->mc_top]; + const MDBX_page *mp = mc->mc_pg[mc->mc_top]; if (unlikely(mc->mc_flags & C_EOF)) { if (mc->mc_ki[mc->mc_top] + (size_t)1 >= page_numkeys(mp)) return MDBX_NOTFOUND; mc->mc_flags ^= C_EOF; } - if (mc->mc_db->md_flags & MDBX_DUPSORT) { - node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - if (op == MDBX_NEXT || op == MDBX_NEXT_DUP) { + if (mc->mc_xcursor) { + if (op != MDBX_NEXT_NODUP) { + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); + if (node_flags(node) & F_DUPDATA) { rc = cursor_next(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_NEXT); - if (op != MDBX_NEXT || rc != MDBX_NOTFOUND) { - if (likely(rc == MDBX_SUCCESS)) - get_key_optional(node, key); - return rc; + if (likely(rc == MDBX_SUCCESS)) { + get_key_optional(node, key); + return MDBX_SUCCESS; } + if (unlikely(rc != MDBX_NOTFOUND)) + return rc; } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (op == MDBX_NEXT_DUP) + if (op != MDBX_NEXT) return MDBX_NOTFOUND; } + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); } DEBUG("cursor_next: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, @@ -16692,7 +16695,7 @@ skip: return MDBX_SUCCESS; } - node = page_node(mp, mc->mc_ki[mc->mc_top]); + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); if (node_flags(node) & F_DUPDATA) { rc = cursor_xinit1(mc, node, mp); if (unlikely(rc != MDBX_SUCCESS)) @@ -16713,40 +16716,41 @@ skip: /* Move the cursor to the previous data item. */ static int cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { - MDBX_page *mp; - MDBX_node *node; + assert(op == MDBX_PREV || op == MDBX_PREV_DUP || op == MDBX_PREV_NODUP); int rc; if (unlikely(mc->mc_flags & C_DEL) && op == MDBX_PREV_DUP) return MDBX_NOTFOUND; if (unlikely(!(mc->mc_flags & C_INITIALIZED))) { + if (unlikely(mc->mc_flags & C_SUB)) + return MDBX_NOTFOUND; rc = cursor_last(mc, key, data); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; mc->mc_ki[mc->mc_top]++; } - mp = mc->mc_pg[mc->mc_top]; - if ((mc->mc_db->md_flags & MDBX_DUPSORT) && - mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { - node = page_node(mp, mc->mc_ki[mc->mc_top]); - if (node_flags(node) & F_DUPDATA) { - if (op == MDBX_PREV || op == MDBX_PREV_DUP) { - rc = cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_PREV); - if (op != MDBX_PREV || rc != MDBX_NOTFOUND) { + const MDBX_page *mp = mc->mc_pg[mc->mc_top]; + if (mc->mc_xcursor) { + if (op != MDBX_PREV_NODUP) { + if (likely(mc->mc_ki[mc->mc_top] < page_numkeys(mp))) { + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); + if (node_flags(node) & F_DUPDATA) { + rc = cursor_prev(&mc->mc_xcursor->mx_cursor, data, NULL, MDBX_PREV); if (likely(rc == MDBX_SUCCESS)) { get_key_optional(node, key); mc->mc_flags &= ~C_EOF; + return MDBX_SUCCESS; } - return rc; + if (unlikely(rc != MDBX_NOTFOUND)) + return rc; } } - } else { - mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); - if (op == MDBX_PREV_DUP) + if (op != MDBX_PREV) return MDBX_NOTFOUND; } + mc->mc_xcursor->mx_cursor.mc_flags &= ~(C_INITIALIZED | C_EOF); } DEBUG("cursor_prev: top page is %" PRIaPGNO " in cursor %p", mp->mp_pgno, @@ -16782,8 +16786,7 @@ static int cursor_prev(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return MDBX_SUCCESS; } - node = page_node(mp, mc->mc_ki[mc->mc_top]); - + const MDBX_node *node = page_node(mp, mc->mc_ki[mc->mc_top]); if (node_flags(node) & F_DUPDATA) { rc = cursor_xinit1(mc, node, mp); if (unlikely(rc != MDBX_SUCCESS)) @@ -17234,6 +17237,8 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return rc; } } else { + cASSERT(mc, !mc->mc_xcursor || !(mc->mc_xcursor->mx_cursor.mc_flags & + C_INITIALIZED)); rc = node_read(mc, node, data, mp); if (unlikely(rc)) return rc; @@ -19025,7 +19030,7 @@ static int cursor_xinit0(MDBX_cursor *mc) { * [in] mc The main cursor whose sorted-dups cursor is to be initialized. * [in] node The data containing the MDBX_db record for the sorted-dup database. */ -static int cursor_xinit1(MDBX_cursor *mc, MDBX_node *node, +static int cursor_xinit1(MDBX_cursor *mc, const MDBX_node *node, const MDBX_page *mp) { MDBX_xcursor *mx = mc->mc_xcursor; if (!MDBX_DISABLE_VALIDATION && unlikely(mx == nullptr)) { From adcbb393790fb3128910564c5b058d92319c2804 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 01:55:29 +0300 Subject: [PATCH 062/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fcompare()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 5 ++++ src/core.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) diff --git a/mdbx.h b/mdbx.h index eb8e4ff6..765f11e3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4891,6 +4891,11 @@ LIBMDBX_API MDBX_dbi mdbx_cursor_dbi(const MDBX_cursor *cursor); * \returns A non-zero error value on failure and 0 on success. */ LIBMDBX_API int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest); +/** FIXME */ +LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, + const MDBX_cursor *right, + bool ignore_nested); + /** \brief Retrieve by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index eadfb491..909b8469 100644 --- a/src/core.c +++ b/src/core.c @@ -19355,6 +19355,83 @@ int mdbx_cursor_renew(const MDBX_txn *txn, MDBX_cursor *mc) { return likely(mc) ? mdbx_cursor_bind(txn, mc, mc->mc_dbi) : MDBX_EINVAL; } +int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, + bool ignore_nested) { + const int incomparable = INT16_MAX + 1; + if (unlikely(!l)) + return r ? -incomparable * 9 : 0; + if (unlikely(!r)) + return l ? incomparable * 9 : 0; + + if (unlikely(l->mc_signature != MDBX_MC_LIVE)) + return (r->mc_signature == MDBX_MC_LIVE) ? -incomparable * 8 : 0; + if (unlikely(r->mc_signature != MDBX_MC_LIVE)) + return (l->mc_signature == MDBX_MC_LIVE) ? incomparable * 8 : 0; + + if (unlikely(l->mc_dbx != r->mc_dbx)) { + if (l->mc_txn->mt_env != r->mc_txn->mt_env) + return (l->mc_txn->mt_env > r->mc_txn->mt_env) ? incomparable * 7 + : -incomparable * 7; + if (l->mc_txn->mt_txnid != r->mc_txn->mt_txnid) + return (l->mc_txn->mt_txnid > r->mc_txn->mt_txnid) ? incomparable * 6 + : -incomparable * 6; + return (l->mc_dbx > r->mc_dbx) ? incomparable * 5 : -incomparable * 5; + } + assert(l->mc_dbi == r->mc_dbi); + + int diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 4 : -incomparable * 4; + if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) + return 0; + + size_t detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; + for (size_t i = 0; i < detent; ++i) { + diff = l->mc_ki[i] - r->mc_ki[i]; + if (diff) + return diff; + } + if (unlikely(l->mc_snum != r->mc_snum)) + return (l->mc_snum > r->mc_snum) ? incomparable * 3 : -incomparable * 3; + + assert((l->mc_xcursor != nullptr) == (r->mc_xcursor != nullptr)); + if (unlikely((l->mc_xcursor != nullptr) != (r->mc_xcursor != nullptr))) + return l->mc_xcursor ? incomparable * 2 : -incomparable * 2; + if (ignore_nested || !l->mc_xcursor) + return 0; + +#if MDBX_DEBUG + if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + const MDBX_page *mp = l->mc_pg[l->mc_top]; + const MDBX_node *node = page_node(mp, l->mc_ki[l->mc_top]); + assert(node_flags(node) & F_DUPDATA); + } + if (l->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED) { + const MDBX_page *mp = r->mc_pg[r->mc_top]; + const MDBX_node *node = page_node(mp, r->mc_ki[r->mc_top]); + assert(node_flags(node) & F_DUPDATA); + } +#endif /* MDBX_DEBUG */ + + l = &l->mc_xcursor->mx_cursor; + r = &r->mc_xcursor->mx_cursor; + diff = (l->mc_flags & C_INITIALIZED) - (l->mc_flags & C_INITIALIZED); + if (unlikely(diff)) + return (diff > 0) ? incomparable * 2 : -incomparable * 2; + if (unlikely((l->mc_flags & C_INITIALIZED) == 0)) + return 0; + + detent = (l->mc_snum <= r->mc_snum) ? l->mc_snum : r->mc_snum; + for (size_t i = 0; i < detent; ++i) { + diff = l->mc_ki[i] - r->mc_ki[i]; + if (diff) + return diff; + } + if (unlikely(l->mc_snum != r->mc_snum)) + return (l->mc_snum > r->mc_snum) ? incomparable : -incomparable; + return 0; +} + int mdbx_cursor_copy(const MDBX_cursor *src, MDBX_cursor *dest) { if (unlikely(!src)) return MDBX_EINVAL; From eee3e6eb6be31843efc8bf81f5345d8c81405d70 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 02:32:55 +0300 Subject: [PATCH 063/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`compare=5Fpositions()`?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80?= =?UTF-8?q?=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 44 ++++++++++++++++++++++++++++++++++++++++++++ src/mdbx.c++ | 5 +++++ 2 files changed, 49 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 9be7e341..c973573b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -567,6 +567,7 @@ MDBX_DECLARE_EXCEPTION(dangling_map_id); [[noreturn]] LIBMDBX_API void throw_out_range(); [[noreturn]] LIBMDBX_API void throw_allocators_mismatch(); [[noreturn]] LIBMDBX_API void throw_bad_value_size(); +[[noreturn]] LIBMDBX_API void throw_incomparable_cursors(); static MDBX_CXX14_CONSTEXPR size_t check_length(size_t bytes); static MDBX_CXX14_CONSTEXPR size_t check_length(size_t headroom, size_t payload); @@ -4339,6 +4340,34 @@ public: friend MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, const cursor &b) noexcept; + friend inline int compare_position_nothrow(const cursor &left, + const cursor &right, + bool ignore_nested) noexcept; + friend inline int compare_position(const cursor &left, const cursor &right, + bool ignore_nested); + + bool is_before_than(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) < 0; + } + + bool is_same_or_before_than(const cursor &other, + bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) <= 0; + } + + bool is_same_position(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) == 0; + } + + bool is_after_than(const cursor &other, bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) > 0; + } + + bool is_same_or_after_than(const cursor &other, + bool ignore_nested = false) const { + return compare_position(*this, other, ignore_nested) >= 0; + } + /// \brief Returns the application context associated with the cursor. inline void *get_context() const noexcept; @@ -6192,6 +6221,21 @@ MDBX_CXX11_CONSTEXPR bool operator!=(const cursor &a, return a.handle_ != b.handle_; } +inline int compare_position_nothrow(const cursor &left, const cursor &right, + bool ignore_nested = false) noexcept { + return mdbx_cursor_compare(left.handle_, right.handle_, ignore_nested); +} + +inline int compare_position(const cursor &left, const cursor &right, + bool ignore_nested = false) { + const auto diff = compare_position_nothrow(left, right, ignore_nested); + assert(compare_position_nothrow(right, left, ignore_nested) == -diff); + if (MDBX_LIKELY(int16_t(diff) == diff)) + MDBX_CXX20_LIKELY + return int(diff); + throw_incomparable_cursors(); +} + inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) : pair_result(slice(), slice(), false) { diff --git a/src/mdbx.c++ b/src/mdbx.c++ index dd75aaa0..4381b8e8 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -233,6 +233,11 @@ namespace mdbx { "into an incompatible memory allocation scheme."); } +[[noreturn]] __cold void throw_incomparable_cursors() { + throw std::logic_error( + "mdbx:: incomparable and/or invalid cursors to compare positions."); +} + [[noreturn]] __cold void throw_bad_value_size() { throw bad_value_size(MDBX_BAD_VALSIZE); } From ed59ad22c64f53d6af7ee46010605228059dd3fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 02:34:07 +0300 Subject: [PATCH 064/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fon=5Ffirst/la?= =?UTF-8?q?st=5Fdup()`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++++++ mdbx.h++ | 10 ++++++++++ src/core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 65 insertions(+) diff --git a/mdbx.h b/mdbx.h index 765f11e3..0123a139 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5138,6 +5138,10 @@ mdbx_cursor_eof(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_first(const MDBX_cursor *cursor); +/** FIXME */ +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int +mdbx_cursor_on_first_dup(const MDBX_cursor *cursor); + /** \brief Determines whether the cursor is pointed to the last key-value pair * or not. * \ingroup c_cursors @@ -5152,6 +5156,10 @@ mdbx_cursor_on_first(const MDBX_cursor *cursor); MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int mdbx_cursor_on_last(const MDBX_cursor *cursor); +/** FIXME */ +MDBX_NOTHROW_PURE_FUNCTION LIBMDBX_API int +mdbx_cursor_on_last_dup(const MDBX_cursor *cursor); + /** \addtogroup c_rqest * \details \note The estimation result varies greatly depending on the filling * of specific pages and the overall balance of the b-tree: diff --git a/mdbx.h++ b/mdbx.h++ index c973573b..eaa5279c 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4467,6 +4467,8 @@ public: inline bool eof() const; inline bool on_first() const; inline bool on_last() const; + inline bool on_first_multival() const; + inline bool on_last_multival() const; inline estimate_result estimate(const slice &key, const slice &value) const; inline estimate_result estimate(const slice &key) const; inline estimate_result estimate(move_operation operation) const; @@ -6396,6 +6398,14 @@ inline bool cursor::on_last() const { return error::boolean_or_throw(::mdbx_cursor_on_last(*this)); } +inline bool cursor::on_first_multival() const { + return error::boolean_or_throw(::mdbx_cursor_on_first_dup(*this)); +} + +inline bool cursor::on_last_multival() const { + return error::boolean_or_throw(::mdbx_cursor_on_last_dup(*this)); +} + inline cursor::estimate_result cursor::estimate(const slice &key, const slice &value) const { return estimate_result(*this, multi_exactkey_lowerboundvalue, key, value); diff --git a/src/core.c b/src/core.c index 909b8469..2e577a93 100644 --- a/src/core.c +++ b/src/core.c @@ -24533,6 +24533,29 @@ int mdbx_cursor_on_first(const MDBX_cursor *mc) { return MDBX_RESULT_TRUE; } +int mdbx_cursor_on_first_dup(const MDBX_cursor *mc) { + if (unlikely(mc == NULL)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + + if (!mc->mc_xcursor) + return MDBX_RESULT_TRUE; + + mc = &mc->mc_xcursor->mx_cursor; + for (size_t i = 0; i < mc->mc_snum; ++i) { + if (mc->mc_ki[i]) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + int mdbx_cursor_on_last(const MDBX_cursor *mc) { if (unlikely(mc == NULL)) return MDBX_EINVAL; @@ -24553,6 +24576,30 @@ int mdbx_cursor_on_last(const MDBX_cursor *mc) { return MDBX_RESULT_TRUE; } +int mdbx_cursor_on_last_dup(const MDBX_cursor *mc) { + if (unlikely(mc == NULL)) + return MDBX_EINVAL; + + if (unlikely(mc->mc_signature != MDBX_MC_LIVE)) + return (mc->mc_signature == MDBX_MC_READY4CLOSE) ? MDBX_EINVAL + : MDBX_EBADSIGN; + + if (!(mc->mc_flags & C_INITIALIZED)) + return mc->mc_db->md_entries ? MDBX_RESULT_FALSE : MDBX_RESULT_TRUE; + + if (!mc->mc_xcursor) + return MDBX_RESULT_TRUE; + + mc = &mc->mc_xcursor->mx_cursor; + for (size_t i = 0; i < mc->mc_snum; ++i) { + size_t nkeys = page_numkeys(mc->mc_pg[i]); + if (mc->mc_ki[i] < nkeys - 1) + return MDBX_RESULT_FALSE; + } + + return MDBX_RESULT_TRUE; +} + int mdbx_cursor_eof(const MDBX_cursor *mc) { if (unlikely(mc == NULL)) return MDBX_EINVAL; From bc562d5c06a3c1efab0f0834d7a1fba0145b0adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 22:52:13 +0300 Subject: [PATCH 065/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fscan()`=20?= =?UTF-8?q?=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 8 ++++++++ src/core.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/mdbx.h b/mdbx.h index 0123a139..c1fdd90e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4929,6 +4929,14 @@ LIBMDBX_API int mdbx_cursor_compare(const MDBX_cursor *left, * \retval MDBX_EINVAL An invalid parameter was specified. */ LIBMDBX_API int mdbx_cursor_get(MDBX_cursor *cursor, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op); +/** FIXME */ +typedef int(MDBX_predicate_func)(void *context, MDBX_val *key, MDBX_val *value, + void *arg) MDBX_CXX17_NOEXCEPT; +/** FIXME */ +LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, + MDBX_predicate_func *predicate, void *context, + MDBX_cursor_op start_op, + MDBX_cursor_op turn_op, void *arg); /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. * \ingroup c_crud diff --git a/src/core.c b/src/core.c index 2e577a93..23eaaa92 100644 --- a/src/core.c +++ b/src/core.c @@ -17433,6 +17433,39 @@ int mdbx_cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, return cursor_get(mc, key, data, op); } +int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op start_op, + MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_FIRST | 1 << MDBX_FIRST_DUP | 1 << MDBX_LAST | + 1 << MDBX_LAST_DUP | 1 << MDBX_GET_CURRENT | 1 << MDBX_GET_MULTIPLE; + if (unlikely(start_op > 30 || ((1 << start_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + MDBX_val key, data; + int rc = mdbx_cursor_get(mc, &key, &data, start_op); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + for (;;) { + rc = predicate(context, &key, &data, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = cursor_get(mc, &key, &data, turn_op); + if (rc != MDBX_SUCCESS) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } +} + static int cursor_first_batch(MDBX_cursor *mc) { if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { int err = page_search(mc, NULL, MDBX_PS_FIRST); From 5cf6542fa002a1cd543ac524b5bfb8de43e00337 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 18 Nov 2023 23:20:53 +0300 Subject: [PATCH 066/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fcursor=5Fscan=5Ffrom()?= =?UTF-8?q?`=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 7 +++++++ src/core.c | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/mdbx.h b/mdbx.h index c1fdd90e..611efd1e 100644 --- a/mdbx.h +++ b/mdbx.h @@ -4938,6 +4938,13 @@ LIBMDBX_API int mdbx_cursor_scan(MDBX_cursor *cursor, MDBX_cursor_op start_op, MDBX_cursor_op turn_op, void *arg); +/** FIXME */ +LIBMDBX_API int mdbx_cursor_scan_from(MDBX_cursor *cursor, + MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op from_op, + MDBX_val *from_key, MDBX_val *from_value, + MDBX_cursor_op turn_op, void *arg); + /** \brief Retrieve multiple non-dupsort key/value pairs by cursor. * \ingroup c_crud * diff --git a/src/core.c b/src/core.c index 23eaaa92..861c266f 100644 --- a/src/core.c +++ b/src/core.c @@ -17466,6 +17466,50 @@ int mdbx_cursor_scan(MDBX_cursor *mc, MDBX_predicate_func *predicate, } } +int mdbx_cursor_scan_from(MDBX_cursor *mc, MDBX_predicate_func *predicate, + void *context, MDBX_cursor_op from_op, MDBX_val *key, + MDBX_val *value, MDBX_cursor_op turn_op, void *arg) { + if (unlikely(!predicate)) + return MDBX_EINVAL; + + const unsigned valid_start_mask = + 1 << MDBX_GET_BOTH | 1 << MDBX_GET_BOTH_RANGE | 1 << MDBX_SET_KEY | + 1 << MDBX_GET_MULTIPLE | 1 << MDBX_SET_LOWERBOUND | + 1 << MDBX_SET_UPPERBOUND; + ; + if (unlikely(from_op < MDBX_TO_KEY_LESSER_THAN && + ((1 << from_op) & valid_start_mask) == 0)) + return MDBX_EINVAL; + + const unsigned valid_turn_mask = + 1 << MDBX_NEXT | 1 << MDBX_NEXT_DUP | 1 << MDBX_NEXT_NODUP | + 1 << MDBX_PREV | 1 << MDBX_PREV_DUP | 1 << MDBX_PREV_NODUP | + 1 << MDBX_NEXT_MULTIPLE | 1 << MDBX_PREV_MULTIPLE; + if (unlikely(turn_op > 30 || ((1 << turn_op) & valid_turn_mask) == 0)) + return MDBX_EINVAL; + + int rc = mdbx_cursor_get(mc, key, value, from_op); + if (unlikely(MDBX_IS_ERROR(rc))) + return rc; + + cASSERT(mc, key != nullptr); + MDBX_val stub; + if (!value) { + value = &stub; + rc = cursor_get(mc, key, value, MDBX_GET_CURRENT); + if (unlikely(rc != MDBX_SUCCESS)) + return rc; + } + for (;;) { + rc = predicate(context, key, value, arg); + if (rc != MDBX_RESULT_FALSE) + return rc; + rc = cursor_get(mc, key, value, turn_op); + if (rc != MDBX_SUCCESS) + return (rc == MDBX_NOTFOUND) ? MDBX_RESULT_FALSE : rc; + } +} + static int cursor_first_batch(MDBX_cursor *mc) { if (!(mc->mc_flags & C_INITIALIZED) || mc->mc_top) { int err = page_search(mc, NULL, MDBX_PS_FIRST); From 25015c54e136ef60c3328c197bf221faf83ccdb0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 13 Nov 2023 12:52:17 +0300 Subject: [PATCH 067/137] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=B5=D1=89=D0=B0?= =?UTF-8?q?=D0=BD=D0=BD=D0=BE=D0=B5=20"doubtless"=20API=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BF=D0=BE=D0=B7=D0=B8=D1=86=D0=B8=D0=BE=D0=BD=D0=B8?= =?UTF-8?q?=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BA=D1=83=D1=80?= =?UTF-8?q?=D1=81=D0=BE=D1=80=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 25 +++++- mdbx.h++ | 256 +++++++++++++++++++++++++++++++++-------------------- src/core.c | 154 +++++++++++++++++++++++++++++++- 3 files changed, 336 insertions(+), 99 deletions(-) diff --git a/mdbx.h b/mdbx.h index 611efd1e..43d4eca3 100644 --- a/mdbx.h +++ b/mdbx.h @@ -1778,7 +1778,7 @@ enum MDBX_cursor_op { * return both key and data, and the return code depends on whether a * upper-bound was found. * - * For non DUPSORT-ed collections this work the same to \ref MDBX_SET_RANGE, + * For non DUPSORT-ed collections this work like \ref MDBX_SET_RANGE, * but returns \ref MDBX_SUCCESS if the greater key was found or * \ref MDBX_NOTFOUND otherwise. * @@ -1786,7 +1786,28 @@ enum MDBX_cursor_op { * i.e. for a pairs/tuples of a key and an each data value of duplicates. * Returns \ref MDBX_SUCCESS if the greater pair was returned or * \ref MDBX_NOTFOUND otherwise. */ - MDBX_SET_UPPERBOUND + MDBX_SET_UPPERBOUND, + + /* Doubtless cursor positioning at a specified key. */ + MDBX_TO_KEY_LESSER_THAN, + MDBX_TO_KEY_LESSER_OR_EQUAL, + MDBX_TO_KEY_EQUAL, + MDBX_TO_KEY_GREATER_OR_EQUAL, + MDBX_TO_KEY_GREATER_THAN, + + /* Doubtless cursor positioning at a specified key-value pair + * for dupsort/multi-value hives. */ + MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, + MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, + + MDBX_TO_PAIR_LESSER_THAN, + MDBX_TO_PAIR_LESSER_OR_EQUAL, + MDBX_TO_PAIR_EQUAL, + MDBX_TO_PAIR_GREATER_OR_EQUAL, + MDBX_TO_PAIR_GREATER_THAN }; #ifndef __cplusplus /** \ingroup c_cursors */ diff --git a/mdbx.h++ b/mdbx.h++ index eaa5279c..bdeeb05b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4391,9 +4391,33 @@ public: multi_find_pair = MDBX_GET_BOTH, multi_exactkey_lowerboundvalue = MDBX_GET_BOTH_RANGE, - find_key = MDBX_SET, + seek_key = MDBX_SET, key_exact = MDBX_SET_KEY, - key_lowerbound = MDBX_SET_RANGE + key_lowerbound = MDBX_SET_RANGE, + + /* Doubtless cursor positioning at a specified key. */ + key_lesser_than = MDBX_TO_KEY_LESSER_THAN, + key_lesser_or_equal = MDBX_TO_KEY_LESSER_OR_EQUAL, + key_equal = MDBX_TO_KEY_EQUAL, + key_greater_or_equal = MDBX_TO_KEY_GREATER_OR_EQUAL, + key_greater_than = MDBX_TO_KEY_GREATER_THAN, + + /* Doubtless cursor positioning at a specified key-value pair + * for dupsort/multi-value hives. */ + multi_exactkey_value_lesser_than = MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN, + multi_exactkey_value_lesser_or_equal = + MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL, + multi_exactkey_value_equal = MDBX_TO_EXACT_KEY_VALUE_EQUAL, + multi_exactkey_value_greater_or_equal = + MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL, + multi_exactkey_value_greater = MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN, + + pair_lesser_than = MDBX_TO_PAIR_LESSER_THAN, + pair_lesser_or_equal = MDBX_TO_PAIR_LESSER_OR_EQUAL, + pair_equal = MDBX_TO_PAIR_EQUAL, + pair_exact = pair_equal, + pair_greater_or_equal = MDBX_TO_PAIR_GREATER_OR_EQUAL, + pair_greater_than = MDBX_TO_PAIR_GREATER_THAN, }; struct move_result : public pair_result { @@ -4424,45 +4448,154 @@ public: }; protected: + /* fake const, i.e. for some move/get operations */ inline bool move(move_operation operation, MDBX_val *key, MDBX_val *value, - bool throw_notfound) const - /* fake const, i.e. for some operations */; + bool throw_notfound) const; + inline ptrdiff_t estimate(move_operation operation, MDBX_val *key, MDBX_val *value) const; public: - inline move_result move(move_operation operation, bool throw_notfound); - inline move_result to_first(bool throw_notfound = true); - inline move_result to_previous(bool throw_notfound = true); - inline move_result to_previous_last_multi(bool throw_notfound = true); - inline move_result to_current_first_multi(bool throw_notfound = true); - inline move_result to_current_prev_multi(bool throw_notfound = true); - inline move_result current(bool throw_notfound = true) const; - inline move_result to_current_next_multi(bool throw_notfound = true); - inline move_result to_current_last_multi(bool throw_notfound = true); - inline move_result to_next_first_multi(bool throw_notfound = true); - inline move_result to_next(bool throw_notfound = true); - inline move_result to_last(bool throw_notfound = true); + move_result move(move_operation operation, bool throw_notfound) { + return move_result(*this, operation, throw_notfound); + } + move_result move(move_operation operation, const slice &key, + bool throw_notfound) { + return move_result(*this, operation, key, throw_notfound); + } + move_result move(move_operation operation, const slice &key, + const slice &value, bool throw_notfound) { + return move_result(*this, operation, key, value, throw_notfound); + } + bool move(move_operation operation, slice &key, slice &value, + bool throw_notfound) { + return move(operation, &key, &value, throw_notfound); + } - inline move_result move(move_operation operation, const slice &key, - bool throw_notfound); + move_result to_first(bool throw_notfound = true) { + return move(first, throw_notfound); + } + move_result to_previous(bool throw_notfound = true) { + return move(previous, throw_notfound); + } + move_result to_previous_last_multi(bool throw_notfound = true) { + return move(multi_prevkey_lastvalue, throw_notfound); + } + move_result to_current_first_multi(bool throw_notfound = true) { + return move(multi_currentkey_firstvalue, throw_notfound); + } + move_result to_current_prev_multi(bool throw_notfound = true) { + return move(multi_currentkey_prevvalue, throw_notfound); + } + move_result current(bool throw_notfound = true) const { + return move_result(*this, throw_notfound); + } + move_result to_current_next_multi(bool throw_notfound = true) { + return move(multi_currentkey_nextvalue, throw_notfound); + } + move_result to_current_last_multi(bool throw_notfound = true) { + return move(multi_currentkey_lastvalue, throw_notfound); + } + move_result to_next_first_multi(bool throw_notfound = true) { + return move(multi_nextkey_firstvalue, throw_notfound); + } + move_result to_next(bool throw_notfound = true) { + return move(next, throw_notfound); + } + move_result to_last(bool throw_notfound = true) { + return move(last, throw_notfound); + } + + move_result to_key_lesser_than(const slice &key, bool throw_notfound = true) { + return move(key_lesser_than, key, throw_notfound); + } + move_result to_key_lesser_or_equal(const slice &key, + bool throw_notfound = true) { + return move(key_lesser_or_equal, key, throw_notfound); + } + move_result to_key_equal(const slice &key, bool throw_notfound = true) { + return move(key_equal, key, throw_notfound); + } + move_result to_key_exact(const slice &key, bool throw_notfound = true) { + return move(key_exact, key, throw_notfound); + } + move_result to_key_greater_or_equal(const slice &key, + bool throw_notfound = true) { + return move(key_greater_or_equal, key, throw_notfound); + } + move_result to_key_greater_than(const slice &key, + bool throw_notfound = true) { + return move(key_greater_than, key, throw_notfound); + } + + move_result to_exact_key_value_lesser_than(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_lesser_than, key, value, throw_notfound); + } + move_result to_exact_key_value_lesser_or_equal(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_lesser_or_equal, key, value, + throw_notfound); + } + move_result to_exact_key_value_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_equal, key, value, throw_notfound); + } + move_result to_exact_key_value_greater_or_equal(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_greater_or_equal, key, value, + throw_notfound); + } + move_result to_exact_key_value_greater_than(const slice &key, + const slice &value, + bool throw_notfound = true) { + return move(multi_exactkey_value_greater, key, value, throw_notfound); + } + + move_result to_pair_lesser_than(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_lesser_than, key, value, throw_notfound); + } + move_result to_pair_lesser_or_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_lesser_or_equal, key, value, throw_notfound); + } + move_result to_pair_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_equal, key, value, throw_notfound); + } + move_result to_pair_exact(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_exact, key, value, throw_notfound); + } + move_result to_pair_greater_or_equal(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_greater_or_equal, key, value, throw_notfound); + } + move_result to_pair_greater_than(const slice &key, const slice &value, + bool throw_notfound = true) { + return move(pair_greater_than, key, value, throw_notfound); + } + + inline bool seek(const slice &key); inline move_result find(const slice &key, bool throw_notfound = true); - inline move_result lower_bound(const slice &key, bool throw_notfound = true); + inline move_result lower_bound(const slice &key, bool throw_notfound = false); + inline move_result upper_bound(const slice &key, bool throw_notfound = false); + + /// \brief Return count of duplicates for current key. + inline size_t count_multivalue() const; - inline move_result move(move_operation operation, const slice &key, - const slice &value, bool throw_notfound); inline move_result find_multivalue(const slice &key, const slice &value, bool throw_notfound = true); inline move_result lower_bound_multivalue(const slice &key, const slice &value, bool throw_notfound = false); - - inline bool seek(const slice &key); - inline bool move(move_operation operation, slice &key, slice &value, - bool throw_notfound); - - /// \brief Return count of duplicates for current key. - inline size_t count_multivalue() const; + inline move_result upper_bound_multivalue(const slice &key, + const slice &value, + bool throw_notfound = false); inline bool eof() const; inline bool on_first() const; @@ -6290,60 +6423,6 @@ inline ptrdiff_t estimate(const cursor &from, const cursor &to) { return result; } -inline cursor::move_result cursor::move(move_operation operation, - bool throw_notfound) { - return move_result(*this, operation, throw_notfound); -} - -inline cursor::move_result cursor::to_first(bool throw_notfound) { - return move(first, throw_notfound); -} - -inline cursor::move_result cursor::to_previous(bool throw_notfound) { - return move(previous, throw_notfound); -} - -inline cursor::move_result cursor::to_previous_last_multi(bool throw_notfound) { - return move(multi_prevkey_lastvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_first_multi(bool throw_notfound) { - return move(multi_currentkey_firstvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_prev_multi(bool throw_notfound) { - return move(multi_currentkey_prevvalue, throw_notfound); -} - -inline cursor::move_result cursor::current(bool throw_notfound) const { - return move_result(*this, throw_notfound); -} - -inline cursor::move_result cursor::to_current_next_multi(bool throw_notfound) { - return move(multi_currentkey_nextvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_current_last_multi(bool throw_notfound) { - return move(multi_currentkey_lastvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_next_first_multi(bool throw_notfound) { - return move(multi_nextkey_firstvalue, throw_notfound); -} - -inline cursor::move_result cursor::to_next(bool throw_notfound) { - return move(next, throw_notfound); -} - -inline cursor::move_result cursor::to_last(bool throw_notfound) { - return move(last, throw_notfound); -} - -inline cursor::move_result cursor::move(move_operation operation, - const slice &key, bool throw_notfound) { - return move_result(*this, operation, key, throw_notfound); -} - inline cursor::move_result cursor::find(const slice &key, bool throw_notfound) { return move(key_exact, key, throw_notfound); } @@ -6353,12 +6432,6 @@ inline cursor::move_result cursor::lower_bound(const slice &key, return move(key_lowerbound, key, throw_notfound); } -inline cursor::move_result cursor::move(move_operation operation, - const slice &key, const slice &value, - bool throw_notfound) { - return move_result(*this, operation, key, value, throw_notfound); -} - inline cursor::move_result cursor::find_multivalue(const slice &key, const slice &value, bool throw_notfound) { @@ -6372,12 +6445,7 @@ inline cursor::move_result cursor::lower_bound_multivalue(const slice &key, } inline bool cursor::seek(const slice &key) { - return move(find_key, const_cast(&key), nullptr, false); -} - -inline bool cursor::move(move_operation operation, slice &key, slice &value, - bool throw_notfound) { - return move(operation, &key, &value, throw_notfound); + return move(seek_key, const_cast(&key), nullptr, false); } inline size_t cursor::count_multivalue() const { diff --git a/src/core.c b/src/core.c index 861c266f..bd955953 100644 --- a/src/core.c +++ b/src/core.c @@ -16961,7 +16961,7 @@ search_node:; node = nsr.node; ret.exact = nsr.exact; if (!ret.exact) { - if (op != MDBX_SET_RANGE) { + if (op < MDBX_SET_RANGE) { /* MDBX_SET specified and not an exact match. */ if (unlikely(mc->mc_ki[mc->mc_top] >= page_numkeys(mc->mc_pg[mc->mc_top]))) @@ -17010,7 +17010,7 @@ got_node: ret.err = cursor_xinit1(mc, node, mp); if (unlikely(ret.err != MDBX_SUCCESS)) return ret; - if (op == MDBX_SET || op == MDBX_SET_KEY || op == MDBX_SET_RANGE) { + if (op >= MDBX_SET) { MDBX_ANALYSIS_ASSUME(mc->mc_xcursor != nullptr); ret.err = cursor_first(&mc->mc_xcursor->mx_cursor, data, NULL); if (unlikely(ret.err != MDBX_SUCCESS)) @@ -17026,7 +17026,7 @@ got_node: } } } else if (likely(data)) { - if (op == MDBX_GET_BOTH || op == MDBX_GET_BOTH_RANGE) { + if (op <= MDBX_GET_BOTH_RANGE) { if (unlikely(data->iov_len < mc->mc_dbx->md_vlen_min || data->iov_len > mc->mc_dbx->md_vlen_max)) { cASSERT(mc, !"Invalid data-size"); @@ -17365,6 +17365,7 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, case MDBX_LAST_DUP: mfunc = cursor_last; goto move; + case MDBX_SET_UPPERBOUND: /* mostly same as MDBX_SET_LOWERBOUND */ case MDBX_SET_LOWERBOUND: { if (unlikely(key == NULL || data == NULL)) @@ -17408,6 +17409,153 @@ static __hot int cursor_get(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, } break; } + + /* Doubtless API to positioning of the cursor at a specified key. */ + case MDBX_TO_KEY_LESSER_THAN: + case MDBX_TO_KEY_LESSER_OR_EQUAL: + case MDBX_TO_KEY_EQUAL: + case MDBX_TO_KEY_GREATER_OR_EQUAL: + case MDBX_TO_KEY_GREATER_THAN: { + if (unlikely(key == NULL)) + return MDBX_EINVAL; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_KEY_LESSER_THAN) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_GREATER_THAN) + rc = cursor_next(mc, key, data, MDBX_NEXT_NODUP); + } else if (op < MDBX_TO_KEY_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_KEY_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + break; + } + + /* Doubtless API to positioning of the cursor at a specified key-value pair + * for multi-value hives. */ + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_KEY); + rc = csr.err; + if (rc == MDBX_SUCCESS) { + cASSERT(mc, csr.exact); + MDBX_cursor *const mx = + (mc->mc_xcursor && + (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + ? &mc->mc_xcursor->mx_cursor + : nullptr; + if (mx) { + csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN) + rc = cursor_prev(mx, data, NULL, MDBX_PREV); + else if (op == MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN) + rc = cursor_next(mx, data, NULL, MDBX_NEXT); + } else if (op < MDBX_TO_EXACT_KEY_VALUE_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mx, data, NULL, MDBX_PREV); + else if (op == MDBX_TO_EXACT_KEY_VALUE_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + } else { + int cmp = mc->mc_dbx->md_dcmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_EXACT_KEY_VALUE_LESSER_THAN: + rc = (cmp < 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL: + rc = (cmp <= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL: + rc = (cmp >= 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_EXACT_KEY_VALUE_GREATER_THAN: + rc = (cmp > 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + } + } + } + break; + } + case MDBX_TO_PAIR_LESSER_THAN: + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + case MDBX_TO_PAIR_EQUAL: + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + case MDBX_TO_PAIR_GREATER_THAN: { + if (unlikely(key == NULL || data == NULL)) + return MDBX_EINVAL; + MDBX_val save_data = *data; + struct cursor_set_result csr = cursor_set(mc, key, data, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + MDBX_cursor *const mx = + (mc->mc_xcursor && + (mc->mc_xcursor->mx_cursor.mc_flags & C_INITIALIZED)) + ? &mc->mc_xcursor->mx_cursor + : nullptr; + if (mx) { + csr = cursor_set(mx, &save_data, NULL, MDBX_SET_RANGE); + rc = csr.err; + if (csr.exact) { + cASSERT(mc, csr.err == MDBX_SUCCESS); + if (op == MDBX_TO_PAIR_LESSER_THAN) + rc = cursor_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_GREATER_THAN) + rc = cursor_next(mc, key, data, MDBX_NEXT); + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + else if (op > MDBX_TO_PAIR_EQUAL && rc == MDBX_NOTFOUND) + rc = cursor_next(mc, key, data, MDBX_NEXT); + } else { + int cmp = mc->mc_dbx->md_dcmp(data, &save_data); + switch (op) { + default: + __unreachable(); + case MDBX_TO_PAIR_LESSER_THAN: + rc = (cmp < 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_LESSER_OR_EQUAL: + rc = + (cmp <= 0) ? MDBX_SUCCESS : cursor_prev(mc, key, data, MDBX_PREV); + break; + case MDBX_TO_PAIR_EQUAL: + rc = (cmp == 0) ? MDBX_SUCCESS : MDBX_NOTFOUND; + break; + case MDBX_TO_PAIR_GREATER_OR_EQUAL: + rc = + (cmp >= 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); + break; + case MDBX_TO_PAIR_GREATER_THAN: + rc = (cmp > 0) ? MDBX_SUCCESS : cursor_next(mc, key, data, MDBX_NEXT); + break; + } + } + } else if (op < MDBX_TO_PAIR_EQUAL && + (rc == MDBX_NOTFOUND || rc == MDBX_SUCCESS)) + rc = cursor_prev(mc, key, data, MDBX_PREV_NODUP); + else if (op == MDBX_TO_PAIR_EQUAL && rc == MDBX_SUCCESS) + rc = MDBX_NOTFOUND; + break; + } default: DEBUG("unhandled/unimplemented cursor operation %u", op); return MDBX_EINVAL; From 4999264460a8a35cf277473e15d50dd4ad2928aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 21:22:12 +0300 Subject: [PATCH 068/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`error::boolean=5For=5Ft?= =?UTF-8?q?hrow(exception=5Fthunk)`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index bdeeb05b..e4bf7c15 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -485,6 +485,7 @@ public: static inline void throw_on_failure(int error_code); static inline bool boolean_or_throw(int error_code); static inline void success_or_throw(int error_code, const exception_thunk &); + static inline bool boolean_or_throw(int error_code, const exception_thunk &); static inline void panic_on_failure(int error_code, const char *context_where, const char *func_who) noexcept; static inline void success_or_panic(int error_code, const char *context_where, @@ -4883,7 +4884,8 @@ inline void error::success_or_throw() const { inline void error::success_or_throw(const exception_thunk &thunk) const { assert(thunk.is_clean() || code() != MDBX_SUCCESS); if (MDBX_UNLIKELY(!is_success())) { - MDBX_CXX20_UNLIKELY if (!thunk.is_clean()) thunk.rethrow_captured(); + MDBX_CXX20_UNLIKELY if (MDBX_UNLIKELY(!thunk.is_clean())) + thunk.rethrow_captured(); else throw_exception(); } } @@ -4944,6 +4946,13 @@ inline void error::success_or_panic(int error_code, const char *context_where, rc.success_or_panic(context_where, func_who); } +inline bool error::boolean_or_throw(int error_code, + const exception_thunk &thunk) { + if (MDBX_UNLIKELY(!thunk.is_clean())) + MDBX_CXX20_UNLIKELY thunk.rethrow_captured(); + return boolean_or_throw(error_code); +} + //------------------------------------------------------------------------------ MDBX_CXX11_CONSTEXPR slice::slice() noexcept : ::MDBX_val({nullptr, 0}) {} @@ -6392,6 +6401,8 @@ inline bool cursor::move(move_operation operation, MDBX_val *key, switch (err) { case MDBX_SUCCESS: MDBX_CXX20_LIKELY return true; + case MDBX_RESULT_TRUE: + return false; case MDBX_NOTFOUND: if (!throw_notfound) return false; From 0b68980489e8ded9cafa4be926d900e09b224709 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 21:22:36 +0300 Subject: [PATCH 069/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`cursor::scan(predicate.?= =?UTF-8?q?..)`=20=D0=B8=20=D1=82.=D0=BF.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index e4bf7c15..d1acd860 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4457,6 +4457,89 @@ protected: MDBX_val *value) const; public: + template + bool scan(CALLABLE_PREDICATE predicate, move_operation start = first, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), MDBX_cursor_op(turn), + &predicate), + thunk); + } + + template + bool fullscan(CALLABLE_PREDICATE predicate, bool backward = false) { + return scan(std::move(predicate), backward ? last : first, + backward ? previous : next); + } + + template + bool scan_from(CALLABLE_PREDICATE predicate, slice &from, + move_operation start = key_greater_or_equal, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), &from, nullptr, + MDBX_cursor_op(turn), &predicate), + thunk); + } + + template + bool scan_from(CALLABLE_PREDICATE predicate, pair &from, + move_operation start = pair_greater_or_equal, + move_operation turn = next) { + struct wrapper : public exception_thunk { + static int probe(void *context, MDBX_val *key, MDBX_val *value, + void *arg) noexcept { + auto thunk = static_cast(context); + assert(thunk->is_clean()); + auto &predicate = *static_cast(arg); + try { + return predicate(pair(*key, *value)) ? MDBX_RESULT_TRUE + : MDBX_RESULT_FALSE; + } catch (... /* capture any exception to rethrow it over C code */) { + thunk->capture(); + return MDBX_RESULT_TRUE; + } + } + } thunk; + return error::boolean_or_throw( + ::mdbx_cursor_scan_from(handle_, wrapper::probe, &thunk, + MDBX_cursor_op(start), &from.key, &from.value, + MDBX_cursor_op(turn), &predicate), + thunk); + } + move_result move(move_operation operation, bool throw_notfound) { return move_result(*this, operation, throw_notfound); } From bf21ee7bde54317c27985b8482512d878479364c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 20 Nov 2023 22:18:44 +0300 Subject: [PATCH 070/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=BE=D0=B2=20=D1=81=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20`mdbx::pa?= =?UTF-8?q?ir`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 70 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d1acd860..5dc57b52 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3044,6 +3044,26 @@ struct pair { assert(bool(key) == bool(value)); return key; } + + /// \brief Three-way fast non-lexicographically length-based comparison. + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t + compare_fast(const pair &a, const pair &b) noexcept; + + /// \brief Three-way lexicographically comparison. + MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t + compare_lexicographically(const pair &a, const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator==(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator<=(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator>=(const pair &a, + const pair &b) noexcept; + friend MDBX_CXX14_CONSTEXPR bool operator!=(const pair &a, + const pair &b) noexcept; }; /// \brief Combines pair of slices for key and value with boolean flag to @@ -5410,6 +5430,56 @@ slice::is_base64(bool ignore_spaces) const noexcept { //------------------------------------------------------------------------------ +MDBX_CXX14_CONSTEXPR intptr_t pair::compare_fast(const pair &a, + const pair &b) noexcept { + const auto diff = slice::compare_fast(a.key, b.key); + return diff ? diff : slice::compare_fast(a.value, b.value); +} + +MDBX_CXX14_CONSTEXPR intptr_t +pair::compare_lexicographically(const pair &a, const pair &b) noexcept { + const auto diff = slice::compare_lexicographically(a.key, b.key); + return diff ? diff : slice::compare_lexicographically(a.value, b.value); +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator==(const pair &a, const pair &b) noexcept { + return a.key.length() == b.key.length() && + a.value.length() == b.value.length() && + memcmp(a.key.data(), b.key.data(), a.key.length()) == 0 && + memcmp(a.value.data(), b.value.data(), a.value.length()) == 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator<(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) < 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator>(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) > 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator<=(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) <= 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator>=(const pair &a, const pair &b) noexcept { + return pair::compare_lexicographically(a, b) >= 0; +} + +MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX14_CONSTEXPR bool +operator!=(const pair &a, const pair &b) noexcept { + return a.key.length() != b.key.length() || + a.value.length() != b.value.length() || + memcmp(a.key.data(), b.key.data(), a.key.length()) != 0 || + memcmp(a.value.data(), b.value.data(), a.value.length()) != 0; +} + +//------------------------------------------------------------------------------ + template inline buffer::buffer( const txn &txn, const struct slice &src, const allocator_type &allocator) From b9e2f6dc0907cf0f9e11ef4796c1b169226ce769 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:14:55 +0300 Subject: [PATCH 071/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`MDBX=5FCXXnn=5FCONSTEXP?= =?UTF-8?q?R=5FENUM`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 5dc57b52..e8614e41 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -162,6 +162,20 @@ #define MDBX_CXX20_CONSTEXPR inline #endif /* MDBX_CXX20_CONSTEXPR */ +#if CONSTEXPR_ENUM_FLAGS_OPERATIONS || defined(DOXYGEN) +#define MDBX_CXX01_CONSTEXPR_ENUM MDBX_CXX01_CONSTEXPR +#define MDBX_CXX11_CONSTEXPR_ENUM MDBX_CXX11_CONSTEXPR +#define MDBX_CXX14_CONSTEXPR_ENUM MDBX_CXX14_CONSTEXPR +#define MDBX_CXX17_CONSTEXPR_ENUM MDBX_CXX17_CONSTEXPR +#define MDBX_CXX20_CONSTEXPR_ENUM MDBX_CXX20_CONSTEXPR +#else +#define MDBX_CXX01_CONSTEXPR_ENUM inline +#define MDBX_CXX11_CONSTEXPR_ENUM inline +#define MDBX_CXX14_CONSTEXPR_ENUM inline +#define MDBX_CXX17_CONSTEXPR_ENUM inline +#define MDBX_CXX20_CONSTEXPR_ENUM inline +#endif /* CONSTEXPR_ENUM_FLAGS_OPERATIONS */ + /** Workaround for old compilers without support assertion inside `constexpr` * functions. */ #if defined(CONSTEXPR_ASSERT) @@ -3198,18 +3212,8 @@ struct LIBMDBX_API_TYPE map_handle { map_handle::state state) noexcept; info(const info &) noexcept = default; info &operator=(const info &) noexcept = default; -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS - MDBX_CXX11_CONSTEXPR -#else - inline -#endif - ::mdbx::key_mode key_mode() const noexcept; -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS - MDBX_CXX11_CONSTEXPR -#else - inline -#endif - ::mdbx::value_mode value_mode() const noexcept; + MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode key_mode() const noexcept; + MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode value_mode() const noexcept; }; }; @@ -5491,17 +5495,13 @@ MDBX_CXX11_CONSTEXPR map_handle::info::info(map_handle::flags flags, map_handle::state state) noexcept : flags(flags), state(state) {} -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS -MDBX_CXX11_CONSTEXPR -#endif -::mdbx::key_mode map_handle::info::key_mode() const noexcept { +MDBX_CXX11_CONSTEXPR_ENUM mdbx::key_mode +map_handle::info::key_mode() const noexcept { return ::mdbx::key_mode(flags & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)); } -#if CONSTEXPR_ENUM_FLAGS_OPERATIONS -MDBX_CXX11_CONSTEXPR -#endif -::mdbx::value_mode map_handle::info::value_mode() const noexcept { +MDBX_CXX11_CONSTEXPR_ENUM mdbx::value_mode +map_handle::info::value_mode() const noexcept { return ::mdbx::value_mode(flags & (MDBX_DUPSORT | MDBX_REVERSEDUP | MDBX_DUPFIXED | MDBX_INTEGERDUP)); } From 55142d8d6f3a1b5649241872da6af4a9ec7f00cb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:17:07 +0300 Subject: [PATCH 072/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`txn::commit=5Fembark=5F?= =?UTF-8?q?read()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 4 ++++ src/mdbx.c++ | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index e8614e41..27df86de 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4318,6 +4318,10 @@ public: /// \brief Commit all the operations of a transaction into the database. void commit(); + /// \brief Commit all the operations of a transaction into the database + /// and then start read transaction. + void commit_embark_read(); + using commit_latency = MDBX_commit_latency; /// \brief Commit all the operations of a transaction into the database diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 4381b8e8..621c2695 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -1555,6 +1555,13 @@ void txn_managed::commit(commit_latency *latency) { MDBX_CXX20_UNLIKELY err.throw_exception(); } +void txn_managed::commit_embark_read() { + auto env = this->env(); + commit(); + error::success_or_throw( + ::mdbx_txn_begin(env, nullptr, MDBX_TXN_RDONLY, &handle_)); +} + //------------------------------------------------------------------------------ bool txn::drop_map(const char *name, bool throw_if_absent) { From 355090f02e9bc715b30c7142458daeb9ad7cc4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 15:18:16 +0300 Subject: [PATCH 073/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`is=5Fxyz()`=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20`key=5Fmode`=20=D0=B8=20`value=5Fmode`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 62 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 54 insertions(+), 8 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 27df86de..6da418e3 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3119,6 +3119,26 @@ enum class key_mode { ///< \note Not yet implemented and PRs are welcome. }; +MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & (MDBX_REVERSEKEY | MDBX_INTEGERKEY)) == 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_ordinal(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_samelength(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(key_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_REVERSEKEY) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(key_mode mode) noexcept { + return mode == key_mode::msgpack; +} + /// \brief Kind of the values and sorted multi-values with corresponding /// comparison. enum class value_mode { @@ -3171,6 +3191,15 @@ enum class value_mode { ///< end of the keys to the beginning. In terms of keys, ///< they are not unique, i.e. has duplicates which are ///< sorted by associated data values. +#else + multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP), + multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED), + multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | + uint32_t(MDBX_INTEGERDUP), + multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | + uint32_t(MDBX_REVERSEDUP) | + uint32_t(MDBX_DUPFIXED), +#endif msgpack = -1 ///< A more than one data value could be associated with each ///< key. Values are in [MessagePack](https://msgpack.org/) ///< format with appropriate comparison. Internally each key is @@ -3178,16 +3207,33 @@ enum class value_mode { ///< In terms of keys, they are not unique, i.e. has duplicates ///< which are sorted by associated data values. ///< \note Not yet implemented and PRs are welcome. -#else - multi_reverse = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_REVERSEDUP), - multi_samelength = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED), - multi_ordinal = uint32_t(MDBX_DUPSORT) | uint32_t(MDBX_DUPFIXED) | - uint32_t(MDBX_INTEGERDUP), - multi_reverse_samelength = uint32_t(MDBX_DUPSORT) | - uint32_t(MDBX_REVERSEDUP) | uint32_t(MDBX_DUPFIXED) -#endif }; +MDBX_CXX01_CONSTEXPR_ENUM bool is_usual(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & (MDBX_DUPSORT | MDBX_INTEGERDUP | + MDBX_DUPFIXED | MDBX_REVERSEDUP)) == 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_multi(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_DUPSORT) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_ordinal(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_INTEGERDUP) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_samelength(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_DUPFIXED) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_reverse(value_mode mode) noexcept { + return (MDBX_db_flags_t(mode) & MDBX_REVERSEDUP) != 0; +} + +MDBX_CXX01_CONSTEXPR_ENUM bool is_msgpack(value_mode mode) noexcept { + return mode == value_mode::msgpack; +} + /// \brief A handle for an individual database (key-value spaces) in the /// environment. /// \see txn::open_map() \see txn::create_map() From 0a58601cdff51d6d3815ccc78effc5158fc4dcf6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 17:35:42 +0300 Subject: [PATCH 074/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`pair::invalid()`=20?= =?UTF-8?q?=D0=B8=20=D0=B2=D0=B7=D0=B0=D0=B8=D0=BC=D0=BE=D0=B4=D0=B5=D0=B9?= =?UTF-8?q?=D1=81=D1=82=D0=B2=D0=B8=D1=8F=20=D1=81=20`std::pair<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 6da418e3..4be188bb 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3049,15 +3049,24 @@ struct value_result { /// \brief Combines pair of slices for key and value to represent result of /// certain operations. struct pair { + using stl_pair = std::pair; slice key, value; - pair(const slice &key, const slice &value) noexcept + MDBX_CXX11_CONSTEXPR pair(const slice &key, const slice &value) noexcept : key(key), value(value) {} + MDBX_CXX11_CONSTEXPR pair(const stl_pair &couple) noexcept + : key(couple.first), value(couple.second) {} + MDBX_CXX11_CONSTEXPR operator stl_pair() const noexcept { + return stl_pair(key, value); + } pair(const pair &) noexcept = default; pair &operator=(const pair &) noexcept = default; MDBX_CXX14_CONSTEXPR operator bool() const noexcept { assert(bool(key) == bool(value)); return key; } + MDBX_CXX14_CONSTEXPR static pair invalid() noexcept { + return pair(slice::invalid(), slice::invalid()); + } /// \brief Three-way fast non-lexicographically length-based comparison. MDBX_NOTHROW_PURE_FUNCTION static MDBX_CXX14_CONSTEXPR intptr_t @@ -3084,7 +3093,10 @@ struct pair { /// represent result of certain operations. struct pair_result : public pair { bool done; - pair_result(const slice &key, const slice &value, bool done) noexcept + MDBX_CXX11_CONSTEXPR pair_result() noexcept + : pair(pair::invalid()), done(false) {} + MDBX_CXX11_CONSTEXPR pair_result(const slice &key, const slice &value, + bool done) noexcept : pair(key, value), done(done) {} pair_result(const pair_result &) noexcept = default; pair_result &operator=(const pair_result &) noexcept = default; @@ -6585,7 +6597,7 @@ inline int compare_position(const cursor &left, const cursor &right, inline cursor::move_result::move_result(const cursor &cursor, bool throw_notfound) - : pair_result(slice(), slice(), false) { + : pair_result() { done = cursor.move(get_current, &this->key, &this->value, throw_notfound); } From 869cfb3fae41d5dde9f686c5453d0ac4eb3b43d5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:12:53 +0300 Subject: [PATCH 075/137] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5?= =?UTF-8?q?=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20doxygen-=D0=BE=D0=BF?= =?UTF-8?q?=D0=B8=D1=81=D0=B0=D0=BD=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 4be188bb..307e4175 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -376,7 +376,7 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ -/// \brief Default singe-byte string. +/// \brief Default single-byte string. template using string = ::std::basic_string, ALLOCATOR>; From 304cf25149f6f741bf575aea2b8b1a23b15a6292 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:14:26 +0300 Subject: [PATCH 076/137] =?UTF-8?q?mdbx++:=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20`slice?= =?UTF-8?q?::invalid()`=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D1=80=D0=B5=D0=B4?= =?UTF-8?q?=D0=BE=D1=82=D0=B2=D1=80=D0=B0=D1=89=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D0=BD=D0=B5=D0=B7=D0=B0=D0=BC=D0=B5=D1=82=D0=BD=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D0=BD=D0=B5=D0=B2=D0=B5=D1=80=D0=BD=D0=BE=D0=B3=D0=BE?= =?UTF-8?q?=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 307e4175..175ef2ca 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -4510,10 +4510,12 @@ public: struct move_result : public pair_result { inline move_result(const cursor &cursor, bool throw_notfound); move_result(cursor &cursor, move_operation operation, bool throw_notfound) - : move_result(cursor, operation, slice(), slice(), throw_notfound) {} + : move_result(cursor, operation, slice::invalid(), slice::invalid(), + throw_notfound) {} move_result(cursor &cursor, move_operation operation, const slice &key, bool throw_notfound) - : move_result(cursor, operation, key, slice(), throw_notfound) {} + : move_result(cursor, operation, key, slice::invalid(), + throw_notfound) {} inline move_result(cursor &cursor, move_operation operation, const slice &key, const slice &value, bool throw_notfound); @@ -4524,10 +4526,11 @@ public: struct estimate_result : public pair { ptrdiff_t approximate_quantity; estimate_result(const cursor &cursor, move_operation operation) - : estimate_result(cursor, operation, slice(), slice()) {} + : estimate_result(cursor, operation, slice::invalid(), + slice::invalid()) {} estimate_result(const cursor &cursor, move_operation operation, const slice &key) - : estimate_result(cursor, operation, key, slice()) {} + : estimate_result(cursor, operation, key, slice::invalid()) {} inline estimate_result(const cursor &cursor, move_operation operation, const slice &key, const slice &value); estimate_result(const estimate_result &) noexcept = default; @@ -4631,7 +4634,7 @@ public: } move_result move(move_operation operation, const slice &key, bool throw_notfound) { - return move_result(*this, operation, key, throw_notfound); + return move_result(*this, operation, key, slice::invalid(), throw_notfound); } move_result move(move_operation operation, const slice &key, const slice &value, bool throw_notfound) { From 8a6bddef4474edb4126d6281bca92bd884990a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:34:38 +0300 Subject: [PATCH 077/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=BE=D0=BF=D1=80?= =?UTF-8?q?=D0=B0=D0=B2=D0=BA=D0=B0=20=D1=84=D0=BE=D1=80=D0=BC=D0=B0=D1=82?= =?UTF-8?q?=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=82=D0=B8=D0=B2=20=D0=B1=D0=B0=D0=B3=D0=B0=20clang-for?= =?UTF-8?q?mat.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 175ef2ca..7b64eaf9 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -6593,9 +6593,9 @@ inline int compare_position(const cursor &left, const cursor &right, const auto diff = compare_position_nothrow(left, right, ignore_nested); assert(compare_position_nothrow(right, left, ignore_nested) == -diff); if (MDBX_LIKELY(int16_t(diff) == diff)) - MDBX_CXX20_LIKELY - return int(diff); - throw_incomparable_cursors(); + MDBX_CXX20_LIKELY return int(diff); + else + throw_incomparable_cursors(); } inline cursor::move_result::move_result(const cursor &cursor, From b412807fc10718ee6e32a5d824df7e366e807842 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 21 Nov 2023 19:35:46 +0300 Subject: [PATCH 078/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx::default=5Fbuffer`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/mdbx.h++ b/mdbx.h++ index 7b64eaf9..a056e279 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -376,8 +376,11 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ +/// \brief Default buffer. +using default_buffer = buffer; + /// \brief Default single-byte string. -template +template using string = ::std::basic_string, ALLOCATOR>; using filehandle = ::mdbx_filehandle_t; From 1f2ff0779698493d79c806219cf51a304d6e029a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 22 Nov 2023 19:31:05 +0300 Subject: [PATCH 079/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA=D0=B0/=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D1=80=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`to=5Fhex()`?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/mdbx.c++ | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/mdbx.c++ b/src/mdbx.c++ index 621c2695..b3187e82 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -607,7 +607,7 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { auto ptr = dest; auto src = source.byte_ptr(); - const char alphabase = (uppercase ? 'A' : 'a') - 10; + const char alpha_shift = (uppercase ? 'A' : 'a') - '9' - 1; auto line = ptr; for (const auto end = source.end_byte_ptr(); src != end; ++src) { if (wrap_width && size_t(ptr - line) >= wrap_width) { @@ -616,8 +616,8 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { } const int8_t hi = *src >> 4; const int8_t lo = *src & 15; - ptr[0] = char(alphabase + hi + (((hi - 10) >> 7) & -7)); - ptr[1] = char(alphabase + lo + (((lo - 10) >> 7) & -7)); + ptr[0] = char('0' + hi + (((9 - hi) >> 7) & alpha_shift)); + ptr[1] = char('0' + lo + (((9 - lo) >> 7) & alpha_shift)); ptr += 2; assert(ptr <= dest + dest_size); } @@ -629,7 +629,7 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { MDBX_CXX20_LIKELY { ::std::ostream::sentry sentry(out); auto src = source.byte_ptr(); - const char alphabase = (uppercase ? 'A' : 'a') - 10; + const char alpha_shift = (uppercase ? 'A' : 'a') - '9' - 1; unsigned width = 0; for (const auto end = source.end_byte_ptr(); src != end; ++src) { if (wrap_width && width >= wrap_width) { @@ -638,8 +638,8 @@ char *to_hex::write_bytes(char *__restrict const dest, size_t dest_size) const { } const int8_t hi = *src >> 4; const int8_t lo = *src & 15; - out.put(char(alphabase + hi + (((hi - 10) >> 7) & -7))); - out.put(char(alphabase + lo + (((lo - 10) >> 7) & -7))); + out.put(char('0' + hi + (((9 - hi) >> 7) & alpha_shift))); + out.put(char('0' + lo + (((9 - lo) >> 7) & alpha_shift))); width += 2; } } @@ -670,11 +670,11 @@ char *from_hex::write_bytes(char *__restrict const dest, int8_t hi = src[0]; hi = (hi | 0x20) - 'a'; - hi += 10 + ((hi >> 7) & 7); + hi += 10 + ((hi >> 7) & 39); int8_t lo = src[1]; lo = (lo | 0x20) - 'a'; - lo += 10 + ((lo >> 7) & 7); + lo += 10 + ((lo >> 7) & 39); *ptr++ = hi << 4 | lo; src += 2; From dd47f1bfd9077278bbc6442f0863280626a873e4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 08:47:55 +0300 Subject: [PATCH 080/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D1=85=D0=BE=D0=B4=20=D0=BD=D0=B0=20=D0=B8=D1=81=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20=D0=BF?= =?UTF-8?q?=D0=BE-=D1=83=D0=BC=D0=BE=D0=BB=D1=87=D0=B0=D0=BD=D0=B8=D1=8E?= =?UTF-8?q?=20default=5Fallocator=20=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=BE?= =?UTF-8?q?=20legacy=5Fallocator.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 76 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index a056e279..321550ed 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -354,18 +354,6 @@ static MDBX_CXX20_CONSTEXPR int memcmp(const void *a, const void *b, /// but it is recommended to use \ref polymorphic_allocator. using legacy_allocator = ::std::string::allocator_type; -struct slice; -struct default_capacity_policy; -template -class buffer; -class env; -class env_managed; -class txn; -class txn_managed; -class cursor; -class cursor_managed; - #if defined(DOXYGEN) || \ (defined(__cpp_lib_memory_resource) && \ __cpp_lib_memory_resource >= 201603L && _GLIBCXX_USE_CXX11_ABI) @@ -376,6 +364,18 @@ using default_allocator = polymorphic_allocator; using default_allocator = legacy_allocator; #endif /* __cpp_lib_memory_resource >= 201603L */ +struct slice; +struct default_capacity_policy; +template +class buffer; +class env; +class env_managed; +class txn; +class txn_managed; +class cursor; +class cursor_managed; + /// \brief Default buffer. using default_buffer = buffer; @@ -633,24 +633,24 @@ concept SliceTranscoder = #endif /* MDBX_HAVE_CXX20_CONCEPTS */ -template inline buffer make_buffer(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline buffer make_buffer(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline string make_string(PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); -template inline string make_string(const PRODUCER &producer, const ALLOCATOR &allocator = ALLOCATOR()); @@ -783,7 +783,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { #endif /* __cpp_lib_string_view >= 201606L */ template , - class ALLOCATOR = legacy_allocator> + class ALLOCATOR = default_allocator> MDBX_CXX20_CONSTEXPR ::std::basic_string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { static_assert(sizeof(CHAR) == 1, "Must be single byte characters"); @@ -798,27 +798,27 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { } /// \brief Returns a string with a hexadecimal dump of the slice content. - template + template inline string as_hex_string(bool uppercase = false, unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - template + template inline string as_base58_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - template + template inline string as_base64_string(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Returns a buffer with a hexadecimal dump of the slice content. - template inline buffer encode_hex(bool uppercase = false, unsigned wrap_width = 0, @@ -826,7 +826,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of the slice content. - template inline buffer encode_base58(unsigned wrap_width = 0, @@ -834,14 +834,14 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of the slice content. - template inline buffer encode_base64(unsigned wrap_width = 0, const ALLOCATOR &allocator = ALLOCATOR()) const; /// \brief Decodes hexadecimal dump from the slice content to returned buffer. - template inline buffer hex_decode(bool ignore_spaces = false, @@ -849,7 +849,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump /// from the slice content to returned buffer. - template inline buffer base58_decode(bool ignore_spaces = false, @@ -857,7 +857,7 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump /// from the slice content to returned buffer. - template inline buffer base64_decode(bool ignore_spaces = false, @@ -1294,13 +1294,13 @@ struct LIBMDBX_API to_hex { } /// \brief Returns a string with a hexadecimal dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a hexadecimal dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1345,14 +1345,14 @@ struct LIBMDBX_API to_base58 { /// \brief Returns a string with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1400,14 +1400,14 @@ struct LIBMDBX_API to_base64 { /// \brief Returns a string with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of a passed slice. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Returns a buffer with a /// [Base64](https://en.wikipedia.org/wiki/Base64) dump of a passed slice. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1464,13 +1464,13 @@ struct LIBMDBX_API from_hex { } /// \brief Decodes hexadecimal dump from a passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes hexadecimal dump from a passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1510,14 +1510,14 @@ struct LIBMDBX_API from_base58 { /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump from a /// passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes [Base58](https://en.wikipedia.org/wiki/Base58) dump from a /// passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { @@ -1559,14 +1559,14 @@ struct LIBMDBX_API from_base64 { /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump from a /// passed slice to returned string. - template + template string as_string(const ALLOCATOR &allocator = ALLOCATOR()) const { return make_string(*this, allocator); } /// \brief Decodes [Base64](https://en.wikipedia.org/wiki/Base64) dump from a /// passed slice to returned buffer. - template buffer as_buffer(const ALLOCATOR &allocator = ALLOCATOR()) const { From be8428257d11c98c1961428c4f4212942ab76bc7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 08:49:43 +0300 Subject: [PATCH 081/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=BA=D0=B0=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5?= =?UTF-8?q?=D1=80=D0=B6=D0=BA=D0=B8=20base58=20=D0=BF=D0=BE=20RFC-draft.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 5 +- src/mdbx.c++ | 416 +++++++++++++++++++++++++-------------------------- 2 files changed, 204 insertions(+), 217 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index 321550ed..f5e906c8 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1362,8 +1362,7 @@ struct LIBMDBX_API to_base58 { /// \brief Returns the buffer size in bytes needed for /// [Base58](https://en.wikipedia.org/wiki/Base58) dump of passed slice. MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - const size_t bytes = - source.length() / 8 * 11 + (source.length() % 8 * 43 + 31) / 32; + const size_t bytes = (source.length() * 11 + 7) / 8; return wrap_width ? bytes + bytes / wrap_width : bytes; } @@ -1528,7 +1527,7 @@ struct LIBMDBX_API from_base58 { /// [Base58](https://en.wikipedia.org/wiki/Base58) dump from a passed slice to /// decoded data. MDBX_CXX11_CONSTEXPR size_t envisage_result_length() const noexcept { - return source.length() / 11 * 8 + source.length() % 11 * 32 / 43; + return source.length() /* могут быть все нули кодируемые один-к-одному */; } /// \brief Fills the destination with data decoded from diff --git a/src/mdbx.c++ b/src/mdbx.c++ index b3187e82..823404b2 100644 --- a/src/mdbx.c++ +++ b/src/mdbx.c++ @@ -207,6 +207,44 @@ __cold bug::~bug() noexcept {} #endif /* Unused*/ +struct line_wrapper { + char *line, *ptr; + line_wrapper(char *buf) noexcept : line(buf), ptr(buf) {} + void put(char c, size_t wrap_width) noexcept { + *ptr++ = c; + if (wrap_width && ptr >= wrap_width + line) { + *ptr++ = '\n'; + line = ptr; + } + } + void put(const ::mdbx::slice &chunk, size_t wrap_width) noexcept { + if (!wrap_width || wrap_width > (ptr - line) + chunk.length()) { + memcpy(ptr, chunk.data(), chunk.length()); + ptr += chunk.length(); + } else { + for (size_t i = 0; i < chunk.length(); ++i) + put(chunk.char_ptr()[i], wrap_width); + } + } +}; + +template +struct temp_buffer { + TYPE inplace[(INPLACE_BYTES + sizeof(TYPE) - 1) / sizeof(TYPE)]; + const size_t size; + TYPE *const area; + temp_buffer(size_t bytes) + : size((bytes + sizeof(TYPE) - 1) / sizeof(TYPE)), + area((bytes > sizeof(inplace)) ? new TYPE[size] : inplace) { + memset(area, 0, sizeof(TYPE) * size); + } + ~temp_buffer() { + if (area != inplace) + delete[] area; + } + TYPE *end() const { return area + size; } +}; + } // namespace //------------------------------------------------------------------------------ @@ -717,156 +755,135 @@ enum : signed char { IL /* invalid */ = -1 }; -static const byte b58_alphabet[58] = { - '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', - 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', - 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', - 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; - -#ifndef bswap64 -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -static inline uint64_t bswap64(uint64_t v) noexcept { -#if __GNUC_PREREQ(4, 4) || __CLANG_PREREQ(4, 0) || \ - __has_builtin(__builtin_bswap64) - return __builtin_bswap64(v); -#elif defined(_MSC_VER) && !defined(__clang__) - return _byteswap_uint64(v); -#elif defined(__bswap_64) - return __bswap_64(v); -#elif defined(bswap_64) - return bswap_64(v); +#if MDBX_WORDBITS > 32 +using b58_uint = uint_fast64_t; #else - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); +using b58_uint = uint_fast32_t; #endif -} -#endif /* __BYTE_ORDER__ */ -#endif /* ifndef bswap64 */ -static inline char b58_8to11(uint64_t &v) noexcept { - const unsigned i = unsigned(v % 58); +struct b58_buffer : public temp_buffer { + b58_buffer(size_t bytes, size_t estimation_ratio_numerator, + size_t estimation_ratio_denominator, size_t extra = 0) + : temp_buffer((/* пересчитываем по указанной пропорции */ + bytes = (bytes * estimation_ratio_numerator + + estimation_ratio_denominator - 1) / + estimation_ratio_denominator, + /* учитываем резервный старший байт в каждом слове */ + ((bytes + sizeof(b58_uint) - 2) / (sizeof(b58_uint) - 1) * + sizeof(b58_uint) + + extra) * + sizeof(b58_uint))) {} +}; + +static byte b58_8to11(b58_uint &v) noexcept { + static const char b58_alphabet[58] = { + '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', + 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', + 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z'}; + + const auto i = size_t(v % 58); v /= 58; return b58_alphabet[i]; } +static slice b58_encode(b58_buffer &buf, const byte *begin, const byte *end) { + auto high = buf.end(); + const auto modulo = + b58_uint((sizeof(b58_uint) > 4) ? UINT64_C(0x1A636A90B07A00) /* 58^9 */ + : UINT32_C(0xACAD10) /* 58^4 */); + static_assert(sizeof(modulo) == 4 || sizeof(modulo) == 8, "WTF?"); + while (begin < end) { + b58_uint carry = *begin++; + auto ptr = buf.end(); + do { + assert(ptr > buf.area); + carry += *--ptr << CHAR_BIT; + *ptr = carry % modulo; + carry /= modulo; + } while (carry || ptr > high); + high = ptr; + } + + byte *output = static_cast(static_cast(buf.area)); + auto ptr = output; + for (auto porous = high; porous < buf.end();) { + auto chunk = *porous++; + static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?"); + assert(chunk < modulo); + if (sizeof(chunk) > 4) { + ptr[8] = b58_8to11(chunk); + ptr[7] = b58_8to11(chunk); + ptr[6] = b58_8to11(chunk); + ptr[5] = b58_8to11(chunk); + ptr[4] = b58_8to11(chunk); + ptr[3] = b58_8to11(chunk); + ptr[2] = b58_8to11(chunk); + ptr[1] = b58_8to11(chunk); + ptr[0] = b58_8to11(chunk); + ptr += 9; + } else { + ptr[3] = b58_8to11(chunk); + ptr[2] = b58_8to11(chunk); + ptr[1] = b58_8to11(chunk); + ptr[0] = b58_8to11(chunk); + ptr += 4; + } + assert(static_cast(ptr) < static_cast(porous)); + } + + while (output < ptr && *output == '1') + ++output; + return slice(output, ptr); +} + char *to_base58::write_bytes(char *__restrict const dest, size_t dest_size) const { if (MDBX_UNLIKELY(envisage_result_length() > dest_size)) MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); - auto ptr = dest; - auto src = source.byte_ptr(); - size_t left = source.length(); - auto line = ptr; - while (MDBX_LIKELY(left > 7)) { - uint64_t v; - std::memcpy(&v, src, 8); - src += 8; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - ptr[10] = b58_8to11(v); - ptr[9] = b58_8to11(v); - ptr[8] = b58_8to11(v); - ptr[7] = b58_8to11(v); - ptr[6] = b58_8to11(v); - ptr[5] = b58_8to11(v); - ptr[4] = b58_8to11(v); - ptr[3] = b58_8to11(v); - ptr[2] = b58_8to11(v); - ptr[1] = b58_8to11(v); - ptr[0] = b58_8to11(v); - assert(v == 0); - ptr += 11; - left -= 8; - if (wrap_width && size_t(ptr - line) >= wrap_width && left) { - *ptr = '\n'; - line = ++ptr; - } - assert(ptr <= dest + dest_size); + auto begin = source.byte_ptr(); + auto end = source.end_byte_ptr(); + line_wrapper wrapper(dest); + while (MDBX_LIKELY(begin < end) && *begin == 0) { + wrapper.put('1', wrap_width); + assert(wrapper.ptr <= dest + dest_size); + ++begin; } - if (left) { - uint64_t v = 0; - unsigned parrots = 31; - do { - v = (v << 8) + *src++; - parrots += 43; - } while (--left); - - auto tail = ptr += parrots >> 5; - assert(ptr <= dest + dest_size); - do { - *--tail = b58_8to11(v); - parrots -= 32; - } while (parrots > 31); - assert(v == 0); - } - - return ptr; + b58_buffer buf(end - begin, 11, 8); + wrapper.put(b58_encode(buf, begin, end), wrap_width); + return wrapper.ptr; } ::std::ostream &to_base58::output(::std::ostream &out) const { if (MDBX_LIKELY(!is_empty())) MDBX_CXX20_LIKELY { ::std::ostream::sentry sentry(out); - auto src = source.byte_ptr(); - size_t left = source.length(); + auto begin = source.byte_ptr(); + auto end = source.end_byte_ptr(); unsigned width = 0; - std::array buf; - - while (MDBX_LIKELY(left > 7)) { - uint64_t v; - std::memcpy(&v, src, 8); - src += 8; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - buf[10] = b58_8to11(v); - buf[9] = b58_8to11(v); - buf[8] = b58_8to11(v); - buf[7] = b58_8to11(v); - buf[6] = b58_8to11(v); - buf[5] = b58_8to11(v); - buf[4] = b58_8to11(v); - buf[3] = b58_8to11(v); - buf[2] = b58_8to11(v); - buf[1] = b58_8to11(v); - buf[0] = b58_8to11(v); - assert(v == 0); - out.write(&buf.front(), 11); - left -= 8; - if (wrap_width && (width += 11) >= wrap_width && left) { + while (MDBX_LIKELY(begin < end) && *begin == 0) { + out.put('1'); + if (wrap_width && ++width >= wrap_width) { out << ::std::endl; width = 0; } + ++begin; } - if (left) { - uint64_t v = 0; - unsigned parrots = 31; - do { - v = (v << 8) + *src++; - parrots += 43; - } while (--left); - - auto ptr = buf.end(); - do { - *--ptr = b58_8to11(v); - parrots -= 32; - } while (parrots > 31); - assert(v == 0); - out.write(&*ptr, buf.end() - ptr); + b58_buffer buf(end - begin, 11, 8); + const auto chunk = b58_encode(buf, begin, end); + if (!wrap_width || wrap_width > width + chunk.length()) + out.write(chunk.char_ptr(), chunk.length()); + else { + for (size_t i = 0; i < chunk.length(); ++i) { + out.put(chunk.char_ptr()[i]); + if (wrap_width && ++width >= wrap_width) { + out << ::std::endl; + width = 0; + } + } } } return out; @@ -892,10 +909,46 @@ const signed char b58_map[256] = { IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL, IL // f0 }; -static inline signed char b58_11to8(uint64_t &v, const byte c) noexcept { - const signed char m = b58_map[c]; - v = v * 58 + m; - return m; +static slice b58_decode(b58_buffer &buf, const byte *begin, const byte *end, + bool ignore_spaces) { + auto high = buf.end(); + while (begin < end) { + const auto c = b58_map[*begin++]; + if (MDBX_LIKELY(c >= 0)) { + b58_uint carry = c; + auto ptr = buf.end(); + do { + assert(ptr > buf.area); + carry += *--ptr * 58; + *ptr = carry & (~b58_uint(0) >> CHAR_BIT); + carry >>= CHAR_BIT * (sizeof(carry) - 1); + } while (carry || ptr > high); + high = ptr; + } else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(begin[-1]))) + MDBX_CXX20_UNLIKELY + throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + } + + byte *output = static_cast(static_cast(buf.area)); + auto ptr = output; + for (auto porous = high; porous < buf.end(); ++porous) { + auto chunk = *porous; + static_assert(sizeof(chunk) == 4 || sizeof(chunk) == 8, "WTF?"); + assert(chunk <= (~b58_uint(0) >> CHAR_BIT)); + if (sizeof(chunk) > 4) { + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 6); + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 5); + *ptr++ = byte(uint_fast64_t(chunk) >> CHAR_BIT * 4); + *ptr++ = byte(chunk >> CHAR_BIT * 3); + } + *ptr++ = byte(chunk >> CHAR_BIT * 2); + *ptr++ = byte(chunk >> CHAR_BIT * 1); + *ptr++ = byte(chunk >> CHAR_BIT * 0); + } + + while (output < ptr && *output == 0) + ++output; + return slice(output, ptr); } char *from_base58::write_bytes(char *__restrict const dest, @@ -904,98 +957,33 @@ char *from_base58::write_bytes(char *__restrict const dest, MDBX_CXX20_UNLIKELY throw_too_small_target_buffer(); auto ptr = dest; - auto src = source.byte_ptr(); - for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(isspace(*src)) && ignore_spaces) { - ++src; - --left; - continue; - } - - if (MDBX_LIKELY(left > 10)) { - uint64_t v = 0; - if (MDBX_UNLIKELY((b58_11to8(v, src[0]) | b58_11to8(v, src[1]) | - b58_11to8(v, src[2]) | b58_11to8(v, src[3]) | - b58_11to8(v, src[4]) | b58_11to8(v, src[5]) | - b58_11to8(v, src[6]) | b58_11to8(v, src[7]) | - b58_11to8(v, src[8]) | b58_11to8(v, src[9]) | - b58_11to8(v, src[10])) < 0)) - MDBX_CXX20_UNLIKELY goto bailout; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - v = bswap64(v); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#else -#error "FIXME: Unsupported byte order" -#endif /* __BYTE_ORDER__ */ - std::memcpy(ptr, &v, 8); - ptr += 8; - src += 11; - left -= 11; - assert(ptr <= dest + dest_size); - continue; - } - - constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8; - if (MDBX_UNLIKELY(invalid_length_mask & (1 << left))) - MDBX_CXX20_UNLIKELY goto bailout; - - uint64_t v = 1; - unsigned parrots = 0; - do { - if (MDBX_UNLIKELY(b58_11to8(v, *src++) < 0)) - MDBX_CXX20_UNLIKELY goto bailout; - parrots += 32; - } while (--left); - - auto tail = ptr += parrots / 43; - assert(ptr <= dest + dest_size); - do { - *--tail = byte(v); - v >>= 8; - } while (v > 255); - break; + auto begin = source.byte_ptr(); + auto const end = source.end_byte_ptr(); + while (begin < end && *begin <= '1') { + if (MDBX_LIKELY(*begin == '1')) + MDBX_CXX20_LIKELY *ptr++ = 0; + else if (MDBX_UNLIKELY(!ignore_spaces || !isspace(*begin))) + MDBX_CXX20_UNLIKELY + throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + ++begin; } - return ptr; -bailout: - throw std::domain_error("mdbx::from_base58:: invalid base58 string"); + b58_buffer buf(end - begin, 47, 64); + auto slice = b58_decode(buf, begin, end, ignore_spaces); + memcpy(ptr, slice.data(), slice.length()); + return ptr + slice.length(); } bool from_base58::is_erroneous() const noexcept { - bool got = false; - auto src = source.byte_ptr(); - for (auto left = source.length(); left > 0;) { - if (MDBX_UNLIKELY(*src <= ' ') && - MDBX_LIKELY(ignore_spaces && isspace(*src))) { - ++src; - --left; - continue; - } - - if (MDBX_LIKELY(left > 10)) { - if (MDBX_UNLIKELY((b58_map[src[0]] | b58_map[src[1]] | b58_map[src[2]] | - b58_map[src[3]] | b58_map[src[4]] | b58_map[src[5]] | - b58_map[src[6]] | b58_map[src[7]] | b58_map[src[8]] | - b58_map[src[9]] | b58_map[src[10]]) < 0)) - MDBX_CXX20_UNLIKELY return true; - src += 11; - left -= 11; - got = true; - continue; - } - - constexpr unsigned invalid_length_mask = 1 << 1 | 1 << 4 | 1 << 8; - if (invalid_length_mask & (1 << left)) - return false; - - do - if (MDBX_UNLIKELY(b58_map[*src++] < 0)) - MDBX_CXX20_UNLIKELY return true; - while (--left); - got = true; - break; + auto begin = source.byte_ptr(); + auto const end = source.end_byte_ptr(); + while (begin < end) { + if (MDBX_UNLIKELY(b58_map[*begin] < 0 && + !(ignore_spaces && isspace(*begin)))) + return true; + ++begin; } - return !got; + return false; } //------------------------------------------------------------------------------ From 0e250a4457144090ea9248b6c2786424026727b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 15:49:05 +0300 Subject: [PATCH 082/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B8=20`std::span<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index f5e906c8..d42b5942 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -85,6 +85,10 @@ #include #endif +#if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L +#include +#endif + #if __cplusplus >= 201103L #include #include @@ -699,6 +703,47 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { MDBX_CXX14_CONSTEXPR slice(MDBX_val &&src); MDBX_CXX14_CONSTEXPR slice(slice &&src) noexcept; +#if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) + template + MDBX_CXX14_CONSTEXPR slice(const ::std::span &span) + : slice(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + } + + template + MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + if (MDBX_LIKELY(size() % sizeof(POD) == 0)) + MDBX_CXX20_LIKELY + return ::std::span(static_cast(data()), + size() / sizeof(POD)); + throw_bad_value_size(); + } + + template MDBX_CXX14_CONSTEXPR ::std::span as_span() { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + if (MDBX_LIKELY(size() % sizeof(POD) == 0)) + MDBX_CXX20_LIKELY + return ::std::span(static_cast(data()), size() / sizeof(POD)); + throw_bad_value_size(); + } + + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } +#endif /* __cpp_lib_span >= 202002L */ + #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) /// \brief Create a slice that refers to the same contents as "string_view" @@ -2368,6 +2413,33 @@ public: return slice_; } +#if defined(DOXYGEN) || (defined(__cpp_lib_span) && __cpp_lib_span >= 202002L) + template + MDBX_CXX14_CONSTEXPR buffer(const ::std::span &span) + : buffer(span.begin(), span.end()) { + static_assert(::std::is_standard_layout::value && + !::std::is_pointer::value, + "Must be a standard layout type!"); + } + + template + MDBX_CXX14_CONSTEXPR ::std::span as_span() const { + return slice_.template as_span(); + } + template MDBX_CXX14_CONSTEXPR ::std::span as_span() { + return slice_.template as_span(); + } + + MDBX_CXX14_CONSTEXPR ::std::span bytes() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span bytes() { return as_span(); } + MDBX_CXX14_CONSTEXPR ::std::span chars() const { + return as_span(); + } + MDBX_CXX14_CONSTEXPR ::std::span chars() { return as_span(); } +#endif /* __cpp_lib_span >= 202002L */ + template static buffer wrap(const POD &pod, bool make_reference = false, const allocator_type &allocator = allocator_type()) { From ef69336189b02ad3d48c4d952a25d19b805d54d8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 24 Nov 2023 14:11:41 +0300 Subject: [PATCH 083/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=D0=B0=20=D0=B4=D0=BB=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D1=81?= =?UTF-8?q?=D0=BA=D0=BE=D0=B4=D0=B5=D1=80=D0=BE=D0=B2=20hex/base64/base58.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 8 ++ test/extra/hex_base64_base58.c++ | 128 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+) create mode 100644 test/extra/hex_base64_base58.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 23789be0..08866fe4 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -88,6 +88,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_dupfixed_multiple PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_hex_base64_base58 extra/hex_base64_base58.c++) + target_include_directories(test_extra_hex_base64_base58 PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_hex_base64_base58 ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_hex_base64_base58 PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() @@ -172,6 +179,7 @@ else() if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) + add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) endif() endif() diff --git a/test/extra/hex_base64_base58.c++ b/test/extra/hex_base64_base58.c++ new file mode 100644 index 00000000..879e8f7b --- /dev/null +++ b/test/extra/hex_base64_base58.c++ @@ -0,0 +1,128 @@ +#include "mdbx.h++" +#include +#include +#include + +#include +#include + +using buffer = mdbx::default_buffer; + +std::default_random_engine prng(42); + +static buffer random(size_t length) { + buffer result(length); +#if defined(__cpp_lib_span) && __cpp_lib_span >= 202002L + for (auto &i : result.bytes()) + i = prng(); +#else + for (auto p = result.byte_ptr(); p < result.end_byte_ptr(); ++p) + *p = mdbx::byte(prng()); +#endif + return result; +} + +static bool basic() { + bool ok = true; + const char *const hex_dump = "1D58fa\n2e46E3\nBd9c7A\nC0bF"; + const uint8_t native[] = {0x1D, 0x58, 0xfa, 0x2e, 0x46, 0xE3, + 0xBd, 0x9c, 0x7A, 0xC0, 0xbF}; + + if (mdbx::slice(hex_dump).hex_decode(true) != mdbx::slice::wrap(native)) + std::cerr << "hex_decode() failed\n"; + else if (mdbx::slice::wrap(native).encode_hex(true, 4).hex_decode(true) != + mdbx::slice::wrap(native)) + std::cerr << "hex_encode(UPPERCASE) failed\n"; + else if (mdbx::slice::wrap(native).encode_hex(false).hex_decode(true) != + mdbx::slice::wrap(native)) + std::cerr << "hex_encode(lowercase) failed\n"; + + if (mdbx::slice("").as_base64_string() != "" || + mdbx::slice(" ").encode_base64().as_string() != "IA==" || + mdbx::slice("~0").encode_base64().as_string() != "fjA=" || + mdbx::slice("A_z").encode_base64().as_string() != "QV96" || + mdbx::slice("Ka9q").encode_base64().as_string() != "S2E5cQ==" || + mdbx::slice("123456789").encode_base64().as_string() != "MTIzNDU2Nzg5") { + std::cerr << "encode_base64() failed\n"; + ok = false; + } + + const uint8_t base58_rfc[] = {0x00, 0x00, 0x28, 0x7f, 0xb4, 0xcd}; + if (mdbx::slice("").as_base58_string() != "" || + mdbx::slice(" ").encode_base58().as_string() != "Z" || + mdbx::slice("Hello World!").as_base58_string() != "2NEpo7TZRRrLZSi2U" || + mdbx::slice("The quick brown fox jumps over the lazy dog.") + .encode_base58() + .as_string() != + "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z" || + mdbx::slice::wrap(base58_rfc).as_base58_string() != "11233QC4" || + mdbx::slice("~0").encode_base58().as_string() != "Aby" || + mdbx::slice("A_z").encode_base58().as_string() != "NxZw" || + mdbx::slice("Ka9q").encode_base58().as_string() != "2vkjDi" || + mdbx::slice("123456789").encode_base58().as_string() != "dKYWwnRHc7Ck") { + std::cerr << "encode_base58() failed\n"; + ok = false; + } + + if (mdbx::slice("").base58_decode() != mdbx::slice() || + mdbx::slice("Z").base58_decode() != mdbx::slice(" ") || + mdbx::slice("2NEpo7TZRRrLZSi2U").base58_decode() != "Hello World!" || + mdbx::slice( + "USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z") + .base58_decode() != + mdbx::slice("The quick brown fox jumps over the lazy dog.") || + mdbx::slice("11233QC4").base58_decode() != + mdbx::slice::wrap(base58_rfc) || + mdbx::slice("Aby").base58_decode() != mdbx::slice("~0") || + mdbx::slice("NxZw").base58_decode() != mdbx::slice("A_z") || + mdbx::slice("2vkjDi").base58_decode() != mdbx::slice("Ka9q") || + mdbx::slice("dKYWwnRHc7Ck").base58_decode() != mdbx::slice("123456789")) { + std::cerr << "decode_base58() failed\n"; + ok = false; + } + + return ok; +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + auto ok = basic(); + for (size_t n = 0; n < 1000; ++n) { + for (size_t length = 0; ok && length < 111; ++length) { + const auto pattern = random(length); + if (pattern != pattern.encode_hex(bool(prng() & 1), prng() % 111) + .hex_decode(true) + .encode_hex() + .hex_decode(false)) { + std::cerr << "hex encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + if (pattern != pattern.encode_base64(unsigned(prng() % 111)) + .base64_decode(true) + .encode_base64() + .base64_decode(false)) { + std::cerr << "base64 encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + if (pattern != pattern.encode_base58(unsigned(prng() % 111)) + .base58_decode(true) + .encode_base58() + .base58_decode(false)) { + std::cerr << "base58 encode/decode failed: n " << n << ", length " + << length << std::endl; + ok = false; + } + } + } + + if (!ok) { + std::cerr << "Fail\n"; + return EXIT_FAILURE; + } + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From e68771df18fe874be1b97baf06f3d6841b122141 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 12:34:06 +0300 Subject: [PATCH 084/137] =?UTF-8?q?mdbx++:=20=D0=94=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`buffer=5Fpair<>`=20?= =?UTF-8?q?=D0=B8=20`buffer=5Fpair=5Fspec<>`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index d42b5942..74373039 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -3180,6 +3180,92 @@ struct pair_result : public pair { } }; +template +struct buffer_pair_spec { + using buffer_type = buffer; + using allocator_type = typename buffer_type::allocator_type; + using allocator_traits = typename buffer_type::allocator_traits; + using reservation_policy = CAPACITY_POLICY; + using stl_pair = ::std::pair; + buffer_type key, value; + + MDBX_CXX20_CONSTEXPR buffer_pair_spec() noexcept = default; + MDBX_CXX20_CONSTEXPR + buffer_pair_spec(const allocator_type &allocator) noexcept + : key(allocator), value(allocator) {} + + buffer_pair_spec(const buffer_type &key, const buffer_type &value, + const allocator_type &allocator = allocator_type()) + : key(key, allocator), value(value, allocator) {} + buffer_pair_spec(const buffer_type &key, const buffer_type &value, + bool make_reference, + const allocator_type &allocator = allocator_type()) + : key(key, make_reference, allocator), + value(value, make_reference, allocator) {} + + buffer_pair_spec(const stl_pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.first, pair.second, allocator) {} + buffer_pair_spec(const stl_pair &pair, bool make_reference, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.first, pair.second, make_reference, allocator) {} + + buffer_pair_spec(const slice &key, const slice &value, + const allocator_type &allocator = allocator_type()) + : key(key, allocator), value(value, allocator) {} + buffer_pair_spec(const slice &key, const slice &value, bool make_reference, + const allocator_type &allocator = allocator_type()) + : key(key, make_reference, allocator), + value(value, make_reference, allocator) {} + + buffer_pair_spec(const pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.key, pair.value, allocator) {} + buffer_pair_spec(const pair &pair, bool make_reference, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(pair.key, pair.value, make_reference, allocator) {} + + buffer_pair_spec(const txn &txn, const slice &key, const slice &value, + const allocator_type &allocator = allocator_type()) + : key(txn, key, allocator), value(txn, value, allocator) {} + buffer_pair_spec(const txn &txn, const pair &pair, + const allocator_type &allocator = allocator_type()) + : buffer_pair_spec(txn, pair.key, pair.value, allocator) {} + + buffer_pair_spec(buffer_type &&key, buffer_type &&value) noexcept( + buffer_type::move_assign_alloc::is_nothrow()) + : key(::std::move(key)), value(::std::move(value)) {} + buffer_pair_spec(buffer_pair_spec &&pair) noexcept( + buffer_type::move_assign_alloc::is_nothrow()) + : buffer_pair_spec(::std::move(pair.key), ::std::move(pair.value)) {} + + /// \brief Checks whether data chunk stored inside the buffers both, otherwise + /// at least one of buffers just refers to data located outside. + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool + is_freestanding() const noexcept { + return key.is_freestanding() && value.is_freestanding(); + } + /// \brief Checks whether one of the buffers just refers to data located + /// outside the buffer, rather than stores it. + MDBX_NOTHROW_PURE_FUNCTION MDBX_CXX20_CONSTEXPR bool + is_reference() const noexcept { + return key.is_reference() || value.is_reference(); + } + /// \brief Makes buffers owning the data. + /// \details If buffer refers to an external data, then makes it the owner + /// of clone by allocating storage and copying the data. + void make_freestanding() { + key.make_freestanding(); + value.make_freestanding(); + } + + operator pair() const noexcept { return pair(key, value); } +}; + +template +using buffer_pair = buffer_pair_spec; + /// end of cxx_data @} //------------------------------------------------------------------------------ From 100f07e89ac4e34ba990ab630916af1c5a8215b2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 21:51:41 +0300 Subject: [PATCH 085/137] =?UTF-8?q?mdbx++:=20=D0=B4=D0=BE=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D0=BD=D0=B8=D1=82=D0=B5=D0=BB=D1=8C=D0=BD=D1=8B=D0=B5=20?= =?UTF-8?q?=D0=BE=D0=BF=D0=B5=D1=80=D0=B0=D1=82=D0=BE=D1=80=D1=8B=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B5=D0=BE=D0=B1=D1=80=D0=B0=D0=B7=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BA=20=D1=82=D0=B8=D0=BF=D0=B0=D0=BC=20?= =?UTF-8?q?C=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 3 +++ 1 file changed, 3 insertions(+) diff --git a/mdbx.h++ b/mdbx.h++ index 74373039..c945545b 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -798,6 +798,8 @@ struct LIBMDBX_API_TYPE slice : public ::MDBX_val { slice &operator=(const slice &) noexcept = default; inline slice &operator=(slice &&src) noexcept; inline slice &operator=(::MDBX_val &&src); + operator MDBX_val *() noexcept { return this; } + operator const MDBX_val *() const noexcept { return this; } #if defined(DOXYGEN) || \ (defined(__cpp_lib_string_view) && __cpp_lib_string_view >= 201606L) @@ -3420,6 +3422,7 @@ struct LIBMDBX_API_TYPE map_handle { map_handle(const map_handle &) noexcept = default; map_handle &operator=(const map_handle &) noexcept = default; operator bool() const noexcept { return dbi != 0; } + operator MDBX_dbi() const { return dbi; } using flags = ::MDBX_db_flags_t; using state = ::MDBX_dbi_state_t; From f97c127455517f130b3206f2090acdceb3d644ed Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 15 Nov 2023 23:38:31 +0300 Subject: [PATCH 086/137] =?UTF-8?q?mdbx-test:=20=D1=82=D0=B5=D1=81=D1=82?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20doubtless-API=20=D0=BF=D0=BE=D0=B7?= =?UTF-8?q?=D0=B8=D1=86=D0=B8=D0=BE=D0=BD=D0=B8=D1=80=D0=BE=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F=20=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80=D0=BE?= =?UTF-8?q?=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 8 + test/extra/doubtless_positioning.c++ | 263 +++++++++++++++++++++++++++ 2 files changed, 271 insertions(+) create mode 100644 test/extra/doubtless_positioning.c++ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 08866fe4..59dc315c 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -95,6 +95,13 @@ if(UNIX AND NOT SUBPROJECT) set_target_properties(test_extra_hex_base64_base58 PROPERTIES CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) endif() + add_executable(test_extra_doubtless_positioning extra/doubtless_positioning.c++) + target_include_directories(test_extra_doubtless_positioning PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_doubtless_positioning ${TOOL_MDBX_LIB}) + if(MDBX_CXX_STANDARD) + set_target_properties(test_extra_doubtless_positioning PROPERTIES + CXX_STANDARD ${MDBX_CXX_STANDARD} CXX_STANDARD_REQUIRED ON) + endif() endif() endif() @@ -180,6 +187,7 @@ else() add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) + add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) endif() endif() diff --git a/test/extra/doubtless_positioning.c++ b/test/extra/doubtless_positioning.c++ new file mode 100644 index 00000000..aff30792 --- /dev/null +++ b/test/extra/doubtless_positioning.c++ @@ -0,0 +1,263 @@ +#include "mdbx.h++" +#include +#include +#include +#include +#include + +static ::std::ostream &operator<<(::std::ostream &out, + const mdbx::cursor::move_operation op) { + static const char *const str[] = {"FIRST", + "FIRST_DUP", + "GET_BOTH", + "GET_BOTH_RANGE", + "GET_CURRENT", + "GET_MULTIPLE", + "LAST", + "LAST_DUP", + "NEXT", + "NEXT_DUP", + "NEXT_MULTIPLE", + "NEXT_NODUP", + "PREV", + "PREV_DUP", + "PREV_NODUP", + "SET", + "SET_KEY", + "SET_RANGE", + "PREV_MULTIPLE", + "SET_LOWERBOUND", + "SET_UPPERBOUND", + "TO_KEY_LESSER_THAN", + "TO_KEY_LESSER_OR_EQUAL", + "TO_KEY_EQUAL", + "TO_KEY_GREATER_OR_EQUAL", + "TO_KEY_GREATER_THAN", + "TO_EXACT_KEY_VALUE_LESSER_THAN", + "TO_EXACT_KEY_VALUE_LESSER_OR_EQUAL", + "TO_EXACT_KEY_VALUE_EQUAL", + "TO_EXACT_KEY_VALUE_GREATER_OR_EQUAL", + "TO_EXACT_KEY_VALUE_GREATER_THAN", + "TO_PAIR_LESSER_THAN", + "TO_PAIR_LESSER_OR_EQUAL", + "TO_PAIR_EQUAL", + "TO_PAIR_GREATER_OR_EQUAL", + "TO_PAIR_GREATER_THAN"}; + return out << str[op]; +} + +using buffer = mdbx::default_buffer; +using buffer_pair = mdbx::buffer_pair; + +std::default_random_engine prng(42); + +static buffer random(const unsigned &value) { + switch (prng() % 3) { + default: + return buffer::hex(value); + case 1: + return buffer::base64(value); + case 2: + return buffer::base58(value); + } +} + +static buffer random_key() { return random(prng() % 10007); } + +static buffer random_value() { return random(prng() % 47); } + +using predicate = std::function; + +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, + mdbx::cursor::move_operation op, predicate cmp, + const buffer_pair &pair) { + auto seeker = txn.open_cursor(dbi); + auto scanner = seeker.clone(); + + const bool scan_backward = + op == mdbx::cursor::key_lesser_than || + op == mdbx::cursor::key_lesser_or_equal || + op == mdbx::cursor::multi_exactkey_value_lesser_than || + op == mdbx::cursor::multi_exactkey_value_lesser_or_equal || + op == mdbx::cursor::pair_lesser_than || + op == mdbx::cursor::pair_lesser_or_equal; + + const bool is_multi = mdbx::is_multi(txn.get_handle_info(dbi).value_mode()); + + auto seek_result = seeker.move(op, pair.key, pair.value, false); + auto scan_result = scanner.fullscan( + [cmp, &pair](const mdbx::pair &scan) -> bool { return cmp(scan, pair); }, + scan_backward); + if (seek_result.done == scan_result && + (!scan_result || + seeker.is_same_position( + scanner, + op < mdbx::cursor::multi_exactkey_value_lesser_than && is_multi))) + return true; + + std::cerr << std::endl; + std::cerr << "bug:"; + std::cerr << std::endl; + std::cerr << std::string(is_multi ? "multi" : "single") << "-map, op " << op + << ", key " << pair.key << ", value " << pair.value; + std::cerr << std::endl; + std::cerr << "\tscanner: "; + if (scan_result) + std::cerr << " done, key " << scanner.current(false).key << ", value " + << scanner.current(false).value; + else + std::cerr << "not-found"; + std::cerr << std::endl; + std::cerr << "\t seeker: " << (seek_result.done ? " done" : "not-found") + << ", key " << seek_result.key << ", value " << seek_result.value; + std::cerr << std::endl; + return false; +} + +static bool probe(mdbx::txn txn, mdbx::map_handle dbi, + mdbx::cursor::move_operation op, predicate cmp) { + const auto pair = buffer_pair(random_key(), random_value()); + const bool ok = probe(txn, dbi, op, cmp, pair); +#if MDBX_DEBUG + if (!ok) + // повтор для отладки и поиска причин + probe(txn, dbi, op, cmp, pair); +#endif /* MDBX_DEBUG */ + return ok; +} + +static bool test(mdbx::txn txn, mdbx::map_handle dbi) { + bool ok = true; + + ok = probe(txn, dbi, mdbx::cursor::key_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::key_greater_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) > 0; + }) && + ok; + + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) == 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::multi_exactkey_value_greater, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return mdbx_cmp(txn, dbi, l.key, r.key) == 0 && + mdbx_dcmp(txn, dbi, l.value, r.value) > 0; + }) && + ok; + + ok = probe(txn, dbi, mdbx::cursor::pair_lesser_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp < 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_lesser_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp <= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_equal, + [](const mdbx::pair &l, const mdbx::pair &r) -> bool { + return l == r; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_greater_or_equal, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp >= 0; + }) && + ok; + ok = probe(txn, dbi, mdbx::cursor::pair_greater_than, + [txn, dbi](const mdbx::pair &l, const mdbx::pair &r) -> bool { + auto cmp = mdbx_cmp(txn, dbi, l.key, r.key); + if (cmp == 0) + cmp = mdbx_dcmp(txn, dbi, l.value, r.value); + return cmp > 0; + }) && + ok; + return ok; +} + +int main(int argc, const char *argv[]) { + (void)argc; + (void)argv; + + unlink("." MDBX_DATANAME); + unlink("." MDBX_LOCKNAME); + mdbx::env_managed env(".", mdbx::env_managed::create_parameters(), + mdbx::env::operate_parameters(3)); + + auto txn = env.start_write(); + auto single = + txn.create_map("single", mdbx::key_mode::usual, mdbx::value_mode::single); + auto multi = + txn.create_map("multi", mdbx::key_mode::usual, mdbx::value_mode::multi); + for (size_t i = 0; i < 1000; ++i) { + auto key = random_key(); + txn.upsert(single, key, random_value()); + for (auto n = prng() % 5 + 1; n > 0; --n) + txn.upsert(multi, key, random_value()); + } + txn.commit_embark_read(); + + bool ok = true; + for (size_t i = 0; ok && i < 3333; ++i) { + ok = test(txn, single) && ok; + ok = test(txn, multi) && ok; + } + + if (!ok) { + std::cerr << "Fail\n"; + return EXIT_FAILURE; + } + std::cout << "OK\n"; + return EXIT_SUCCESS; +} From 1db44c7914406c9d525a4f3167584a32b2a7b1f9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 22:27:26 +0300 Subject: [PATCH 087/137] =?UTF-8?q?mdbx++:=20=D0=BF=D0=B5=D1=80=D0=B5?= =?UTF-8?q?=D0=BD=D0=BE=D1=81=20=D0=B2=20public=20=D1=82=D0=B8=D0=BF=D0=BE?= =?UTF-8?q?=D0=B2=20`buffer::move=5Fassign=5Falloc`=20=D0=B8=20`buffer::co?= =?UTF-8?q?py=5Fassign=5Falloc`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D1=82=D0=B0?= =?UTF-8?q?=D1=80=D1=8B=D1=85=20=D1=81=D1=82=D0=B0=D0=BD=D0=B4=D0=B0=D1=80?= =?UTF-8?q?=D1=82=D0=BE=D0=B2=20C++.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h++ | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/mdbx.h++ b/mdbx.h++ index c945545b..9ab336d1 100644 --- a/mdbx.h++ +++ b/mdbx.h++ @@ -1662,10 +1662,6 @@ public: private: friend class txn; struct silo; - using move_assign_alloc = - allocation_aware_details::move_assign_alloc; - using copy_assign_alloc = - allocation_aware_details::copy_assign_alloc; using swap_alloc = allocation_aware_details::swap_alloc; struct silo /* Empty Base Class Optimization */ : public allocator_type { MDBX_CXX20_CONSTEXPR const allocator_type &get_allocator() const noexcept { @@ -2157,6 +2153,11 @@ public: /// \todo buffer& operator>>(buffer&, ...) for reading (delegated to slice) /// \todo template key(X) for encoding keys while writing + using move_assign_alloc = + allocation_aware_details::move_assign_alloc; + using copy_assign_alloc = + allocation_aware_details::copy_assign_alloc; + /// \brief Returns the associated allocator. MDBX_CXX20_CONSTEXPR allocator_type get_allocator() const { return silo_.get_allocator(); From 14558fa90bcc93fcf47c0b67087a177c5707ef66 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 26 Nov 2023 23:18:05 +0300 Subject: [PATCH 088/137] =?UTF-8?q?mdbx-test:=20=D0=BF=D1=80=D0=B0=D0=B2?= =?UTF-8?q?=D0=BA=D0=B0=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=81=D0=BE=D0=B2=D0=BC=D0=B5=D1=81=D1=82=D0=B8?= =?UTF-8?q?=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D1=81=20=D0=BF=D1=80=D0=BE?= =?UTF-8?q?=D0=B1=D0=BB=D0=B5=D0=BC=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D1=81=D0=B8=D1=8F=D0=BC=D0=B8=20glibc=20=D0=B8=20glibc++?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/config.c++ b/test/config.c++ index 31cf9395..922f7b37 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -667,7 +667,10 @@ bool actor_config::deserialize(const char *str, actor_config &config) { } str = slash + 1; - uint64_t verify = std::stoull(std::string(str)); + uint64_t verify = 0; + while (*str >= '0' && *str <= '9') + verify = verify * 10 + *str++ - '0'; + if (checksum.value != verify) { TRACE("<< actor_config::deserialize: checksum mismatch\n"); return false; From 43dbf8ec4f953971f47adfa7e470745a53ca7c8d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 11:29:46 +0300 Subject: [PATCH 089/137] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20`txn=5Fvalgrind()`=20=D0=B4=D0=BB=D1=8F=20=D1=81=D0=BB?= =?UTF-8?q?=D1=83=D1=87=D0=B0=D1=8F=20resurrect-after-fork.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index bd955953..fc0318fe 100644 --- a/src/core.c +++ b/src/core.c @@ -8761,7 +8761,10 @@ static void txn_valgrind(MDBX_env *env, MDBX_txn *txn) { } else { /* transaction end */ bool should_unlock = false; pgno_t last = MAX_PAGENO + 1; - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) { + if (env->me_pid != osal_getpid()) { + /* resurrect after fork */ + return; + } else if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) { /* inside write-txn */ last = meta_recent(env, &env->me_txn0->tw.troika).ptr_v->mm_geo.next; } else if (env->me_flags & MDBX_RDONLY) { From 349759648d65c98c1e8a7c49270af1c59f31432c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 12:35:20 +0300 Subject: [PATCH 090/137] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA?= =?UTF-8?q?=D0=B0=20`rthc=5Fafterfork()`=20=D0=B4=D0=BB=D1=8F=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index fc0318fe..3eb8c028 100644 --- a/src/core.c +++ b/src/core.c @@ -26320,8 +26320,15 @@ __cold static void rthc_afterfork(void) { for (size_t i = 0; i < rthc_count; ++i) { MDBX_env *const env = rthc_table[i].env; NOTICE("drown env %p", __Wpedantic_format_voidptr(env)); - env->me_dxb_mmap.base = nullptr; - env->me_lck_mmap.base = nullptr; + if (env->me_lck_mmap.lck) + osal_munmap(&env->me_lck_mmap); + if (env->me_map) { + osal_munmap(&env->me_dxb_mmap); +#ifdef ENABLE_MEMCHECK + VALGRIND_DISCARD(env->me_valgrind_handle); + env->me_valgrind_handle = -1; +#endif /* ENABLE_MEMCHECK */ + } env->me_lck = lckless_stub(env); rthc_drown(env); } From 5a86afaac3a49eff0707914d48a0b4c126d7d801 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 12:52:40 +0300 Subject: [PATCH 091/137] =?UTF-8?q?mdbx-test:=203-=D1=87=D0=B0=D1=81=D0=BE?= =?UTF-8?q?=D0=B2=D0=BE=D0=B9=20=D1=82=D0=B0=D0=B9=D0=BC=D0=B0=D1=83=D1=82?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20`extra=5Fdoubtless=5Fpositioning`=20?= =?UTF-8?q?=D0=BF=D1=80=D0=B8=20=D0=B8=D1=81=D0=BF=D0=BE=D0=BB=D1=8C=D0=B7?= =?UTF-8?q?=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20Valgrind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 59dc315c..bb3ab448 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -188,6 +188,9 @@ else() add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) add_test(NAME extra_hex_base64_base58 COMMAND test_extra_hex_base64_base58) add_test(NAME extra_doubtless_positioning COMMAND test_extra_doubtless_positioning) + if (ENABLE_MEMCHECK) + set_tests_properties(extra_doubtless_positioning PROPERTIES TIMEOUT 10800) + endif() endif() endif() From e316bc8b2912e0fbe083eada2d2c558883838dc5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 27 Nov 2023 13:23:06 +0300 Subject: [PATCH 092/137] =?UTF-8?q?mdbx-test:=20=D1=83=D0=B2=D0=B5=D0=BB?= =?UTF-8?q?=D0=B8=D1=87=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=82=D0=B0=D0=B9=D0=BC?= =?UTF-8?q?=D0=B0=D1=83=D1=82=D0=B0=20=D0=B4=D0=BB=D1=8F=20smoke-=D1=82?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B0=20=D0=BD=D0=B0=20=D1=81=D0=BB=D1=83?= =?UTF-8?q?=D1=87=D0=B0=D0=B9=20=D0=BF=D0=B0=D1=80=D0=B0=D0=BB=D0=BB=D0=B5?= =?UTF-8?q?=D0=BB=D1=8C=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=B2=D1=8B=D0=BF=D0=BE?= =?UTF-8?q?=D0=BB=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BF=D0=BE=D0=B4=20Val?= =?UTF-8?q?grind.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index bb3ab448..232ff2e6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -144,7 +144,7 @@ else() --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES - TIMEOUT 600 + TIMEOUT 3600 RUN_SERIAL OFF) if(MDBX_BUILD_TOOLS) add_test(NAME dupsort_writemap_chk COMMAND ${MDBX_OUTPUT_DIR}/mdbx_chk -nvvwc dupsort_writemap.db) From eb90ec61924ad68558ffb30997733ed60c6a404c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Nov 2023 21:33:57 +0300 Subject: [PATCH 093/137] =?UTF-8?q?mdbx:=20=D0=BD=D0=BE=D0=B2=D1=8B=D0=B9?= =?UTF-8?q?=20=D1=80=D0=B0=D0=B7=D0=BC=D0=B5=D1=80=20MDBX=5Fopt=5Frp=5Faug?= =?UTF-8?q?ment=5Flimit=20=D0=BF=D0=BE=20=D1=83=D0=BC=D0=BE=D0=BB=D1=87?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=8E=20=D0=B2=201/3=20=D0=BE=D1=82=20=D1=82?= =?UTF-8?q?=D0=B5=D0=BA=D1=83=D1=89=D0=B5=D0=B3=D0=BE=20=D0=BA=D0=BE=D0=BB?= =?UTF-8?q?-=D0=B2=D0=B0=20=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=20?= =?UTF-8?q?=D0=B2=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 3 ++- src/core.c | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/mdbx.h b/mdbx.h index 43d4eca3..b8f3eac5 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2146,7 +2146,8 @@ enum MDBX_option_t { * growth, or/and to the inability of put long values. * * The `MDBX_opt_rp_augment_limit` controls described limit for the current - * process. Default is 262144, it is usually enough for most cases. */ + * process. By default this limit adjusted dynamically to 1/3 of current + * quantity of DB pages, which is usually enough for most cases. */ MDBX_opt_rp_augment_limit, /** \brief Controls the in-process limit to grow a cache of dirty diff --git a/src/core.c b/src/core.c index 3eb8c028..24e0353c 100644 --- a/src/core.c +++ b/src/core.c @@ -6423,8 +6423,8 @@ __cold static void munlock_all(const MDBX_env *env) { } __cold static unsigned default_rp_augment_limit(const MDBX_env *env) { - /* default rp_augment_limit = ceil(npages / gold_ratio) */ - const size_t augment = (env->me_dbgeo.now >> (env->me_psize2log + 10)) * 633u; + /* default rp_augment_limit = npages / 3 */ + const size_t augment = env->me_dbgeo.now / 3 >> env->me_psize2log; eASSERT(env, augment < MDBX_PGL_LIMIT); return pnl_bytes2size(pnl_size2bytes( (augment > MDBX_PNL_INITIAL) ? augment : MDBX_PNL_INITIAL)); From 508cf83c321226aefc3f38519f6cfa3b4b1e4afb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 28 Nov 2023 21:40:21 +0300 Subject: [PATCH 094/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D1=81=D1=83?= =?UTF-8?q?=D1=89=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=BE=D0=BF=D0=B5=D1=87=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BC=D0=BC=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8?= =?UTF-8?q?=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 24e0353c..670a61e3 100644 --- a/src/core.c +++ b/src/core.c @@ -7743,7 +7743,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, * простейших случаях (тривиальный бенчмарк) интегральная производительность * становится вдвое меньше. А на платформах без mincore() и с проблемной * подсистемой виртуальной памяти ситуация может быть многократно хуже. - * Поэтому избегаем затрат в ситуациях когда prefaukt-write скорее всего не + * Поэтому избегаем затрат в ситуациях когда prefault-write скорее всего не * нужна. */ const bool readahead_enabled = env->me_lck->mti_readahead_anchor & 1; const pgno_t readahead_edge = env->me_lck->mti_readahead_anchor >> 1; From eeec44f56df34a01f23d94e76dc9651525220d4c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 29 Nov 2023 00:35:25 +0300 Subject: [PATCH 095/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20MDBX=5Fopt=5Fgc=5Ftime=5Flimit?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 28 ++++++++++++++++++++++ src/core.c | 64 +++++++++++++++++++++++++++++++++++++++++++------ src/internals.h | 2 ++ 3 files changed, 87 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index b8f3eac5..bf5bc560 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2133,6 +2133,7 @@ enum MDBX_option_t { /** \brief Controls the in-process limit to grow a list of reclaimed/recycled * page's numbers for finding a sequence of contiguous pages for large data * items. + * \see MDBX_opt_gc_time_limit * * \details A long values requires allocation of contiguous database pages. * To find such sequences, it may be necessary to accumulate very large lists, @@ -2293,6 +2294,33 @@ enum MDBX_option_t { * in the \ref MDBX_WRITEMAP mode by clearing ones through file handle before * touching. */ MDBX_opt_prefault_write_enable, + + /** \brief Controls the in-process spending time limit of searching + * consecutive pages inside GC. + * \see MDBX_opt_rp_augment_limit + * + * \details Задаёт ограничение времени в 1/65536 долях секунды, которое может + * быть потрачено в ходе пишущей транзакции на поиск последовательностей + * страниц внутри GC/freelist после достижения ограничения задаваемого опцией + * \ref MDBX_opt_rp_augment_limit. Контроль по времени не выполняется при + * поиске/выделении одиночных страниц и выделении страниц под нужды GC (при + * обновлении GC в ходе фиксации транзакции). + * + * Задаваемый лимит времени исчисляется по "настенным часам" и контролируется + * в рамках транзакции, наследуется для вложенных транзакций и с + * аккумулированием в родительской при их фиксации. Контроль по времени + * производится только при достижении ограничения задаваемого опцией \ref + * MDBX_opt_rp_augment_limit. Это позволяет гибко управлять поведением + * используя обе опции. + * + * По умолчанию ограничение устанавливается в 0, что приводит к + * незамедлительной остановке поиска в GC при достижении \ref + * MDBX_opt_rp_augment_limit во внутреннем состоянии транзакции и + * соответствует поведению до появления опции `MDBX_opt_gc_time_limit`. + * С другой стороны, при минимальном значении (включая 0) + * `MDBX_opt_rp_augment_limit` переработка GC будет ограничиваться + * преимущественно затраченным временем. */ + MDBX_opt_gc_time_limit }; #ifndef __cplusplus /** \ingroup c_settings */ diff --git a/src/core.c b/src/core.c index 670a61e3..1e0a400b 100644 --- a/src/core.c +++ b/src/core.c @@ -7674,12 +7674,24 @@ bailout: return ret; } +struct monotime_cache { + uint64_t value; + int expire_countdown; +}; + +static __inline uint64_t monotime_since_cached(uint64_t begin_timestamp, + struct monotime_cache *cache) { + if (cache->expire_countdown) + cache->expire_countdown -= 1; + else { + cache->value = osal_monotime(); + cache->expire_countdown = 42 / 3; + } + return cache->value - begin_timestamp; +} + static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, uint8_t flags) { -#if MDBX_ENABLE_PROFGC - const uint64_t monotime_before = osal_monotime(); -#endif /* MDBX_ENABLE_PROFGC */ - pgr_t ret; MDBX_txn *const txn = mc->mc_txn; MDBX_env *const env = txn->mt_env; @@ -7694,8 +7706,19 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, eASSERT(env, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); - pgno_t pgno = 0; size_t newnext; + const uint64_t monotime_begin = + (MDBX_ENABLE_PROFGC || (num > 1 && env->me_options.gc_time_limit)) + ? osal_monotime() + : 0; + struct monotime_cache now_cache; + now_cache.expire_countdown = + 1 /* старт с 1 позволяет избавиться как от лишних системных вызовов когда + лимит времени задан нулевой или уже исчерпан, так и от подсчета + времени при не-достижении rp_augment_limit */ + ; + now_cache.value = monotime_begin; + pgno_t pgno = 0; if (num > 1) { #if MDBX_ENABLE_PROFGC prof->xpages += 1; @@ -7871,7 +7894,10 @@ next_gc:; txn->tw.relist) >= env->me_options.rp_augment_limit) && ((/* not a slot-request from gc-update */ num && /* have enough unallocated space */ txn->mt_geo.upper >= - txn->mt_next_pgno + num) || + txn->mt_next_pgno + num && + monotime_since_cached(monotime_begin, &now_cache) + + txn->tw.gc_time_acc >= + env->me_options.gc_time_limit) || gc_len + MDBX_PNL_GETSIZE(txn->tw.relist) >= MDBX_PGL_LIMIT)) { /* Stop reclaiming to avoid large/overflow the page list. This is a rare * case while search for a continuously multi-page region in a @@ -8173,6 +8199,8 @@ done: (size_t)txn->mt_dbs[FREE_DBI].md_entries); ret.page = NULL; } + if (num > 1) + txn->tw.gc_time_acc += monotime_since_cached(monotime_begin, &now_cache); } else { early_exit: DEBUG("return NULL for %zu pages for ALLOC_%s, rc %d", num, @@ -8181,7 +8209,7 @@ done: } #if MDBX_ENABLE_PROFGC - prof->rtime_monotonic += osal_monotime() - monotime_before; + prof->rtime_monotonic += osal_monotime() - monotime_begin; #endif /* MDBX_ENABLE_PROFGC */ return ret; } @@ -9352,6 +9380,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { MDBX_PNL_SETSIZE(txn->tw.retired_pages, 0); txn->tw.spilled.list = NULL; txn->tw.spilled.least_removed = 0; + txn->tw.gc_time_acc = 0; txn->tw.last_reclaimed = 0; if (txn->tw.lifo_reclaimed) MDBX_PNL_SETSIZE(txn->tw.lifo_reclaimed, 0); @@ -9800,6 +9829,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, = parent->mt_next_pgno) - MDBX_ENABLE_REFUND)); + txn->tw.gc_time_acc = parent->tw.gc_time_acc; txn->tw.last_reclaimed = parent->tw.last_reclaimed; if (parent->tw.lifo_reclaimed) { txn->tw.lifo_reclaimed = parent->tw.lifo_reclaimed; @@ -12037,6 +12067,7 @@ int mdbx_txn_commit_ex(MDBX_txn *txn, MDBX_commit_latency *latency) { pnl_free(parent->tw.relist); parent->tw.relist = txn->tw.relist; txn->tw.relist = NULL; + parent->tw.gc_time_acc = txn->tw.gc_time_acc; parent->tw.last_reclaimed = txn->tw.last_reclaimed; parent->mt_geo = txn->mt_geo; @@ -25875,6 +25906,21 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, } break; + case MDBX_opt_gc_time_limit: + if (value == /* default */ UINT64_MAX) + value = 0; + if (unlikely(value > UINT32_MAX)) + return MDBX_EINVAL; + if (unlikely(env->me_flags & MDBX_RDONLY)) + return MDBX_EACCESS; + value = osal_16dot16_to_monotime((uint32_t)value); + if (value != env->me_options.gc_time_limit) { + if (env->me_txn && env->me_txn0->mt_owner != osal_thread_self()) + return MDBX_EPERM; + env->me_options.gc_time_limit = value; + } + break; + case MDBX_opt_txn_dp_limit: case MDBX_opt_txn_dp_initial: if (value == /* default */ UINT64_MAX) @@ -26027,6 +26073,10 @@ __cold int mdbx_env_get_option(const MDBX_env *env, const MDBX_option_t option, *pvalue = env->me_options.rp_augment_limit; break; + case MDBX_opt_gc_time_limit: + *pvalue = osal_monotime_to_16dot16(env->me_options.gc_time_limit); + break; + case MDBX_opt_txn_dp_limit: *pvalue = env->me_options.dp_limit; break; diff --git a/src/internals.h b/src/internals.h index 7f9aedd0..f4e37ac3 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1260,6 +1260,7 @@ struct MDBX_txn { size_t writemap_dirty_npages; size_t writemap_spilled_npages; }; + uint64_t gc_time_acc; } tw; }; }; @@ -1422,6 +1423,7 @@ struct MDBX_env { unsigned rp_augment_limit; unsigned dp_limit; unsigned dp_initial; + uint64_t gc_time_limit; uint8_t dp_loose_limit; uint8_t spill_max_denominator; uint8_t spill_min_denominator; From 169e69c52e3f5389ae8b8729e02a279c48a919bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 30 Nov 2023 22:53:28 +0300 Subject: [PATCH 096/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=BE=D0=B4=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=BE=D0=B9=D0=BA=D0=B0=20rp=5Faugment=5Flimit=20=D0=B2?= =?UTF-8?q?=20=D0=B7=D0=B0=D0=B2=D0=B8=D1=81=D0=B8=D0=BC=D0=BE=D1=81=D1=82?= =?UTF-8?q?=D0=B8=20=D0=BE=D1=82=20gc=5Ftime=5Flimit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Когда rp_augment_limit не задан пользователем посредством `MDBX_opt_rp_augment_limit`, то как и ранее он подстраивается в зависимости от текущего размера БД (актуального кол-ва страниц). Теперь-же авто-устанавливаемое значение rp_augment_limit вычисляется обратно-пропорционально `MDBX_opt_gc_time_limit`: - Если gc_time_limit == 0, то rp_augment_limit устанавливается в 1/3 от общего кол-ва страниц БД, но не меньше рационального минимума. Это соответствует прежнему поведению и обеспечивает достаточно глубокую переработку GC во всех не-экстремальных сценариях. - При gc_time_limit >= 16_секунд rp_augment_limit устанавливается в минимальное значение. - Когда 0 < gc_time_limit < 16_секунд rp_augment_limit устанавливается между минимумом и 1/3 от размера БД пропорционально остатку gc_time_limit до 16 секунд. Соответственно, при больших значениях gc_time_limit, выбирается меньшее значение rp_augment_limit, и контроль глубины переработки GC ограничивается в основном по-времени. --- src/core.c | 22 +++++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/src/core.c b/src/core.c index 1e0a400b..c8f47537 100644 --- a/src/core.c +++ b/src/core.c @@ -6423,11 +6423,21 @@ __cold static void munlock_all(const MDBX_env *env) { } __cold static unsigned default_rp_augment_limit(const MDBX_env *env) { - /* default rp_augment_limit = npages / 3 */ - const size_t augment = env->me_dbgeo.now / 3 >> env->me_psize2log; - eASSERT(env, augment < MDBX_PGL_LIMIT); - return pnl_bytes2size(pnl_size2bytes( - (augment > MDBX_PNL_INITIAL) ? augment : MDBX_PNL_INITIAL)); + const size_t timeframe = 16 << 16; + const size_t remain_1sec = + (env->me_options.gc_time_limit < timeframe) + ? timeframe - (size_t)env->me_options.gc_time_limit + : 0; + const size_t minimum = (env->me_maxgc_ov1page * 2 > MDBX_PNL_INITIAL) + ? env->me_maxgc_ov1page * 2 + : MDBX_PNL_INITIAL; + const size_t one_third = env->me_dbgeo.now / 3 >> env->me_psize2log; + const size_t augment_limit = + (one_third > minimum) + ? minimum + (one_third - minimum) / timeframe * remain_1sec + : minimum; + eASSERT(env, augment_limit < MDBX_PGL_LIMIT); + return pnl_bytes2size(pnl_size2bytes(augment_limit)); } static bool default_prefault_write(const MDBX_env *env) { @@ -25918,6 +25928,8 @@ __cold int mdbx_env_set_option(MDBX_env *env, const MDBX_option_t option, if (env->me_txn && env->me_txn0->mt_owner != osal_thread_self()) return MDBX_EPERM; env->me_options.gc_time_limit = value; + if (!env->me_options.flags.non_auto.rp_augment_limit) + env->me_options.rp_augment_limit = default_rp_augment_limit(env); } break; From 4c139b36190b861dffef5c2d0c611c33c7243975 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 4 Dec 2023 21:39:29 +0300 Subject: [PATCH 097/137] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82=D1=80?= =?UTF-8?q?=D0=B5=D0=BD=D0=BD=D0=B5=D0=B3=D0=BE=20=D1=80=D0=B5=D0=B3=D1=80?= =?UTF-8?q?=D0=B5=D1=81=D1=81=D0=B0=20=D0=BF=D1=80=D0=BE=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D0=BA=D0=B8=20=D1=81=D0=BE=D1=81=D1=82=D0=BE=D1=8F=D0=BD=D0=B8?= =?UTF-8?q?=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82=D1=80=D0=B8=20=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit После f0d523c507042cc70eeeb690778c9b2be6a8b33f, при использовании добавленного API блокировок, возможно ложно-положительное определение состояние "внутри транзакции". --- src/core.c | 35 ++++++++++++++++------------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/src/core.c b/src/core.c index c8f47537..774eeb0e 100644 --- a/src/core.c +++ b/src/core.c @@ -8581,7 +8581,8 @@ retry:; goto bailout; } - const bool inside_txn = (env->me_txn0->mt_owner == osal_thread_self()); + const bool inside_txn = + (!locked && env->me_txn0->mt_owner == osal_thread_self()); const meta_troika_t troika = (inside_txn | locked) ? env->me_txn0->tw.troika : meta_tap(env); const meta_ptr_t head = meta_recent(env, &troika); @@ -8594,7 +8595,7 @@ retry:; goto bailout; } - if (!inside_txn && locked && (env->me_flags & MDBX_WRITEMAP) && + if (locked && (env->me_flags & MDBX_WRITEMAP) && unlikely(head.ptr_c->mm_geo.next > bytes2pgno(env, env->me_dxb_mmap.current))) { @@ -8934,7 +8935,7 @@ __cold int mdbx_thread_register(const MDBX_env *env) { } const uintptr_t tid = osal_thread_self(); - if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == tid)) + if (env->me_txn0 && unlikely(env->me_txn0->mt_owner == tid) && env->me_txn) return MDBX_TXN_OVERLAPPING; return bind_rslot((MDBX_env *)env, tid).err; } @@ -9726,7 +9727,7 @@ int mdbx_txn_begin_ex(MDBX_env *env, MDBX_txn *parent, MDBX_txn_flags_t flags, flags |= parent->mt_flags & (MDBX_TXN_RW_BEGIN_FLAGS | MDBX_TXN_SPILLS); } else if (flags & MDBX_TXN_RDONLY) { if (env->me_txn0 && - unlikely(env->me_txn0->mt_owner == osal_thread_self()) && + unlikely(env->me_txn0->mt_owner == osal_thread_self()) && env->me_txn && (runtime_flags & MDBX_DBG_LEGACY_OVERLAP) == 0) return MDBX_TXN_OVERLAPPING; } else { @@ -13140,9 +13141,7 @@ static int sync_locked(MDBX_env *env, unsigned flags, MDBX_meta *const pending, const txnid_t txnid = safe64_txnid_next(pending->unsafe_txnid); NOTICE("force-forward pending-txn %" PRIaTXN " -> %" PRIaTXN, pending->unsafe_txnid, txnid); - ENSURE(env, !env->me_txn0 || - (env->me_txn0->mt_owner != osal_thread_self() && - !env->me_txn)); + ENSURE(env, !env->me_txn0 || !env->me_txn); if (unlikely(txnid > MAX_TXNID)) { rc = MDBX_TXN_FULL; ERROR("txnid overflow, raise %d", rc); @@ -13654,8 +13653,9 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, if (unlikely(rc != MDBX_SUCCESS)) return rc; - const bool inside_txn = - (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()); + const bool need_lock = + !env->me_txn0 || env->me_txn0->mt_owner != osal_thread_self(); + const bool inside_txn = !need_lock && env->me_txn; #if MDBX_DEBUG if (growth_step < 0) { @@ -13666,17 +13666,17 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, #endif /* MDBX_DEBUG */ intptr_t reasonable_maxsize = 0; - bool need_unlock = false; + bool should_unlock = false; if (env->me_map) { /* env already mapped */ if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - if (!inside_txn) { + if (need_lock) { int err = osal_txn_lock(env, false); if (unlikely(err != MDBX_SUCCESS)) return err; - need_unlock = true; + should_unlock = true; env->me_txn0->tw.troika = meta_tap(env); eASSERT(env, !env->me_txn && !env->me_txn0->mt_child); env->me_txn0->mt_txnid = @@ -13902,7 +13902,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, MDBX_meta meta; memset(&meta, 0, sizeof(meta)); if (!inside_txn) { - eASSERT(env, need_unlock); + eASSERT(env, should_unlock); const meta_ptr_t head = meta_recent(env, &env->me_txn0->tw.troika); uint64_t timestamp = 0; @@ -14023,7 +14023,7 @@ __cold int mdbx_env_set_geometry(MDBX_env *env, intptr_t size_lower, } bailout: - if (need_unlock) + if (should_unlock) osal_txn_unlock(env); return rc; } @@ -22737,10 +22737,6 @@ __cold int mdbx_env_set_flags(MDBX_env *env, MDBX_env_flags_t flags, if (unlikely(env->me_flags & MDBX_RDONLY)) return MDBX_EACCESS; - if ((env->me_flags & MDBX_ENV_ACTIVE) && - unlikely(env->me_txn0->mt_owner == osal_thread_self())) - return MDBX_BUSY; - const bool lock_needed = (env->me_flags & MDBX_ENV_ACTIVE) && env->me_txn0->mt_owner != osal_thread_self(); bool should_unlock = false; @@ -22974,7 +22970,8 @@ __cold int mdbx_env_stat_ex(const MDBX_env *env, const MDBX_txn *txn, if (unlikely(err != MDBX_SUCCESS)) return err; - if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self()) + if (env->me_txn0 && env->me_txn0->mt_owner == osal_thread_self() && + env->me_txn) /* inside write-txn */ return stat_acc(env->me_txn, dest, bytes); From 6b72d88fde2eee67a00af5b4853c34f00882669a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 4 Dec 2023 22:32:25 +0300 Subject: [PATCH 098/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B5=D0=BD=D0=BD=D0=B5=D0=B3=D0=BE=20=D1=80=D0=B5=D0=B3?= =?UTF-8?q?=D1=80=D0=B5=D1=81=D1=81=D0=B0=20lck=5Fserize=5Frc=20=D0=B2=20?= =?UTF-8?q?=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B8=20=D0=BF=D0=BE?= =?UTF-8?q?=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BE=D1=82?= =?UTF-8?q?=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D1=8F=20=D0=91=D0=94=20=D0=BE?= =?UTF-8?q?=D0=B4=D0=BD=D0=B8=D0=BC=20=D0=BF=D1=80=D0=BE=D1=86=D0=B5=D1=81?= =?UTF-8?q?=D1=81=D0=BE=D0=BC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 295 ++++++++++++++++++++++++++--------------------------- 1 file changed, 146 insertions(+), 149 deletions(-) diff --git a/src/core.c b/src/core.c index 774eeb0e..1fdc8528 100644 --- a/src/core.c +++ b/src/core.c @@ -14579,6 +14579,150 @@ __cold static int setup_dxb(MDBX_env *env, const int lck_rc, /******************************************************************************/ +__cold static int setup_lck_locked(MDBX_env *env) { + int err = rthc_register(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + int lck_seize_rc = osal_lck_seize(env); + if (unlikely(MDBX_IS_ERROR(lck_seize_rc))) + return lck_seize_rc; + + if (env->me_lfd == INVALID_HANDLE_VALUE) { + env->me_lck = lckless_stub(env); + env->me_maxreaders = UINT_MAX; + DEBUG("lck-setup:%s%s%s", " lck-less", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + return lck_seize_rc; + } + + DEBUG("lck-setup:%s%s%s", " with-lck", + (env->me_flags & MDBX_RDONLY) ? " readonly" : "", + (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); + + MDBX_env *inprocess_neighbor = nullptr; + err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); + if (unlikely(MDBX_IS_ERROR(err))) + return err; + if (inprocess_neighbor) { + if ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || + (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0) + return MDBX_BUSY; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + err = osal_lck_downgrade(env); + if (unlikely(err != MDBX_SUCCESS)) + return err; + lck_seize_rc = MDBX_RESULT_FALSE; + } + } + + uint64_t size = 0; + err = osal_filesize(env->me_lfd, &size); + if (unlikely(err != MDBX_SUCCESS)) + return err; + + if (lck_seize_rc == MDBX_RESULT_TRUE) { + size = ceil_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + + sizeof(MDBX_lockinfo), + env->me_os_psize); + jitter4testing(false); + } else { + if (env->me_flags & MDBX_EXCLUSIVE) + return MDBX_BUSY; + if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 || + size < env->me_os_psize) { + ERROR("lck-file has invalid size %" PRIu64 " bytes", size); + return MDBX_PROBLEM; + } + } + + const size_t maxreaders = + ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); + if (maxreaders < 4) { + ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); + return MDBX_PROBLEM; + } + env->me_maxreaders = (maxreaders <= MDBX_READERS_LIMIT) + ? (unsigned)maxreaders + : (unsigned)MDBX_READERS_LIMIT; + + err = osal_mmap((env->me_flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, + &env->me_lck_mmap, (size_t)size, (size_t)size, + lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE + : MMAP_OPTION_SEMAPHORE); + if (unlikely(err != MDBX_SUCCESS)) + return err; + +#if MDBX_ENABLE_MADVISE +#ifdef MADV_DODUMP + err = madvise(env->me_lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_DODUMP */ + +#ifdef MADV_WILLNEED + err = madvise(env->me_lck_mmap.lck, size, MADV_WILLNEED) + ? ignore_enosys(errno) + : MDBX_SUCCESS; + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#elif defined(POSIX_MADV_WILLNEED) + err = ignore_enosys( + posix_madvise(env->me_lck_mmap.lck, size, POSIX_MADV_WILLNEED)); + if (unlikely(MDBX_IS_ERROR(err))) + return err; +#endif /* MADV_WILLNEED */ +#endif /* MDBX_ENABLE_MADVISE */ + + struct MDBX_lockinfo *lck = env->me_lck_mmap.lck; + if (lck_seize_rc == MDBX_RESULT_TRUE) { + /* If we succeed got exclusive lock, then nobody is using the lock region + * and we should initialize it. */ + memset(lck, 0, (size_t)size); + jitter4testing(false); + lck->mti_magic_and_version = MDBX_LOCK_MAGIC; + lck->mti_os_and_format = MDBX_LOCK_FORMAT; +#if MDBX_ENABLE_PGOP_STAT + lck->mti_pgop_stat.wops.weak = 1; +#endif /* MDBX_ENABLE_PGOP_STAT */ + err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, + MDBX_SYNC_DATA | MDBX_SYNC_SIZE); + if (unlikely(err != MDBX_SUCCESS)) { + ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + } else { + if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { + const bool invalid = (lck->mti_magic_and_version >> 8) != MDBX_MAGIC; + ERROR("lock region has %s", + invalid + ? "invalid magic" + : "incompatible version (only applications with nearly or the " + "same versions of libmdbx can share the same database)"); + return invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; + } + if (lck->mti_os_and_format != MDBX_LOCK_FORMAT) { + ERROR("lock region has os/format signature 0x%" PRIx32 + ", expected 0x%" PRIx32, + lck->mti_os_and_format, MDBX_LOCK_FORMAT); + return MDBX_VERSION_MISMATCH; + } + } + + err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); + if (unlikely(err != MDBX_SUCCESS)) { + eASSERT(env, MDBX_IS_ERROR(err)); + return err; + } + + env->me_lck = lck; + eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); + return lck_seize_rc; +} + /* Open and/or initialize the lock region for the environment. */ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { eASSERT(env, env->me_lazy_fd != INVALID_HANDLE_VALUE); @@ -14615,157 +14759,10 @@ __cold static int setup_lck(MDBX_env *env, mdbx_mode_t mode) { env->me_lfd = INVALID_HANDLE_VALUE; } - /* beginning of a locked section ------------------------------------------ */ rthc_lock(); - err = rthc_register(env); - if (likely(err == MDBX_SUCCESS)) - err = osal_lck_seize(env); - - const int lck_seize_rc = err; - if (MDBX_IS_ERROR(err)) - goto bailout; - - struct MDBX_lockinfo *lck = nullptr; - if (env->me_lfd == INVALID_HANDLE_VALUE) { - lck = lckless_stub(env); - env->me_maxreaders = UINT_MAX; - DEBUG("lck-setup:%s%s%s", " lck-less", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - goto done; - } - - DEBUG("lck-setup:%s%s%s", " with-lck", - (env->me_flags & MDBX_RDONLY) ? " readonly" : "", - (lck_seize_rc == MDBX_RESULT_TRUE) ? " exclusive" : " cooperative"); - - uint64_t size = 0; - err = osal_filesize(env->me_lfd, &size); - if (unlikely(err != MDBX_SUCCESS)) - goto bailout; - - if (lck_seize_rc == MDBX_RESULT_TRUE) { - size = ceil_powerof2(env->me_maxreaders * sizeof(MDBX_reader) + - sizeof(MDBX_lockinfo), - env->me_os_psize); - jitter4testing(false); - } else { - if (env->me_flags & MDBX_EXCLUSIVE) { - err = MDBX_BUSY; - goto bailout; - } - if (size > INT_MAX || (size & (env->me_os_psize - 1)) != 0 || - size < env->me_os_psize) { - ERROR("lck-file has invalid size %" PRIu64 " bytes", size); - err = MDBX_PROBLEM; - goto bailout; - } - } - - const size_t maxreaders = - ((size_t)size - sizeof(MDBX_lockinfo)) / sizeof(MDBX_reader); - if (maxreaders < 4) { - ERROR("lck-size too small (up to %" PRIuPTR " readers)", maxreaders); - err = MDBX_PROBLEM; - goto bailout; - } - env->me_maxreaders = (maxreaders <= MDBX_READERS_LIMIT) - ? (unsigned)maxreaders - : (unsigned)MDBX_READERS_LIMIT; - - err = osal_mmap((env->me_flags & MDBX_EXCLUSIVE) | MDBX_WRITEMAP, - &env->me_lck_mmap, (size_t)size, (size_t)size, - lck_seize_rc ? MMAP_OPTION_TRUNCATE | MMAP_OPTION_SEMAPHORE - : MMAP_OPTION_SEMAPHORE); - if (unlikely(err != MDBX_SUCCESS)) - goto bailout; - -#if MDBX_ENABLE_MADVISE -#ifdef MADV_DODUMP - err = madvise(env->me_lck_mmap.lck, size, MADV_DODUMP) ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#endif /* MADV_DODUMP */ - -#ifdef MADV_WILLNEED - err = madvise(env->me_lck_mmap.lck, size, MADV_WILLNEED) - ? ignore_enosys(errno) - : MDBX_SUCCESS; - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#elif defined(POSIX_MADV_WILLNEED) - err = ignore_enosys( - posix_madvise(env->me_lck_mmap.lck, size, POSIX_MADV_WILLNEED)); - if (unlikely(MDBX_IS_ERROR(err))) - goto bailout; -#endif /* MADV_WILLNEED */ -#endif /* MDBX_ENABLE_MADVISE */ - - lck = env->me_lck_mmap.lck; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - /* If we succeed got exclusive lock, then nobody is using the lock region - * and we should initialize it. */ - memset(lck, 0, (size_t)size); - jitter4testing(false); - lck->mti_magic_and_version = MDBX_LOCK_MAGIC; - lck->mti_os_and_format = MDBX_LOCK_FORMAT; -#if MDBX_ENABLE_PGOP_STAT - lck->mti_pgop_stat.wops.weak = 1; -#endif /* MDBX_ENABLE_PGOP_STAT */ - err = osal_msync(&env->me_lck_mmap, 0, (size_t)size, - MDBX_SYNC_DATA | MDBX_SYNC_SIZE); - if (unlikely(err != MDBX_SUCCESS)) { - ERROR("initial-%s for lck-file failed, err %d", "msync/fsync", err); - goto bailout; - } - } else { - if (lck->mti_magic_and_version != MDBX_LOCK_MAGIC) { - const bool invalid = (lck->mti_magic_and_version >> 8) != MDBX_MAGIC; - ERROR("lock region has %s", - invalid - ? "invalid magic" - : "incompatible version (only applications with nearly or the " - "same versions of libmdbx can share the same database)"); - err = invalid ? MDBX_INVALID : MDBX_VERSION_MISMATCH; - goto bailout; - } - if (lck->mti_os_and_format != MDBX_LOCK_FORMAT) { - ERROR("lock region has os/format signature 0x%" PRIx32 - ", expected 0x%" PRIx32, - lck->mti_os_and_format, MDBX_LOCK_FORMAT); - err = MDBX_VERSION_MISMATCH; - goto bailout; - } - } - - MDBX_env *inprocess_neighbor = nullptr; - if (lck_seize_rc == MDBX_RESULT_TRUE) { - err = rthc_uniq_check(&env->me_lck_mmap, &inprocess_neighbor); - if (MDBX_IS_ERROR(err)) - goto bailout; - if (inprocess_neighbor && - ((runtime_flags & MDBX_DBG_LEGACY_MULTIOPEN) == 0 || - (inprocess_neighbor->me_flags & MDBX_EXCLUSIVE) != 0)) { - err = MDBX_BUSY; - goto bailout; - } - } - - err = osal_lck_init(env, inprocess_neighbor, lck_seize_rc); - if (MDBX_IS_ERROR(err)) - goto bailout; - -done: - env->me_lck = lck; - eASSERT(env, !MDBX_IS_ERROR(lck_seize_rc)); - -bailout: - /* Calling osal_lck_destroy() is required to restore POSIX-filelock - * and this job will be done by env_close(). */ + err = setup_lck_locked(env); rthc_unlock(); - /* end of a locked section ------------------------------------------------ */ - return lck_seize_rc; + return err; } __cold int mdbx_is_readahead_reasonable(size_t volume, intptr_t redundancy) { From 72332a8f9e8dae39b33ee4c9add1b0c26097fc83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 8 Dec 2023 15:07:37 +0300 Subject: [PATCH 099/137] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D1=81=D1=83=D1=89?= =?UTF-8?q?=D0=B5=D1=81=D1=82=D0=B2=D0=B5=D0=BD=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=BC=D0=B5=D1=87=D0=B0=D0=BD=D0=B8=D1=8F=20Coveri?= =?UTF-8?q?ty.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 1fdc8528..d588bb5c 100644 --- a/src/core.c +++ b/src/core.c @@ -19626,8 +19626,8 @@ int mdbx_cursor_compare(const MDBX_cursor *l, const MDBX_cursor *r, const int incomparable = INT16_MAX + 1; if (unlikely(!l)) return r ? -incomparable * 9 : 0; - if (unlikely(!r)) - return l ? incomparable * 9 : 0; + else if (unlikely(!r)) + return incomparable * 9; if (unlikely(l->mc_signature != MDBX_MC_LIVE)) return (r->mc_signature == MDBX_MC_LIVE) ? -incomparable * 8 : 0; From f16c4303bff02328da08f9bc82bc9b8cf2e62062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 8 Dec 2023 16:47:40 +0300 Subject: [PATCH 100/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20=D0=B2=20=D0=BA=D0=BE=D0=BC=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D1=82=D0=B0=D1=80=D0=B8=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mdbx.h b/mdbx.h index bf5bc560..693cfee6 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5868,7 +5868,7 @@ typedef struct MDBX_chk_context { problems_gc, problems_kv, total_problems; uint64_t steady_txnid, recent_txnid; /** Указатель на массив размером subdb_total с указателями на экземпляры - * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значние, + * структур MDBX_chk_subdb_t с информацией о всех таблицах ключ-значение, * включая MainDB и GC/FreeDB. */ const MDBX_chk_subdb_t *const *subdbs; } result; From fbc83dd069a5d955ba378733de7f8204aacb9ff8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 19 Feb 2024 01:20:27 +0300 Subject: [PATCH 101/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BB=D0=BE=D0=B6=D0=BD?= =?UTF-8?q?=D0=BE=D0=B9=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20`MDBX=5FC?= =?UTF-8?q?ORRUPTED=20(-30796)`=20=D0=B2=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0?= =?UTF-8?q?=D1=80=D0=B8=D0=B8=20"odd=20dupfixed".?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Повреждение БД и/или потери данных не происходило, проблема лишь в возврате ложной ошибки. Благодарю пользователя/разработчика @Dvirsw (https://t.me/Dvirsw) за сообщения о проблеме и предоставление минимального/оптимального сценария воспроизведения. -- Проблема была из-за излишнего условия при контроле внутренего поля mp_upper в ходе проверки структуры страниц БД. Поле mp_upper указывает на нижнуюю границу заполнения страницы от конца к началу. Вследствие того, что значения ключей выравниваетня на четную границу, это поле четно во всех случаях за исключением LEAF2-страницы (листовая страница вложенного дерева для множественных значений финсированной/одинаковой длины одного ключа), на которой размещено нечетное количество значений нечетной длины. Ошибка не проявлялась в большинстве случаев (в том числе в стохастических тестах), так как штатно лишняя проверка производилась только при чтении страницы и перебалансировке ключей, но не при каждом добавлении значения. Тем не менее, сценарии тестов требуют доработки/расширения для явного добавления нечетных dupfixed-сценариев. --- src/core.c | 66 ++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 17 deletions(-) diff --git a/src/core.c b/src/core.c index d588bb5c..cba3f642 100644 --- a/src/core.c +++ b/src/core.c @@ -8416,7 +8416,7 @@ __hot static int page_touch(MDBX_cursor *mc) { np->mp_txnid = txn->mt_front; return MDBX_SUCCESS; } - tASSERT(txn, !IS_OVERFLOW(mp)); + tASSERT(txn, !IS_OVERFLOW(mp) && !IS_SUBP(mp)); if (IS_FROZEN(txn, mp)) { /* CoW the page */ @@ -16102,8 +16102,12 @@ __hot static __always_inline int page_get_checker_lite(const uint16_t ILL, if (((ILL & P_OVERFLOW) || !IS_OVERFLOW(page)) && (ILL & (P_BRANCH | P_LEAF | P_LEAF2)) == 0) { - if (unlikely(page->mp_upper < page->mp_lower || - ((page->mp_lower | page->mp_upper) & 1) || + /* Контроль четности page->mp_upper тут либо приводит к ложным ошибкам, + * либо слишком дорог по количеству операций. Заковырка в том, что mp_upper + * может быть нечетным на LEAF2-страницах, при нечетном количестве элементов + * нечетной длины. Поэтому четность page->mp_upper здесь не проверяется, но + * соответствующие полные проверки есть в page_check(). */ + if (unlikely(page->mp_upper < page->mp_lower || (page->mp_lower & 1) || PAGEHDRSZ + page->mp_upper > txn->mt_env->me_psize)) return bad_page(page, "invalid page' lower(%u)/upper(%u) with limit %zu\n", @@ -18082,9 +18086,9 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_xcursor->mx_dbx.md_klen_min = mc->mc_xcursor->mx_dbx.md_klen_max = data->iov_len); + if (mc->mc_flags & C_SUB) + npr.page->mp_flags |= P_LEAF2; } - if ((mc->mc_db->md_flags & (MDBX_DUPSORT | MDBX_DUPFIXED)) == MDBX_DUPFIXED) - npr.page->mp_flags |= P_LEAF2; mc->mc_flags |= C_INITIALIZED; } @@ -18361,7 +18365,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(fp_flags & P_LEAF2)) { memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->mp_leaf2_ksize); + cASSERT(mc, + (((mp->mp_leaf2_ksize & page_numkeys(mp)) ^ mp->mp_upper) & + 1) == 0); } else { + cASSERT(mc, (mp->mp_upper & 1) == 0); memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), olddata.iov_len - fp->mp_upper - PAGEHDRSZ); @@ -18979,6 +18987,7 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, const size_t ksize = mc->mc_db->md_xsize; cASSERT(mc, ksize == key->iov_len); const size_t nkeys = page_numkeys(mp); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); /* Just using these for counting */ const intptr_t lower = mp->mp_lower + sizeof(indx_t); @@ -18998,6 +19007,8 @@ __hot static int __must_check_result node_add_leaf2(MDBX_cursor *mc, memmove(ptr_disp(ptr, ksize), ptr, diff * ksize); /* insert new key */ memcpy(ptr, key->iov_base, ksize); + + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return MDBX_SUCCESS; } @@ -19164,6 +19175,7 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { mp->mp_lower -= sizeof(indx_t); cASSERT(mc, (size_t)UINT16_MAX - mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper += (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); return; } @@ -20830,8 +20842,7 @@ __cold static int page_check(const MDBX_cursor *const mc, break; } - if (unlikely(mp->mp_upper < mp->mp_lower || - ((mp->mp_lower | mp->mp_upper) & 1) || + if (unlikely(mp->mp_upper < mp->mp_lower || (mp->mp_lower & 1) || PAGEHDRSZ + mp->mp_upper > env->me_psize)) rc = bad_page(mp, "invalid page lower(%u)/upper(%u) with limit %zu\n", mp->mp_lower, mp->mp_upper, page_space(env)); @@ -20847,11 +20858,6 @@ __cold static int page_check(const MDBX_cursor *const mc, bad_page(mp, "%s-page nkeys (%zu) < %u\n", IS_BRANCH(mp) ? "branch" : "leaf", nkeys, 1 + IS_BRANCH(mp)); } - if (!IS_LEAF2(mp) && unlikely(PAGEHDRSZ + mp->mp_upper + - nkeys * sizeof(MDBX_node) + nkeys - 1 > - env->me_psize)) - rc = bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", - mp->mp_upper, nkeys, page_space(env)); const size_t ksize_max = keysize_max(env->me_psize, 0); const size_t leaf2_ksize = mp->mp_leaf2_ksize; @@ -20860,8 +20866,20 @@ __cold static int page_check(const MDBX_cursor *const mc, (mc->mc_db->md_flags & MDBX_DUPFIXED) == 0)) rc = bad_page(mp, "unexpected leaf2-page (db-flags 0x%x)\n", mc->mc_db->md_flags); - if (unlikely(leaf2_ksize < 1 || leaf2_ksize > ksize_max)) - rc = bad_page(mp, "invalid leaf2-key length (%zu)\n", leaf2_ksize); + else if (unlikely(leaf2_ksize != mc->mc_db->md_xsize)) + rc = bad_page(mp, "invalid leaf2_ksize %zu\n", leaf2_ksize); + else if (unlikely(((leaf2_ksize & nkeys) ^ mp->mp_upper) & 1)) + rc = bad_page( + mp, "invalid page upper (%u) for nkeys %zu with leaf2-length %zu\n", + mp->mp_upper, nkeys, leaf2_ksize); + } else { + if (unlikely((mp->mp_upper & 1) || PAGEHDRSZ + mp->mp_upper + + nkeys * sizeof(MDBX_node) + + nkeys - 1 > + env->me_psize)) + rc = + bad_page(mp, "invalid page upper (%u) for nkeys %zu with limit %zu\n", + mp->mp_upper, nkeys, page_space(env)); } MDBX_val here, prev = {0, 0}; @@ -20869,7 +20887,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (IS_LEAF2(mp)) { const char *const key = page_leaf2key(mp, i, leaf2_ksize); if (unlikely(end_of_page < key + leaf2_ksize)) { - rc = bad_page(mp, "leaf2-key beyond (%zu) page-end\n", + rc = bad_page(mp, "leaf2-item beyond (%zu) page-end\n", key + leaf2_ksize - end_of_page); continue; } @@ -20878,7 +20896,7 @@ __cold static int page_check(const MDBX_cursor *const mc, if (unlikely(leaf2_ksize < mc->mc_dbx->md_klen_min || leaf2_ksize > mc->mc_dbx->md_klen_max)) rc = bad_page( - mp, "leaf2-key size (%zu) <> min/max key-length (%zu/%zu)\n", + mp, "leaf2-item size (%zu) <> min/max length (%zu/%zu)\n", leaf2_ksize, mc->mc_dbx->md_klen_min, mc->mc_dbx->md_klen_max); else mc->mc_dbx->md_klen_min = mc->mc_dbx->md_klen_max = leaf2_ksize; @@ -20887,7 +20905,7 @@ __cold static int page_check(const MDBX_cursor *const mc, here.iov_base = (void *)key; here.iov_len = leaf2_ksize; if (prev.iov_base && unlikely(mc->mc_dbx->md_cmp(&prev, &here) >= 0)) - rc = bad_page(mp, "leaf2-key #%zu wrong order (%s >= %s)\n", i, + rc = bad_page(mp, "leaf2-item #%zu wrong order (%s >= %s)\n", i, DKEY(&prev), DVAL(&here)); prev = here; } @@ -21299,6 +21317,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, DKBUF; MDBX_page *const mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, (mp->mp_flags & P_ILL_BITS) == 0); + const size_t newindx = mc->mc_ki[mc->mc_top]; size_t nkeys = page_numkeys(mp); if (AUDIT_ENABLED()) { @@ -21414,6 +21434,15 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) split_indx = minkeys; } + if (foliage) { + TRACE("pure-left: foliage %u, top %i, ptop %zu, split_indx %zi, " + "minkeys %zi, sepkey %s, parent-room %zu, need4split %zu", + foliage, mc->mc_top, ptop, split_indx, minkeys, + DKEY_DEBUG(&sepkey), page_room(mc->mc_pg[ptop]), + branch_size(env, &sepkey)); + TRACE("pure-left: newkey %s, newdata %s, newindx %zu", + DKEY_DEBUG(newkey), DVAL_DEBUG(newdata), newindx); + } } } @@ -21459,6 +21488,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, mp->mp_lower += sizeof(indx_t); cASSERT(mc, mp->mp_upper >= ksize - sizeof(indx_t)); mp->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); + cASSERT(mc, (((ksize & page_numkeys(mp)) ^ mp->mp_upper) & 1) == 0); } else { memcpy(sister->mp_ptrs, split, distance * ksize); void *const ins = page_leaf2key(sister, distance, ksize); @@ -21471,6 +21501,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sister->mp_upper -= (indx_t)(ksize - sizeof(indx_t)); cASSERT(mc, distance <= (int)UINT16_MAX); mc->mc_ki[mc->mc_top] = (indx_t)distance; + cASSERT(mc, + (((ksize & page_numkeys(sister)) ^ sister->mp_upper) & 1) == 0); } if (AUDIT_ENABLED()) { From f0cfedc26f5557ab5283868765897fbe326a82f1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 11:20:09 +0300 Subject: [PATCH 102/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`extra/dupfixed=5Faddodd?= =?UTF-8?q?`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 5 ++ test/extra/dupfixed_addodd.c | 93 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) create mode 100644 test/extra/dupfixed_addodd.c diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 232ff2e6..ac11ef63 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -73,6 +73,10 @@ if(UNIX AND NOT SUBPROJECT) target_include_directories(test_extra_upsert_alldups PRIVATE "${PROJECT_SOURCE_DIR}") target_link_libraries(test_extra_upsert_alldups ${TOOL_MDBX_LIB}) + add_executable(test_extra_dupfixed_addodd extra/dupfixed_addodd.c) + target_include_directories(test_extra_dupfixed_addodd PRIVATE "${PROJECT_SOURCE_DIR}") + target_link_libraries(test_extra_dupfixed_addodd ${TOOL_MDBX_LIB}) + if(MDBX_BUILD_CXX) add_executable(test_extra_maindb_ordinal extra/maindb_ordinal.c++) target_include_directories(test_extra_maindb_ordinal PRIVATE "${PROJECT_SOURCE_DIR}") @@ -183,6 +187,7 @@ else() if(UNIX AND NOT SUBPROJECT) add_test(NAME extra_upsert_alldups COMMAND test_extra_upsert_alldups) + add_test(NAME extra_dupfixed_addodd COMMAND test_extra_dupfixed_addodd) if(MDBX_BUILD_CXX) add_test(NAME extra_maindb_ordinal COMMAND test_extra_maindb_ordinal) add_test(NAME extra_dupfixed_multiple COMMAND test_extra_dupfixed_multiple) diff --git a/test/extra/dupfixed_addodd.c b/test/extra/dupfixed_addodd.c new file mode 100644 index 00000000..da9ba944 --- /dev/null +++ b/test/extra/dupfixed_addodd.c @@ -0,0 +1,93 @@ +/* + * @Dvirsw (https://t.me/Dvirsw) + * I think there is a bug with DUPFIXED. The following code fails. + * + * https://t.me/libmdbx/5368 + */ + +#include +#include + +#include "mdbx.h" +#include +#include +#include +#include +#include +#include +#include +#include + +int main() { + int rc; + MDBX_env *env = NULL; + MDBX_dbi dbi = 0; + MDBX_val key, data; + MDBX_txn *txn = NULL; + + rc = mdbx_env_create(&env); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_set_maxdbs(env, 1); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_create: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_env_open(env, "./example-db", + MDBX_NOSUBDIR | MDBX_COALESCE | MDBX_LIFORECLAIM, 0664); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_env_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_begin(env, NULL, 0, &txn); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_txn_begin: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + rc = mdbx_dbi_open(txn, "test", MDBX_DUPSORT | MDBX_DUPFIXED | MDBX_CREATE, + &dbi); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_dbi_open: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + + char key_bytes[32] = {0}; + key.iov_len = 32; + key.iov_base = key_bytes; + + // Another put after this will fail. + unsigned char idx; + for (idx = 0; idx < 129; idx++) { + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } + } + + // This will fail and exit. + char data_bytes[15] = {idx}; + data.iov_len = 15; + data.iov_base = data_bytes; + rc = mdbx_put(txn, dbi, &key, &data, 0); + if (rc != MDBX_SUCCESS) { + fprintf(stderr, "mdbx_put: (%d) %s\n", rc, mdbx_strerror(rc)); + fprintf(stderr, "expected failure\n"); + exit(EXIT_FAILURE); + } + + rc = mdbx_txn_commit(txn); + if (rc) { + fprintf(stderr, "mdbx_txn_commit: (%d) %s\n", rc, mdbx_strerror(rc)); + exit(EXIT_FAILURE); + } +} From ba719ef12a413f5dba1ddc4c3879627f859ab4ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 21 Feb 2024 01:28:51 +0300 Subject: [PATCH 103/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20after-fork=20=D1=81=D1=86?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D0=B5=D0=B2=20=D1=81=20=D1=83?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20?= =?UTF-8?q?=D0=BB=D0=BE=D0=B3=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/fork.c++ | 93 +++++++++++++++++++++++++++++++++++++++++++++------ test/test.h++ | 5 +-- 2 files changed, 86 insertions(+), 12 deletions(-) diff --git a/test/fork.c++ b/test/fork.c++ index 7f1c9b19..81af98b4 100644 --- a/test/fork.c++ +++ b/test/fork.c++ @@ -22,13 +22,58 @@ class testcase_smoke4fork : public testcase { using inherited = testcase; +protected: + bool dbi_invalid{true}; + bool dbi_stable{false}; + unsigned dbi_state{0}; + public: testcase_smoke4fork(const actor_config &config, const mdbx_pid_t pid) : testcase(config, pid) {} + virtual void txn_end(bool abort) override; bool run() override; virtual bool smoke() = 0; + bool open_dbi(); }; +bool testcase_smoke4fork::open_dbi() { + if (!dbi || dbi_invalid) { + if (dbi_stable || + (mdbx_txn_flags(txn_guard.get()) & int(MDBX_TXN_RDONLY)) == 0) { + dbi = db_table_open(!dbi_stable); + dbi_invalid = false; + } + } + + dbi_state = 0; + if (dbi && !dbi_invalid) { + unsigned unused_dbi_flags; + int err = + mdbx_dbi_flags_ex(txn_guard.get(), dbi, &unused_dbi_flags, &dbi_state); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_flags_ex()", err); + if ((dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_FRESH)) == 0) + dbi_stable = true; + } + return !dbi_invalid; +} + +void testcase_smoke4fork::txn_end(bool abort) { + if (dbi) { + if (abort) { + if (dbi_state & MDBX_DBI_CREAT) + dbi_stable = false; + if (dbi_state & MDBX_DBI_FRESH) + dbi_invalid = true; + } else { + if (dbi_state & (MDBX_DBI_CREAT | MDBX_DBI_FRESH)) + dbi_stable = true; + } + dbi_state = 0; + } + inherited::txn_end(abort); +} + bool testcase_smoke4fork::run() { static std::vector history; const pid_t current_pid = getpid(); @@ -52,6 +97,7 @@ bool testcase_smoke4fork::run() { current_pid, mdbx_strerror(err)); return false; } + open_dbi(); if (flipcoin()) { if (!smoke()) { @@ -65,11 +111,11 @@ bool testcase_smoke4fork::run() { log_verbose("%s[deep %d, pid %d] probe %s", "pre-fork", deep, current_pid, "skipped"); #ifdef __SANITIZE_ADDRESS__ - const bool abort_txn_to_avoid_memleak = true; + const bool commit_txn_to_avoid_memleak = true; #else - const bool abort_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); + const bool commit_txn_to_avoid_memleak = !RUNNING_ON_VALGRIND && flipcoin(); #endif - if (abort_txn_to_avoid_memleak && txn_guard) + if (commit_txn_to_avoid_memleak && txn_guard) txn_end(false); } @@ -90,8 +136,14 @@ bool testcase_smoke4fork::run() { log_flush(); if (err != MDBX_SUCCESS) failure_perror("mdbx_env_resurrect_after_fork()", err); - if (txn_guard) + if (txn_guard) { + if (dbi_state & MDBX_DBI_CREAT) + dbi_invalid = true; + // if (dbi_state & MDBX_DBI_FRESH) + // dbi_invalid = true; + dbi_state = 0; mdbx_txn_abort(txn_guard.release()); + } if (!smoke()) { log_notice("%s[deep %d, pid %d] probe %s", "fork-child", deep, new_pid, "failed"); @@ -182,9 +234,19 @@ bool testcase_forkread::smoke() { failure_perror("mdbx_env_info_ex()", err); uint64_t seq; - err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); - if (unlikely(err != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_sequence(get)", err); + if (dbi_invalid) { + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_SUCCESS))) + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", + mdbx_strerror(err), dbi); + open_dbi(); + } + if (!dbi_invalid) { + err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != MDBX_SUCCESS)) + failure("unexpected '%s' from mdbx_dbi_sequence(get, dbi %d)", + mdbx_strerror(err), dbi); + } txn_end(false); return true; } @@ -210,10 +272,21 @@ bool testcase_forkwrite::smoke() { if (!txn_guard) txn_begin(false); + uint64_t seq; - int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); - if (unlikely(err != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_sequence(inc)", err); + if (dbi_invalid) { + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 0); + if (unlikely(err != (dbi ? MDBX_BAD_DBI : MDBX_EACCESS))) + failure("unexpected '%s' from mdbx_dbi_sequence(get, bad_dbi %d)", + mdbx_strerror(err), dbi); + open_dbi(); + } + if (!dbi_invalid) { + int err = mdbx_dbi_sequence(txn_guard.get(), dbi, &seq, 1); + if (unlikely(err != MDBX_SUCCESS)) + failure("unexpected '%s' from mdbx_dbi_sequence(inc, dbi %d)", + mdbx_strerror(err), dbi); + } txn_end(false); if (!firstly_read && !testcase_forkread::smoke()) diff --git a/test/test.h++ b/test/test.h++ index 96d93a7c..b03b80e1 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -248,9 +248,10 @@ protected: void db_prepare(); void db_open(); void db_close(); - void txn_begin(bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); + virtual void txn_begin(bool readonly, + MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); int breakable_commit(); - void txn_end(bool abort); + virtual void txn_end(bool abort); int breakable_restart(); void txn_restart(bool abort, bool readonly, MDBX_txn_flags_t flags = MDBX_TXN_READWRITE); From 603ce05435dddbf865ae69631c9a9eb3e31ad947 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 22 Feb 2024 16:35:44 +0300 Subject: [PATCH 104/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20vlen=5Fmin=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20dupfixed.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index cba3f642..6abf0d06 100644 --- a/src/core.c +++ b/src/core.c @@ -16295,7 +16295,7 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, dbx->md_vlen_min = (db->md_flags & MDBX_INTEGERDUP) ? 4 /* sizeof(uint32_t) */ - : ((db->md_flags & MDBX_DUPFIXED) ? 1 : 0); + : ((db->md_flags & MDBX_DUPFIXED) ? sizeof(indx_t) : 0); dbx->md_vlen_max = valsize_max(pagesize, db->md_flags); assert(dbx->md_vlen_max != (size_t)-1); From b1dcd07be475114ddc9fd9759d22b9fbb31bfdc1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 21 Feb 2024 01:28:19 +0300 Subject: [PATCH 105/137] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA?= =?UTF-8?q?=D0=B8=20=D0=B2=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80=D0=B6?= =?UTF-8?q?=D0=BA=D0=B5=20MDBX=5FENABLE=5FDBI=5FLOCKFREE.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/src/core.c b/src/core.c index 6abf0d06..e6528c7c 100644 --- a/src/core.c +++ b/src/core.c @@ -15742,6 +15742,7 @@ __cold static int env_close(MDBX_env *env, bool resurrect_after_fork) { next = ptr->next; osal_free(ptr); } + env->me_defer_free = nullptr; #endif /* MDBX_ENABLE_DBI_LOCKFREE */ if (!(env->me_flags & MDBX_RDONLY)) From fb5dbbdf202f0d0a682326f8af92682f3ace8915 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:45:46 +0300 Subject: [PATCH 106/137] =?UTF-8?q?mdbx-test:=20=D1=81=D0=BE=D0=BE=D1=82?= =?UTF-8?q?=D0=B2=D0=B5=D1=82=D1=81=D1=82=D0=B2=D0=B8=D0=B5=20=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D1=82=D0=BE=D0=BA=D0=BE=D0=BB=D0=B8=D1=80=D1=83=D0=B5?= =?UTF-8?q?=D0=BC=D1=8B=D1=85=20=D0=B8=D0=BC=D0=B5=D0=BD=20=D1=82=D0=B5?= =?UTF-8?q?=D1=81=D1=82=D0=BE=D0=B2=20=D0=BE=D0=BF=D1=86=D0=B8=D1=8F=D0=BC?= =?UTF-8?q?=20=D0=BA=D0=BE=D0=BC=D0=B0=D0=BD=D0=B4=D0=BD=D0=BE=D0=B9=20?= =?UTF-8?q?=D1=81=D1=82=D1=80=D0=BE=D0=BA=D0=B8=20(=D0=BA=D0=BE=D1=81?= =?UTF-8?q?=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index 79ca8a43..b2d9da86 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -24,9 +24,9 @@ const char *testcase2str(const actor_testcase testcase) { case ac_hill: return "hill"; case ac_deadread: - return "deadread"; + return "dead.reader"; case ac_deadwrite: - return "deadwrite"; + return "dead.writer"; case ac_jitter: return "jitter"; case ac_try: @@ -41,9 +41,9 @@ const char *testcase2str(const actor_testcase testcase) { return "nested"; #if !defined(_WIN32) && !defined(_WIN64) case ac_forkread: - return "forkread"; + return "fork.reader"; case ac_forkwrite: - return "forkwrite"; + return "fork.writer"; #endif /* Windows */ } } From 164d1125075f16e0f74afa451bd6c9d966e4c7b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 23:52:09 +0300 Subject: [PATCH 107/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20chk-=D1=84=D1=83=D0=BD=D0=BA=D1=86?= =?UTF-8?q?=D0=B8=D0=BE=D0=BD=D0=B0=D0=BB=D0=B0=20=D1=81=20=D1=83=D1=81?= =?UTF-8?q?=D1=82=D1=80=D0=B0=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=D0=BC=20=D0=BE?= =?UTF-8?q?=D1=88=D0=B8=D0=B1=D0=BE=D0=BA=20=D0=B8=20=D0=BD=D0=B5=D0=B4?= =?UTF-8?q?=D0=BE=D1=87=D0=B5=D1=82=D0=BE=D0=B2.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 100 ++++++++++++++++++++++++++++--------------------- src/mdbx_chk.c | 3 +- 2 files changed, 60 insertions(+), 43 deletions(-) diff --git a/src/core.c b/src/core.c index e6528c7c..1ba69ff2 100644 --- a/src/core.c +++ b/src/core.c @@ -24529,32 +24529,35 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, (mp ? page_room(mp) : pagesize - header_size) - payload_size; size_t align_bytes = 0; - for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; - align_bytes += ((payload_size + align_bytes) & 1), ++i) { + for (size_t i = 0; err == MDBX_SUCCESS && i < nentries; ++i) { if (type == MDBX_page_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ payload_size += mp->mp_leaf2_ksize; continue; } - MDBX_node *node = page_node(mp, i); - payload_size += NODESIZE + node_ks(node); + const MDBX_node *node = page_node(mp, i); + header_size += NODESIZE; + const size_t node_key_size = node_ks(node); + payload_size += node_key_size; if (type == MDBX_page_branch) { assert(i > 0 || node_ks(node) == 0); + align_bytes += node_key_size & 1; continue; } + const size_t node_data_size = node_ds(node); assert(type == MDBX_page_leaf); switch (node_flags(node)) { case 0 /* usual node */: - payload_size += node_ds(node); + payload_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_BIGDATA /* long data on the large/overflow page */: { - payload_size += sizeof(pgno_t); const pgno_t large_pgno = node_largedata_pgno(node); - const size_t over_payload = node_ds(node); + const size_t over_payload = node_data_size; const size_t over_header = PAGEHDRSZ; npages = 1; @@ -24573,27 +24576,31 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, over_payload, over_header, over_unused); if (unlikely(rc != MDBX_SUCCESS)) return (rc == MDBX_RESULT_TRUE) ? MDBX_SUCCESS : rc; + payload_size += sizeof(pgno_t); + align_bytes += node_key_size & 1; } break; case F_SUBDATA /* sub-db */: { - const size_t namelen = node_ks(node); - payload_size += node_ds(node); - if (unlikely(namelen == 0 || node_ds(node) != sizeof(MDBX_db))) { + const size_t namelen = node_key_size; + if (unlikely(namelen == 0 || node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; } break; case F_SUBDATA | F_DUPDATA /* dupsorted sub-tree */: - payload_size += sizeof(MDBX_db); - if (unlikely(node_ds(node) != sizeof(MDBX_db))) { + if (unlikely(node_data_size != sizeof(MDBX_db))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; } + header_size += node_data_size; + align_bytes += (node_key_size + node_data_size) & 1; break; case F_DUPDATA /* short sub-page */: { - if (unlikely(node_ds(node) <= PAGEHDRSZ)) { + if (unlikely(node_data_size <= PAGEHDRSZ || (node_data_size & 1))) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; break; @@ -24621,16 +24628,17 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, err = MDBX_CORRUPTED; } - for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; - subalign_bytes += ((subpayload_size + subalign_bytes) & 1), ++j) { - + for (size_t j = 0; err == MDBX_SUCCESS && j < nsubkeys; ++j) { if (subtype == MDBX_subpage_dupfixed_leaf) { /* LEAF2 pages have no mp_ptrs[] or node headers */ subpayload_size += sp->mp_leaf2_ksize; } else { assert(subtype == MDBX_subpage_leaf); - MDBX_node *subnode = page_node(sp, j); - subpayload_size += NODESIZE + node_ks(subnode) + node_ds(subnode); + const MDBX_node *subnode = page_node(sp, j); + const size_t subnode_size = node_ks(subnode) + node_ds(subnode); + subheader_size += NODESIZE; + subpayload_size += subnode_size; + subalign_bytes += subnode_size & 1; if (unlikely(node_flags(subnode) != 0)) { assert(err == MDBX_CORRUPTED); err = MDBX_CORRUPTED; @@ -24639,7 +24647,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, } const int rc = - ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_ds(node), + ctx->mw_visitor(pgno, 0, ctx->mw_user, deep + 1, sdb, node_data_size, subtype, err, nsubkeys, subpayload_size, subheader_size, subunused_size + subalign_bytes); if (unlikely(rc != MDBX_SUCCESS)) @@ -24647,7 +24655,7 @@ __cold static int walk_tree(mdbx_walk_ctx_t *ctx, const pgno_t pgno, header_size += subheader_size; unused_size += subunused_size; payload_size += subpayload_size; - align_bytes += subalign_bytes; + align_bytes += subalign_bytes + (node_key_size & 1); } break; default: @@ -27581,19 +27589,12 @@ chk_pgvisitor(const size_t pgno, const unsigned npages, void *const ctx, pagetype_caption, sizeof(long), header_bytes, env->me_psize - sizeof(long)); } - if (payload_bytes < 1) { - if (nentries > 1) { - chk_object_issue(scope, "page", pgno, "zero size-of-entry", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries", - pagetype_caption, payload_bytes, nentries); - } else { - chk_object_issue(scope, "page", pgno, "empty", - "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE - " entries, deep %i", - pagetype_caption, payload_bytes, nentries, deep); - sdb->pages.empty += 1; - } + if (nentries < 1 || (pagetype == MDBX_page_branch && nentries < 2)) { + chk_object_issue(scope, "page", pgno, nentries ? "half-empty" : "empty", + "%s-page: payload %" PRIuSIZE " bytes, %" PRIuSIZE + " entries, deep %i", + pagetype_caption, payload_bytes, nentries, deep); + sdb->pages.empty += 1; } if (npages) { @@ -28402,13 +28403,28 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { chk_line_end( chk_puts(chk_line_begin(inner, MDBX_chk_verbose), "performs full check recent-txn-id with meta-pages")); - if (prefer_steady_txnid != chk->envinfo.mi_recent_txnid) { - chk_scope_issue( - inner, - "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 - " != %" PRIi64 ")", - prefer_steady_metanum, prefer_steady_txnid, - chk->envinfo.mi_recent_txnid); + eASSERT(env, recent_txnid == chk->envinfo.mi_recent_txnid); + if (prefer_steady_txnid != recent_txnid) { + if ((chk->flags & MDBX_CHK_READWRITE) != 0 && + (env->me_flags & MDBX_RDONLY) == 0 && + recent_txnid > prefer_steady_txnid && + (chk->envinfo.mi_bootid.current.x | + chk->envinfo.mi_bootid.current.y) != 0 && + chk->envinfo.mi_bootid.current.x == + chk->envinfo.mi_bootid.meta[recent_metanum].x && + chk->envinfo.mi_bootid.current.y == + chk->envinfo.mi_bootid.meta[recent_metanum].y) { + chk_line_end( + chk_print(chk_line_begin(inner, MDBX_chk_verbose), + "recent meta-%u is weak, but boot-id match current" + " (will synced upon successful check)", + recent_metanum)); + } else + chk_scope_issue( + inner, + "steady meta-%d txn-id mismatch recent-txn-id (%" PRIi64 + " != %" PRIi64 ")", + prefer_steady_metanum, prefer_steady_txnid, recent_txnid); } } else if (chk->write_locked) { chk_line_end( @@ -28441,7 +28457,6 @@ __cold static int env_chk(MDBX_chk_scope_t *const scope) { //-------------------------------------------------------------------------- - eASSERT(env, err == MDBX_SUCCESS); if (chk->flags & MDBX_CHK_SKIP_BTREE_TRAVERSAL) chk_line_end(chk_print(chk_line_begin(scope, MDBX_chk_processing), "Skipping %s traversal...", "b-tree")); @@ -28699,7 +28714,8 @@ __cold int mdbx_env_chk(MDBX_env *env, const struct MDBX_chk_callbacks *cb, rc = chk_scope_begin( chk, 0, MDBX_chk_lock, nullptr, nullptr, "Taking %slock...", (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) ? "" : "read "); - if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0) { + if (likely(!rc) && (env->me_flags & (MDBX_RDONLY | MDBX_EXCLUSIVE)) == 0 && + (flags & MDBX_CHK_READWRITE)) { rc = mdbx_txn_lock(env, false); if (unlikely(rc)) chk_error_rc(ctx->scope, rc, "mdbx_txn_lock"); diff --git a/src/mdbx_chk.c b/src/mdbx_chk.c index 55e6f98d..4f0790c1 100644 --- a/src/mdbx_chk.c +++ b/src/mdbx_chk.c @@ -691,7 +691,8 @@ int main(int argc, char *argv[]) { bailout: if (env) { - const bool dont_sync = rc != 0 || chk.result.total_problems; + const bool dont_sync = rc != 0 || chk.result.total_problems || + (chk_flags & MDBX_CHK_READWRITE) == 0; mdbx_env_close_ex(env, dont_sync); } flush(); From 544c6bc1e49b46d35c1d6a2b8eae64d3b2c11e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:45:12 +0300 Subject: [PATCH 108/137] =?UTF-8?q?mdbx-test:=20=D0=BF=D0=BE=D0=B4=D0=B4?= =?UTF-8?q?=D0=B5=D1=80=D0=B6=D0=BA=D0=B0=20rnd/rand/random=20=D0=B4=D0=BB?= =?UTF-8?q?=D1=8F=20=D0=BE=D0=BF=D1=86=D0=B8=D0=B9=20`--keylen`=20=D0=B8?= =?UTF-8?q?=20`--datalen`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/config.c++ | 14 +++++++++++++- test/config.h++ | 2 +- test/main.c++ | 50 +++++++++++++++++++++++++++++-------------------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/test/config.c++ b/test/config.c++ index 922f7b37..acad8fb6 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -145,6 +145,16 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, return true; } + if (strcmp(value_cstr, "rnd") == 0 || strcmp(value_cstr, "rand") == 0 || + strcmp(value_cstr, "random") == 0) { + value = minval; + if (maxval > minval) + value += (prng32() + UINT64_C(44263400549519813)) % (maxval - minval); + if (scale == intkey) + value &= ~3u; + return true; + } + char *suffix = nullptr; errno = 0; unsigned long long raw = strtoull(value_cstr, &suffix, 0); @@ -159,7 +169,7 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, uint64_t multiplier = 1; if (suffix && *suffix) { - if (scale == no_scale) + if (scale == no_scale || scale == intkey) failure("Option '--%s' doesn't accepts suffixes, so '%s' is unexpected\n", option, suffix); if (strcmp(suffix, "K") == 0 || strcasecmp(suffix, "Kilo") == 0) @@ -203,6 +213,8 @@ bool parse_option(int argc, char *const argv[], int &narg, const char *option, if (value < minval) failure("The minimal value for option '--%s' is %" PRIu64 "\n", option, minval); + if (scale == intkey) + value &= ~3u; return true; } diff --git a/test/config.h++ b/test/config.h++ index 80996157..be369171 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -63,7 +63,7 @@ const char *keygencase2str(const keygen_case); namespace config { -enum scale_mode { no_scale, decimal, binary, duration }; +enum scale_mode { no_scale, decimal, binary, duration, intkey }; bool parse_option(int argc, char *const argv[], int &narg, const char *option, const char **value, const char *default_value = nullptr); diff --git a/test/main.c++ b/test/main.c++ index 6242a05d..fe159142 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -468,49 +468,59 @@ int main(int argc, char *const argv[]) { keycase_setup(value, params); continue; } - if (config::parse_option(argc, argv, narg, "keylen.min", params.keylen_min, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen.min", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_max < params.keylen_min) params.keylen_max = params.keylen_min; continue; } - if (config::parse_option(argc, argv, narg, "keylen.max", params.keylen_max, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen.max", params.keylen_max, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { if ((params.table_flags & MDBX_INTEGERKEY) || params.keylen_min > params.keylen_max) params.keylen_min = params.keylen_max; continue; } - if (config::parse_option(argc, argv, narg, "keylen", params.keylen_min, - config::no_scale, params.mdbx_keylen_min(), - params.mdbx_keylen_max())) { + if (config::parse_option( + argc, argv, narg, "keylen", params.keylen_min, + (params.table_flags & MDBX_INTEGERKEY) ? config::intkey + : config::no_scale, + params.mdbx_keylen_min(), params.mdbx_keylen_max())) { params.keylen_max = params.keylen_min; continue; } - if (config::parse_option(argc, argv, narg, "datalen.min", - params.datalen_min, config::no_scale, - params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen.min", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_max < params.datalen_min) params.datalen_max = params.datalen_min; continue; } - if (config::parse_option(argc, argv, narg, "datalen.max", - params.datalen_max, config::no_scale, - params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen.max", params.datalen_max, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { if ((params.table_flags & (MDBX_INTEGERDUP | MDBX_DUPFIXED)) || params.datalen_min > params.datalen_max) params.datalen_min = params.datalen_max; continue; } - if (config::parse_option(argc, argv, narg, "datalen", params.datalen_min, - config::no_scale, params.mdbx_datalen_min(), - params.mdbx_datalen_max())) { + if (config::parse_option( + argc, argv, narg, "datalen", params.datalen_min, + (params.table_flags & MDBX_INTEGERDUP) ? config::intkey + : config::no_scale, + params.mdbx_datalen_min(), params.mdbx_datalen_max())) { params.datalen_max = params.datalen_min; continue; } From c5ac7b25c90a722c13fdf1a26b340f7663100f45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 22:51:35 +0300 Subject: [PATCH 109/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=B0=20=D0=BF=D0=B0=D1=80=20key-value?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D0=BE=D0=B4=D0=B4=D0=B5=D1=80?= =?UTF-8?q?=D0=B6=D0=BA=D0=B8=20=D0=BA=D0=BE=D1=80=D0=BE=D1=82=D0=BA=D0=B8?= =?UTF-8?q?=D1=85=20=D0=BA=D0=BB=D1=8E=D1=87=D0=B5=D0=B9=20=D0=B2=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B6=D0=B8=D0=BC=D0=B5=20`MDBX=5FDUPFIXED`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/keygen.c++ | 56 +++++++++++++++++++++++++++++++++---------------- test/keygen.h++ | 10 +++------ 2 files changed, 41 insertions(+), 25 deletions(-) diff --git a/test/keygen.c++ b/test/keygen.c++ index a6d20f33..1829e0db 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -79,7 +79,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)))); assert(!(value_essentials.flags & ~(essentials::prng_fill_flag | - unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP)))); + unsigned(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)))); log_trace("keygen-pair: serial %" PRIu64 ", data-age %" PRIu64, serial, value_age); @@ -126,15 +126,14 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, actor_params::serial_mask(mapping.split); } - value_serial |= value_age << mapping.split; log_trace("keygen-pair: split@%u => k%" PRIu64 ", v%" PRIu64, mapping.split, key_serial, value_serial); } log_trace("keygen-pair: key %" PRIu64 ", value %" PRIu64, key_serial, value_serial); - mk_begin(key_serial, key_essentials, *key); - mk_begin(value_serial, value_essentials, *value); + key_serial = mk_begin(key_serial, key_essentials, *key); + value_serial = mk_begin(value_serial, value_essentials, *value); #if 0 /* unused for now */ if (key->value.iov_len + value->value.iov_len > pair_maxlen) { @@ -193,11 +192,13 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, unsigned thread_number) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX, + MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < + UINT16_MAX, "WTF?"); #else assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | - MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX); + MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < + UINT16_MAX); #endif key_essentials.flags = uint16_t( actor.table_flags & @@ -205,20 +206,19 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, assert(actor.keylen_min <= UINT16_MAX); key_essentials.minlen = uint16_t(actor.keylen_min); assert(actor.keylen_max <= UINT32_MAX); - key_essentials.maxlen = - std::min(uint32_t(actor.keylen_max), - uint32_t(mdbx_limits_keysize_max( - actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); + key_essentials.maxlen = std::min( + uint32_t(actor.keylen_max), + uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); value_essentials.flags = uint16_t( - actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP)); + actor.table_flags & + MDBX_db_flags_t(MDBX_INTEGERDUP | MDBX_REVERSEDUP | MDBX_DUPFIXED)); assert(actor.datalen_min <= UINT16_MAX); value_essentials.minlen = uint16_t(actor.datalen_min); assert(actor.datalen_max <= UINT32_MAX); - value_essentials.maxlen = - std::min(uint32_t(actor.datalen_max), - uint32_t(mdbx_limits_valsize_max( - actor.pagesize, MDBX_db_flags_t(key_essentials.flags)))); + value_essentials.maxlen = std::min( + uint32_t(actor.datalen_max), + uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); if (!actor.keygen.zero_fill) { key_essentials.flags |= essentials::prng_fill_flag; @@ -227,6 +227,16 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, (void)thread_number; mapping = actor.keygen; + while (mapping.split > + essentials::value_age_width + value_essentials.maxlen * CHAR_BIT || + mapping.split >= mapping.width) + mapping.split -= 1; + + while (unsigned((actor.table_flags & MDBX_DUPSORT) + ? mapping.width - mapping.split + : mapping.width) > key_essentials.maxlen * CHAR_BIT) + mapping.width -= 1; + salt = (actor.keygen.seed + uint64_t(actor_id)) * UINT64_C(14653293970879851569); @@ -307,11 +317,20 @@ buffer alloc(size_t limit) { return buffer(ptr); } -void __hot maker::mk_begin(const serial_t serial, const essentials ¶ms, - result &out) { +serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, + result &out) { assert(out.limit >= params.maxlen); assert(params.maxlen >= params.minlen); - assert(params.maxlen >= length(serial)); + if (params.maxlen < sizeof(serial_t)) { + const serial_t max = actor_params::serial_mask(params.maxlen * CHAR_BIT); + if (serial > max) { + serial ^= (serial >> max / 2) * serial_t((sizeof(serial_t) > 4) + ? UINT64_C(40719303417517073) + : UINT32_C(3708688457)); + serial &= max; + } + assert(params.maxlen >= length(serial)); + } out.value.iov_len = std::max(unsigned(params.minlen), length(serial)); const auto variation = params.maxlen - params.minlen; @@ -328,6 +347,7 @@ void __hot maker::mk_begin(const serial_t serial, const essentials ¶ms, assert(length(serial) <= out.value.iov_len); assert(out.value.iov_len >= params.minlen); assert(out.value.iov_len <= params.maxlen); + return serial; } void __hot maker::mk_continue(const serial_t serial, const essentials ¶ms, diff --git a/test/keygen.h++ b/test/keygen.h++ index 9e2410fd..8c49eabb 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -108,19 +108,15 @@ class maker { struct essentials { uint16_t minlen{0}; - enum { prng_fill_flag = 1 }; + enum { prng_fill_flag = 1, value_age_width = 8 }; uint16_t flags{0}; uint32_t maxlen{0}; } key_essentials, value_essentials; - static void mk_begin(const serial_t serial, const essentials ¶ms, - result &out); + static serial_t mk_begin(serial_t serial, const essentials ¶ms, + result &out); static void mk_continue(const serial_t serial, const essentials ¶ms, result &out); - static void mk(const serial_t serial, const essentials ¶ms, result &out) { - mk_begin(serial, params, out); - mk_continue(serial, params, out); - } public: void pair(serial_t serial, const buffer &key, buffer &value, From 3373631cffc0a20831553700eca90f665a1814e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 17:38:23 +0300 Subject: [PATCH 110/137] =?UTF-8?q?mdbx-test:=20=D1=83=D0=BD=D0=B8=D1=84?= =?UTF-8?q?=D0=B8=D0=BA=D0=B0=D1=86=D0=B8=D1=8F=20PRNG=20=D0=B8=20=D0=B8?= =?UTF-8?q?=D0=B7=D0=BC=D0=B5=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF?= =?UTF-8?q?=D1=86=D0=B8=D0=B8=20=D0=BA=D0=BE=D0=BC=D0=B0=D0=BD=D0=B4=D0=BD?= =?UTF-8?q?=D0=BE=D0=B9=20=D1=81=D1=82=D1=80=D0=BE=D0=BA=D0=B8=20=D0=BD?= =?UTF-8?q?=D0=B0=20`--prng-seed`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/CMakeLists.txt | 4 +- test/append.c++ | 6 +-- test/cases.c++ | 13 +++--- test/config.c++ | 2 +- test/config.h++ | 1 + test/hill.c++ | 2 +- test/keygen.c++ | 4 +- test/keygen.h++ | 3 +- test/long_stochastic.sh | 95 +++++++++++++++++------------------------ test/main.c++ | 10 +++-- test/nested.c++ | 5 +-- test/test.c++ | 4 +- test/test.h++ | 9 ++-- test/ttl.c++ | 5 +-- test/utils.c++ | 2 +- test/utils.h++ | 1 + 16 files changed, 75 insertions(+), 91 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index ac11ef63..7098cfed 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -122,7 +122,7 @@ else() add_test(NAME smoke COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=verbose - --keygen.seed=${test_seed} + --prng-seed=${test_seed} --progress --console=no --pathname=smoke.db --dont-cleanup-after basic) set_tests_properties(smoke PROPERTIES TIMEOUT 600 @@ -144,7 +144,7 @@ else() add_test(NAME dupsort_writemap COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice - --keygen.seed=${test_seed} + --prng-seed=${test_seed} --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES diff --git a/test/append.c++ b/test/append.c++ index d2486001..5c7adf3d 100644 --- a/test/append.c++ +++ b/test/append.c++ @@ -20,8 +20,8 @@ public: : testcase(config, pid) {} bool run() override; - static bool review_params(actor_params ¶ms) { - if (!testcase::review_params(params)) + static bool review_params(actor_params ¶ms, unsigned space_id) { + if (!testcase::review_params(params, space_id)) return false; const bool ordered = !flipcoin_x3(); log_notice("the '%s' key-generation mode is selected", @@ -45,7 +45,7 @@ bool testcase_append::run() { } cursor_open(dbi); - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); /* LY: тест наполнения таблиц в append-режиме, * при котором записи добавляются строго в конец (в порядке сортировки) */ const MDBX_put_flags_t flags = diff --git a/test/cases.c++ b/test/cases.c++ index 5ccb87ae..1c650a91 100644 --- a/test/cases.c++ +++ b/test/cases.c++ @@ -41,8 +41,9 @@ testcase *registry::create_actor(const actor_config &config, } bool registry::review_actor_params(const actor_testcase id, - actor_params ¶ms) { - return instance()->id2record.at(id)->review_params(params); + actor_params ¶ms, + const unsigned space_id) { + return instance()->id2record.at(id)->review_params(params, space_id); } //----------------------------------------------------------------------------- @@ -78,13 +79,13 @@ void configure_actor(unsigned &last_space_id, const actor_testcase testcase, failure("The '%s' is unexpected for space-id\n", end); } - if (!registry::review_actor_params(testcase, params)) - failure("Actor config-review failed for space-id %lu\n", space_id); - if (space_id > ACTOR_ID_MAX) failure("Invalid space-id %lu\n", space_id); - last_space_id = unsigned(space_id); + if (!registry::review_actor_params(testcase, params, unsigned(space_id))) + failure("Actor config-review failed for space-id %lu\n", space_id); + + last_space_id = unsigned(space_id); log_trace("configure_actor: space %lu for %s", space_id, testcase2str(testcase)); global::actors.emplace_back( diff --git a/test/config.c++ b/test/config.c++ index acad8fb6..f675dee1 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -434,6 +434,7 @@ void dump(const char *title) { log_verbose("#%u, testcase %s, space_id/table %u\n", i->actor_id, testcase2str(i->testcase), i->space_id); indent.push(); + log_verbose("prng-seed: %u\n", i->params.prng_seed); if (i->params.loglevel) { log_verbose("log: level %u, %s\n", i->params.loglevel, @@ -473,7 +474,6 @@ void dump(const char *title) { i->params.keygen.mesh, i->params.keygen.rotate, i->params.keygen.offset, i->params.keygen.split, i->params.keygen.width - i->params.keygen.split); - log_verbose("keygen.seed: %u\n", i->params.keygen.seed); log_verbose("keygen.zerofill: %s\n", i->params.keygen.zero_fill ? "Yes" : "No"); log_verbose("key: minlen %u, maxlen %u\n", i->params.keylen_min, diff --git a/test/config.h++ b/test/config.h++ index be369171..18617e34 100644 --- a/test/config.h++ +++ b/test/config.h++ @@ -274,6 +274,7 @@ struct actor_params_pod { unsigned batch_read{0}; unsigned batch_write{0}; + unsigned prng_seed{0}; unsigned delaystart{0}; unsigned waitfor_nops{0}; unsigned inject_writefaultn{0}; diff --git a/test/hill.c++ b/test/hill.c++ index 79234b7d..f5ca1026 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -52,7 +52,7 @@ bool testcase_hill::run() { speculum_committed.clear(); /* TODO: работа в несколько потоков */ - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); keygen::buffer a_key = keygen::alloc(config.params.keylen_max); keygen::buffer a_data_0 = keygen::alloc(config.params.datalen_max); diff --git a/test/keygen.c++ b/test/keygen.c++ index 1829e0db..46b64ecf 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -188,7 +188,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, log_pair(logging::trace, "kv", key, value); } -void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, +void maker::setup(const config::actor_params_pod &actor, unsigned thread_number) { #if CONSTEXPR_ENUM_FLAGS_OPERATIONS static_assert(unsigned(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT | @@ -238,7 +238,7 @@ void maker::setup(const config::actor_params_pod &actor, unsigned actor_id, mapping.width -= 1; salt = - (actor.keygen.seed + uint64_t(actor_id)) * UINT64_C(14653293970879851569); + (prng_state + uint64_t(thread_number)) * UINT64_C(14653293970879851569); base = actor.serial_base(); } diff --git a/test/keygen.h++ b/test/keygen.h++ index 8c49eabb..0ded8130 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -121,8 +121,7 @@ class maker { public: void pair(serial_t serial, const buffer &key, buffer &value, serial_t value_age, const bool keylen_changeable); - void setup(const config::actor_params_pod &actor, unsigned actor_id, - unsigned thread_number); + void setup(const config::actor_params_pod &actor, unsigned thread_number); bool is_unordered() const; void seek2end(serial_t &serial) const; diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index ff73726c..b508b5f6 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -453,91 +453,72 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 split=30 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=16 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) int-key,int-data, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.integer --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ - --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ - --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} \ - --keygen.seed=${seed} + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options loop=$((loop + 1)) if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi diff --git a/test/main.c++ b/test/main.c++ index fe159142..84ab801c 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -37,6 +37,7 @@ MDBX_NORETURN void usage(void) { " --console[=yes/no] Enable/disable console-like output\n" " --cleanup-before[=YES/no] Cleanup/remove and re-create database\n" " --cleanup-after[=YES/no] Cleanup/remove database after completion\n" + " --prng-seed=N Seed PRNG\n" "Database size control:\n" " --pagesize=... Database page size: min, max, 256..65536\n" " --size-lower=N[K|M|G|T] Lower-bound of size in Kb/Mb/Gb/Tb\n" @@ -88,7 +89,6 @@ MDBX_NORETURN void usage(void) { " --datalen=N Set both min/max for data length\n" " --keygen.width=N TBD (see the source code)\n" " --keygen.mesh=N TBD (see the source code)\n" - " --keygen.seed=N TBD (see the source code)\n" " --keygen.zerofill=yes|NO TBD (see the source code)\n" " --keygen.split=N TBD (see the source code)\n" " --keygen.rotate=N TBD (see the source code)\n" @@ -144,7 +144,7 @@ void actor_params::set_defaults(const std::string &tmpdir) { growth_step = -1; pagesize = -1; - keygen.seed = 1; + prng_seed = 0; keygen.zero_fill = false; keygen.keycase = kc_random; keygen.width = (table_flags & MDBX_DUPSORT) ? 32 : 64; @@ -449,9 +449,11 @@ int main(int argc, char *const argv[]) { if (config::parse_option(argc, argv, narg, "keygen.mesh", params.keygen.mesh, 0, 64)) continue; - if (config::parse_option(argc, argv, narg, "keygen.seed", - params.keygen.seed, config::no_scale)) + if (config::parse_option(argc, argv, narg, "prng-seed", params.prng_seed, + config::no_scale)) { + prng_seed(params.prng_seed); continue; + } if (config::parse_option(argc, argv, narg, "keygen.zerofill", params.keygen.zero_fill)) continue; diff --git a/test/nested.c++ b/test/nested.c++ index 48299c79..55c7ab11 100644 --- a/test/nested.c++ +++ b/test/nested.c++ @@ -74,7 +74,7 @@ bool testcase_nested::setup() { return false; } - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + keyvalue_maker.setup(config.params, 0 /* thread_number */); key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); serial = 0; @@ -292,8 +292,7 @@ retry: } bool testcase_nested::run() { - uint64_t seed = - prng64_map2_white(config.params.keygen.seed) + config.actor_id; + uint64_t seed = prng64_map2_white(prng_state) + config.space_id; clear_wholetable_passed = 0; clear_stepbystep_passed = 0; diff --git a/test/test.c++ b/test/test.c++ index b2d9da86..10107980 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -668,8 +668,8 @@ bool test_execute(const actor_config &config_const) { size_t(config.params.nrepeat)); else log_verbose("test successfully (iteration %zi)", iter); - config.params.keygen.seed += INT32_C(0xA4F4D37B); - log_verbose("turn keygen to %u", config.params.keygen.seed); + prng_seed(config.params.prng_seed += INT32_C(0xA4F4D37B)); + log_verbose("turn PRNG to %u", config.params.prng_seed); } } while (config.params.nrepeat == 0 || iter < config.params.nrepeat); diff --git a/test/test.h++ b/test/test.h++ index b03b80e1..9db1bc6c 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -103,7 +103,7 @@ class registry { struct record { actor_testcase id = ac_none; std::string name; - bool (*review_params)(actor_params &) = nullptr; + bool (*review_params)(actor_params &, unsigned space_id) = nullptr; testcase *(*constructor)(const actor_config &, const mdbx_pid_t) = nullptr; }; std::unordered_map name2id; @@ -124,8 +124,8 @@ public: add(this); } }; - static bool review_actor_params(const actor_testcase id, - actor_params ¶ms); + static bool review_actor_params(const actor_testcase id, actor_params ¶ms, + const unsigned space_id); static testcase *create_actor(const actor_config &config, const mdbx_pid_t pid); }; @@ -301,8 +301,9 @@ public: memset(&last, 0, sizeof(last)); } - static bool review_params(actor_params ¶ms) { + static bool review_params(actor_params ¶ms, unsigned space_id) { // silently fix key/data length for fixed-length modes + params.prng_seed += bleach32(space_id); if ((params.table_flags & MDBX_INTEGERKEY) && params.keylen_min != params.keylen_max) params.keylen_min = params.keylen_max; diff --git a/test/ttl.c++ b/test/ttl.c++ index a7049022..b3839357 100644 --- a/test/ttl.c++ +++ b/test/ttl.c++ @@ -119,9 +119,8 @@ bool testcase_ttl::run() { return false; } - uint64_t seed = - prng64_map2_white(config.params.keygen.seed) + config.actor_id; - keyvalue_maker.setup(config.params, config.actor_id, 0 /* thread_number */); + uint64_t seed = prng64_map2_white(prng_state) + config.space_id; + keyvalue_maker.setup(config.params, 0 /* thread_number */); key = keygen::alloc(config.params.keylen_max); data = keygen::alloc(config.params.datalen_max); const MDBX_put_flags_t insert_flags = diff --git a/test/utils.c++ b/test/utils.c++ index 71d56eb8..399ea472 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -136,7 +136,7 @@ void prng_fill(uint64_t &state, void *ptr, size_t bytes) { } } -static __thread uint64_t prng_state; +/* __thread */ uint64_t prng_state; void prng_seed(uint64_t seed) { prng_state = bleach64(seed); } diff --git a/test/utils.h++ b/test/utils.h++ index 4e91226d..f8083437 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -346,6 +346,7 @@ uint64_t prng64_white(uint64_t &state); uint32_t prng32(uint64_t &state); void prng_fill(uint64_t &state, void *ptr, size_t bytes); +extern uint64_t prng_state; void prng_seed(uint64_t seed); uint32_t prng32(void); uint64_t prng64(void); From 00c4e2636ec0febc549a9fea5499655155ced093 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 27 Feb 2024 20:46:27 +0300 Subject: [PATCH 111/137] =?UTF-8?q?mdbx-test:=20=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=93=D0=9F=D0=A1=D0=A7?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/utils.c++ | 14 +++++++++----- test/utils.h++ | 29 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/test/utils.c++ b/test/utils.c++ index 399ea472..fd32c689 100644 --- a/test/utils.c++ +++ b/test/utils.c++ @@ -107,18 +107,22 @@ uint64_t prng64_white(uint64_t &state) { return bleach64(state); } -uint32_t prng32(uint64_t &state) { - return (uint32_t)(prng64_careless(state) >> 32); +uint32_t prng32_fast(uint64_t &state) { + return uint32_t(prng64_careless(state) >> 32); +} + +uint32_t prng32_white(uint64_t &state) { + return bleach32(uint32_t(prng64_careless(state) >> 32)); } void prng_fill(uint64_t &state, void *ptr, size_t bytes) { - uint32_t u32 = prng32(state); + uint32_t u32 = prng32_fast(state); while (bytes >= 4) { memcpy(ptr, &u32, 4); ptr = (uint32_t *)ptr + 1; bytes -= 4; - u32 = prng32(state); + u32 = prng32_fast(state); } switch (bytes & 3) { @@ -140,7 +144,7 @@ void prng_fill(uint64_t &state, void *ptr, size_t bytes) { void prng_seed(uint64_t seed) { prng_state = bleach64(seed); } -uint32_t prng32(void) { return prng32(prng_state); } +uint32_t prng32(void) { return prng32_white(prng_state); } uint64_t prng64(void) { return prng64_white(prng_state); } diff --git a/test/utils.h++ b/test/utils.h++ index f8083437..055e7912 100644 --- a/test/utils.h++ +++ b/test/utils.h++ @@ -288,24 +288,26 @@ inline bool is_samedata(const MDBX_val &a, const MDBX_val &b) { } std::string format(const char *fmt, ...); -static inline uint64_t bleach64(uint64_t v) { - // Tommy Ettinger, https://www.blogger.com/profile/04953541827437796598 - // http://mostlymangling.blogspot.com/2019/01/better-stronger-mixer-and-test-procedure.html - v ^= rot64(v, 25) ^ rot64(v, 50); - v *= UINT64_C(0xA24BAED4963EE407); - v ^= rot64(v, 24) ^ rot64(v, 49); - v *= UINT64_C(0x9FB21C651E98DF25); - return v ^ v >> 28; +static inline uint64_t bleach64(uint64_t x) { + // NASAM from Tommy Ettinger, + // https://www.blogger.com/profile/04953541827437796598 + // http://mostlymangling.blogspot.com/2020/01/nasam-not-another-strange-acronym-mixer.html + x ^= rot64(x, 25) ^ rot64(x, 47); + x *= UINT64_C(0x9E6C63D0676A9A99); + x ^= x >> 23 ^ x >> 51; + x *= UINT64_C(0x9E6D62D06F6A9A9B); + x ^= x >> 23 ^ x >> 51; + return x; } static inline uint32_t bleach32(uint32_t x) { // https://github.com/skeeto/hash-prospector - // exact bias: 0.17353355999581582 + // exact bias: 0.10760229515479501 x ^= x >> 16; - x *= UINT32_C(0x7feb352d); + x *= UINT32_C(0x21f0aaad); x ^= 0x3027C563 ^ (x >> 15); - x *= UINT32_C(0x846ca68b); - x ^= x >> 16; + x *= UINT32_C(0x0d35a2d97); + x ^= x >> 15; return x; } @@ -343,7 +345,8 @@ static inline double u64_to_double1(uint64_t v) { } uint64_t prng64_white(uint64_t &state); -uint32_t prng32(uint64_t &state); +uint32_t prng32_white(uint64_t &state); +uint32_t prng32_fast(uint64_t &state); void prng_fill(uint64_t &state, void *ptr, size_t bytes); extern uint64_t prng_state; From 826441741def28251fe437fefd85b4663bb99697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Fri, 23 Feb 2024 12:43:18 +0300 Subject: [PATCH 112/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20keysize=5Fmin()=20=D0=B8=20val?= =?UTF-8?q?size=5Fmin()=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- mdbx.h | 12 ++++++++++++ src/core.c | 28 +++++++++++++++++++++++----- test/config.c++ | 4 ++-- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/mdbx.h b/mdbx.h index 693cfee6..1fda47e2 100644 --- a/mdbx.h +++ b/mdbx.h @@ -3369,6 +3369,12 @@ mdbx_limits_dbsize_max(intptr_t pagesize); MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); +/** \brief Returns minimal key size in bytes for given database flags. + * \ingroup c_statinfo + * \see db_flags */ +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t +mdbx_limits_keysize_min(MDBX_db_flags_t flags); + /** \brief Returns maximal data size in bytes for given page size * and database flags, or -1 if pagesize is invalid. * \ingroup c_statinfo @@ -3376,6 +3382,12 @@ mdbx_limits_keysize_max(intptr_t pagesize, MDBX_db_flags_t flags); MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t mdbx_limits_valsize_max(intptr_t pagesize, MDBX_db_flags_t flags); +/** \brief Returns minimal data size in bytes for given database flags. + * \ingroup c_statinfo + * \see db_flags */ +MDBX_NOTHROW_CONST_FUNCTION LIBMDBX_API intptr_t +mdbx_limits_valsize_min(MDBX_db_flags_t flags); + /** \brief Returns maximal size of key-value pair to fit in a single page with * the given size and database flags, or -1 if pagesize is invalid. * \ingroup c_statinfo diff --git a/src/core.c b/src/core.c index 1ba69ff2..405da396 100644 --- a/src/core.c +++ b/src/core.c @@ -458,6 +458,19 @@ static __inline size_t keysize_max(size_t pagesize, MDBX_db_flags_t flags) { return max_branch_key; } +static __inline size_t keysize_min(MDBX_db_flags_t flags) { + return (flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; +} + +static __inline size_t valsize_min(MDBX_db_flags_t flags) { + if (flags & MDBX_INTEGERDUP) + return 4 /* sizeof(uint32_t) */; + else if (flags & MDBX_DUPFIXED) + return sizeof(indx_t); + else + return 0; +} + static __inline size_t valsize_max(size_t pagesize, MDBX_db_flags_t flags) { assert(pagesize >= MIN_PAGESIZE && pagesize <= MAX_PAGESIZE && is_powerof2(pagesize)); @@ -510,6 +523,10 @@ __cold intptr_t mdbx_limits_keysize_max(intptr_t pagesize, return keysize_max(pagesize, flags); } +__cold intptr_t mdbx_limits_keysize_min(MDBX_db_flags_t flags) { + return keysize_min(flags); +} + __cold int mdbx_env_get_maxvalsize_ex(const MDBX_env *env, MDBX_db_flags_t flags) { if (unlikely(!env || env->me_signature.weak != MDBX_ME_SIGNATURE)) @@ -530,6 +547,10 @@ __cold intptr_t mdbx_limits_valsize_max(intptr_t pagesize, return valsize_max(pagesize, flags); } +__cold intptr_t mdbx_limits_valsize_min(MDBX_db_flags_t flags) { + return valsize_min(flags); +} + __cold intptr_t mdbx_limits_pairsize4page_max(intptr_t pagesize, MDBX_db_flags_t flags) { if (pagesize < 1) @@ -16289,14 +16310,11 @@ static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, dbx->md_dcmp = get_default_datacmp(db->md_flags); } - dbx->md_klen_min = - (db->md_flags & MDBX_INTEGERKEY) ? 4 /* sizeof(uint32_t) */ : 0; + dbx->md_klen_min = keysize_min(db->md_flags); dbx->md_klen_max = keysize_max(pagesize, db->md_flags); assert(dbx->md_klen_max != (unsigned)-1); - dbx->md_vlen_min = (db->md_flags & MDBX_INTEGERDUP) - ? 4 /* sizeof(uint32_t) */ - : ((db->md_flags & MDBX_DUPFIXED) ? sizeof(indx_t) : 0); + dbx->md_vlen_min = valsize_min(db->md_flags); dbx->md_vlen_max = valsize_max(pagesize, db->md_flags); assert(dbx->md_vlen_max != (size_t)-1); diff --git a/test/config.c++ b/test/config.c++ index f675dee1..1ac2101d 100644 --- a/test/config.c++ +++ b/test/config.c++ @@ -693,7 +693,7 @@ bool actor_config::deserialize(const char *str, actor_config &config) { } unsigned actor_params::mdbx_keylen_min() const { - return (table_flags & MDBX_INTEGERKEY) ? 4 : 0; + return unsigned(mdbx_limits_keysize_min(table_flags)); } unsigned actor_params::mdbx_keylen_max() const { @@ -701,7 +701,7 @@ unsigned actor_params::mdbx_keylen_max() const { } unsigned actor_params::mdbx_datalen_min() const { - return (table_flags & MDBX_INTEGERDUP) ? 4 : 0; + return unsigned(mdbx_limits_valsize_min(table_flags)); } unsigned actor_params::mdbx_datalen_max() const { From 2e863cf7e0fd9a35c71407fcd24d47b6048042a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 25 Feb 2024 19:32:16 +0300 Subject: [PATCH 113/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BD=D0=B5=D0=B4=D0=BE?= =?UTF-8?q?=D1=87=D0=B5=D1=82=D0=B0=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5=D0=BA?= =?UTF-8?q?=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B8=20=D1=81=D0=BE=D0=BF?= =?UTF-8?q?=D1=83=D1=82=D1=81=D1=82=D0=B2=D1=83=D1=8E=D1=89=D0=B8=D1=85=20?= =?UTF-8?q?=D0=BA=D1=83=D1=80=D1=81=D0=BE=D1=80=D0=BE=D0=B2=20=D0=BF=D1=80?= =?UTF-8?q?=D0=B8=20=D1=80=D0=B0=D0=B7=D0=B4=D0=B5=D0=BB=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D0=B8=20=D1=81=D1=82=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D1=8B=20?= =?UTF-8?q?=D0=BF=D0=BE=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D1=8E?= =?UTF-8?q?=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D1=8F?= =?UTF-8?q?=20=D0=BF=D1=83=D1=81=D1=82=D0=BE=D0=B9=20=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=D0=B8=D1=86=D1=8B=20=D1=81=D0=BB=D0=B5=D0=B2=D0=B0?= =?UTF-8?q?.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/src/core.c b/src/core.c index 405da396..0527e279 100644 --- a/src/core.c +++ b/src/core.c @@ -21429,7 +21429,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, /* It is reasonable and possible to split the page at the begin */ if (unlikely(newindx < minkeys)) { split_indx = minkeys; - if (newindx == 0 && foliage == 0 && !(naf & MDBX_SPLIT_REPLACE)) { + if (newindx == 0 && !(naf & MDBX_SPLIT_REPLACE)) { split_indx = 0; /* Checking for ability of splitting by the left-side insertion * of a pure page with the new key */ @@ -21449,8 +21449,8 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } else get_key(page_node(mp, 0), &sepkey); cASSERT(mc, mc->mc_dbx->md_cmp(newkey, &sepkey) < 0); - /* Avoiding rare complex cases of split the parent page */ - if (page_room(mn.mc_pg[ptop]) < branch_size(env, &sepkey)) + /* Avoiding rare complex cases of nested split the parent page(s) */ + if (page_room(mc->mc_pg[ptop]) < branch_size(env, &sepkey)) split_indx = minkeys; } if (foliage) { @@ -21474,9 +21474,10 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, sepkey = *newkey; } else if (unlikely(pure_left)) { /* newindx == split_indx == 0 */ - TRACE("no-split, but add new pure page at the %s", "left/before"); + TRACE("pure-left: no-split, but add new pure page at the %s", + "left/before"); cASSERT(mc, newindx == 0 && split_indx == 0 && minkeys == 1); - TRACE("old-first-key is %s", DKEY_DEBUG(&sepkey)); + TRACE("pure-left: old-first-key is %s", DKEY_DEBUG(&sepkey)); } else { if (IS_LEAF2(sister)) { /* Move half of the keys to the right sibling */ @@ -21690,18 +21691,20 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, } } else if (unlikely(pure_left)) { MDBX_page *ptop_page = mc->mc_pg[ptop]; - DEBUG("adding to parent page %u node[%u] left-leaf page #%u key %s", + TRACE("pure-left: adding to parent page %u node[%u] left-leaf page #%u key " + "%s", ptop_page->mp_pgno, mc->mc_ki[ptop], sister->mp_pgno, DKEY(mc->mc_ki[ptop] ? newkey : NULL)); - mc->mc_top--; + assert(mc->mc_top == ptop + 1); + mc->mc_top = (uint8_t)ptop; rc = node_add_branch(mc, mc->mc_ki[ptop], mc->mc_ki[ptop] ? newkey : NULL, sister->mp_pgno); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1] && ptop == mc->mc_top); if (likely(rc == MDBX_SUCCESS) && mc->mc_ki[ptop] == 0) { - DEBUG("update prev-first key on parent %s", DKEY(&sepkey)); MDBX_node *node = page_node(mc->mc_pg[ptop], 1); + TRACE("pure-left: update prev-first key on parent to %s", DKEY(&sepkey)); cASSERT(mc, node_ks(node) == 0 && node_pgno(node) == mp->mp_pgno); cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 0); mc->mc_ki[ptop] = 1; @@ -21709,6 +21712,9 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, cASSERT(mc, mc->mc_top == ptop && mc->mc_ki[ptop] == 1); cASSERT(mc, mp == mc->mc_pg[ptop + 1] && newindx == mc->mc_ki[ptop + 1]); mc->mc_ki[ptop] = 0; + } else { + TRACE("pure-left: no-need-update prev-first key on parent %s", + DKEY(&sepkey)); } mc->mc_top++; @@ -21757,7 +21763,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, &sepkey); if (mc->mc_dbx->md_cmp(newkey, &sepkey) < 0) { mc->mc_top -= (uint8_t)i; - DEBUG("update new-first on parent [%i] page %u key %s", + DEBUG("pure-left: update new-first on parent [%i] page %u key %s", mc->mc_ki[mc->mc_top], mc->mc_pg[mc->mc_top]->mp_pgno, DKEY(newkey)); rc = update_key(mc, newkey); @@ -21768,7 +21774,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, break; } } - } else if (tmp_ki_copy /* !IS_LEAF2(mp) */) { + } else if (tmp_ki_copy) { /* !IS_LEAF2(mp) */ /* Move nodes */ mc->mc_pg[mc->mc_top] = sister; i = split_indx; @@ -21887,7 +21893,7 @@ static int page_split(MDBX_cursor *mc, const MDBX_val *const newkey, m3->mc_ki[k + 1] = m3->mc_ki[k]; m3->mc_pg[k + 1] = m3->mc_pg[k]; } - m3->mc_ki[0] = m3->mc_ki[0] >= nkeys; + m3->mc_ki[0] = m3->mc_ki[0] >= nkeys + pure_left; m3->mc_pg[0] = mc->mc_pg[0]; m3->mc_snum++; m3->mc_top++; From d53dc4572c6ecc6a31383511d234bd3118039c8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:21:59 +0300 Subject: [PATCH 114/137] =?UTF-8?q?mdbx:=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20node=5Fshrink()=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D1=8F=D1=81=D0=BD=D0=BE=D1=81=D1=82=D0=B8=20?= =?UTF-8?q?=D0=B8=D1=81=D1=85=D0=BE=D0=B4=D0=BD=D0=BE=D0=B3=D0=BE=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=B4=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 58 ++++++++++++++++++++++++------------------------------ 1 file changed, 26 insertions(+), 32 deletions(-) diff --git a/src/core.c b/src/core.c index 0527e279..f7a5e433 100644 --- a/src/core.c +++ b/src/core.c @@ -3357,7 +3357,7 @@ static int __must_check_result node_add_leaf2(MDBX_cursor *mc, size_t indx, const MDBX_val *key); static void node_del(MDBX_cursor *mc, size_t ksize); -static void node_shrink(MDBX_page *mp, size_t indx); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node); static int __must_check_result node_move(MDBX_cursor *csrc, MDBX_cursor *cdst, bool fromleft); static int __must_check_result node_read(MDBX_cursor *mc, const MDBX_node *leaf, @@ -18766,7 +18766,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { if (!(node_flags(node) & F_SUBDATA)) mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); rc = cursor_del(&mc->mc_xcursor->mx_cursor, 0); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; /* If sub-DB still has entries, we're done */ if (mc->mc_xcursor->mx_db.md_entries) { @@ -18775,11 +18775,10 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(node_data(node), &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } else { - /* shrink fake page */ - node_shrink(mp, mc->mc_ki[mc->mc_top]); - node = page_node(mp, mc->mc_ki[mc->mc_top]); + /* shrink sub-page */ + node = node_shrink(mp, mc->mc_ki[mc->mc_top], node); mc->mc_xcursor->mx_cursor.mc_pg[0] = node_data(node); - /* fix other sub-DB cursors pointed at fake pages on this page */ + /* fix other sub-DB cursors pointed at sub-pages on this page */ for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) @@ -19234,35 +19233,28 @@ __hot static void node_del(MDBX_cursor *mc, size_t ksize) { /* Compact the main page after deleting a node on a subpage. * [in] mp The main page to operate on. * [in] indx The index of the subpage on the main page. */ -static void node_shrink(MDBX_page *mp, size_t indx) { - MDBX_node *node; - MDBX_page *sp, *xp; - size_t nsize, delta, len, ptr; - intptr_t i; - - node = page_node(mp, indx); - sp = (MDBX_page *)node_data(node); - delta = page_room(sp); - assert(delta > 0); +static MDBX_node *node_shrink(MDBX_page *mp, size_t indx, MDBX_node *node) { + assert(node = page_node(mp, indx)); + MDBX_page *sp = (MDBX_page *)node_data(node); + assert(IS_SUBP(sp) && page_numkeys(sp) > 0); + const size_t delta = + EVEN_FLOOR(page_room(sp) /* avoid the node uneven-sized */); + if (unlikely(delta) == 0) + return node; /* Prepare to shift upward, set len = length(subpage part to shift) */ - if (IS_LEAF2(sp)) { - delta &= /* do not make the node uneven-sized */ ~(size_t)1; - if (unlikely(delta) == 0) - return; - nsize = node_ds(node) - delta; - assert(nsize % 1 == 0); - len = nsize; - } else { - xp = ptr_disp(sp, delta); /* destination subpage */ - for (i = page_numkeys(sp); --i >= 0;) { + size_t nsize = node_ds(node) - delta, len = nsize; + assert(nsize % 1 == 0); + if (!IS_LEAF2(sp)) { + len = PAGEHDRSZ; + MDBX_page *xp = ptr_disp(sp, delta); /* destination subpage */ + for (intptr_t i = page_numkeys(sp); --i >= 0;) { assert(sp->mp_ptrs[i] >= delta); xp->mp_ptrs[i] = (indx_t)(sp->mp_ptrs[i] - delta); } - nsize = node_ds(node) - delta; - len = PAGEHDRSZ; } - sp->mp_upper = sp->mp_lower; + assert(sp->mp_upper >= sp->mp_lower + delta); + sp->mp_upper -= (indx_t)delta; sp->mp_pgno = mp->mp_pgno; node_set_ds(node, nsize); @@ -19270,15 +19262,17 @@ static void node_shrink(MDBX_page *mp, size_t indx) { void *const base = ptr_disp(mp, mp->mp_upper + PAGEHDRSZ); memmove(ptr_disp(base, delta), base, ptr_dist(sp, base) + len); - ptr = mp->mp_ptrs[indx]; - for (i = page_numkeys(mp); --i >= 0;) { - if (mp->mp_ptrs[i] <= ptr) { + const size_t pivot = mp->mp_ptrs[indx]; + for (intptr_t i = page_numkeys(mp); --i >= 0;) { + if (mp->mp_ptrs[i] <= pivot) { assert((size_t)UINT16_MAX - mp->mp_ptrs[i] >= delta); mp->mp_ptrs[i] += (indx_t)delta; } } assert((size_t)UINT16_MAX - mp->mp_upper >= delta); mp->mp_upper += (indx_t)delta; + + return ptr_disp(node, delta); } /* Initial setup of a sorted-dups cursor. From d7f259110c408fdab8a4b33afbc4ab96078ef538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:24:53 +0300 Subject: [PATCH 115/137] =?UTF-8?q?mdbx-test:=20=D1=84=D0=B8=D0=BA=D1=81?= =?UTF-8?q?=D0=B0=D1=86=D0=B8=D1=8F=20=D1=82=D1=80=D0=B0=D0=BD=D0=B7=D0=B0?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D0=B8=20=D0=BF=D1=80=D0=B8=20=D0=BE=D1=88?= =?UTF-8?q?=D0=B8=D0=B1=D0=BA=D0=B0=D1=85=20=D1=82=D0=B5=D1=81=D1=82=D0=B0?= =?UTF-8?q?=20=D0=B4=D0=BB=D1=8F=20=D0=BF=D0=BE=D1=81=D0=BB=D0=B5=D0=B4?= =?UTF-8?q?=D1=83=D1=8E=D1=89=D0=B5=D0=B3=D0=BE=20=D0=B0=D0=BD=D0=B0=D0=BB?= =?UTF-8?q?=D0=B8=D0=B7=D0=B0=20=D0=91=D0=94.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 12 ++++++++++++ test/test.h++ | 1 + 2 files changed, 13 insertions(+) diff --git a/test/test.c++ b/test/test.c++ index 10107980..bb7bd818 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -753,6 +753,18 @@ void testcase::speculum_check_iterator(const char *where, const char *stage, mdbx_dump_val(&v, dump_value, sizeof(dump_value))); } +void testcase::failure(const char *fmt, ...) const { + va_list ap; + va_start(ap, fmt); + fflush(nullptr); + logging::output_nocheckloglevel_ap(logging::failure, fmt, ap); + va_end(ap); + fflush(nullptr); + if (txn_guard) + mdbx_txn_commit(const_cast(this)->txn_guard.release()); + exit(EXIT_FAILURE); +} + #if SPECULUM_CURSORS void testcase::speculum_check_cursor(const char *where, const char *stage, const testcase::SET::const_iterator &it, diff --git a/test/test.h++ b/test/test.h++ index 9db1bc6c..d99ba4f8 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -281,6 +281,7 @@ protected: void signal(); bool should_continue(bool check_timeout_only = false) const; + void failure(const char *fmt, ...) const; void generate_pair(const keygen::serial_t serial, keygen::buffer &out_key, keygen::buffer &out_value, keygen::serial_t data_age) { keyvalue_maker.pair(serial, out_key, out_value, data_age, false); From fa0017591d25caf5e13967d630270a1d0d323c31 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 02:32:43 +0300 Subject: [PATCH 116/137] =?UTF-8?q?mdbx:=20=D0=BF=D1=80=D0=BE=D0=B4=D0=BE?= =?UTF-8?q?=D0=BB=D0=B6=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=87=D0=B8=D1=81?= =?UTF-8?q?=D1=82=D0=BA=D0=B8/=D1=80=D0=B5=D1=84=D0=B0=D0=BA=D1=82=D0=BE?= =?UTF-8?q?=D1=80=D0=B8=D0=BD=D0=B3=D0=B0=20=D1=83=D0=BD=D0=B0=D1=81=D0=BB?= =?UTF-8?q?=D0=B5=D0=B4=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D1=8B=D1=85=20=D1=80?= =?UTF-8?q?=D0=B5=D0=B1=D1=83=D1=81=D0=BE=D0=B2=20=D0=B2=20`cursor=5Fput?= =?UTF-8?q?=5Fnochecklen()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 225 ++++++++++++++++++++++++++--------------------------- 1 file changed, 111 insertions(+), 114 deletions(-) diff --git a/src/core.c b/src/core.c index f7a5e433..3e99a8dd 100644 --- a/src/core.c +++ b/src/core.c @@ -17997,11 +17997,11 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, rc = MDBX_NO_ROOT; } else if ((flags & MDBX_CURRENT) == 0) { bool exact = false; - MDBX_val lastkey, olddata; + MDBX_val last_key, old_data; if ((flags & MDBX_APPEND) && mc->mc_db->md_entries > 0) { - rc = cursor_last(mc, &lastkey, &olddata); + rc = cursor_last(mc, &last_key, &old_data); if (likely(rc == MDBX_SUCCESS)) { - const int cmp = mc->mc_dbx->md_cmp(key, &lastkey); + const int cmp = mc->mc_dbx->md_cmp(key, &last_key); if (likely(cmp > 0)) { mc->mc_ki[mc->mc_top]++; /* step forward for appending */ rc = MDBX_NOTFOUND; @@ -18016,7 +18016,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } else { struct cursor_set_result csr = /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ - cursor_set(mc, (MDBX_val *)key, &olddata, MDBX_SET); + cursor_set(mc, (MDBX_val *)key, &old_data, MDBX_SET); rc = csr.err; exact = csr.exact; } @@ -18024,14 +18024,14 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (exact) { if (unlikely(flags & MDBX_NOOVERWRITE)) { DEBUG("duplicate key [%s]", DKEY_DEBUG(key)); - *data = olddata; + *data = old_data; return MDBX_KEYEXIST; } if (unlikely(mc->mc_flags & C_SUB)) { /* nested subtree of DUPSORT-database with the same key, * nothing to update */ eASSERT(env, data->iov_len == 0 && - (olddata.iov_len == 0 || + (old_data.iov_len == 0 || /* olddata may not be updated in case LEAF2-page of dupfixed-subDB */ (mc->mc_db->md_flags & MDBX_DUPFIXED))); @@ -18047,8 +18047,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, exact = false; } else if (!(flags & (MDBX_RESERVE | MDBX_MULTIPLE))) { /* checking for early exit without dirtying pages */ - if (unlikely(eq_fast(data, &olddata))) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + if (unlikely(eq_fast(data, &old_data))) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (mc->mc_xcursor) { if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; @@ -18058,7 +18058,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, /* the same data, nothing to update */ return MDBX_SUCCESS; } - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) != 0); + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) != 0); } } } else if (unlikely(rc != MDBX_NOTFOUND)) @@ -18066,17 +18066,16 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } mc->mc_flags &= ~C_DEL; - MDBX_val xdata, *rdata = data; - size_t mcount = 0, dcount = 0; + MDBX_val xdata, *ref_data = data; + size_t *batch_dupfixed_done = nullptr, batch_dupfixed_given = 0; if (unlikely(flags & MDBX_MULTIPLE)) { - dcount = data[1].iov_len; - data[1].iov_len = 0 /* reset done item counter */; - rdata = &xdata; - xdata.iov_len = data->iov_len * dcount; + batch_dupfixed_given = data[1].iov_len; + batch_dupfixed_done = &data[1].iov_len; + *batch_dupfixed_done = 0; } /* Cursor is positioned, check for room in the dirty list */ - err = cursor_touch(mc, key, rdata); + err = cursor_touch(mc, key, ref_data); if (unlikely(err)) return err; @@ -18111,7 +18110,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_flags |= C_INITIALIZED; } - MDBX_val dkey, olddata; + MDBX_val old_singledup, old_data; MDBX_db nested_dupdb; MDBX_page *sub_root = nullptr; bool insert_key, insert_data; @@ -18119,19 +18118,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_page *fp = env->me_pbuf; fp->mp_txnid = mc->mc_txn->mt_front; insert_key = insert_data = (rc != MDBX_SUCCESS); - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; if (insert_key) { /* The key does not exist */ DEBUG("inserting key at index %i", mc->mc_ki[mc->mc_top]); if ((mc->mc_db->md_flags & MDBX_DUPSORT) && node_size(key, data) > env->me_leaf_nodemax) { /* Too big for a node, insert in sub-DB. Set up an empty - * "old sub-page" for prep_subDB to expand to a full page. */ + * "old sub-page" for convert_to_subtree to expand to a full page. */ fp->mp_leaf2_ksize = (mc->mc_db->md_flags & MDBX_DUPFIXED) ? (uint16_t)data->iov_len : 0; fp->mp_lower = fp->mp_upper = 0; - olddata.iov_len = PAGEHDRSZ; - goto prep_subDB; + old_data.iov_len = PAGEHDRSZ; + goto convert_to_subtree; } } else { /* there's only a key anyway, so this is a no-op */ @@ -18176,7 +18175,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(err != MDBX_SUCCESS)) return err; } - MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); + MDBX_node *const node = + page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); /* Large/Overflow page overwrites need special handling */ if (unlikely(node_flags(node) & F_BIGDATA)) { @@ -18250,19 +18250,18 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if ((err = page_retire(mc, lp.page)) != MDBX_SUCCESS) return err; } else { - olddata.iov_len = node_ds(node); - olddata.iov_base = node_data(node); - cASSERT(mc, ptr_disp(olddata.iov_base, olddata.iov_len) <= + old_data.iov_len = node_ds(node); + old_data.iov_base = node_data(node); + cASSERT(mc, ptr_disp(old_data.iov_base, old_data.iov_len) <= ptr_disp(mc->mc_pg[mc->mc_top], env->me_psize)); /* DB has dups? */ if (mc->mc_db->md_flags & MDBX_DUPSORT) { /* Prepare (sub-)page/sub-DB to accept the new item, if needed. * fp: old sub-page or a header faking it. - * mp: new (sub-)page. offset: growth in page size. - * xdata: node data with new page or DB. */ - size_t i; - size_t offset = 0; + * mp: new (sub-)page. + * xdata: node data with new sub-page or sub-DB. */ + size_t growth = 0; /* growth in page size.*/ MDBX_page *mp = fp = xdata.iov_base = env->me_pbuf; mp->mp_pgno = mc->mc_pg[mc->mc_top]->mp_pgno; @@ -18270,19 +18269,19 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (!(node_flags(node) & F_DUPDATA)) { /* does data match? */ if (flags & MDBX_APPENDDUP) { - const int cmp = mc->mc_dbx->md_dcmp(data, &olddata); - cASSERT(mc, cmp != 0 || eq_fast(data, &olddata)); + const int cmp = mc->mc_dbx->md_dcmp(data, &old_data); + cASSERT(mc, cmp != 0 || eq_fast(data, &old_data)); if (unlikely(cmp <= 0)) return MDBX_EKEYMISMATCH; - } else if (eq_fast(data, &olddata)) { - cASSERT(mc, mc->mc_dbx->md_dcmp(data, &olddata) == 0); + } else if (eq_fast(data, &old_data)) { + cASSERT(mc, mc->mc_dbx->md_dcmp(data, &old_data) == 0); if (flags & MDBX_NODUPDATA) return MDBX_KEYEXIST; /* data is match exactly byte-to-byte, nothing to update */ rc = MDBX_SUCCESS; - if (likely((flags & MDBX_MULTIPLE) == 0)) - return rc; - goto continue_multiple; + if (unlikely(batch_dupfixed_done)) + goto batch_dupfixed_continue; + return rc; } /* Just overwrite the current item */ @@ -18292,13 +18291,13 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, } /* Back up original data item */ - memcpy(dkey.iov_base = fp + 1, olddata.iov_base, - dkey.iov_len = olddata.iov_len); + memcpy(old_singledup.iov_base = fp + 1, old_data.iov_base, + old_singledup.iov_len = old_data.iov_len); /* Make sub-page header for the dup items, with dummy body */ fp->mp_flags = P_LEAF | P_SUBP; fp->mp_lower = 0; - xdata.iov_len = PAGEHDRSZ + dkey.iov_len + data->iov_len; + xdata.iov_len = PAGEHDRSZ + old_data.iov_len + data->iov_len; if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp->mp_flags |= P_LEAF2; fp->mp_leaf2_ksize = (uint16_t)data->iov_len; @@ -18306,27 +18305,26 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, cASSERT(mc, xdata.iov_len <= env->me_psize); } else { xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + - (dkey.iov_len & 1) + (data->iov_len & 1); - cASSERT(mc, xdata.iov_len <= env->me_psize); + (old_data.iov_len & 1) + (data->iov_len & 1); } fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); - olddata.iov_len = xdata.iov_len; /* pretend olddata is fp */ + old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ } else if (node_flags(node) & F_SUBDATA) { /* Data is on sub-DB, just store it */ flags |= F_DUPDATA | F_SUBDATA; - goto put_sub; + goto dupsort_put; } else { /* Data is on sub-page */ - fp = olddata.iov_base; + fp = old_data.iov_base; switch (flags) { default: if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) { - offset = node_size(data, nullptr) + sizeof(indx_t); + growth = node_size(data, nullptr) + sizeof(indx_t); break; } - offset = fp->mp_leaf2_ksize; - if (page_room(fp) < offset) { - offset *= 4; /* space for 4 more */ + growth = fp->mp_leaf2_ksize; + if (page_room(fp) < growth) { + growth *= 4; /* space for 4 more */ break; } /* FALLTHRU: Big enough MDBX_DUPFIXED sub-page */ @@ -18337,17 +18335,17 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, fp->mp_pgno = mp->mp_pgno; mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; flags |= F_DUPDATA; - goto put_sub; + goto dupsort_put; } - xdata.iov_len = olddata.iov_len + offset; + xdata.iov_len = old_data.iov_len + growth; } fp_flags = fp->mp_flags; if (node_size_len(node_ks(node), xdata.iov_len) > env->me_leaf_nodemax) { /* Too big for a sub-page, convert to sub-DB */ + convert_to_subtree: fp_flags &= ~P_SUBP; - prep_subDB: nested_dupdb.md_xsize = 0; nested_dupdb.md_flags = flags_db2sub(mc->mc_db->md_flags); if (mc->mc_db->md_flags & MDBX_DUPFIXED) { @@ -18366,8 +18364,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (unlikely(par.err != MDBX_SUCCESS)) return par.err; mc->mc_db->md_leaf_pages += 1; - cASSERT(mc, env->me_psize > olddata.iov_len); - offset = env->me_psize - (unsigned)olddata.iov_len; + cASSERT(mc, env->me_psize > old_data.iov_len); + growth = env->me_psize - (unsigned)old_data.iov_len; flags |= F_DUPDATA | F_SUBDATA; nested_dupdb.md_root = mp->mp_pgno; nested_dupdb.md_seq = 0; @@ -18379,8 +18377,8 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mp->mp_txnid = mc->mc_txn->mt_front; mp->mp_leaf2_ksize = fp->mp_leaf2_ksize; mp->mp_lower = fp->mp_lower; - cASSERT(mc, fp->mp_upper + offset <= UINT16_MAX); - mp->mp_upper = (indx_t)(fp->mp_upper + offset); + cASSERT(mc, fp->mp_upper + growth < UINT16_MAX); + mp->mp_upper = fp->mp_upper + (indx_t)growth; if (unlikely(fp_flags & P_LEAF2)) { memcpy(page_data(mp), page_data(fp), page_numkeys(fp) * fp->mp_leaf2_ksize); @@ -18391,21 +18389,21 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, cASSERT(mc, (mp->mp_upper & 1) == 0); memcpy(ptr_disp(mp, mp->mp_upper + PAGEHDRSZ), ptr_disp(fp, fp->mp_upper + PAGEHDRSZ), - olddata.iov_len - fp->mp_upper - PAGEHDRSZ); + old_data.iov_len - fp->mp_upper - PAGEHDRSZ); memcpy(mp->mp_ptrs, fp->mp_ptrs, page_numkeys(fp) * sizeof(mp->mp_ptrs[0])); - for (i = 0; i < page_numkeys(fp); i++) { - cASSERT(mc, mp->mp_ptrs[i] + offset <= UINT16_MAX); - mp->mp_ptrs[i] += (indx_t)offset; + for (size_t i = 0; i < page_numkeys(fp); i++) { + cASSERT(mc, mp->mp_ptrs[i] + growth <= UINT16_MAX); + mp->mp_ptrs[i] += (indx_t)growth; } } } if (!insert_key) node_del(mc, 0); - rdata = &xdata; + ref_data = &xdata; flags |= F_DUPDATA; - goto new_sub; + goto insert_node; } /* MDBX passes F_SUBDATA in 'flags' to write a DB record */ @@ -18413,15 +18411,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, return MDBX_INCOMPATIBLE; current: - if (data->iov_len == olddata.iov_len) { + if (data->iov_len == old_data.iov_len) { cASSERT(mc, EVEN(key->iov_len) == EVEN(node_ks(node))); /* same size, just replace it. Note that we could * also reuse this node if the new data is smaller, * but instead we opt to shrink the node in that case. */ if (flags & MDBX_RESERVE) - data->iov_base = olddata.iov_base; + data->iov_base = old_data.iov_base; else if (!(mc->mc_flags & C_SUB)) - memcpy(olddata.iov_base, data->iov_base, data->iov_len); + memcpy(old_data.iov_base, data->iov_base, data->iov_len); else { cASSERT(mc, page_numkeys(mc->mc_pg[mc->mc_top]) == 1); cASSERT(mc, PAGETYPE_COMPAT(mc->mc_pg[mc->mc_top]) == P_LEAF); @@ -18446,14 +18444,15 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, node_del(mc, 0); } - rdata = data; + ref_data = data; -new_sub:; +insert_node:; const unsigned naf = flags & NODE_ADD_FLAGS; - size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) ? key->iov_len - : leaf_size(env, key, rdata); + size_t nsize = IS_LEAF2(mc->mc_pg[mc->mc_top]) + ? key->iov_len + : leaf_size(env, key, ref_data); if (page_room(mc->mc_pg[mc->mc_top]) < nsize) { - rc = page_split(mc, key, rdata, P_INVALID, + rc = page_split(mc, key, ref_data, P_INVALID, insert_key ? naf : naf | MDBX_SPLIT_REPLACE); if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) rc = insert_key ? cursor_check(mc) : cursor_check_updating(mc); @@ -18461,25 +18460,25 @@ new_sub:; /* There is room already in this leaf page. */ if (IS_LEAF2(mc->mc_pg[mc->mc_top])) { cASSERT(mc, !(naf & (F_BIGDATA | F_SUBDATA | F_DUPDATA)) && - rdata->iov_len == 0); + ref_data->iov_len == 0); rc = node_add_leaf2(mc, mc->mc_ki[mc->mc_top], key); } else - rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, rdata, naf); + rc = node_add_leaf(mc, mc->mc_ki[mc->mc_top], key, ref_data, naf); if (likely(rc == 0)) { /* Adjust other cursors pointing to mp */ const MDBX_dbi dbi = mc->mc_dbi; - const size_t i = mc->mc_top; - MDBX_page *const mp = mc->mc_pg[i]; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[dbi]; m2; m2 = m2->mc_next) { MDBX_cursor *m3 = (mc->mc_flags & C_SUB) ? &m2->mc_xcursor->mx_cursor : m2; - if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[i] != mp) + if (m3 == mc || m3->mc_snum < mc->mc_snum || m3->mc_pg[top] != mp) continue; - if (m3->mc_ki[i] >= mc->mc_ki[i]) - m3->mc_ki[i] += insert_key; + if (m3->mc_ki[top] >= mc->mc_ki[top]) + m3->mc_ki[top] += insert_key; if (XCURSOR_INITED(m3)) - XCURSOR_REFRESH(m3, mp, m3->mc_ki[i]); + XCURSOR_REFRESH(m3, mp, m3->mc_ki[top]); } } } @@ -18490,18 +18489,18 @@ new_sub:; * size limits on dupdata. The actual data fields of the child * DB are all zero size. */ if (flags & F_DUPDATA) { - unsigned xflags; - size_t ecount; - put_sub: - xdata.iov_len = 0; - xdata.iov_base = nullptr; + MDBX_val empty; + dupsort_put: + empty.iov_len = 0; + empty.iov_base = nullptr; MDBX_node *node = page_node(mc->mc_pg[mc->mc_top], mc->mc_ki[mc->mc_top]); #define SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE 1 STATIC_ASSERT( (MDBX_NODUPDATA >> SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE) == MDBX_NOOVERWRITE); - xflags = MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> - SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); + unsigned xflags = + MDBX_CURRENT | ((flags & MDBX_NODUPDATA) >> + SHIFT_MDBX_NODUPDATA_TO_MDBX_NOOVERWRITE); if ((flags & MDBX_CURRENT) == 0) { xflags -= MDBX_CURRENT; err = cursor_xinit1(mc, node, mc->mc_pg[mc->mc_top]); @@ -18511,80 +18510,78 @@ new_sub:; if (sub_root) mc->mc_xcursor->mx_cursor.mc_pg[0] = sub_root; /* converted, write the original data first */ - if (dkey.iov_base) { - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &dkey, &xdata, - xflags); + if (old_singledup.iov_base) { + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, &old_singledup, + &empty, xflags); if (unlikely(rc)) - goto bad_sub; + goto dupsort_error; } if (!(node_flags(node) & F_SUBDATA) || sub_root) { /* Adjust other cursors pointing to mp */ - MDBX_cursor *m2; - MDBX_xcursor *mx = mc->mc_xcursor; - size_t i = mc->mc_top; - MDBX_page *mp = mc->mc_pg[i]; + MDBX_xcursor *const mx = mc->mc_xcursor; + const size_t top = mc->mc_top; + MDBX_page *const mp = mc->mc_pg[top]; const intptr_t nkeys = page_numkeys(mp); - for (m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; m2 = m2->mc_next) { + for (MDBX_cursor *m2 = mc->mc_txn->mt_cursors[mc->mc_dbi]; m2; + m2 = m2->mc_next) { if (m2 == mc || m2->mc_snum < mc->mc_snum) continue; if (!(m2->mc_flags & C_INITIALIZED)) continue; - if (m2->mc_pg[i] == mp) { - if (m2->mc_ki[i] == mc->mc_ki[i]) { - err = cursor_xinit2(m2, mx, dkey.iov_base != nullptr); + if (m2->mc_pg[top] == mp) { + if (m2->mc_ki[top] == mc->mc_ki[top]) { + err = cursor_xinit2(m2, mx, old_singledup.iov_base != nullptr); if (unlikely(err != MDBX_SUCCESS)) return err; - } else if (!insert_key && m2->mc_ki[i] < nkeys) { - XCURSOR_REFRESH(m2, mp, m2->mc_ki[i]); + } else if (!insert_key && m2->mc_ki[top] < nkeys) { + XCURSOR_REFRESH(m2, mp, m2->mc_ki[top]); } } } } cASSERT(mc, mc->mc_xcursor->mx_db.md_entries < PTRDIFF_MAX); - ecount = (size_t)mc->mc_xcursor->mx_db.md_entries; + const size_t probe = (size_t)mc->mc_xcursor->mx_db.md_entries; #define SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND 1 STATIC_ASSERT((MDBX_APPENDDUP >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND) == MDBX_APPEND); xflags |= (flags & MDBX_APPENDDUP) >> SHIFT_MDBX_APPENDDUP_TO_MDBX_APPEND; - rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &xdata, + rc = cursor_put_nochecklen(&mc->mc_xcursor->mx_cursor, data, &empty, xflags); if (flags & F_SUBDATA) { void *db = node_data(node); mc->mc_xcursor->mx_db.md_mod_txnid = mc->mc_txn->mt_txnid; memcpy(db, &mc->mc_xcursor->mx_db, sizeof(MDBX_db)); } - insert_data = (ecount != (size_t)mc->mc_xcursor->mx_db.md_entries); + insert_data = (probe != (size_t)mc->mc_xcursor->mx_db.md_entries); } /* Increment count unless we just replaced an existing item. */ if (insert_data) mc->mc_db->md_entries++; if (insert_key) { - /* Invalidate txn if we created an empty sub-DB */ - if (unlikely(rc)) - goto bad_sub; + if (unlikely(rc != MDBX_SUCCESS)) + goto dupsort_error; /* If we succeeded and the key didn't exist before, * make sure the cursor is marked valid. */ mc->mc_flags |= C_INITIALIZED; } - if (unlikely(flags & MDBX_MULTIPLE)) { - if (likely(rc == MDBX_SUCCESS)) { - continue_multiple: - mcount++; + if (likely(rc == MDBX_SUCCESS)) { + if (unlikely(batch_dupfixed_done)) { + batch_dupfixed_continue: /* let caller know how many succeeded, if any */ - data[1].iov_len = mcount; - if (mcount < dcount) { + if ((*batch_dupfixed_done += 1) < batch_dupfixed_given) { data[0].iov_base = ptr_disp(data[0].iov_base, data[0].iov_len); insert_key = insert_data = false; - dkey.iov_base = nullptr; + old_singledup.iov_base = nullptr; goto more; } } + if (AUDIT_ENABLED()) + rc = cursor_check(mc); } - if (rc == MDBX_SUCCESS && AUDIT_ENABLED()) - rc = cursor_check(mc); return rc; - bad_sub: + + dupsort_error: if (unlikely(rc == MDBX_KEYEXIST)) { /* should not happen, we deleted that item */ ERROR("Unexpected %i error while put to nested dupsort's hive", rc); From aa9d2387e59da96d527b7f4dc47f068ec6740964 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 15:49:05 +0300 Subject: [PATCH 117/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`me=5Fdxb=5Fmmap.current?= =?UTF-8?q?=20>=20me=5Fdxb=5Fmmap.limit`=20=D0=B8=20=D1=81=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=B0=D1=82=D1=8B=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BE=D0=BE=D1=82=D0=B2=D0=B5=D1=82=D1=81=D1=82=D0=B2=D1=83?= =?UTF-8?q?=D1=8E=D1=89=D0=B5=D0=B9=20assert-=D0=BF=D1=80=D0=BE=D0=B2?= =?UTF-8?q?=D0=B5=D1=80=D0=BA=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Устранение упущения приводящего к нелогичной ситуации `me_dxb_mmap.curren > me_dxb_mmap.limit` при "дребезге" размера БД. В текущем понимании, последствий кроме срабатывания assert-проверки нет, а вероятность проявления близка к нулю. --- src/core.c | 11 ++++++++++- src/osal.c | 3 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 3e99a8dd..c163fe14 100644 --- a/src/core.c +++ b/src/core.c @@ -6670,6 +6670,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, #endif /* MDBX_ENABLE_MADVISE */ rc = osal_mresize(mresize_flags, &env->me_dxb_mmap, size_bytes, limit_bytes); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); #if MDBX_ENABLE_MADVISE if (rc == MDBX_SUCCESS) { @@ -6695,6 +6696,7 @@ __cold static int dxb_resize(MDBX_env *const env, const pgno_t used_pgno, bailout: if (rc == MDBX_SUCCESS) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); eASSERT(env, limit_bytes == env->me_dxb_mmap.limit); eASSERT(env, size_bytes <= env->me_dxb_mmap.filesize); if (mode == explicit_resize) @@ -6725,6 +6727,7 @@ bailout: "present %" PRIuPTR " -> %" PRIuPTR ", " "limit %" PRIuPTR " -> %" PRIuPTR ", errcode %d", prev_size, size_bytes, prev_limit, limit_bytes, rc); + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } if (!env->me_dxb_mmap.base) { env->me_flags |= MDBX_FATAL_ERROR; @@ -9534,6 +9537,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { const size_t used_bytes = pgno2bytes(env, txn->mt_next_pgno); const size_t required_bytes = (txn->mt_flags & MDBX_TXN_RDONLY) ? used_bytes : size_bytes; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); if (unlikely(required_bytes > env->me_dxb_mmap.current)) { /* Размер БД (для пишущих транзакций) или используемых данных (для * читающих транзакций) больше предыдущего/текущего размера внутри @@ -9551,6 +9555,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_geo.upper, implicit_grow); if (unlikely(rc != MDBX_SUCCESS)) goto bailout; + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); } else if (unlikely(size_bytes < env->me_dxb_mmap.current)) { /* Размер БД меньше предыдущего/текущего размера внутри процесса, можно * уменьшить, но всё сложнее: @@ -9576,11 +9581,15 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { rc = osal_fastmutex_acquire(&env->me_remap_guard); #endif if (likely(rc == MDBX_SUCCESS)) { + eASSERT(env, env->me_dxb_mmap.limit >= env->me_dxb_mmap.current); rc = osal_filesize(env->me_dxb_mmap.fd, &env->me_dxb_mmap.filesize); if (likely(rc == MDBX_SUCCESS)) { eASSERT(env, env->me_dxb_mmap.filesize >= required_bytes); if (env->me_dxb_mmap.current > env->me_dxb_mmap.filesize) - env->me_dxb_mmap.current = (size_t)env->me_dxb_mmap.filesize; + env->me_dxb_mmap.current = + (env->me_dxb_mmap.limit < env->me_dxb_mmap.filesize) + ? env->me_dxb_mmap.limit + : (size_t)env->me_dxb_mmap.filesize; } #if defined(_WIN32) || defined(_WIN64) osal_srwlock_ReleaseShared(&env->me_remap_guard); diff --git a/src/osal.c b/src/osal.c index 5559b204..3865025a 100644 --- a/src/osal.c +++ b/src/osal.c @@ -2581,7 +2581,7 @@ retry_mapview:; ptr_disp(map->base, size), ((map->current < map->limit) ? map->current : map->limit) - size); } - map->current = size; + map->current = (size < map->limit) ? size : map->limit; } if (limit == map->limit) @@ -2742,6 +2742,7 @@ retry_mapview:; map->base = ptr; } map->limit = limit; + map->current = size; #if MDBX_ENABLE_MADVISE #ifdef MADV_DONTFORK From 72e51ee370c1e6d1f012c367f5f8155597d1ad5c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 28 Feb 2024 16:11:09 +0300 Subject: [PATCH 118/137] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20y=D0=BD=D0=B0=D1=81=D0=BB?= =?UTF-8?q?=D0=B5=D0=B4=D0=BE=D0=B2=D0=B0=D0=BD=D0=BD=D0=BE=D0=B9=20=D0=BE?= =?UTF-8?q?=D1=82=20LMDB=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BA=D0=B8=20=D0=BF?= =?UTF-8?q?=D1=80=D0=B8=D0=B2=D0=BE=D0=B4=D1=8F=D1=89=D0=B5=D0=B9=20=D0=BA?= =?UTF-8?q?=20=D0=BF=D0=BE=D0=B2=D1=80=D0=B5=D0=B6=D0=B4=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8E=20=D0=91=D0=94=20=D0=BF=D1=80=D0=B8=20=D0=B8=D1=81=D0=BF?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D0=B7=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B8=20?= =?UTF-8?q?MDBX=5FDUPFIXED.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Тезисно: - Использование DUPFIXED (включая INTEGERDUP) могло приводить к повреждению БД и/или потере данных. Этот коммит устраняет эту угрозу. - Вероятность проявления существенно увеличивается с увеличением размера/длины мульти-значений/дубликатов (не ключей). - В MDBX проблема унаследована от LMDB, где существует более 11 лет, начиная с коммита https://github.com/LMDB/lmdb/commit/ccc4d23e749edc5ea461261427a0ee0a663fdfe5 и до настоящего времени. Для вложенных страниц типа LEAF2 (которые содержат только значения одинаковой длины, без таблицы смещений к ним), упомянутым выше коммитом, было добавлено резервирование места (что в целом спорно, но в некоторых сценариях позволяет уменьшить накладные расходы). Ошибка была в том, что в коде не исключалась возможность превышения размера страницы БД, что далее приводило к арифметическому переполнению, повреждению БД и/или просписи памяти. --- src/core.c | 145 ++++++++++++++++++++++++++++++++++++++++++------ src/internals.h | 8 ++- 2 files changed, 135 insertions(+), 18 deletions(-) diff --git a/src/core.c b/src/core.c index c163fe14..af570585 100644 --- a/src/core.c +++ b/src/core.c @@ -13499,13 +13499,23 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { leaf_nodemax > (intptr_t)(sizeof(MDBX_db) + NODESIZE + 42) && leaf_nodemax >= branch_nodemax && leaf_nodemax < (int)UINT16_MAX && leaf_nodemax % 2 == 0); - env->me_leaf_nodemax = (unsigned)leaf_nodemax; - env->me_branch_nodemax = (unsigned)branch_nodemax; + env->me_leaf_nodemax = (uint16_t)leaf_nodemax; + env->me_branch_nodemax = (uint16_t)branch_nodemax; env->me_psize2log = (uint8_t)log2n_powerof2(pagesize); eASSERT(env, pgno2bytes(env, 1) == pagesize); eASSERT(env, bytes2pgno(env, pagesize + pagesize) == 2); recalculate_merge_threshold(env); + /* TODO: recalculate me_subpage_xyz values from MDBX_opt_subpage_xyz. */ + env->me_subpage_limit = env->me_leaf_nodemax - NODESIZE; + env->me_subpage_room_threshold = 0; + env->me_subpage_reserve_prereq = env->me_leaf_nodemax; + env->me_subpage_reserve_limit = env->me_subpage_limit / 42; + eASSERT(env, + env->me_subpage_reserve_prereq > + env->me_subpage_room_threshold + env->me_subpage_reserve_limit); + eASSERT(env, env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + const pgno_t max_pgno = bytes2pgno(env, MAX_MAPSIZE); if (!env->me_options.flags.non_auto.dp_limit) { /* auto-setup dp_limit by "The42" ;-) */ @@ -17939,6 +17949,26 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, return rc; } +static size_t leaf2_reserve(const MDBX_env *const env, size_t host_page_room, + size_t subpage_len, size_t item_len) { + eASSERT(env, (subpage_len & 1) == 0); + eASSERT(env, + env->me_subpage_reserve_prereq > env->me_subpage_room_threshold + + env->me_subpage_reserve_limit && + env->me_leaf_nodemax >= env->me_subpage_limit + NODESIZE); + size_t reserve = 0; + for (size_t n = 0; + n < 5 && reserve + item_len <= env->me_subpage_reserve_limit && + EVEN(subpage_len + item_len) <= env->me_subpage_limit && + host_page_room >= + env->me_subpage_reserve_prereq + EVEN(subpage_len + item_len); + ++n) { + subpage_len += item_len; + reserve += item_len; + } + return reserve + (subpage_len & 1); +} + static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, MDBX_val *data, unsigned flags) { int err; @@ -18310,12 +18340,21 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, if (mc->mc_db->md_flags & MDBX_DUPFIXED) { fp->mp_flags |= P_LEAF2; fp->mp_leaf2_ksize = (uint16_t)data->iov_len; - xdata.iov_len += 2 * data->iov_len; /* leave space for 2 more */ - cASSERT(mc, xdata.iov_len <= env->me_psize); + /* Будем создавать LEAF2-страницу, как минимум с двумя элементами. + * При коротких значениях и наличии свободного места можно сделать + * некоторое резервирование места, чтобы при последующих добавлениях + * не сразу расширять созданную под-страницу. + * Резервирование в целом сомнительно (см ниже), но может сработать + * в плюс (а если в минус то несущественный) при коротких ключах. */ + xdata.iov_len += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + cASSERT(mc, (xdata.iov_len & 1) == 0); } else { xdata.iov_len += 2 * (sizeof(indx_t) + NODESIZE) + (old_data.iov_len & 1) + (data->iov_len & 1); } + cASSERT(mc, (xdata.iov_len & 1) == 0); fp->mp_upper = (uint16_t)(xdata.iov_len - PAGEHDRSZ); old_data.iov_len = xdata.iov_len; /* pretend olddata is fp */ } else if (node_flags(node) & F_SUBDATA) { @@ -18327,19 +18366,85 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, fp = old_data.iov_base; switch (flags) { default: - if (!(mc->mc_db->md_flags & MDBX_DUPFIXED)) { - growth = node_size(data, nullptr) + sizeof(indx_t); - break; + growth = IS_LEAF2(fp) ? fp->mp_leaf2_ksize + : (node_size(data, nullptr) + sizeof(indx_t)); + if (page_room(fp) >= growth) { + /* На текущей под-странице есть место для добавления элемента. + * Оптимальнее продолжить использовать эту страницу, ибо + * добавление вложенного дерева увеличит WAF на одну страницу. */ + goto continue_subpage; } - growth = fp->mp_leaf2_ksize; - if (page_room(fp) < growth) { - growth *= 4; /* space for 4 more */ - break; - } - /* FALLTHRU: Big enough MDBX_DUPFIXED sub-page */ - __fallthrough; + /* На текущей под-странице нет места для еще одного элемента. + * Можно либо увеличить эту под-страницу, либо вынести куст + * значений во вложенное дерево. + * + * Продолжать использовать текущую под-страницу возможно + * только пока и если размер после добавления элемента будет + * меньше me_leaf_nodemax. Соответственно, при превышении + * просто сразу переходим на вложенное дерево. */ + xdata.iov_len = old_data.iov_len + (growth += growth & 1); + if (xdata.iov_len > env->me_subpage_limit) + goto convert_to_subtree; + + /* Можно либо увеличить под-страницу, в том числе с некоторым + * запасом, либо перейти на вложенное поддерево. + * + * Резервирование места на под-странице представляется сомнительным: + * - Резервирование увеличит рыхлость страниц, в том числе + * вероятность разделения основной/гнездовой страницы; + * - Сложно предсказать полезный размер резервирования, + * особенно для не-MDBX_DUPFIXED; + * - Наличие резерва позволяет съекономить только на перемещении + * части элементов основной/гнездовой страницы при последующих + * добавлениях в нее элементов. Причем после первого изменения + * размера под-страницы, её тело будет примыкать + * к неиспользуемому месту на основной/гнездовой странице, + * поэтому последующие последовательные добавления потребуют + * только передвижения в mp_ptrs[]. + * + * Соответственно, более важным/определяющим представляется + * своевременный переход к вложеному дереву, но тут достаточно + * сложный конфликт интересов: + * - При склонности к переходу к вложенным деревьям, суммарно + * в БД будет большее кол-во более рыхлых страниц. Это увеличит + * WAF, а также RAF при последовательных чтениях большой БД. + * Однако, при коротких ключах и большом кол-ве + * дубликатов/мультизначений, плотность ключей в листовых + * страницах основного дерева будет выше. Соответственно, будет + * пропорционально меньше branch-страниц. Поэтому будет выше + * вероятность оседания/не-вымывания страниц основного дерева из + * LRU-кэша, а также попадания в write-back кэш при записи. + * - Наоботот, при склонности к использованию под-страниц, будут + * наблюдаться обратные эффекты. Плюс некоторые накладные расходы + * на лишнее копирование данных под-страниц в сценариях + * нескольких обонвлений дубликатов одного куста в одной + * транзакции. + * + * Суммарно наиболее рациональным представляется такая тактика: + * - Вводим три порога subpage_limit, subpage_room_threshold + * и subpage_reserve_prereq, которые могут быть + * заданы/скорректированы пользователем в ‰ от me_leaf_nodemax; + * - Используем под-страницу пока её размер меньше subpage_limit + * и на основной/гнездовой странице не-менее + * subpage_room_threshold свободного места; + * - Резервируем место только для 1-3 коротких dupfixed-элементов, + * расширяя размер под-страницы на размер кэш-линии ЦПУ, но + * только если на странице не менее subpage_reserve_prereq + * свободного места. + * - По-умолчанию устанавливаем: + * subpage_limit = me_leaf_nodemax (1000‰); + * subpage_room_threshold = 0; + * subpage_reserve_prereq = me_leaf_nodemax (1000‰). + */ + if (IS_LEAF2(fp)) + growth += leaf2_reserve( + env, page_room(mc->mc_pg[mc->mc_top]) + old_data.iov_len, + xdata.iov_len, data->iov_len); + break; + case MDBX_CURRENT | MDBX_NODUPDATA: case MDBX_CURRENT: + continue_subpage: fp->mp_txnid = mc->mc_txn->mt_front; fp->mp_pgno = mp->mp_pgno; mc->mc_xcursor->mx_cursor.mc_pg[0] = fp; @@ -18347,11 +18452,18 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, goto dupsort_put; } xdata.iov_len = old_data.iov_len + growth; + cASSERT(mc, (xdata.iov_len & 1) == 0); } fp_flags = fp->mp_flags; - if (node_size_len(node_ks(node), xdata.iov_len) > - env->me_leaf_nodemax) { + if (xdata.iov_len > env->me_subpage_limit || + node_size_len(node_ks(node), xdata.iov_len) > + env->me_leaf_nodemax || + (env->me_subpage_room_threshold && + page_room(mc->mc_pg[mc->mc_top]) + + node_size_len(node_ks(node), old_data.iov_len) < + env->me_subpage_room_threshold + + node_size_len(node_ks(node), xdata.iov_len))) { /* Too big for a sub-page, convert to sub-DB */ convert_to_subtree: fp_flags &= ~P_SUBP; @@ -18375,6 +18487,7 @@ static __hot int cursor_put_nochecklen(MDBX_cursor *mc, const MDBX_val *key, mc->mc_db->md_leaf_pages += 1; cASSERT(mc, env->me_psize > old_data.iov_len); growth = env->me_psize - (unsigned)old_data.iov_len; + cASSERT(mc, (growth & 1) == 0); flags |= F_DUPDATA | F_SUBDATA; nested_dupdb.md_root = mp->mp_pgno; nested_dupdb.md_seq = 0; diff --git a/src/internals.h b/src/internals.h index f4e37ac3..eaba18d0 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1388,8 +1388,12 @@ struct MDBX_env { #define me_lfd me_lck_mmap.fd struct MDBX_lockinfo *me_lck; - unsigned me_leaf_nodemax; /* max size of a leaf-node */ - unsigned me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_leaf_nodemax; /* max size of a leaf-node */ + uint16_t me_branch_nodemax; /* max size of a branch-node */ + uint16_t me_subpage_limit; + uint16_t me_subpage_room_threshold; + uint16_t me_subpage_reserve_prereq; + uint16_t me_subpage_reserve_limit; atomic_pgno_t me_mlocked_pgno; uint8_t me_psize2log; /* log2 of DB page size */ int8_t me_stuck_meta; /* recovery-only: target meta page or less that zero */ From 9480599afa2fa89990cab5d5ce9eb931254321aa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 29 Feb 2024 09:19:54 +0300 Subject: [PATCH 119/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`rebalance()`=20=D1=80=D0=B0=D0=B4?= =?UTF-8?q?=D0=B8=20=D1=83=D0=BC=D0=B5=D0=BD=D1=8C=D1=88=D0=B5=D0=BD=D0=B8?= =?UTF-8?q?=D1=8F=20WAF.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit После предыдущей серии доработок весной 2021 года, функция `rebalance()` обеспечивала слияние мало заполненной страницы с менее заполненной соседней, одновременно пытаясь не вовлекать соседних страниц, если те еще не были скопированы/клонированы/изменены в текущей транзакции. В целом, реализованная тактика представляется успешной. Однако, при обновлении GC она иногда приводила к исчерпанию подготовленного резерва извлеченных из GC страниц. Это не является проблемой, если не считать вероятность срабатывания `assert(txn->mt_flags & MDBX_TXN_DRAINED_GC)` в отладочных сборках. Тем не менее, из этой ситуации можно сделать вывод, что поведение `rebalance()`, как минимум, может быть обогащено опцией уменьшения WAF ценой меньшей сбалансированности дерева. Технически при этом слияние выполняется преимущественно с грязной страницей, если на ней достаточно места и соседняя страница с другой стороны еще чистая. Соответствующая опция в `enum MDBX_option_t` будет добавлена чуть позже. --- src/core.c | 59 +++++++++++++++++++++++++++++++------------------ src/internals.h | 2 ++ 2 files changed, 39 insertions(+), 22 deletions(-) diff --git a/src/core.c b/src/core.c index af570585..09095f8a 100644 --- a/src/core.c +++ b/src/core.c @@ -10642,7 +10642,9 @@ static int gcu_prepare_backlog(MDBX_txn *txn, gcu_context_t *ctx) { const size_t for_all_before_touch = for_relist + for_tree_before_touch; const size_t for_all_after_touch = for_relist + for_tree_after_touch; - if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch)) + if (likely(for_relist < 2 && gcu_backlog_size(txn) > for_all_before_touch) && + (ctx->cursor.mc_snum == 0 || + IS_MODIFIABLE(txn, ctx->cursor.mc_pg[ctx->cursor.mc_top]))) return MDBX_SUCCESS; TRACE(">> retired-stored %zu, left %zi, backlog %zu, need %zu (4list %zu, " @@ -18867,6 +18869,7 @@ static __hot int cursor_del(MDBX_cursor *mc, MDBX_put_flags_t flags) { return rc; MDBX_page *mp = mc->mc_pg[mc->mc_top]; + cASSERT(mc, IS_MODIFIABLE(mc->mc_txn, mp)); if (!MDBX_DISABLE_VALIDATION && unlikely(!CHECK_LEAF_TYPE(mc, mp))) { ERROR("unexpected leaf-page #%" PRIaPGNO " type 0x%x seen by cursor", mp->mp_pgno, mp->mp_flags); @@ -20386,7 +20389,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { IS_LEAF(cdst->mc_pg[cdst->mc_db->md_depth - 1])); cASSERT(csrc, csrc->mc_snum < csrc->mc_db->md_depth || IS_LEAF(csrc->mc_pg[csrc->mc_db->md_depth - 1])); - cASSERT(cdst, page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); + cASSERT(cdst, csrc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance || + page_room(pdst) >= page_used(cdst->mc_txn->mt_env, psrc)); const int pagetype = PAGETYPE_WHOLE(psrc); /* Move all nodes from src to dst */ @@ -20397,7 +20401,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t j = dst_nkeys; if (unlikely(pagetype & P_LEAF2)) { /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_len = csrc->mc_db->md_xsize; @@ -20405,6 +20411,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { size_t i = 0; do { rc = node_add_leaf2(cdst, j++, &key); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; key.iov_base = ptr_disp(key.iov_base, key.iov_len); @@ -20418,7 +20425,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cursor_copy(csrc, &mn); /* must find the lowest key below src */ rc = page_search_lowest(&mn); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; const MDBX_page *mp = mn.mc_pg[mn.mc_top]; @@ -20443,7 +20451,9 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } /* Mark dst as dirty. */ - if (unlikely(rc = page_touch(cdst))) + rc = page_touch(cdst); + cASSERT(cdst, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; size_t i = 0; @@ -20457,6 +20467,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { cASSERT(csrc, node_flags(srcnode) == 0); rc = node_add_branch(cdst, j++, &key, node_pgno(srcnode)); } + cASSERT(cdst, rc != MDBX_RESULT_TRUE); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -20483,7 +20494,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { if (csrc->mc_ki[csrc->mc_top] == 0) { const MDBX_val nullkey = {0, 0}; rc = update_key(csrc, &nullkey); - if (unlikely(rc)) { + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) { csrc->mc_top++; return rc; } @@ -20518,7 +20530,8 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { } rc = page_retire(csrc, (MDBX_page *)psrc); - if (unlikely(rc)) + cASSERT(csrc, rc != MDBX_RESULT_TRUE); + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -20531,7 +20544,7 @@ static int page_merge(MDBX_cursor *csrc, MDBX_cursor *cdst) { const uint16_t save_depth = cdst->mc_db->md_depth; cursor_pop(cdst); rc = rebalance(cdst); - if (unlikely(rc)) + if (unlikely(rc != MDBX_SUCCESS)) return rc; cASSERT(cdst, cdst->mc_db->md_entries > 0); @@ -20719,11 +20732,9 @@ static int rebalance(MDBX_cursor *mc) { mc->mc_snum = 0; mc->mc_top = 0; mc->mc_flags &= ~C_INITIALIZED; - - rc = page_retire(mc, mp); - if (unlikely(rc != MDBX_SUCCESS)) - return rc; - } else if (IS_BRANCH(mp) && nkeys == 1) { + return page_retire(mc, mp); + } + if (IS_BRANCH(mp) && nkeys == 1) { DEBUG("%s", "collapsing root page!"); mc->mc_db->md_root = node_pgno(page_node(mp, 0)); rc = page_get(mc, mc->mc_db->md_root, &mc->mc_pg[0], mp->mp_txnid); @@ -20756,15 +20767,10 @@ static int rebalance(MDBX_cursor *mc) { PAGETYPE_WHOLE(mc->mc_pg[mc->mc_top]) == pagetype); cASSERT(mc, mc->mc_snum < mc->mc_db->md_depth || IS_LEAF(mc->mc_pg[mc->mc_db->md_depth - 1])); - - rc = page_retire(mc, mp); - if (likely(rc == MDBX_SUCCESS)) - rc = page_touch(mc); - return rc; - } else { - DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", - mp->mp_pgno, mp->mp_flags); + return page_retire(mc, mp); } + DEBUG("root page %" PRIaPGNO " doesn't need rebalancing (flags 0x%x)", + mp->mp_pgno, mp->mp_flags); return MDBX_SUCCESS; } @@ -20813,6 +20819,7 @@ static int rebalance(MDBX_cursor *mc) { const size_t right_nkeys = right ? page_numkeys(right) : 0; bool involve = false; retry: + cASSERT(mc, mc->mc_snum > 1); if (left_room > room_threshold && left_room >= right_room && (IS_MODIFIABLE(mc->mc_txn, left) || involve)) { /* try merge with left */ @@ -20884,7 +20891,15 @@ retry: return MDBX_SUCCESS; } - if (likely(!involve)) { + if (mc->mc_txn->mt_env->me_options.prefer_waf_insteadof_balance && + likely(room_threshold > 0)) { + room_threshold = 0; + goto retry; + } + if (likely(!involve) && + (likely(mc->mc_dbi != FREE_DBI) || mc->mc_txn->tw.loose_pages || + MDBX_PNL_GETSIZE(mc->mc_txn->tw.relist) || (mc->mc_flags & C_GCU) || + (mc->mc_txn->mt_flags & MDBX_TXN_DRAINED_GC) || room_threshold)) { involve = true; goto retry; } diff --git a/src/internals.h b/src/internals.h index eaba18d0..483d545c 100644 --- a/src/internals.h +++ b/src/internals.h @@ -1437,6 +1437,8 @@ struct MDBX_env { unsigned writethrough_threshold; #endif /* Windows */ bool prefault_write; + bool prefer_waf_insteadof_balance; /* Strive to minimize WAF instead of + balancing pages fullment */ union { unsigned all; /* tracks options with non-auto values but tuned by user */ From e29cb076d379a1cab1367884e0c62a77252276db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:08:22 +0300 Subject: [PATCH 120/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D1=80=D0=B0?= =?UTF-8?q?=D0=B1=D0=BE=D1=82=D0=BA=D0=B0=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80=D0=B0=20=D0=BA=D0=BB=D1=8E=D1=87=D0=B5?= =?UTF-8?q?=D0=B9/=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B9=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D0=BD=D0=B0=D0=B4=D0=B5=D0=B6=D0=BD=D0=BE?= =?UTF-8?q?=D0=B9=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B8?= =?UTF-8?q?=20=D1=83=D0=BD=D0=B8=D0=BA=D0=B0=D0=BB=D1=8C=D0=BD=D1=8B=D1=85?= =?UTF-8?q?=20=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B9.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit В текущем понимании коммитом этим устраняется застарелая проблема редких сбоев стохастического теста из-за вероятности ошибочной генерации повторяющихся пар key-value. --- test/hill.c++ | 4 +- test/keygen.c++ | 97 ++++++++++++++++++++++++++++++++++++++++--------- test/keygen.h++ | 11 +++++- 3 files changed, 91 insertions(+), 21 deletions(-) diff --git a/test/hill.c++ b/test/hill.c++ index f5ca1026..bbb3b3cf 100644 --- a/test/hill.c++ +++ b/test/hill.c++ @@ -90,7 +90,7 @@ bool testcase_hill::run() { assert(b_serial > a_serial); // создаем первую запись из пары - const keygen::serial_t age_shift = UINT64_C(1) << (a_serial % 31); + const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); log_trace("uphill: insert-a (age %" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_1, age_shift); @@ -302,7 +302,7 @@ bool testcase_hill::run() { assert(b_serial > a_serial); // обновляем первую запись из пары - const keygen::serial_t age_shift = UINT64_C(1) << (a_serial % 31); + const keygen::serial_t age_shift = keyvalue_maker.remix_age(a_serial); log_trace("downhill: update-a (age 0->%" PRIu64 ") %" PRIu64, age_shift, a_serial); generate_pair(a_serial, a_key, a_data_0, 0); diff --git a/test/keygen.c++ b/test/keygen.c++ index 46b64ecf..69b0550c 100644 --- a/test/keygen.c++ +++ b/test/keygen.c++ @@ -14,6 +14,39 @@ #include "test.h++" +static const uint64_t primes[64] = { + /* */ + 0, 1, 3, 7, 13, 31, 61, 127, 251, 509, 1021, 2039, 4093, 8191, 16381, + /* */ + UINT64_C(32749), UINT64_C(65521), UINT64_C(131071), UINT64_C(262139), + UINT64_C(524287), UINT64_C(1048573), UINT64_C(2097143), UINT64_C(4194301), + UINT64_C(8388593), UINT64_C(16777213), UINT64_C(33554393), + UINT64_C(67108859), UINT64_C(134217689), UINT64_C(268435399), + UINT64_C(536870909), UINT64_C(1073741789), UINT64_C(2147483647), + UINT64_C(4294967291), UINT64_C(8589934583), UINT64_C(17179869143), + UINT64_C(34359738337), UINT64_C(68719476731), UINT64_C(137438953447), + UINT64_C(274877906899), UINT64_C(549755813881), UINT64_C(1099511627689), + UINT64_C(2199023255531), UINT64_C(4398046511093), UINT64_C(8796093022151), + UINT64_C(17592186044399), UINT64_C(35184372088777), + UINT64_C(70368744177643), UINT64_C(140737488355213), + UINT64_C(281474976710597), UINT64_C(562949953421231), + UINT64_C(1125899906842597), UINT64_C(2251799813685119), + UINT64_C(4503599627370449), UINT64_C(9007199254740881), + UINT64_C(18014398509481951), UINT64_C(36028797018963913), + UINT64_C(72057594037927931), UINT64_C(144115188075855859), + UINT64_C(288230376151711717), UINT64_C(576460752303423433), + UINT64_C(1152921504606846883), UINT64_C(2305843009213693951), + UINT64_C(4611686018427387847), UINT64_C(9223372036854775783)}; + +/* static unsigned supid_log2(uint64_t v) { + unsigned r = 0; + while (v > 1) { + v >>= 1; + r += 1; + } + return r; +} */ + namespace keygen { /* LY: https://en.wikipedia.org/wiki/Injective_function */ @@ -48,19 +81,19 @@ serial_t injective(const serial_t serial, 10, 14, 22, 19, 3, 21, 18, 19, 26, 24, 2, 21, 25, 29, 24, 10, 11, 14, 20, 19}; + const auto mask = actor_params::serial_mask(bits); const auto mult = m[bits - 8]; const auto shift = s[bits - 8]; serial_t result = serial * mult; if (salt) { const unsigned left = bits / 2; const unsigned right = bits - left; - result = (result << left) | - ((result & actor_params::serial_mask(bits)) >> right); + result = (result << left) | ((result & mask) >> right); result = (result ^ salt) * mult; } - result ^= result << shift; - result &= actor_params::serial_mask(bits); + result ^= (result & mask) >> shift; + result &= mask; log_trace("keygen-injective: serial %" PRIu64 "/%u @%" PRIx64 ",%u,%" PRIu64 " => %" PRIu64 "/%u", serial, bits, mult, shift, salt, result, bits); @@ -111,7 +144,7 @@ void __hot maker::pair(serial_t serial, const buffer &key, buffer &value, } serial_t key_serial = serial; - serial_t value_serial = value_age << mapping.split; + serial_t value_serial = (value_age & value_age_mask) << mapping.split; if (mapping.split) { if (MDBX_db_flags_t(key_essentials.flags) & MDBX_DUPSORT) { key_serial >>= mapping.split; @@ -200,6 +233,7 @@ void maker::setup(const config::actor_params_pod &actor, MDBX_DUPFIXED | MDBX_INTEGERDUP | MDBX_REVERSEDUP) < UINT16_MAX); #endif + key_essentials.flags = uint16_t( actor.table_flags & MDBX_db_flags_t(MDBX_INTEGERKEY | MDBX_REVERSEKEY | MDBX_DUPSORT)); @@ -209,6 +243,12 @@ void maker::setup(const config::actor_params_pod &actor, key_essentials.maxlen = std::min( uint32_t(actor.keylen_max), uint32_t(mdbx_limits_keysize_max(actor.pagesize, actor.table_flags))); + key_essentials.bits = (key_essentials.maxlen < sizeof(serial_t)) + ? key_essentials.maxlen * CHAR_BIT + : sizeof(serial_t) * CHAR_BIT; + key_essentials.mask = actor_params::serial_mask(key_essentials.bits); + assert(key_essentials.bits > 63 || + key_essentials.mask > primes[key_essentials.bits]); value_essentials.flags = uint16_t( actor.table_flags & @@ -219,27 +259,44 @@ void maker::setup(const config::actor_params_pod &actor, value_essentials.maxlen = std::min( uint32_t(actor.datalen_max), uint32_t(mdbx_limits_valsize_max(actor.pagesize, actor.table_flags))); + value_essentials.bits = (value_essentials.maxlen < sizeof(serial_t)) + ? value_essentials.maxlen * CHAR_BIT + : sizeof(serial_t) * CHAR_BIT; + value_essentials.mask = actor_params::serial_mask(value_essentials.bits); + assert(value_essentials.bits > 63 || + value_essentials.mask > primes[value_essentials.bits]); if (!actor.keygen.zero_fill) { key_essentials.flags |= essentials::prng_fill_flag; value_essentials.flags |= essentials::prng_fill_flag; } - (void)thread_number; mapping = actor.keygen; + const auto split = mapping.split; while (mapping.split > - essentials::value_age_width + value_essentials.maxlen * CHAR_BIT || + value_essentials.bits - essentials::value_age_minwidth || mapping.split >= mapping.width) mapping.split -= 1; + if (split != mapping.width) + log_notice("keygen: reduce mapping-split from %u to %u", split, + mapping.split); + const auto width = mapping.width; while (unsigned((actor.table_flags & MDBX_DUPSORT) ? mapping.width - mapping.split - : mapping.width) > key_essentials.maxlen * CHAR_BIT) + : mapping.width) > key_essentials.bits) mapping.width -= 1; + if (width != mapping.width) + log_notice("keygen: reduce mapping-width from %u to %u", width, + mapping.width); - salt = - (prng_state + uint64_t(thread_number)) * UINT64_C(14653293970879851569); + value_age_bits = value_essentials.bits - mapping.split; + value_age_mask = actor_params::serial_mask(value_age_bits); + assert(value_age_bits >= essentials::value_age_minwidth); + salt = (prng_state ^ + (thread_number * 1575554837) * UINT64_C(59386707711075671)) * + UINT64_C(14653293970879851569); base = actor.serial_base(); } @@ -321,14 +378,18 @@ serial_t __hot maker::mk_begin(serial_t serial, const essentials ¶ms, result &out) { assert(out.limit >= params.maxlen); assert(params.maxlen >= params.minlen); - if (params.maxlen < sizeof(serial_t)) { - const serial_t max = actor_params::serial_mask(params.maxlen * CHAR_BIT); - if (serial > max) { - serial ^= (serial >> max / 2) * serial_t((sizeof(serial_t) > 4) - ? UINT64_C(40719303417517073) - : UINT32_C(3708688457)); - serial &= max; - } + assert(serial <= params.mask); + if (unlikely(serial > params.mask)) { +#if 1 + serial %= primes[params.bits]; + assert(params.mask > primes[params.bits]); +#else + const serial_t maxbits = params.maxlen * CHAR_BIT; + serial ^= (serial >> maxbits / 2) * + serial_t((sizeof(serial_t) > 4) ? UINT64_C(40719303417517073) + : UINT32_C(3708688457)); + serial &= params.mask; +#endif assert(params.maxlen >= length(serial)); } diff --git a/test/keygen.h++ b/test/keygen.h++ index 0ded8130..8eb78118 100644 --- a/test/keygen.h++ +++ b/test/keygen.h++ @@ -108,10 +108,14 @@ class maker { struct essentials { uint16_t minlen{0}; - enum { prng_fill_flag = 1, value_age_width = 8 }; + enum { prng_fill_flag = 1, value_age_minwidth = 5 }; uint16_t flags{0}; uint32_t maxlen{0}; + serial_t mask{0}; + unsigned bits; } key_essentials, value_essentials; + unsigned value_age_bits; + serial_t value_age_mask{0}; static serial_t mk_begin(serial_t serial, const essentials ¶ms, result &out); @@ -136,6 +140,11 @@ public: } return increment(serial, int64_t(uint64_t(delta) << mapping.split)); } + + serial_t remix_age(serial_t serial) const { + return (UINT64_C(768097847591) * (serial ^ UINT64_C(768097847591))) & + value_age_mask; + } }; void log_pair(logging::loglevel level, const char *prefix, const buffer &key, From d8db63a67d151b29d20938cf2f81e1dc0492b653 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:11:24 +0300 Subject: [PATCH 121/137] =?UTF-8?q?mdbx-test:=20=D1=87=D1=83=D1=82=D1=8C?= =?UTF-8?q?=20=D0=B1=D0=BE=D0=BB=D0=B5=D0=B5=20=D1=80=D0=B0=D0=B7=D1=83?= =?UTF-8?q?=D0=BC=D0=BD=D0=BE=D0=B5/=D1=83=D0=B4=D0=BE=D0=B1=D0=BD=D0=BE?= =?UTF-8?q?=D0=B5=20=D0=BF=D0=BE=D0=B2=D0=B5=D0=B4=D0=B5=D0=BD=D0=B8=D0=B5?= =?UTF-8?q?=20=D0=BF=D1=80=D0=B8=20=D0=BA=D0=BE=D0=BB=D0=BB=D0=B8=D0=B7?= =?UTF-8?q?=D0=B8=D0=B8=20=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D0=B8=20=D0=BD=D0=B5-=D1=83=D0=BD=D0=B8=D0=BA=D0=B0=D0=BB?= =?UTF-8?q?=D1=8C=D0=BD=D1=8B=D1=85=20=D0=BF=D0=B0=D1=80=20=D0=BA=D0=BB?= =?UTF-8?q?=D1=8E=D1=87-=D0=B7=D0=BD=D0=B0=D1=87=D0=B5=D0=BD=D0=B8=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test.c++ | 55 +++++++++++++++++++++++++++++++-------------------- test/test.h++ | 5 +++-- 2 files changed, 37 insertions(+), 23 deletions(-) diff --git a/test/test.c++ b/test/test.c++ index bb7bd818..05f1be60 100644 --- a/test/test.c++ +++ b/test/test.c++ @@ -599,9 +599,9 @@ void testcase::db_table_drop(MDBX_dbi handle) { void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) { log_trace(">> testcase::db_table_clear, handle %u", handle); - int rc = mdbx_drop(txn ? txn : txn_guard.get(), handle, false); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_drop(delete=false)", rc); + int err = mdbx_drop(txn ? txn : txn_guard.get(), handle, false); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_drop(delete=false)", err); speculum.clear(); log_trace("<< testcase::db_table_clear"); } @@ -609,21 +609,25 @@ void testcase::db_table_clear(MDBX_dbi handle, MDBX_txn *txn) { void testcase::db_table_close(MDBX_dbi handle) { log_trace(">> testcase::db_table_close, handle %u", handle); assert(!txn_guard); - int rc = mdbx_dbi_close(db_guard.get(), handle); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror("mdbx_dbi_close()", rc); + int err = mdbx_dbi_close(db_guard.get(), handle); + if (unlikely(err != MDBX_SUCCESS)) + failure_perror("mdbx_dbi_close()", err); log_trace("<< testcase::db_table_close"); } -void testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, +bool testcase::checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued) { MDBX_val actual_value = expected_valued; - int rc = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, - &actual_value); - if (unlikely(rc != MDBX_SUCCESS)) - failure_perror(step, rc); + int err = mdbx_get_equal_or_great(txn_guard.get(), handle, &key2check, + &actual_value); + if (unlikely(err != MDBX_SUCCESS)) { + if (!config.params.speculum || err != MDBX_RESULT_TRUE) + failure_perror(step, (err == MDBX_RESULT_TRUE) ? MDBX_NOTFOUND : err); + return false; + } if (!is_samedata(&actual_value, &expected_valued)) failure("%s data mismatch", step); + return true; } //----------------------------------------------------------------------------- @@ -988,7 +992,9 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, } auto it_lowerbound = insertion_result.first; - if (++it_lowerbound != speculum.end()) { + if (insertion_result.second) + ++it_lowerbound; + if (it_lowerbound != speculum.end()) { const auto cursor_lowerbound = speculum_cursors[lowerbound].get(); speculum_check_cursor("after-insert", "lowerbound", it_lowerbound, cursor_lowerbound, MDBX_GET_CURRENT); @@ -1015,30 +1021,37 @@ int testcase::insert(const keygen::buffer &akey, const keygen::buffer &adata, int testcase::replace(const keygen::buffer &akey, const keygen::buffer &new_data, - const keygen::buffer &old_data, MDBX_put_flags_t flags) { + const keygen::buffer &old_data, MDBX_put_flags_t flags, + bool hush_keygen_mistakes) { + int expected_err = MDBX_SUCCESS; if (config.params.speculum) { const auto S_key = iov2dataview(akey); const auto S_old = iov2dataview(old_data); const auto S_new = iov2dataview(new_data); const auto removed = speculum.erase(SET::key_type(S_key, S_old)); - if (unlikely(removed != 1)) { + if (unlikely(!removed)) { char dump_key[128], dump_value[128]; log_error( - "speculum-%s: %s old value {%s, %s}", "replace", - (removed > 1) ? "multi" : "no", + "speculum-%s: no old pair {%s, %s} (keygen mistake)", "replace", mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), mdbx_dump_val(&old_data->value, dump_value, sizeof(dump_value))); - } - if (unlikely(!speculum.emplace(S_key, S_new).second)) { + expected_err = MDBX_NOTFOUND; + } else if (unlikely(!speculum.emplace(S_key, S_new).second)) { char dump_key[128], dump_value[128]; log_error( - "speculum-replace: new pair not inserted {%s, %s}", + "speculum-%s: %s {%s, %s}", "replace", "new pair not inserted", mdbx_dump_val(&akey->value, dump_key, sizeof(dump_key)), mdbx_dump_val(&new_data->value, dump_value, sizeof(dump_value))); + expected_err = MDBX_KEYEXIST; } } - return mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, - &old_data->value, flags); + int err = mdbx_replace(txn_guard.get(), dbi, &akey->value, &new_data->value, + &old_data->value, flags); + if (err && err == expected_err && hush_keygen_mistakes) { + log_notice("speculum-%s: %s %d", "replace", "hust keygen mistake", err); + err = MDBX_SUCCESS; + } + return err; } int testcase::remove(const keygen::buffer &akey, const keygen::buffer &adata) { diff --git a/test/test.h++ b/test/test.h++ index d99ba4f8..ef9ea0c1 100644 --- a/test/test.h++ +++ b/test/test.h++ @@ -232,7 +232,8 @@ protected: int insert(const keygen::buffer &akey, const keygen::buffer &adata, MDBX_put_flags_t flags); int replace(const keygen::buffer &akey, const keygen::buffer &new_value, - const keygen::buffer &old_value, MDBX_put_flags_t flags); + const keygen::buffer &old_value, MDBX_put_flags_t flags, + bool hush_keygen_mistakes = true); int remove(const keygen::buffer &akey, const keygen::buffer &adata); static int hsr_callback(const MDBX_env *env, const MDBX_txn *txn, @@ -262,7 +263,7 @@ protected: void txn_inject_writefault(MDBX_txn *txn); void fetch_canary(); void update_canary(uint64_t increment); - void checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, + bool checkdata(const char *step, MDBX_dbi handle, MDBX_val key2check, MDBX_val expected_valued); unsigned txn_underutilization_x256(MDBX_txn *txn) const; From 0c24b49bbf88486ef751d1ee37d5e4556a5b9da6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 20 Feb 2024 13:47:44 +0300 Subject: [PATCH 122/137] =?UTF-8?q?mdbx-test:=20=D1=80=D0=B0=D1=81=D1=88?= =?UTF-8?q?=D0=B8=D1=80=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=82=D0=BE=D1=85?= =?UTF-8?q?=D0=B0=D1=81=D1=82=D0=B8=D1=87=D0=B5=D1=81=D0=BA=D0=BE=D0=B3?= =?UTF-8?q?=D0=BE=20=D1=82=D0=B5=D1=81=D1=82=D0=B0=20dupfixed-=D1=81=D1=86?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=D1=80=D0=B8=D1=8F=D0=BC=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/long_stochastic.sh | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/long_stochastic.sh b/test/long_stochastic.sh index b508b5f6..12b493cb 100755 --- a/test/long_stochastic.sh +++ b/test/long_stochastic.sh @@ -461,6 +461,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} split=24 caption="Probe #$((++count)) int-key,with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -472,6 +478,13 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + split=16 caption="Probe #$((++count)) int-key,w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ @@ -489,6 +502,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} if [ "$EXTRA" != "no" ]; then split=10 @@ -507,6 +526,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) with-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=max \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} fi split=4 @@ -519,6 +544,12 @@ for nops in 10 33 100 333 1000 3333 10000 33333 100000 333333 1000000 3333333 10 caption="Probe #$((++count)) w/o-dups, split=${split}, case $((++subcase)) of ${cases}" probe \ --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=-data.dups --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen.min=min --datalen.max=1111 \ --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) int-key,fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+key.integer,+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} + caption="Probe #$((++count)) fixdups, split=${split}, case $((++subcase)) of ${cases}" probe \ + --prng-seed=${seed} --pagesize=$PAGESIZE --size-upper-upto=${db_size_mb}M --table=+data.fixed --keygen.split=${split} --keylen.min=min --keylen.max=max --datalen=rnd \ + --nops=$nops --batch.write=$wbatch --mode=$(bits2options $bits)${syncmodes[count%4]} done # options loop=$((loop + 1)) if [ -n "$LOOPS" ] && [ $loop -ge "$LOOPS" ]; then break; fi From 4ed05689bcb39f57019ddb1263a88d811d737ed1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 29 Feb 2024 15:58:01 +0300 Subject: [PATCH 123/137] =?UTF-8?q?mdbx:=20=D0=BF=D0=B5=D1=80=D0=B5=D0=B8?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D0=BE=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20setup?= =?UTF-8?q?=5Fsdb()=20=D0=B4=D0=BB=D1=8F=20=D1=87=D0=B8=D1=82=D0=B0=D0=B5?= =?UTF-8?q?=D0=BC=D0=BE=D1=81=D1=82=D0=B8=20=D0=BA=D0=BE=D0=B4=D0=B0=20(?= =?UTF-8?q?=D0=BA=D0=BE=D1=81=D0=BC=D0=B5=D1=82=D0=B8=D0=BA=D0=B0).?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/core.c b/src/core.c index 09095f8a..9d1f8f8e 100644 --- a/src/core.c +++ b/src/core.c @@ -3423,7 +3423,7 @@ static void cursor_copy(const MDBX_cursor *csrc, MDBX_cursor *cdst); static int __must_check_result drop_tree(MDBX_cursor *mc, const bool may_have_subDBs); static int __must_check_result fetch_sdb(MDBX_txn *txn, size_t dbi); -static int __must_check_result setup_dbx(MDBX_dbx *const dbx, +static int __must_check_result setup_sdb(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize); @@ -9493,7 +9493,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { txn->mt_dbs[MAIN_DBI].md_flags); env->me_db_flags[MAIN_DBI] = DB_POISON; atomic_store32(&env->me_dbi_seqs[MAIN_DBI], seq, mo_AcquireRelease); - rc = setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + rc = setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize); if (likely(rc == MDBX_SUCCESS)) { seq = dbi_seq_next(env, MAIN_DBI); @@ -16320,7 +16320,7 @@ __hot __noinline static int page_search_root(MDBX_cursor *mc, return MDBX_SUCCESS; } -static int setup_dbx(MDBX_dbx *const dbx, const MDBX_db *const db, +static int setup_sdb(MDBX_dbx *const dbx, const MDBX_db *const db, const unsigned pagesize) { if (unlikely(!db_check_flags(db->md_flags))) { ERROR("incompatible or invalid db.md_flags (%u) ", db->md_flags); @@ -16415,7 +16415,7 @@ static int fetch_sdb(MDBX_txn *txn, size_t dbi) { return MDBX_CORRUPTED; } #endif /* !MDBX_DISABLE_VALIDATION */ - rc = setup_dbx(dbx, db, txn->mt_env->me_psize); + rc = setup_sdb(dbx, db, txn->mt_env->me_psize); if (unlikely(rc != MDBX_SUCCESS)) return rc; @@ -19596,7 +19596,7 @@ static __inline int couple_init(MDBX_cursor_couple *couple, const size_t dbi, rc = page_search(&couple->outer, NULL, MDBX_PS_ROOTONLY); rc = (rc != MDBX_NOTFOUND) ? rc : MDBX_SUCCESS; } else if (unlikely(dbx->md_klen_max == 0)) { - rc = setup_dbx(dbx, db, txn->mt_env->me_psize); + rc = setup_sdb(dbx, db, txn->mt_env->me_psize); } if (couple->outer.mc_db->md_flags & MDBX_DUPSORT) { @@ -23511,7 +23511,7 @@ static int dbi_bind(MDBX_txn *txn, const size_t dbi, unsigned user_flags, datacmp ? datacmp : get_default_datacmp(user_flags); txn->mt_dbs[dbi].md_flags = db_flags; txn->mt_dbs[dbi].md_xsize = 0; - if (unlikely(setup_dbx(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], + if (unlikely(setup_sdb(&env->me_dbxs[dbi], &txn->mt_dbs[dbi], env->me_psize))) { txn->mt_dbi_state[dbi] = DBI_LINDO; txn->mt_flags |= MDBX_TXN_ERROR; @@ -23588,7 +23588,7 @@ static int dbi_open_locked(MDBX_txn *txn, unsigned user_flags, MDBX_dbi *dbi, env->me_dbxs[MAIN_DBI].md_dcmp = get_default_datacmp(main_flags); txn->mt_dbs[MAIN_DBI].md_flags = main_flags; txn->mt_dbs[MAIN_DBI].md_xsize = 0; - if (unlikely(setup_dbx(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], + if (unlikely(setup_sdb(&env->me_dbxs[MAIN_DBI], &txn->mt_dbs[MAIN_DBI], env->me_psize) != MDBX_SUCCESS)) { txn->mt_dbi_state[MAIN_DBI] = DBI_LINDO; txn->mt_flags |= MDBX_TXN_ERROR; From fe498de323a2836ce7ef0b06a8026b2da5f92797 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sat, 2 Mar 2024 01:06:05 +0300 Subject: [PATCH 124/137] =?UTF-8?q?mdbx:=20=D1=83=D1=81=D1=82=D1=80=D0=B0?= =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20`cursor=5Fset()`=20?= =?UTF-8?q?=D0=BF=D0=BE=D0=B2=D1=82=D0=BE=D1=80=D0=BD=D0=BE=D0=B3=D0=BE=20?= =?UTF-8?q?=D1=81=D1=80=D0=B0=D0=B2=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D1=81?= =?UTF-8?q?=20=D0=BD=D1=83=D0=BB=D0=B5=D0=B2=D1=8B=D0=BC=20=D1=8D=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=B5=D0=BD=D1=82=D0=BE=D0=BC=20=D0=BD=D0=B0=20?= =?UTF-8?q?=D0=BB=D0=B8=D1=81=D1=82=D0=BE=D0=B2=D0=BE=D0=B9=20=D1=81=D1=82?= =?UTF-8?q?=D1=80=D0=B0=D0=BD=D0=B8=D1=86=D0=B5.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/core.c b/src/core.c index 9d1f8f8e..c8691237 100644 --- a/src/core.c +++ b/src/core.c @@ -16981,8 +16981,9 @@ cursor_set(MDBX_cursor *mc, MDBX_val *key, MDBX_val *data, MDBX_cursor_op op) { goto got_node; } if (cmp < 0) { - if (mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { - /* This is definitely the right page, skip search_page */ + /* This is definitely the right page, skip search_page */ + if (mc->mc_ki[mc->mc_top] != 0 /* уже проверяли выше */ && + mc->mc_ki[mc->mc_top] < page_numkeys(mp)) { if (IS_LEAF2(mp)) { nodekey.iov_base = page_leaf2key(mp, mc->mc_ki[mc->mc_top], nodekey.iov_len); From eca0f463689def0092bd4ac20d4f1678df582976 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 23:10:52 +0300 Subject: [PATCH 125/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`check=5Ftxn()`=20=D0=B4=D0=BB=D1=8F=20=D1=81?= =?UTF-8?q?=D0=BB=D1=83=D1=87=D0=B0=D1=8F=20=D0=B7=D0=B0=D0=B2=D0=B5=D1=80?= =?UTF-8?q?=D1=88=D0=B5=D0=BD=D0=BD=D1=8B=D1=85=20=D1=82=D1=80=D0=B0=D0=BD?= =?UTF-8?q?=D0=B7=D0=B0=D0=BA=D1=86=D0=B8=D0=B9=20=D0=B2=20=D1=80=D0=B5?= =?UTF-8?q?=D0=B6=D0=B8=D0=BC=D0=B5=20`MDBX=5FNO=5FTLS`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit По сообщению о проблеме https://t.me/libmdbx/5424 --- src/core.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/core.c b/src/core.c index c8691237..666998b4 100644 --- a/src/core.c +++ b/src/core.c @@ -9646,10 +9646,11 @@ static __always_inline int check_txn(const MDBX_txn *txn, int bad_bits) { if (unlikely(txn->mt_flags & bad_bits)) return MDBX_BAD_TXN; - tASSERT(txn, (txn->mt_flags & MDBX_NOTLS) == - ((txn->mt_flags & MDBX_TXN_RDONLY) - ? txn->mt_env->me_flags & MDBX_NOTLS - : 0)); + tASSERT(txn, (txn->mt_flags & MDBX_TXN_FINISHED) || + (txn->mt_flags & MDBX_NOTLS) == + ((txn->mt_flags & MDBX_TXN_RDONLY) + ? txn->mt_env->me_flags & MDBX_NOTLS + : 0)); #if MDBX_TXN_CHECKOWNER STATIC_ASSERT(MDBX_NOTLS > MDBX_TXN_FINISHED + MDBX_TXN_RDONLY); if (unlikely(txn->mt_owner != osal_thread_self()) && From 0b87ddc6d4eda23fb8a7728e6c900a2fb2a9ff9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 3 Mar 2024 23:07:45 +0300 Subject: [PATCH 126/137] =?UTF-8?q?mdbx-test:=20=D0=B4=D0=BE=D0=B1=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=B2=20jitter=20=D0=BF?= =?UTF-8?q?=D1=80=D0=BE=D1=81=D1=82=D0=BE=D0=B3=D0=BE=20=D1=82=D0=B5=D1=81?= =?UTF-8?q?=D1=82=D0=B0=20txn=5Freset+txn=5Frenew.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/jitter.c++ | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/test/jitter.c++ b/test/jitter.c++ index 993631e8..3e7a2b52 100644 --- a/test/jitter.c++ +++ b/test/jitter.c++ @@ -124,6 +124,37 @@ bool testcase_jitter::run() { jitter_delay(); txn_begin(true); fetch_canary(); + if (flipcoin()) { + MDBX_txn_info info; + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err != MDBX_BAD_TXN) + failure_perror("mdbx_txn_info(MDBX_BAD_TXN)", err); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset(again)", err); + err = mdbx_txn_break(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_break()", err); + + err = mdbx_txn_abort(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_abort()", err); + txn_guard.release(); + txn_begin(true); + err = mdbx_txn_reset(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_reset()", err); + + err = mdbx_txn_renew(txn_guard.get()); + if (err) + failure_perror("mdbx_txn_renew()", err); + err = mdbx_txn_info(txn_guard.get(), &info, false); + if (err) + failure_perror("mdbx_txn_info()", err); + } jitter_delay(); txn_end(flipcoin()); } From 1c174e84c4e8d6a1a9b851b2132e07e5590fefb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Mar 2024 01:56:04 +0300 Subject: [PATCH 127/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20`mdbx=5Fpreopen=5Fsnapinfo()`?= =?UTF-8?q?=20=D0=B2=20API.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit https://gitflic.ru/project/erthink/libmdbx/issue/15 --- mdbx.h | 12 ++++++++ src/core.c | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++ test/main.c++ | 8 +++++ 3 files changed, 103 insertions(+) diff --git a/mdbx.h b/mdbx.h index 1fda47e2..080b2509 100644 --- a/mdbx.h +++ b/mdbx.h @@ -5714,6 +5714,18 @@ LIBMDBX_API int mdbx_env_open_for_recoveryW(MDBX_env *env, * leg(s). */ LIBMDBX_API int mdbx_env_turn_for_recovery(MDBX_env *env, unsigned target_meta); +/** \brief FIXME + */ +LIBMDBX_API int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *arg, + size_t bytes); +#if defined(_WIN32) || defined(_WIN64) || defined(DOXYGEN) +/** \copydoc mdbx_preopen_snapinfo() + * \note Available only on Windows. + * \see mdbx_preopen_snapinfo() */ +LIBMDBX_API int mdbx_preopen_snapinfoW(const wchar_t *pathname, + MDBX_envinfo *arg, size_t bytes); +#endif /* Windows */ + /** \brief Флаги/опции для проверки целостности БД. * \see mdbx_env_chk() */ enum MDBX_chk_flags_t { diff --git a/src/core.c b/src/core.c index 666998b4..eb217b0a 100644 --- a/src/core.c +++ b/src/core.c @@ -23396,6 +23396,89 @@ __cold int env_info(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *out, } } +__cold int mdbx_preopen_snapinfo(const char *pathname, MDBX_envinfo *out, + size_t bytes) { +#if defined(_WIN32) || defined(_WIN64) + wchar_t *pathnameW = nullptr; + int rc = osal_mb2w(pathname, &pathnameW); + if (likely(rc == MDBX_SUCCESS)) { + rc = mdbx_preopen_snapinfoW(pathnameW, out, bytes); + osal_free(pathnameW); + } + return rc; +} + +__cold int mdbx_preopen_snapinfoW(const wchar_t *pathname, MDBX_envinfo *out, + size_t bytes) { +#endif /* Windows */ + if (unlikely(!out)) + return MDBX_EINVAL; + + const size_t size_before_bootid = offsetof(MDBX_envinfo, mi_bootid); + const size_t size_before_pgop_stat = offsetof(MDBX_envinfo, mi_pgop_stat); + if (unlikely(bytes != sizeof(MDBX_envinfo)) && bytes != size_before_bootid && + bytes != size_before_pgop_stat) + return MDBX_EINVAL; + + memset(out, 0, bytes); + if (likely(bytes > size_before_bootid)) { + out->mi_bootid.current.x = bootid.x; + out->mi_bootid.current.y = bootid.y; + } + + MDBX_env env; + memset(&env, 0, sizeof(env)); + env.me_pid = osal_getpid(); + const size_t os_psize = osal_syspagesize(); + if (unlikely(!is_powerof2(os_psize) || os_psize < MIN_PAGESIZE)) { + ERROR("unsuitable system pagesize %" PRIuPTR, os_psize); + return MDBX_INCOMPATIBLE; + } + out->mi_sys_pagesize = env.me_os_psize = (unsigned)os_psize; + env.me_flags = MDBX_RDONLY | MDBX_NORDAHEAD | MDBX_ACCEDE | MDBX_VALIDATION; + env.me_stuck_meta = -1; + env.me_lfd = INVALID_HANDLE_VALUE; + env.me_lazy_fd = INVALID_HANDLE_VALUE; + env.me_dsync_fd = INVALID_HANDLE_VALUE; + env.me_fd4meta = INVALID_HANDLE_VALUE; +#if defined(_WIN32) || defined(_WIN64) + env.me_data_lock_event = INVALID_HANDLE_VALUE; + env.me_overlapped_fd = INVALID_HANDLE_VALUE; +#endif /* Windows */ + + int rc = env_handle_pathname(&env, pathname, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + rc = osal_openfile(MDBX_OPEN_DXB_READ, &env, env.me_pathname.dxb, + &env.me_lazy_fd, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + MDBX_meta header; + rc = read_header(&env, &header, 0, 0); + if (unlikely(rc != MDBX_SUCCESS)) + goto bailout; + + setup_pagesize(&env, header.mm_psize); + out->mi_dxb_pagesize = env.me_psize; + out->mi_geo.lower = pgno2bytes(&env, header.mm_geo.lower); + out->mi_geo.upper = pgno2bytes(&env, header.mm_geo.upper); + out->mi_geo.shrink = pgno2bytes(&env, pv2pages(header.mm_geo.shrink_pv)); + out->mi_geo.grow = pgno2bytes(&env, pv2pages(header.mm_geo.grow_pv)); + out->mi_geo.current = pgno2bytes(&env, header.mm_geo.now); + out->mi_last_pgno = header.mm_geo.next - 1; + + const unsigned n = 0; + out->mi_recent_txnid = constmeta_txnid(&header); + out->mi_meta_sign[n] = unaligned_peek_u64(4, &header.mm_sign); + if (likely(bytes > size_before_bootid)) + memcpy(&out->mi_bootid.meta[n], &header.mm_bootid, 16); + +bailout: + env_close(&env, false); + return rc; +} + __cold int mdbx_env_info_ex(const MDBX_env *env, const MDBX_txn *txn, MDBX_envinfo *arg, size_t bytes) { if (unlikely((env == NULL && txn == NULL) || arg == NULL)) diff --git a/test/main.c++ b/test/main.c++ index 84ab801c..8732f5f7 100644 --- a/test/main.c++ +++ b/test/main.c++ @@ -743,6 +743,14 @@ int main(int argc, char *const argv[]) { log_trace("=== done..."); } + if (!failed) { + MDBX_envinfo info; + int err = + mdbx_preopen_snapinfo(params.pathname_db.c_str(), &info, sizeof(info)); + if (err != MDBX_SUCCESS) + failure_perror("mdbx_preopen_snapinfo()", err); + } + log_notice("RESULT: %s\n", failed ? "Failed" : "Successful"); if (global::config::cleanup_after) { if (failed) From 1549d3970c8b08873f57a318e2a8fa5c89138c3d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 5 Mar 2024 15:07:00 +0300 Subject: [PATCH 128/137] =?UTF-8?q?mdbx:=20=D0=BA=D0=BE=D1=80=D1=80=D0=B5?= =?UTF-8?q?=D0=BA=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=BA=D0=B0=20=D1=83=D1=81?= =?UTF-8?q?=D0=BB=D0=BE=D0=B2=D0=B8=D1=8F=20=D0=B2=20assert-=D0=BF=D1=80?= =?UTF-8?q?=D0=BE=D0=B2=D0=B5=D1=80=D0=BA=D0=B5=20=D0=B4=D0=BB=D1=8F=20MDB?= =?UTF-8?q?X=5FTXN=5FDRAINED=5FGC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index eb217b0a..0f8663ba 100644 --- a/src/core.c +++ b/src/core.c @@ -7772,7 +7772,7 @@ static pgr_t page_alloc_slowpath(const MDBX_cursor *const mc, const size_t num, //--------------------------------------------------------------------------- if (unlikely(!is_gc_usable(txn, mc, flags))) { - eASSERT(env, txn->mt_flags & MDBX_TXN_DRAINED_GC); + eASSERT(env, (txn->mt_flags & MDBX_TXN_DRAINED_GC) || num > 1); goto no_gc; } From 5df3eb6449867f97fd5c7492af38eba078dc51db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 6 Mar 2024 11:27:22 +0300 Subject: [PATCH 129/137] =?UTF-8?q?mdbx-test:=20=D1=83=D1=81=D0=B8=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=81=D1=86=D0=B5=D0=BD=D0=B0=D1=80?= =?UTF-8?q?=D0=B8=D0=B5=D0=B2=20=D1=82=D0=B5=D1=81=D1=82=D0=BE=D0=B2=D1=8B?= =?UTF-8?q?=D1=85=20=D1=86=D0=B5=D0=BB=D0=B5=D0=B9=20=D0=B2=20`GNUmakefile?= =?UTF-8?q?`=20=D0=B8=20`CMake`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- GNUmakefile | 8 ++++---- test/CMakeLists.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 0ddce68e..1694df6b 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -424,12 +424,12 @@ smoke-fault: build-test test: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 2`...' - $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --dont-check-ram-size --loops 2 --db-upto-mb 256 --extra --skip-make --taillog >$(TEST_LOG) || (cat $(TEST_LOG) && false) long-test: test-long test-long: build-test @echo ' RUNNING `test/long_stochastic.sh --loops 42`...' - $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --skip-make --taillog + $(QUIET)test/long_stochastic.sh --loops 42 --db-upto-mb 1024 --extra --skip-make --taillog test-singleprocess: build-test @echo ' RUNNING `test/long_stochastic.sh --single --loops 2`...' @@ -439,7 +439,7 @@ test-valgrind: test-memcheck test-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK test-memcheck: build-test @echo ' RUNNING `test/long_stochastic.sh --with-valgrind --loops 2`...' - $(QUIET)test/long_stochastic.sh --with-valgrind --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) + $(QUIET)test/long_stochastic.sh --with-valgrind --extra --loops 2 --db-upto-mb 256 --skip-make >$(TEST_LOG) || (cat $(TEST_LOG) && false) memcheck: smoke-memcheck smoke-memcheck: VALGRIND=valgrind --trace-children=yes --log-file=valgrind-%p.log --leak-check=full --track-origins=yes --read-var-info=yes --error-exitcode=42 --suppressions=test/valgrind_suppress.txt @@ -447,7 +447,7 @@ smoke-memcheck: CFLAGS_EXTRA=-Ofast -DENABLE_MEMCHECK smoke-memcheck: build-test @echo " SMOKE \`mdbx_test basic\` under Valgrind's memcheck..." $(QUIET)rm -f valgrind-*.log $(TEST_DB) $(TEST_LOG).gz && (set -o pipefail; ( \ - $(VALGRIND) ./mdbx_test --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ + $(VALGRIND) ./mdbx_test --table=+data.fixed --keygen.split=29 --datalen=35 --progress --console=no --repeat=2 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ $(VALGRIND) ./mdbx_test --progress --console=no --pathname=$(TEST_DB) --dont-cleanup-before --dont-cleanup-after --copy && \ $(VALGRIND) ./mdbx_test --mode=-writemap,-nosync-safe,-lifo --progress --console=no --repeat=4 --pathname=$(TEST_DB) --dont-cleanup-after $(MDBX_SMOKE_EXTRA) basic && \ $(VALGRIND) ./mdbx_chk -vvn $(TEST_DB) && \ diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 7098cfed..61531a57 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -145,7 +145,7 @@ else() add_test(NAME dupsort_writemap COMMAND ${MDBX_OUTPUT_DIR}/mdbx_test --loglevel=notice --prng-seed=${test_seed} - --table=+data.integer --keygen.split=29 --datalen.min=min --datalen.max=max --progress --console=no + --table=+data.fixed --keygen.split=29 --datalen=rnd --progress --console=no --repeat=2 --pathname=dupsort_writemap.db --dont-cleanup-after basic) set_tests_properties(dupsort_writemap PROPERTIES TIMEOUT 3600 From a6f7d74a32a3cbcc310916a624a31302dbebfa07 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 7 Mar 2024 19:25:19 +0300 Subject: [PATCH 130/137] =?UTF-8?q?mdbx:=20=D0=BC=D0=B8=D0=BA=D1=80=D0=BE?= =?UTF-8?q?=D0=BE=D0=BF=D1=82=D0=B8=D0=BC=D0=B8=D0=B7=D0=B0=D1=86=D0=B8?= =?UTF-8?q?=D1=8F=20`cursor=5Ftouch()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 0f8663ba..10718222 100644 --- a/src/core.c +++ b/src/core.c @@ -17940,7 +17940,8 @@ static __hot int cursor_touch(MDBX_cursor *const mc, const MDBX_val *key, } int rc = MDBX_SUCCESS; - if (likely(mc->mc_snum)) { + if (likely(mc->mc_snum) && + !IS_MODIFIABLE(mc->mc_txn, mc->mc_pg[mc->mc_snum - 1])) { mc->mc_top = 0; do { rc = page_touch(mc); From 471085788c427e5643ce1b7b8b41d2c413d78ad9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 10 Mar 2024 23:47:19 +0300 Subject: [PATCH 131/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D1=88=D0=B8=D0=B1?= =?UTF-8?q?=D0=BA=D0=B8=20=D0=BE=D1=82=D0=BA=D1=80=D1=8B=D1=82=D0=B8=D1=8F?= =?UTF-8?q?=20=D0=91=D0=94=20=D0=BD=D0=B0=20=D0=A4=D0=A1=20=D1=82=D0=BE?= =?UTF-8?q?=D0=BB=D1=8C=D0=BA=D0=BE-=D0=B4=D0=BB=D1=8F-=D1=87=D1=82=D0=B5?= =?UTF-8?q?=D0=BD=D0=B8=D1=8F.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 22 +++++++++++----------- src/osal.c | 4 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/src/core.c b/src/core.c index 10718222..d7edca17 100644 --- a/src/core.c +++ b/src/core.c @@ -6054,6 +6054,14 @@ __cold static void meta_troika_dump(const MDBX_env *env, /*----------------------------------------------------------------------------*/ +static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * +lckless_stub(const MDBX_env *env) { + uintptr_t stub = (uintptr_t)&env->x_lckless_stub; + /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ + stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); + return (MDBX_lockinfo *)stub; +} + /* Find oldest txnid still referenced. */ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { const uint32_t nothing_changed = MDBX_STRING_TETRAD("None"); @@ -6061,7 +6069,7 @@ static txnid_t find_oldest_reader(MDBX_env *const env, const txnid_t steady) { MDBX_lockinfo *const lck = env->me_lck_mmap.lck; if (unlikely(lck == NULL /* exclusive without-lck mode */)) { - eASSERT(env, env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, env->me_lck == lckless_stub(env)); env->me_lck->mti_readers_refresh_flag.weak = nothing_changed; return env->me_lck->mti_oldest_reader.weak = steady; } @@ -9296,8 +9304,7 @@ static int txn_renew(MDBX_txn *txn, const unsigned flags) { mo_AcquireRelease); } else { /* exclusive mode without lck */ - eASSERT(env, !env->me_lck_mmap.lck && - env->me_lck == (void *)&env->x_lckless_stub); + eASSERT(env, !env->me_lck_mmap.lck && env->me_lck == lckless_stub(env)); } jitter4testing(true); @@ -13548,14 +13555,6 @@ __cold static void setup_pagesize(MDBX_env *env, const size_t pagesize) { env->me_options.dp_initial = env->me_options.dp_limit; } -static __inline MDBX_CONST_FUNCTION MDBX_lockinfo * -lckless_stub(const MDBX_env *env) { - uintptr_t stub = (uintptr_t)&env->x_lckless_stub; - /* align to avoid false-positive alarm from UndefinedBehaviorSanitizer */ - stub = (stub + MDBX_CACHELINE_SIZE - 1) & ~(MDBX_CACHELINE_SIZE - 1); - return (MDBX_lockinfo *)stub; -} - __cold int mdbx_env_create(MDBX_env **penv) { if (unlikely(!penv)) return MDBX_EINVAL; @@ -15559,6 +15558,7 @@ __cold static int env_open(MDBX_env *env, mdbx_mode_t mode) { if (rc == MDBX_RESULT_TRUE) { env->me_incore = true; NOTICE("%s", "in-core database"); + rc = MDBX_SUCCESS; } else if (unlikely(rc != MDBX_SUCCESS)) { ERROR("check_fs_incore(), err %d", rc); return rc; diff --git a/src/osal.c b/src/osal.c index 3865025a..161bed91 100644 --- a/src/osal.c +++ b/src/osal.c @@ -1836,8 +1836,8 @@ MDBX_INTERNAL_FUNC int osal_check_fs_rdonly(mdbx_filehandle_t handle, #else struct statvfs info; if (err != MDBX_ENOFILE) { - if (statvfs(pathname, &info) == 0 && (info.f_flag & ST_RDONLY) == 0) - return err; + if (statvfs(pathname, &info) == 0) + return (info.f_flag & ST_RDONLY) ? MDBX_SUCCESS : err; if (errno != MDBX_ENOFILE) return errno; } From aae6a0395acc2bd85349043f9a7202c425e7c0b3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 14 Mar 2024 23:03:33 +0300 Subject: [PATCH 132/137] =?UTF-8?q?mdbx:=20=D0=B8=D1=81=D0=BF=D1=80=D0=B0?= =?UTF-8?q?=D0=B2=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20=D0=BE=D0=BF=D0=B5=D1=87?= =?UTF-8?q?=D0=B0=D1=82=D0=BA=D0=B8=20`=D1=80=D0=B0=D0=B2=D0=BD=D0=BE`/`?= =?UTF-8?q?=D0=BD=D0=B5=D1=80=D0=B0=D0=B2=D0=BD=D0=BE`=20=D0=B2=20=D1=83?= =?UTF-8?q?=D1=81=D0=BB=D0=BE=D0=B2=D0=B8=D0=B8=20=D0=B2=D0=BD=D1=83=D1=82?= =?UTF-8?q?=D1=80=D0=B8=20`update=5Fgc()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Существенных последствий ошибки не было (иначе бы давно было замечено). Но в определенных сценариях, сходимость требовала еще одного цикла повтора внутри update_gc(). --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index d7edca17..31586aad 100644 --- a/src/core.c +++ b/src/core.c @@ -10967,7 +10967,7 @@ retry: if (unlikely(!ctx->retired_stored)) { /* Make sure last page of GC is touched and on retired-list */ rc = cursor_last(&ctx->cursor, nullptr, nullptr); - if (likely(rc != MDBX_SUCCESS)) + if (likely(rc == MDBX_SUCCESS)) rc = gcu_touch(ctx); if (unlikely(rc != MDBX_SUCCESS) && rc != MDBX_NOTFOUND) goto bailout; From 93f76f43ac5e3b9af11305fe70f23123d07fb8df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Tue, 12 Mar 2024 02:25:13 +0300 Subject: [PATCH 133/137] =?UTF-8?q?mdbx-chk:=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D0=BC=20=D0=BE=D1=88=D0=B8=D0=B1=D0=BE?= =?UTF-8?q?=D1=87=D0=BD=D1=8B=D0=BC=D0=B8/=D0=BF=D1=80=D0=BE=D0=B1=D0=BB?= =?UTF-8?q?=D0=B5=D0=BC=D0=BD=D1=8B=D0=BC=D0=B8=20=D0=B7=D0=B0=D0=BF=D0=B8?= =?UTF-8?q?=D1=81=D0=B8=20=D0=BD=D1=83=D0=BB=D0=B5=D0=B2=D0=BE=D0=B9=20?= =?UTF-8?q?=D0=B4=D0=BB=D0=B8=D0=BD=D1=8B=20=D0=B2=20GC.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Технически такие записи не являются проблемными, а образовываются в случае когда внутри update_gc() резервируется больше места, чем реально остается номеров свободных страниц для возврата в GC. Изначально такое избыточное резервирование считалось алгоритмическим недостатком update_gc(). Поэтому утилита mdbx_chk была временно доработана для выявления таких случаев в ходе стохастических тестов. Постепенно все реальные недочеты update_gc() (если не считать запутанности и неочевидности кода) были устранены, формирование пустых записей в GC не наблюдалось и излишне строгий контроль в mdbx_chk не создавал проблем. В ходе же последних точечных доработок была предпринята попытка еще немного уменьшить затраты ЦПУ внутри update_gc(), в частности уменьшить кол-во циклов/повторов посредством улучшения сходимости, а также уменьшить WAF. При этом образование пустых записей в GC стало возможным в достаточно редких ситуациях, когда (например) для возврата в GC остается только одна страница и добавление записи единичной длины приводит к перебалансировке или разделению листовой страницы по легковесному пути, без вовлечения других страниц дерева и без переработки дополнительных записей GC, но с поглощением остававшейся на возврат страницы. Проще говоря, в актуальная версии MDBX пустые записи в GC могут образовываться, когда это энергетически выгодно. Тогда как в предыдущих выпусках в таких ситуациях выполнялось более дорогое обновление GC с переработкой и возвратом дополнительных записей. --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 31586aad..e0b66d30 100644 --- a/src/core.c +++ b/src/core.c @@ -28387,7 +28387,7 @@ __cold static int chk_handle_gc(MDBX_chk_scope_t *const scope, chk_object_issue(scope, "entry", txnid, "wrong idl size", "%" PRIuPTR, data->iov_len); size_t number = (data->iov_len >= sizeof(pgno_t)) ? *iptr++ : 0; - if (number < 1 || number > MDBX_PGL_LIMIT) + if (number > MDBX_PGL_LIMIT) chk_object_issue(scope, "entry", txnid, "wrong idl length", "%" PRIuPTR, number); else if ((number + 1) * sizeof(pgno_t) > data->iov_len) { From baaa26bb322799496d4fa32f9e59f750ea64c21c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Sun, 17 Mar 2024 00:25:08 +0300 Subject: [PATCH 134/137] =?UTF-8?q?mdbx:=20=D0=B4=D0=BE=D1=80=D0=B0=D0=B1?= =?UTF-8?q?=D0=BE=D1=82=D0=BA=D0=B0=20`update=5Fgc()`=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20=D1=83=D0=BB=D1=83=D1=87=D1=88=D0=B5=D0=BD=D0=B8=D1=8F=20?= =?UTF-8?q?=D1=81=D1=85=D0=BE=D0=B4=D0=B8=D0=BC=D0=BE=D1=81=D1=82=D0=B8.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 143 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 87 insertions(+), 56 deletions(-) diff --git a/src/core.c b/src/core.c index e0b66d30..81c768c1 100644 --- a/src/core.c +++ b/src/core.c @@ -10551,8 +10551,9 @@ __cold static int audit_ex(MDBX_txn *txn, size_t retired_stored, } typedef struct gc_update_context { - size_t retired_stored, loop; - size_t settled, cleaned_slot, reused_slot, filled_slot; + size_t loop, reserve_adj; + size_t retired_stored; + size_t reserved, cleaned_slot, reused_slot, fill_idx; txnid_t cleaned_id, rid; bool lifo, dense; #if MDBX_ENABLE_BIGFOOT @@ -10597,7 +10598,8 @@ static int gcu_clean_stored_retired(MDBX_txn *txn, gcu_context_t *ctx) { err = cursor_del(gc, 0); TRACE("== clear-4linear, backlog %zu, err %d", gcu_backlog_size(txn), err); - } + } else + err = (err == MDBX_NOTFOUND) ? MDBX_SUCCESS : err; } #if MDBX_ENABLE_BIGFOOT while (!err && --ctx->bigfoot >= txn->mt_txnid); @@ -10736,7 +10738,8 @@ static int update_gc(MDBX_txn *txn, gcu_context_t *ctx) { /* txn->tw.relist[] can grow and shrink during this call. * txn->tw.last_reclaimed and txn->tw.retired_pages[] can only grow. * But page numbers cannot disappear from txn->tw.retired_pages[]. */ - +retry_clean_adj: + ctx->reserve_adj = 0; retry: if (ctx->loop++) TRACE("%s", " >> restart"); @@ -10756,10 +10759,10 @@ retry: goto bailout; } - ctx->settled = 0; + ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->filled_slot = ~0u; + ctx->fill_idx = ~0u; ctx->cleaned_id = 0; ctx->rid = txn->tw.last_reclaimed; while (true) { @@ -10781,10 +10784,10 @@ retry: if (ctx->cleaned_slot < (txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0)) { - ctx->settled = 0; + ctx->reserved = 0; ctx->cleaned_slot = 0; ctx->reused_slot = 0; - ctx->filled_slot = ~0u; + ctx->fill_idx = ~0u; /* LY: cleanup reclaimed records. */ do { ctx->cleaned_id = txn->tw.lifo_reclaimed[++ctx->cleaned_slot]; @@ -10827,7 +10830,7 @@ retry: goto bailout; } ctx->rid = ctx->cleaned_id; - ctx->settled = 0; + ctx->reserved = 0; ctx->reused_slot = 0; ctx->cleaned_id = unaligned_peek_u64(4, key.iov_base); if (ctx->cleaned_id > txn->tw.last_reclaimed) @@ -11098,10 +11101,10 @@ retry: DEBUG_EXTRA_PRINT("%s\n", "."); } if (unlikely(amount != MDBX_PNL_GETSIZE(txn->tw.relist) && - ctx->settled)) { + ctx->reserved)) { TRACE("%s: reclaimed-list changed %zu -> %zu, retry", dbg_prefix_mode, amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry /* rare case, but avoids GC fragmentation + goto retry_clean_adj /* rare case, but avoids GC fragmentation and one cycle. */ ; } @@ -11119,10 +11122,11 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - const size_t left = amount - ctx->settled; - TRACE("%s: amount %zu, settled %zd, left %zd, lifo-reclaimed-slots %zu, " + const size_t left = amount - ctx->reserved - ctx->reserve_adj; + TRACE("%s: amount %zu, settled %zd, reserve_adj %zu, left %zd, " + "lifo-reclaimed-slots %zu, " "reused-gc-slots %zu", - dbg_prefix_mode, amount, ctx->settled, left, + dbg_prefix_mode, amount, ctx->reserved, ctx->reserve_adj, left, txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : 0, ctx->reused_slot); if (0 >= (intptr_t)left) @@ -11280,7 +11284,7 @@ retry: if (unlikely(ctx->rid == 0)) { ERROR("%s", "** no GC tail-space to store (going dense-mode)"); ctx->dense = true; - goto retry; + goto retry_clean_adj; } } else if (rc != MDBX_NOTFOUND) goto bailout; @@ -11369,7 +11373,7 @@ retry: key.iov_base = &reservation_gc_id; data.iov_len = (chunk + 1) * sizeof(pgno_t); TRACE("%s: reserve %zu [%zu...%zu) @%" PRIaTXN, dbg_prefix_mode, chunk, - ctx->settled + 1, ctx->settled + chunk + 1, reservation_gc_id); + ctx->reserved + 1, ctx->reserved + chunk + 1, reservation_gc_id); gcu_prepare_backlog(txn, ctx); rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, MDBX_RESERVE | MDBX_NOOVERWRITE); @@ -11379,17 +11383,17 @@ retry: goto bailout; gcu_clean_reserved(env, data); - ctx->settled += chunk; - TRACE("%s: settled %zu (+%zu), continue", dbg_prefix_mode, ctx->settled, + ctx->reserved += chunk; + TRACE("%s: settled %zu (+%zu), continue", dbg_prefix_mode, ctx->reserved, chunk); if (txn->tw.lifo_reclaimed && unlikely(amount < MDBX_PNL_GETSIZE(txn->tw.relist)) && - (ctx->loop < 5 || - MDBX_PNL_GETSIZE(txn->tw.relist) - amount > env->me_maxgc_ov1page)) { + (ctx->loop < 5 || MDBX_PNL_GETSIZE(txn->tw.relist) - amount > + env->me_maxgc_ov1page / 2)) { NOTICE("** restart: reclaimed-list growth %zu -> %zu", amount, MDBX_PNL_GETSIZE(txn->tw.relist)); - goto retry; + goto retry_clean_adj; } continue; @@ -11402,7 +11406,8 @@ retry: TRACE("%s", " >> filling"); /* Fill in the reserved records */ - ctx->filled_slot = + size_t excess_slots = 0; + ctx->fill_idx = txn->tw.lifo_reclaimed ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - ctx->reused_slot : ctx->reused_slot; @@ -11410,18 +11415,21 @@ retry: tASSERT(txn, pnl_check_allocated(txn->tw.relist, txn->mt_next_pgno - MDBX_ENABLE_REFUND)); tASSERT(txn, dirtylist_check(txn)); - if (MDBX_PNL_GETSIZE(txn->tw.relist)) { + if (ctx->reserved || MDBX_PNL_GETSIZE(txn->tw.relist)) { MDBX_val key, data; key.iov_len = data.iov_len = 0; /* avoid MSVC warning */ key.iov_base = data.iov_base = NULL; const size_t amount = MDBX_PNL_GETSIZE(txn->tw.relist); - size_t left = amount; + size_t left = amount, excess = 0; if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); rc = cursor_first(&ctx->cursor, &key, &data); - if (unlikely(rc != MDBX_SUCCESS)) + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc == MDBX_NOTFOUND && ctx->reserve_adj) + goto retry_clean_adj; goto bailout; + } } else { tASSERT(txn, ctx->lifo != 0); } @@ -11433,24 +11441,33 @@ retry: if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); fill_gc_id = unaligned_peek_u64(4, key.iov_base); - if (ctx->filled_slot-- == 0 || fill_gc_id > txn->tw.last_reclaimed) { - NOTICE( - "** restart: reserve depleted (filled_slot %zu, fill_id %" PRIaTXN - " > last_reclaimed %" PRIaTXN, - ctx->filled_slot, fill_gc_id, txn->tw.last_reclaimed); + if (ctx->fill_idx == 0 || fill_gc_id > txn->tw.last_reclaimed) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu, fill_id %" PRIaTXN + " > last_reclaimed %" PRIaTXN ", left %zu", + ctx->fill_idx, fill_gc_id, txn->tw.last_reclaimed, left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } + ctx->fill_idx -= 1; } else { tASSERT(txn, ctx->lifo != 0); - if (++ctx->filled_slot > MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { - NOTICE("** restart: reserve depleted (filled_gc_slot %zu > " - "lifo_reclaimed %zu" PRIaTXN, - ctx->filled_slot, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)); + if (ctx->fill_idx >= MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed)) { + if (!left) + break; + NOTICE("** restart: reserve depleted (fill_idx %zu >= " + "lifo_reclaimed %zu, left %zu", + ctx->fill_idx, MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed), left); + ctx->reserve_adj = + (ctx->reserve_adj > left) ? ctx->reserve_adj - left : 0; goto retry; } - fill_gc_id = txn->tw.lifo_reclaimed[ctx->filled_slot]; + ctx->fill_idx += 1; + fill_gc_id = txn->tw.lifo_reclaimed[ctx->fill_idx]; TRACE("%s: seek-reservation @%" PRIaTXN " at lifo_reclaimed[%zu]", - dbg_prefix_mode, fill_gc_id, ctx->filled_slot); + dbg_prefix_mode, fill_gc_id, ctx->fill_idx); key.iov_base = &fill_gc_id; key.iov_len = sizeof(fill_gc_id); rc = cursor_set(&ctx->cursor, &key, &data, MDBX_SET_KEY).err; @@ -11469,12 +11486,17 @@ retry: tASSERT(txn, data.iov_len >= sizeof(pgno_t) * 2); size_t chunk = data.iov_len / sizeof(pgno_t) - 1; if (unlikely(chunk > left)) { + const size_t delta = chunk - left; + excess += delta; + if (!left) { + excess_slots += 1; + goto next; + } TRACE("%s: chunk %zu > left %zu, @%" PRIaTXN, dbg_prefix_mode, chunk, left, fill_gc_id); - if ((ctx->loop < 5 && chunk - left > ctx->loop / 2) || - chunk - left > env->me_maxgc_ov1page) { + if ((ctx->loop < 5 && delta > (ctx->loop / 2)) || + delta > env->me_maxgc_ov1page) data.iov_len = (left + 1) * sizeof(pgno_t); - } chunk = left; } rc = cursor_put_nochecklen(&ctx->cursor, &key, &data, @@ -11487,14 +11509,14 @@ retry: amount != MDBX_PNL_GETSIZE(txn->tw.relist))) { NOTICE("** restart: reclaimed-list growth (%zu -> %zu, loose +%zu)", amount, MDBX_PNL_GETSIZE(txn->tw.relist), txn->tw.loose_count); - goto retry; + goto retry_clean_adj; } if (unlikely(txn->tw.lifo_reclaimed ? ctx->cleaned_slot < MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) : ctx->cleaned_id < txn->tw.last_reclaimed)) { NOTICE("%s", "** restart: reclaimed-slots changed"); - goto retry; + goto retry_clean_adj; } if (unlikely(ctx->retired_stored != MDBX_PNL_GETSIZE(txn->tw.retired_pages))) { @@ -11502,7 +11524,7 @@ retry: ctx->retired_stored < MDBX_PNL_GETSIZE(txn->tw.retired_pages)); NOTICE("** restart: retired-list growth (%zu -> %zu)", ctx->retired_stored, MDBX_PNL_GETSIZE(txn->tw.retired_pages)); - goto retry; + goto retry_clean_adj; } pgno_t *dst = data.iov_base; @@ -11520,35 +11542,44 @@ retry: if (unlikely(rc != MDBX_SUCCESS)) goto bailout; } - if (left == 0) { - rc = MDBX_SUCCESS; - break; - } + next: if (txn->tw.lifo_reclaimed == nullptr) { tASSERT(txn, ctx->lifo == 0); rc = cursor_next(&ctx->cursor, &key, &data, MDBX_NEXT); - if (unlikely(rc != MDBX_SUCCESS)) - goto bailout; + if (unlikely(rc != MDBX_SUCCESS)) { + if (rc != MDBX_NOTFOUND) + goto bailout; + rc = MDBX_SUCCESS; + break; + } } else { tASSERT(txn, ctx->lifo != 0); } } + + if (excess) { + size_t n = excess, adj = excess; + while (n >= env->me_maxgc_ov1page) + adj -= n /= env->me_maxgc_ov1page; + ctx->reserve_adj += adj; + TRACE("%s: extra %zu reserved space, adj +%zu (%zu)", dbg_prefix_mode, + excess, adj, ctx->reserve_adj); + } } tASSERT(txn, rc == MDBX_SUCCESS); if (unlikely(txn->tw.loose_count != 0)) { NOTICE("** restart: got %zu loose pages", txn->tw.loose_count); - goto retry; + goto retry_clean_adj; } - if (unlikely(ctx->filled_slot != - (txn->tw.lifo_reclaimed - ? MDBX_PNL_GETSIZE(txn->tw.lifo_reclaimed) - : 0))) { - const bool will_retry = ctx->loop < 9; - NOTICE("** %s: reserve excess (filled-slot %zu, loop %zu)", - will_retry ? "restart" : "ignore", ctx->filled_slot, ctx->loop); + if (unlikely(excess_slots)) { + const bool will_retry = ctx->loop < 5 || excess_slots > 1; + NOTICE("** %s: reserve excess (excess-slots %zu, filled-slot %zu, adj %zu, " + "loop %zu)", + will_retry ? "restart" : "ignore", excess_slots, ctx->fill_idx, + ctx->reserve_adj, ctx->loop); if (will_retry) goto retry; } From 94a6bc140dc5199c2578faa160fdea95710cfd99 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Mon, 18 Mar 2024 21:57:38 +0300 Subject: [PATCH 135/137] =?UTF-8?q?mdbx-doc:=20=D0=B4=D0=BE=D0=BA=D1=83?= =?UTF-8?q?=D0=BC=D0=B5=D0=BD=D1=82=D0=B8=D1=80=D0=BE=D0=B2=D0=B0=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20`mdbx=5Fenv=5Fresurrect=5Fafter=5Ffork()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/_restrictions.md | 6 ++++ mdbx.h | 76 +++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/docs/_restrictions.md b/docs/_restrictions.md index 170924f7..d967cca7 100644 --- a/docs/_restrictions.md +++ b/docs/_restrictions.md @@ -106,6 +106,7 @@ reservation can deplete system resources (trigger ENOMEM error, etc) when setting an inadequately large upper DB size using \ref mdbx_env_set_geometry() or \ref mdbx::env::geometry. So just avoid this. + ## Remote filesystems Do not use MDBX databases on remote filesystems, even between processes on the same host. This breaks file locks on some platforms, possibly @@ -132,6 +133,11 @@ corruption in such cases. On the other hand, MDBX allow calling \ref mdbx_env_close() in such cases to release resources, but no more and in general this is a wrong way. +#### Since v0.13.1 and later +Начиная с версии 0.13.1 в API доступна функция \ref mdbx_env_resurrect_after_fork(), +которая позволяет пере-использовать в дочерних процессах уже открытую среду БД, +но строго без наследования транзакций от родительского процесса. + ## Read-only mode There is no pure read-only mode in a normal explicitly way, since diff --git a/mdbx.h b/mdbx.h index e91e8170..a7e78072 100644 --- a/mdbx.h +++ b/mdbx.h @@ -2944,8 +2944,80 @@ LIBMDBX_INLINE_API(int, mdbx_env_close, (MDBX_env * env)) { return mdbx_env_close_ex(env, false); } -#if !(defined(_WIN32) || defined(_WIN64)) -/** FIXME */ +#if defined(DOXYGEN) || !(defined(_WIN32) || defined(_WIN64)) +/** \brief Восстанавливает экземпляр среды в дочернем процессе после ветвления + * родительского процесса посредством `fork()` и родственных системных вызовов. + * \ingroup c_extra + * + * Без вызова \ref mdbx_env_resurrect_after_fork() использование открытого + * экземпляра среды в дочернем процессе не возможно, включая все выполняющиеся + * на момент ветвления транзакции. + * + * Выполняемые функцией действия можно рассматривать как повторное открытие БД + * в дочернем процессе, с сохранением заданных опций и адресов уже созданных + * экземпляров объектов связанных с API. + * + * \note Функция не доступна в ОС семейства Windows по причине отсутствия + * функционала ветвления процесса в API операционной системы. + * + * Ветвление не оказывает влияния на состояние MDBX-среды в родительском + * процессе. Все транзакции, которые были в родительском процессе на момент + * ветвления, после ветвления в родительском процессе продолжат выполняться без + * помех. Но в дочернем процессе все соответствующие транзакции безальтернативно + * перестают быть валидными, а попытка их использования приведет к возврату + * ошибки или отправке `SIGSEGV`. + * + * Использование экземпляра среды в дочернем процессе не возможно до вызова + * \ref mdbx_env_resurrect_after_fork(), так как в результате ветвления у + * процесса меняется PID, значение которого используется для организации + * совместно работы с БД, в том числе, для отслеживания процессов/потоков + * выполняющих читающие транзакции связанные с соответствующими снимками данных. + * Все активные на момент ветвления транзакции не могут продолжаться в дочернем + * процессе, так как не владеют какими-либо блокировками или каким-либо снимком + * данных и не удерживает его от переработки при сборке мусора. + * + * Функция \ref mdbx_env_resurrect_after_fork() восстанавливает переданный + * экземпляр среды в дочернем процессе после ветвления, а именно: обновляет + * используемые системные идентификаторы, повторно открывает дескрипторы файлов, + * производит захват необходимых блокировок связанных с LCK- и DXB-файлами БД, + * восстанавливает отображения в память страницы БД, таблицы читателей и + * служебных/вспомогательных данных в память. Однако унаследованные от + * родительского процесса транзакции не восстанавливаются, прием пишущие и + * читающие транзакции обрабатываются по-разному: + * + * - Пишущая транзакция, если таковая была на момент ветвления, + * прерывается в дочернем процессе с освобождение связанных с ней ресурсов, + * включая все вложенные транзакции. + * + * - Читающие же транзакции, если таковые были в родительском процессе, + * в дочернем процессе логически прерываются, но без освобождения ресурсов. + * Поэтому необходимо обеспечить вызов \ref mdbx_txn_abort() для каждой + * такой читающей транзакций в дочернем процессе, либо смириться с утечкой + * ресурсов до завершения дочернего процесса. + * + * Причина не-освобождения ресурсов читающих транзакций в том, что исторически + * MDBX не ведет какой-либо общий список экземпляров читающих, так как это не + * требуется для штатных режимов работы, но требует использования атомарных + * операций или дополнительных объектов синхронизации при создании/разрушении + * экземпляров \ref MDBX_txn. + * + * Вызов \ref mdbx_env_resurrect_after_fork() без ветвления, не в дочернем + * процессе, либо повторные вызовы не приводят к каким-либо действиям или + * изменениям. + * + * \returns Ненулевое значение ошибки при сбое и 0 при успешном выполнении, + * некоторые возможные ошибки таковы: + * + * \retval MDBX_BUSY В родительском процессе БД была открыта + * в режиме \ref MDBX_EXCLUSIVE. + * + * \retval MDBX_EBADSIGN При повреждении сигнатуры экземпляра объекта, а также + * в случае одновременного вызова \ref + * mdbx_env_resurrect_after_fork() из разных потоков. + * + * \retval MDBX_PANIC Произошла критическая ошибка при восстановлении + * экземпляра среды, либо такая ошибка уже была + * до вызова функции. */ LIBMDBX_API int mdbx_env_resurrect_after_fork(MDBX_env *env); #endif /* Windows */ From 236afee80b39a35a46a6c60297aab3c47378d4a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Wed, 20 Mar 2024 03:09:33 +0300 Subject: [PATCH 136/137] =?UTF-8?q?mdbx:=20=D0=B1=D1=8B=D1=81=D1=82=D1=80?= =?UTF-8?q?=D0=B0=D1=8F=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE=D1=82=D0=BA?= =?UTF-8?q?=D0=B0=20=D1=80=D0=B5=D0=B6=D0=B8=D0=BC=D0=B0=20`MDBX=5FEXCLUSI?= =?UTF-8?q?VE`=20=D0=B4=D0=BB=D1=8F=20`mdbx=5Fenv=5Fresurrect=5Fafter=5Ffo?= =?UTF-8?q?rk()`.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core.c b/src/core.c index 24a6d892..8f061af2 100644 --- a/src/core.c +++ b/src/core.c @@ -15926,7 +15926,7 @@ __cold int mdbx_env_resurrect_after_fork(MDBX_env *env) { int rc = env_close(env, true); env->me_signature.weak = MDBX_ME_SIGNATURE; if (likely(rc == MDBX_SUCCESS)) { - rc = env_open(env, 0); + rc = (env->me_flags & MDBX_EXCLUSIVE) ? MDBX_BUSY : env_open(env, 0); if (unlikely(rc != MDBX_SUCCESS && env_close(env, false) != MDBX_SUCCESS)) { rc = MDBX_PANIC; env->me_flags |= MDBX_FATAL_ERROR; From 20d6d39ab35ad09932a33ded9c9f3aad089051bc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9B=D0=B5=D0=BE=D0=BD=D0=B8=D0=B4=20=D0=AE=D1=80=D1=8C?= =?UTF-8?q?=D0=B5=D0=B2=20=28Leonid=20Yuriev=29?= Date: Thu, 21 Mar 2024 11:44:25 +0300 Subject: [PATCH 137/137] =?UTF-8?q?mdbx:=20=D0=BE=D0=B1=D0=BD=D0=BE=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B8=D0=B5=20ChangeLog.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ChangeLog.md | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/ChangeLog.md b/ChangeLog.md index 32143bf4..b637c62b 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -4,8 +4,42 @@ ChangeLog English version [by Google](https://gitflic-ru.translate.goog/project/erthink/libmdbx/blob?file=ChangeLog.md&_x_tr_sl=ru&_x_tr_tl=en) and [by Yandex](https://translated.turbopages.org/proxy_u/ru-en.en/https/gitflic.ru/project/erthink/libmdbx/blob?file=ChangeLog.md). +## v0.13.1 (в процессе подготовки релиза) -## v0.13.0 at 2023-04-23 +Новая версия с существенным расширением API и добавлением функционала. + +Новое: + + - Управление основной блокировкой lock/unlock/upgrade/downgrade для координации пишущих транзакций. + - `mdbx_env_chk() `для проверка целостности структуры БД, с переработкой и переносом функционала утилиты `mdbx_chk` внутрь библиотеки. + - `mdbx_dbi_rename()` и `mdbx_dbi_rename()` для переименования таблиц. + - `mdbx_cursor_unbind()` и `mdbx_txn_release_all_cursors()` для управления курсорами. + - `mdbx_env_resurrect_after_fork()` для восстановление открытой среды работы с БД в дочернем процессе после ветвления/расщепления процесса. + - `mdbx_cursor_compare()` для сравнения позиций курсоров. + - `mdbx_cursor_scan()` и `mdbx_cursor_scan_from()` для сканирования таблиц с использованием функционального предиката. + - `mdbx_cursor_on_first_dup()` и `mdbx_cursor_on_last_dup()` для проверки позиции курсора. + - `mdbx_preopen_snapinfo()` для получения информации о БД без её открытия. + + - Расширение и доработка C++ API: + + - добавлен тип `mdbx::cursor::estimation_result`, а поведение методов + `cursor::estimate()` унифицировано с `cursor::move()`; + - для предотвращения незаметного неверного использования API, для инициализации + возвращаемых по ссылке срезов, вместо пустых срезов задействован `slice::invalid()`; + - добавлены дополнительные C++ операторы преобразования к типам C API; + - для совместимости со старыми стандартами C++ и старыми версиями STL перенесены + в public классы `buffer::move_assign_alloc` и `buffer::copy_assign_alloc`; + - добавлен тип `mdbx::default_buffer`; + - для срезов и буферов добавлены методы `hex_decode()`, `base64_decode()`, `base58_decode()`; + - добавлен тип `mdbx::comparator` и функций `mdbx::default_comparator()`; + - добавлены статические методы `buffer::hex()`, `base64()`, `base58()`; + - для транзакций и курсоров добавлены методы `get_/set_context`; + - добавлен метод `cursor::clone()`; + - поддержка base58 переработана и приведена в соответствии с черновиком RFC, в текущем понимании теперь это одна из самых высокопроизводительных реализаций; + - переработка `to_hex()` и `from_hex()`. + + +## v0.13.0 от 2023-04-23 Не выпуск, а начало ветки `0.13` с новым функционалом и изменением API.